Bug Summary

File:llvm/include/llvm/CodeGen/TargetLowering.h
Warning:line 1240, column 31
Called C++ object pointer is null

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name AArch64TargetTransformInfo.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mthread-model posix -mframe-pointer=none -fmath-errno -fno-rounding-math -masm-verbose -mconstructor-aliases -munwind-tables -fuse-init-array -target-cpu x86-64 -dwarf-column-info -debugger-tuning=gdb -ffunction-sections -fdata-sections -resource-dir /usr/lib/llvm-10/lib/clang/10.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I /build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/build-llvm/lib/Target/AArch64 -I /build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/AArch64 -I /build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/build-llvm/include -I /build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0/backward -internal-isystem /usr/local/include -internal-isystem /usr/lib/llvm-10/lib/clang/10.0.0/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir /build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/build-llvm/lib/Target/AArch64 -fdebug-prefix-map=/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809=. -ferror-limit 19 -fmessage-length 0 -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -fobjc-runtime=gcc -fdiagnostics-show-option -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -o /tmp/scan-build-2019-12-07-102640-14763-1 -x c++ /build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

1//===-- AArch64TargetTransformInfo.cpp - AArch64 specific TTI -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "AArch64ExpandImm.h"
10#include "AArch64TargetTransformInfo.h"
11#include "MCTargetDesc/AArch64AddressingModes.h"
12#include "llvm/Analysis/LoopInfo.h"
13#include "llvm/Analysis/TargetTransformInfo.h"
14#include "llvm/CodeGen/BasicTTIImpl.h"
15#include "llvm/CodeGen/CostTable.h"
16#include "llvm/CodeGen/TargetLowering.h"
17#include "llvm/IR/IntrinsicInst.h"
18#include "llvm/Support/Debug.h"
19#include <algorithm>
20using namespace llvm;
21
22#define DEBUG_TYPE"aarch64tti" "aarch64tti"
23
24static cl::opt<bool> EnableFalkorHWPFUnrollFix("enable-falkor-hwpf-unroll-fix",
25 cl::init(true), cl::Hidden);
26
27bool AArch64TTIImpl::areInlineCompatible(const Function *Caller,
28 const Function *Callee) const {
29 const TargetMachine &TM = getTLI()->getTargetMachine();
30
31 const FeatureBitset &CallerBits =
32 TM.getSubtargetImpl(*Caller)->getFeatureBits();
33 const FeatureBitset &CalleeBits =
34 TM.getSubtargetImpl(*Callee)->getFeatureBits();
35
36 // Inline a callee if its target-features are a subset of the callers
37 // target-features.
38 return (CallerBits & CalleeBits) == CalleeBits;
39}
40
41/// Calculate the cost of materializing a 64-bit value. This helper
42/// method might only calculate a fraction of a larger immediate. Therefore it
43/// is valid to return a cost of ZERO.
44int AArch64TTIImpl::getIntImmCost(int64_t Val) {
45 // Check if the immediate can be encoded within an instruction.
46 if (Val == 0 || AArch64_AM::isLogicalImmediate(Val, 64))
47 return 0;
48
49 if (Val < 0)
50 Val = ~Val;
51
52 // Calculate how many moves we will need to materialize this constant.
53 SmallVector<AArch64_IMM::ImmInsnModel, 4> Insn;
54 AArch64_IMM::expandMOVImm(Val, 64, Insn);
55 return Insn.size();
56}
57
58/// Calculate the cost of materializing the given constant.
59int AArch64TTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {
60 assert(Ty->isIntegerTy())((Ty->isIntegerTy()) ? static_cast<void> (0) : __assert_fail
("Ty->isIntegerTy()", "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp"
, 60, __PRETTY_FUNCTION__))
;
61
62 unsigned BitSize = Ty->getPrimitiveSizeInBits();
63 if (BitSize == 0)
64 return ~0U;
65
66 // Sign-extend all constants to a multiple of 64-bit.
67 APInt ImmVal = Imm;
68 if (BitSize & 0x3f)
69 ImmVal = Imm.sext((BitSize + 63) & ~0x3fU);
70
71 // Split the constant into 64-bit chunks and calculate the cost for each
72 // chunk.
73 int Cost = 0;
74 for (unsigned ShiftVal = 0; ShiftVal < BitSize; ShiftVal += 64) {
75 APInt Tmp = ImmVal.ashr(ShiftVal).sextOrTrunc(64);
76 int64_t Val = Tmp.getSExtValue();
77 Cost += getIntImmCost(Val);
78 }
79 // We need at least one instruction to materialze the constant.
80 return std::max(1, Cost);
81}
82
83int AArch64TTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx,
84 const APInt &Imm, Type *Ty) {
85 assert(Ty->isIntegerTy())((Ty->isIntegerTy()) ? static_cast<void> (0) : __assert_fail
("Ty->isIntegerTy()", "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp"
, 85, __PRETTY_FUNCTION__))
;
86
87 unsigned BitSize = Ty->getPrimitiveSizeInBits();
88 // There is no cost model for constants with a bit size of 0. Return TCC_Free
89 // here, so that constant hoisting will ignore this constant.
90 if (BitSize == 0)
91 return TTI::TCC_Free;
92
93 unsigned ImmIdx = ~0U;
94 switch (Opcode) {
95 default:
96 return TTI::TCC_Free;
97 case Instruction::GetElementPtr:
98 // Always hoist the base address of a GetElementPtr.
99 if (Idx == 0)
100 return 2 * TTI::TCC_Basic;
101 return TTI::TCC_Free;
102 case Instruction::Store:
103 ImmIdx = 0;
104 break;
105 case Instruction::Add:
106 case Instruction::Sub:
107 case Instruction::Mul:
108 case Instruction::UDiv:
109 case Instruction::SDiv:
110 case Instruction::URem:
111 case Instruction::SRem:
112 case Instruction::And:
113 case Instruction::Or:
114 case Instruction::Xor:
115 case Instruction::ICmp:
116 ImmIdx = 1;
117 break;
118 // Always return TCC_Free for the shift value of a shift instruction.
119 case Instruction::Shl:
120 case Instruction::LShr:
121 case Instruction::AShr:
122 if (Idx == 1)
123 return TTI::TCC_Free;
124 break;
125 case Instruction::Trunc:
126 case Instruction::ZExt:
127 case Instruction::SExt:
128 case Instruction::IntToPtr:
129 case Instruction::PtrToInt:
130 case Instruction::BitCast:
131 case Instruction::PHI:
132 case Instruction::Call:
133 case Instruction::Select:
134 case Instruction::Ret:
135 case Instruction::Load:
136 break;
137 }
138
139 if (Idx == ImmIdx) {
140 int NumConstants = (BitSize + 63) / 64;
141 int Cost = AArch64TTIImpl::getIntImmCost(Imm, Ty);
142 return (Cost <= NumConstants * TTI::TCC_Basic)
143 ? static_cast<int>(TTI::TCC_Free)
144 : Cost;
145 }
146 return AArch64TTIImpl::getIntImmCost(Imm, Ty);
147}
148
149int AArch64TTIImpl::getIntImmCost(Intrinsic::ID IID, unsigned Idx,
150 const APInt &Imm, Type *Ty) {
151 assert(Ty->isIntegerTy())((Ty->isIntegerTy()) ? static_cast<void> (0) : __assert_fail
("Ty->isIntegerTy()", "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp"
, 151, __PRETTY_FUNCTION__))
;
152
153 unsigned BitSize = Ty->getPrimitiveSizeInBits();
154 // There is no cost model for constants with a bit size of 0. Return TCC_Free
155 // here, so that constant hoisting will ignore this constant.
156 if (BitSize == 0)
157 return TTI::TCC_Free;
158
159 switch (IID) {
160 default:
161 return TTI::TCC_Free;
162 case Intrinsic::sadd_with_overflow:
163 case Intrinsic::uadd_with_overflow:
164 case Intrinsic::ssub_with_overflow:
165 case Intrinsic::usub_with_overflow:
166 case Intrinsic::smul_with_overflow:
167 case Intrinsic::umul_with_overflow:
168 if (Idx == 1) {
169 int NumConstants = (BitSize + 63) / 64;
170 int Cost = AArch64TTIImpl::getIntImmCost(Imm, Ty);
171 return (Cost <= NumConstants * TTI::TCC_Basic)
172 ? static_cast<int>(TTI::TCC_Free)
173 : Cost;
174 }
175 break;
176 case Intrinsic::experimental_stackmap:
177 if ((Idx < 2) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
178 return TTI::TCC_Free;
179 break;
180 case Intrinsic::experimental_patchpoint_void:
181 case Intrinsic::experimental_patchpoint_i64:
182 if ((Idx < 4) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
183 return TTI::TCC_Free;
184 break;
185 }
186 return AArch64TTIImpl::getIntImmCost(Imm, Ty);
187}
188
189TargetTransformInfo::PopcntSupportKind
190AArch64TTIImpl::getPopcntSupport(unsigned TyWidth) {
191 assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2")((isPowerOf2_32(TyWidth) && "Ty width must be power of 2"
) ? static_cast<void> (0) : __assert_fail ("isPowerOf2_32(TyWidth) && \"Ty width must be power of 2\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp"
, 191, __PRETTY_FUNCTION__))
;
192 if (TyWidth == 32 || TyWidth == 64)
193 return TTI::PSK_FastHardware;
194 // TODO: AArch64TargetLowering::LowerCTPOP() supports 128bit popcount.
195 return TTI::PSK_Software;
196}
197
198bool AArch64TTIImpl::isWideningInstruction(Type *DstTy, unsigned Opcode,
199 ArrayRef<const Value *> Args) {
200
201 // A helper that returns a vector type from the given type. The number of
202 // elements in type Ty determine the vector width.
203 auto toVectorTy = [&](Type *ArgTy) {
204 return VectorType::get(ArgTy->getScalarType(),
205 DstTy->getVectorNumElements());
206 };
207
208 // Exit early if DstTy is not a vector type whose elements are at least
209 // 16-bits wide.
210 if (!DstTy->isVectorTy() || DstTy->getScalarSizeInBits() < 16)
211 return false;
212
213 // Determine if the operation has a widening variant. We consider both the
214 // "long" (e.g., usubl) and "wide" (e.g., usubw) versions of the
215 // instructions.
216 //
217 // TODO: Add additional widening operations (e.g., mul, shl, etc.) once we
218 // verify that their extending operands are eliminated during code
219 // generation.
220 switch (Opcode) {
221 case Instruction::Add: // UADDL(2), SADDL(2), UADDW(2), SADDW(2).
222 case Instruction::Sub: // USUBL(2), SSUBL(2), USUBW(2), SSUBW(2).
223 break;
224 default:
225 return false;
226 }
227
228 // To be a widening instruction (either the "wide" or "long" versions), the
229 // second operand must be a sign- or zero extend having a single user. We
230 // only consider extends having a single user because they may otherwise not
231 // be eliminated.
232 if (Args.size() != 2 ||
233 (!isa<SExtInst>(Args[1]) && !isa<ZExtInst>(Args[1])) ||
234 !Args[1]->hasOneUse())
235 return false;
236 auto *Extend = cast<CastInst>(Args[1]);
237
238 // Legalize the destination type and ensure it can be used in a widening
239 // operation.
240 auto DstTyL = TLI->getTypeLegalizationCost(DL, DstTy);
241 unsigned DstElTySize = DstTyL.second.getScalarSizeInBits();
242 if (!DstTyL.second.isVector() || DstElTySize != DstTy->getScalarSizeInBits())
243 return false;
244
245 // Legalize the source type and ensure it can be used in a widening
246 // operation.
247 Type *SrcTy = toVectorTy(Extend->getSrcTy());
248 auto SrcTyL = TLI->getTypeLegalizationCost(DL, SrcTy);
249 unsigned SrcElTySize = SrcTyL.second.getScalarSizeInBits();
250 if (!SrcTyL.second.isVector() || SrcElTySize != SrcTy->getScalarSizeInBits())
251 return false;
252
253 // Get the total number of vector elements in the legalized types.
254 unsigned NumDstEls = DstTyL.first * DstTyL.second.getVectorNumElements();
255 unsigned NumSrcEls = SrcTyL.first * SrcTyL.second.getVectorNumElements();
256
257 // Return true if the legalized types have the same number of vector elements
258 // and the destination element type size is twice that of the source type.
259 return NumDstEls == NumSrcEls && 2 * SrcElTySize == DstElTySize;
260}
261
262int AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
263 const Instruction *I) {
264 int ISD = TLI->InstructionOpcodeToISD(Opcode);
265 assert(ISD && "Invalid opcode")((ISD && "Invalid opcode") ? static_cast<void> (
0) : __assert_fail ("ISD && \"Invalid opcode\"", "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp"
, 265, __PRETTY_FUNCTION__))
;
266
267 // If the cast is observable, and it is used by a widening instruction (e.g.,
268 // uaddl, saddw, etc.), it may be free.
269 if (I && I->hasOneUse()) {
270 auto *SingleUser = cast<Instruction>(*I->user_begin());
271 SmallVector<const Value *, 4> Operands(SingleUser->operand_values());
272 if (isWideningInstruction(Dst, SingleUser->getOpcode(), Operands)) {
273 // If the cast is the second operand, it is free. We will generate either
274 // a "wide" or "long" version of the widening instruction.
275 if (I == SingleUser->getOperand(1))
276 return 0;
277 // If the cast is not the second operand, it will be free if it looks the
278 // same as the second operand. In this case, we will generate a "long"
279 // version of the widening instruction.
280 if (auto *Cast = dyn_cast<CastInst>(SingleUser->getOperand(1)))
281 if (I->getOpcode() == unsigned(Cast->getOpcode()) &&
282 cast<CastInst>(I)->getSrcTy() == Cast->getSrcTy())
283 return 0;
284 }
285 }
286
287 EVT SrcTy = TLI->getValueType(DL, Src);
288 EVT DstTy = TLI->getValueType(DL, Dst);
289
290 if (!SrcTy.isSimple() || !DstTy.isSimple())
291 return BaseT::getCastInstrCost(Opcode, Dst, Src);
292
293 static const TypeConversionCostTblEntry
294 ConversionTbl[] = {
295 { ISD::TRUNCATE, MVT::v4i16, MVT::v4i32, 1 },
296 { ISD::TRUNCATE, MVT::v4i32, MVT::v4i64, 0 },
297 { ISD::TRUNCATE, MVT::v8i8, MVT::v8i32, 3 },
298 { ISD::TRUNCATE, MVT::v16i8, MVT::v16i32, 6 },
299
300 // The number of shll instructions for the extension.
301 { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 3 },
302 { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i16, 3 },
303 { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i32, 2 },
304 { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i32, 2 },
305 { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i8, 3 },
306 { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i8, 3 },
307 { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i16, 2 },
308 { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i16, 2 },
309 { ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i8, 7 },
310 { ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i8, 7 },
311 { ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i16, 6 },
312 { ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i16, 6 },
313 { ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i8, 2 },
314 { ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i8, 2 },
315 { ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i8, 6 },
316 { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i8, 6 },
317
318 // LowerVectorINT_TO_FP:
319 { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i32, 1 },
320 { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 },
321 { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i64, 1 },
322 { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i32, 1 },
323 { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 },
324 { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i64, 1 },
325
326 // Complex: to v2f32
327 { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i8, 3 },
328 { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i16, 3 },
329 { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i64, 2 },
330 { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i8, 3 },
331 { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i16, 3 },
332 { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i64, 2 },
333
334 // Complex: to v4f32
335 { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i8, 4 },
336 { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i16, 2 },
337 { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i8, 3 },
338 { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i16, 2 },
339
340 // Complex: to v8f32
341 { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i8, 10 },
342 { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i16, 4 },
343 { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i8, 10 },
344 { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i16, 4 },
345
346 // Complex: to v16f32
347 { ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i8, 21 },
348 { ISD::UINT_TO_FP, MVT::v16f32, MVT::v16i8, 21 },
349
350 // Complex: to v2f64
351 { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i8, 4 },
352 { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i16, 4 },
353 { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 },
354 { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i8, 4 },
355 { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i16, 4 },
356 { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 },
357
358
359 // LowerVectorFP_TO_INT
360 { ISD::FP_TO_SINT, MVT::v2i32, MVT::v2f32, 1 },
361 { ISD::FP_TO_SINT, MVT::v4i32, MVT::v4f32, 1 },
362 { ISD::FP_TO_SINT, MVT::v2i64, MVT::v2f64, 1 },
363 { ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f32, 1 },
364 { ISD::FP_TO_UINT, MVT::v4i32, MVT::v4f32, 1 },
365 { ISD::FP_TO_UINT, MVT::v2i64, MVT::v2f64, 1 },
366
367 // Complex, from v2f32: legal type is v2i32 (no cost) or v2i64 (1 ext).
368 { ISD::FP_TO_SINT, MVT::v2i64, MVT::v2f32, 2 },
369 { ISD::FP_TO_SINT, MVT::v2i16, MVT::v2f32, 1 },
370 { ISD::FP_TO_SINT, MVT::v2i8, MVT::v2f32, 1 },
371 { ISD::FP_TO_UINT, MVT::v2i64, MVT::v2f32, 2 },
372 { ISD::FP_TO_UINT, MVT::v2i16, MVT::v2f32, 1 },
373 { ISD::FP_TO_UINT, MVT::v2i8, MVT::v2f32, 1 },
374
375 // Complex, from v4f32: legal type is v4i16, 1 narrowing => ~2
376 { ISD::FP_TO_SINT, MVT::v4i16, MVT::v4f32, 2 },
377 { ISD::FP_TO_SINT, MVT::v4i8, MVT::v4f32, 2 },
378 { ISD::FP_TO_UINT, MVT::v4i16, MVT::v4f32, 2 },
379 { ISD::FP_TO_UINT, MVT::v4i8, MVT::v4f32, 2 },
380
381 // Complex, from v2f64: legal type is v2i32, 1 narrowing => ~2.
382 { ISD::FP_TO_SINT, MVT::v2i32, MVT::v2f64, 2 },
383 { ISD::FP_TO_SINT, MVT::v2i16, MVT::v2f64, 2 },
384 { ISD::FP_TO_SINT, MVT::v2i8, MVT::v2f64, 2 },
385 { ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f64, 2 },
386 { ISD::FP_TO_UINT, MVT::v2i16, MVT::v2f64, 2 },
387 { ISD::FP_TO_UINT, MVT::v2i8, MVT::v2f64, 2 },
388 };
389
390 if (const auto *Entry = ConvertCostTableLookup(ConversionTbl, ISD,
391 DstTy.getSimpleVT(),
392 SrcTy.getSimpleVT()))
393 return Entry->Cost;
394
395 return BaseT::getCastInstrCost(Opcode, Dst, Src);
396}
397
398int AArch64TTIImpl::getExtractWithExtendCost(unsigned Opcode, Type *Dst,
399 VectorType *VecTy,
400 unsigned Index) {
401
402 // Make sure we were given a valid extend opcode.
403 assert((Opcode == Instruction::SExt || Opcode == Instruction::ZExt) &&(((Opcode == Instruction::SExt || Opcode == Instruction::ZExt
) && "Invalid opcode") ? static_cast<void> (0) :
__assert_fail ("(Opcode == Instruction::SExt || Opcode == Instruction::ZExt) && \"Invalid opcode\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp"
, 404, __PRETTY_FUNCTION__))
404 "Invalid opcode")(((Opcode == Instruction::SExt || Opcode == Instruction::ZExt
) && "Invalid opcode") ? static_cast<void> (0) :
__assert_fail ("(Opcode == Instruction::SExt || Opcode == Instruction::ZExt) && \"Invalid opcode\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp"
, 404, __PRETTY_FUNCTION__))
;
405
406 // We are extending an element we extract from a vector, so the source type
407 // of the extend is the element type of the vector.
408 auto *Src = VecTy->getElementType();
409
410 // Sign- and zero-extends are for integer types only.
411 assert(isa<IntegerType>(Dst) && isa<IntegerType>(Src) && "Invalid type")((isa<IntegerType>(Dst) && isa<IntegerType>
(Src) && "Invalid type") ? static_cast<void> (0
) : __assert_fail ("isa<IntegerType>(Dst) && isa<IntegerType>(Src) && \"Invalid type\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp"
, 411, __PRETTY_FUNCTION__))
;
412
413 // Get the cost for the extract. We compute the cost (if any) for the extend
414 // below.
415 auto Cost = getVectorInstrCost(Instruction::ExtractElement, VecTy, Index);
416
417 // Legalize the types.
418 auto VecLT = TLI->getTypeLegalizationCost(DL, VecTy);
419 auto DstVT = TLI->getValueType(DL, Dst);
420 auto SrcVT = TLI->getValueType(DL, Src);
421
422 // If the resulting type is still a vector and the destination type is legal,
423 // we may get the extension for free. If not, get the default cost for the
424 // extend.
425 if (!VecLT.second.isVector() || !TLI->isTypeLegal(DstVT))
426 return Cost + getCastInstrCost(Opcode, Dst, Src);
427
428 // The destination type should be larger than the element type. If not, get
429 // the default cost for the extend.
430 if (DstVT.getSizeInBits() < SrcVT.getSizeInBits())
431 return Cost + getCastInstrCost(Opcode, Dst, Src);
432
433 switch (Opcode) {
434 default:
435 llvm_unreachable("Opcode should be either SExt or ZExt")::llvm::llvm_unreachable_internal("Opcode should be either SExt or ZExt"
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp"
, 435)
;
436
437 // For sign-extends, we only need a smov, which performs the extension
438 // automatically.
439 case Instruction::SExt:
440 return Cost;
441
442 // For zero-extends, the extend is performed automatically by a umov unless
443 // the destination type is i64 and the element type is i8 or i16.
444 case Instruction::ZExt:
445 if (DstVT.getSizeInBits() != 64u || SrcVT.getSizeInBits() == 32u)
446 return Cost;
447 }
448
449 // If we are unable to perform the extend for free, get the default cost.
450 return Cost + getCastInstrCost(Opcode, Dst, Src);
451}
452
453int AArch64TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
454 unsigned Index) {
455 assert(Val->isVectorTy() && "This must be a vector type")((Val->isVectorTy() && "This must be a vector type"
) ? static_cast<void> (0) : __assert_fail ("Val->isVectorTy() && \"This must be a vector type\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp"
, 455, __PRETTY_FUNCTION__))
;
456
457 if (Index != -1U) {
458 // Legalize the type.
459 std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Val);
460
461 // This type is legalized to a scalar type.
462 if (!LT.second.isVector())
463 return 0;
464
465 // The type may be split. Normalize the index to the new type.
466 unsigned Width = LT.second.getVectorNumElements();
467 Index = Index % Width;
468
469 // The element at index zero is already inside the vector.
470 if (Index == 0)
471 return 0;
472 }
473
474 // All other insert/extracts cost this much.
475 return ST->getVectorInsertExtractBaseCost();
476}
477
478int AArch64TTIImpl::getArithmeticInstrCost(
479 unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info,
480 TTI::OperandValueKind Opd2Info, TTI::OperandValueProperties Opd1PropInfo,
481 TTI::OperandValueProperties Opd2PropInfo, ArrayRef<const Value *> Args) {
482 // Legalize the type.
483 std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
484
485 // If the instruction is a widening instruction (e.g., uaddl, saddw, etc.),
486 // add in the widening overhead specified by the sub-target. Since the
487 // extends feeding widening instructions are performed automatically, they
488 // aren't present in the generated code and have a zero cost. By adding a
489 // widening overhead here, we attach the total cost of the combined operation
490 // to the widening instruction.
491 int Cost = 0;
492 if (isWideningInstruction(Ty, Opcode, Args))
493 Cost += ST->getWideningBaseCost();
494
495 int ISD = TLI->InstructionOpcodeToISD(Opcode);
496
497 switch (ISD) {
498 default:
499 return Cost + BaseT::getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
500 Opd1PropInfo, Opd2PropInfo);
501 case ISD::SDIV:
502 if (Opd2Info == TargetTransformInfo::OK_UniformConstantValue &&
503 Opd2PropInfo == TargetTransformInfo::OP_PowerOf2) {
504 // On AArch64, scalar signed division by constants power-of-two are
505 // normally expanded to the sequence ADD + CMP + SELECT + SRA.
506 // The OperandValue properties many not be same as that of previous
507 // operation; conservatively assume OP_None.
508 Cost += getArithmeticInstrCost(Instruction::Add, Ty, Opd1Info, Opd2Info,
509 TargetTransformInfo::OP_None,
510 TargetTransformInfo::OP_None);
511 Cost += getArithmeticInstrCost(Instruction::Sub, Ty, Opd1Info, Opd2Info,
512 TargetTransformInfo::OP_None,
513 TargetTransformInfo::OP_None);
514 Cost += getArithmeticInstrCost(Instruction::Select, Ty, Opd1Info, Opd2Info,
515 TargetTransformInfo::OP_None,
516 TargetTransformInfo::OP_None);
517 Cost += getArithmeticInstrCost(Instruction::AShr, Ty, Opd1Info, Opd2Info,
518 TargetTransformInfo::OP_None,
519 TargetTransformInfo::OP_None);
520 return Cost;
521 }
522 LLVM_FALLTHROUGH[[gnu::fallthrough]];
523 case ISD::UDIV:
524 if (Opd2Info == TargetTransformInfo::OK_UniformConstantValue) {
525 auto VT = TLI->getValueType(DL, Ty);
526 if (TLI->isOperationLegalOrCustom(ISD::MULHU, VT)) {
527 // Vector signed division by constant are expanded to the
528 // sequence MULHS + ADD/SUB + SRA + SRL + ADD, and unsigned division
529 // to MULHS + SUB + SRL + ADD + SRL.
530 int MulCost = getArithmeticInstrCost(Instruction::Mul, Ty, Opd1Info,
531 Opd2Info,
532 TargetTransformInfo::OP_None,
533 TargetTransformInfo::OP_None);
534 int AddCost = getArithmeticInstrCost(Instruction::Add, Ty, Opd1Info,
535 Opd2Info,
536 TargetTransformInfo::OP_None,
537 TargetTransformInfo::OP_None);
538 int ShrCost = getArithmeticInstrCost(Instruction::AShr, Ty, Opd1Info,
539 Opd2Info,
540 TargetTransformInfo::OP_None,
541 TargetTransformInfo::OP_None);
542 return MulCost * 2 + AddCost * 2 + ShrCost * 2 + 1;
543 }
544 }
545
546 Cost += BaseT::getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
547 Opd1PropInfo, Opd2PropInfo);
548 if (Ty->isVectorTy()) {
549 // On AArch64, vector divisions are not supported natively and are
550 // expanded into scalar divisions of each pair of elements.
551 Cost += getArithmeticInstrCost(Instruction::ExtractElement, Ty, Opd1Info,
552 Opd2Info, Opd1PropInfo, Opd2PropInfo);
553 Cost += getArithmeticInstrCost(Instruction::InsertElement, Ty, Opd1Info,
554 Opd2Info, Opd1PropInfo, Opd2PropInfo);
555 // TODO: if one of the arguments is scalar, then it's not necessary to
556 // double the cost of handling the vector elements.
557 Cost += Cost;
558 }
559 return Cost;
560
561 case ISD::ADD:
562 case ISD::MUL:
563 case ISD::XOR:
564 case ISD::OR:
565 case ISD::AND:
566 // These nodes are marked as 'custom' for combining purposes only.
567 // We know that they are legal. See LowerAdd in ISelLowering.
568 return (Cost + 1) * LT.first;
569 }
570}
571
572int AArch64TTIImpl::getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
573 const SCEV *Ptr) {
574 // Address computations in vectorized code with non-consecutive addresses will
575 // likely result in more instructions compared to scalar code where the
576 // computation can more often be merged into the index mode. The resulting
577 // extra micro-ops can significantly decrease throughput.
578 unsigned NumVectorInstToHideOverhead = 10;
579 int MaxMergeDistance = 64;
580
581 if (Ty->isVectorTy() && SE &&
582 !BaseT::isConstantStridedAccessLessThan(SE, Ptr, MaxMergeDistance + 1))
583 return NumVectorInstToHideOverhead;
584
585 // In many cases the address computation is not merged into the instruction
586 // addressing mode.
587 return 1;
588}
589
590int AArch64TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
591 Type *CondTy, const Instruction *I) {
592
593 int ISD = TLI->InstructionOpcodeToISD(Opcode);
594 // We don't lower some vector selects well that are wider than the register
595 // width.
596 if (ValTy->isVectorTy() && ISD == ISD::SELECT) {
1
Calling 'Type::isVectorTy'
4
Returning from 'Type::isVectorTy'
20
Calling 'Type::isVectorTy'
23
Returning from 'Type::isVectorTy'
24
Assuming 'ISD' is equal to SELECT
25
Taking true branch
597 // We would need this many instructions to hide the scalarization happening.
598 const int AmortizationCost = 20;
599 static const TypeConversionCostTblEntry
600 VectorSelectTbl[] = {
601 { ISD::SELECT, MVT::v16i1, MVT::v16i16, 16 },
602 { ISD::SELECT, MVT::v8i1, MVT::v8i32, 8 },
603 { ISD::SELECT, MVT::v16i1, MVT::v16i32, 16 },
604 { ISD::SELECT, MVT::v4i1, MVT::v4i64, 4 * AmortizationCost },
605 { ISD::SELECT, MVT::v8i1, MVT::v8i64, 8 * AmortizationCost },
606 { ISD::SELECT, MVT::v16i1, MVT::v16i64, 16 * AmortizationCost }
607 };
608
609 EVT SelCondTy = TLI->getValueType(DL, CondTy);
26
Passing null pointer value via 2nd parameter 'Ty'
27
Calling 'TargetLoweringBase::getValueType'
610 EVT SelValTy = TLI->getValueType(DL, ValTy);
611 if (SelCondTy.isSimple() && SelValTy.isSimple()) {
612 if (const auto *Entry = ConvertCostTableLookup(VectorSelectTbl, ISD,
613 SelCondTy.getSimpleVT(),
614 SelValTy.getSimpleVT()))
615 return Entry->Cost;
616 }
617 }
618 return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, I);
5
Passing value via 3rd parameter 'CondTy'
6
Calling 'BasicTTIImplBase::getCmpSelInstrCost'
619}
620
621AArch64TTIImpl::TTI::MemCmpExpansionOptions
622AArch64TTIImpl::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const {
623 TTI::MemCmpExpansionOptions Options;
624 Options.AllowOverlappingLoads = !ST->requiresStrictAlign();
625 Options.MaxNumLoads = TLI->getMaxExpandSizeMemcmp(OptSize);
626 Options.NumLoadsPerBlock = Options.MaxNumLoads;
627 // TODO: Though vector loads usually perform well on AArch64, in some targets
628 // they may wake up the FP unit, which raises the power consumption. Perhaps
629 // they could be used with no holds barred (-O3).
630 Options.LoadSizes = {8, 4, 2, 1};
631 return Options;
632}
633
634int AArch64TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Ty,
635 MaybeAlign Alignment, unsigned AddressSpace,
636 const Instruction *I) {
637 auto LT = TLI->getTypeLegalizationCost(DL, Ty);
638
639 if (ST->isMisaligned128StoreSlow() && Opcode == Instruction::Store &&
640 LT.second.is128BitVector() && (!Alignment || *Alignment < Align(16))) {
641 // Unaligned stores are extremely inefficient. We don't split all
642 // unaligned 128-bit stores because the negative impact that has shown in
643 // practice on inlined block copy code.
644 // We make such stores expensive so that we will only vectorize if there
645 // are 6 other instructions getting vectorized.
646 const int AmortizationCost = 6;
647
648 return LT.first * 2 * AmortizationCost;
649 }
650
651 if (Ty->isVectorTy() && Ty->getVectorElementType()->isIntegerTy(8)) {
652 unsigned ProfitableNumElements;
653 if (Opcode == Instruction::Store)
654 // We use a custom trunc store lowering so v.4b should be profitable.
655 ProfitableNumElements = 4;
656 else
657 // We scalarize the loads because there is not v.4b register and we
658 // have to promote the elements to v.2.
659 ProfitableNumElements = 8;
660
661 if (Ty->getVectorNumElements() < ProfitableNumElements) {
662 unsigned NumVecElts = Ty->getVectorNumElements();
663 unsigned NumVectorizableInstsToAmortize = NumVecElts * 2;
664 // We generate 2 instructions per vector element.
665 return NumVectorizableInstsToAmortize * NumVecElts * 2;
666 }
667 }
668
669 return LT.first;
670}
671
672int AArch64TTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
673 unsigned Factor,
674 ArrayRef<unsigned> Indices,
675 unsigned Alignment,
676 unsigned AddressSpace,
677 bool UseMaskForCond,
678 bool UseMaskForGaps) {
679 assert(Factor >= 2 && "Invalid interleave factor")((Factor >= 2 && "Invalid interleave factor") ? static_cast
<void> (0) : __assert_fail ("Factor >= 2 && \"Invalid interleave factor\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp"
, 679, __PRETTY_FUNCTION__))
;
680 assert(isa<VectorType>(VecTy) && "Expect a vector type")((isa<VectorType>(VecTy) && "Expect a vector type"
) ? static_cast<void> (0) : __assert_fail ("isa<VectorType>(VecTy) && \"Expect a vector type\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp"
, 680, __PRETTY_FUNCTION__))
;
681
682 if (!UseMaskForCond && !UseMaskForGaps &&
683 Factor <= TLI->getMaxSupportedInterleaveFactor()) {
684 unsigned NumElts = VecTy->getVectorNumElements();
685 auto *SubVecTy = VectorType::get(VecTy->getScalarType(), NumElts / Factor);
686
687 // ldN/stN only support legal vector types of size 64 or 128 in bits.
688 // Accesses having vector types that are a multiple of 128 bits can be
689 // matched to more than one ldN/stN instruction.
690 if (NumElts % Factor == 0 &&
691 TLI->isLegalInterleavedAccessType(SubVecTy, DL))
692 return Factor * TLI->getNumInterleavedAccesses(SubVecTy, DL);
693 }
694
695 return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
696 Alignment, AddressSpace,
697 UseMaskForCond, UseMaskForGaps);
698}
699
700int AArch64TTIImpl::getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) {
701 int Cost = 0;
702 for (auto *I : Tys) {
703 if (!I->isVectorTy())
704 continue;
705 if (I->getScalarSizeInBits() * I->getVectorNumElements() == 128)
706 Cost += getMemoryOpCost(Instruction::Store, I, Align(128), 0) +
707 getMemoryOpCost(Instruction::Load, I, Align(128), 0);
708 }
709 return Cost;
710}
711
712unsigned AArch64TTIImpl::getMaxInterleaveFactor(unsigned VF) {
713 return ST->getMaxInterleaveFactor();
714}
715
716// For Falkor, we want to avoid having too many strided loads in a loop since
717// that can exhaust the HW prefetcher resources. We adjust the unroller
718// MaxCount preference below to attempt to ensure unrolling doesn't create too
719// many strided loads.
720static void
721getFalkorUnrollingPreferences(Loop *L, ScalarEvolution &SE,
722 TargetTransformInfo::UnrollingPreferences &UP) {
723 enum { MaxStridedLoads = 7 };
724 auto countStridedLoads = [](Loop *L, ScalarEvolution &SE) {
725 int StridedLoads = 0;
726 // FIXME? We could make this more precise by looking at the CFG and
727 // e.g. not counting loads in each side of an if-then-else diamond.
728 for (const auto BB : L->blocks()) {
729 for (auto &I : *BB) {
730 LoadInst *LMemI = dyn_cast<LoadInst>(&I);
731 if (!LMemI)
732 continue;
733
734 Value *PtrValue = LMemI->getPointerOperand();
735 if (L->isLoopInvariant(PtrValue))
736 continue;
737
738 const SCEV *LSCEV = SE.getSCEV(PtrValue);
739 const SCEVAddRecExpr *LSCEVAddRec = dyn_cast<SCEVAddRecExpr>(LSCEV);
740 if (!LSCEVAddRec || !LSCEVAddRec->isAffine())
741 continue;
742
743 // FIXME? We could take pairing of unrolled load copies into account
744 // by looking at the AddRec, but we would probably have to limit this
745 // to loops with no stores or other memory optimization barriers.
746 ++StridedLoads;
747 // We've seen enough strided loads that seeing more won't make a
748 // difference.
749 if (StridedLoads > MaxStridedLoads / 2)
750 return StridedLoads;
751 }
752 }
753 return StridedLoads;
754 };
755
756 int StridedLoads = countStridedLoads(L, SE);
757 LLVM_DEBUG(dbgs() << "falkor-hwpf: detected " << StridedLoadsdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64tti")) { dbgs() << "falkor-hwpf: detected " <<
StridedLoads << " strided loads\n"; } } while (false)
758 << " strided loads\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64tti")) { dbgs() << "falkor-hwpf: detected " <<
StridedLoads << " strided loads\n"; } } while (false)
;
759 // Pick the largest power of 2 unroll count that won't result in too many
760 // strided loads.
761 if (StridedLoads) {
762 UP.MaxCount = 1 << Log2_32(MaxStridedLoads / StridedLoads);
763 LLVM_DEBUG(dbgs() << "falkor-hwpf: setting unroll MaxCount to "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64tti")) { dbgs() << "falkor-hwpf: setting unroll MaxCount to "
<< UP.MaxCount << '\n'; } } while (false)
764 << UP.MaxCount << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64tti")) { dbgs() << "falkor-hwpf: setting unroll MaxCount to "
<< UP.MaxCount << '\n'; } } while (false)
;
765 }
766}
767
768void AArch64TTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
769 TTI::UnrollingPreferences &UP) {
770 // Enable partial unrolling and runtime unrolling.
771 BaseT::getUnrollingPreferences(L, SE, UP);
772
773 // For inner loop, it is more likely to be a hot one, and the runtime check
774 // can be promoted out from LICM pass, so the overhead is less, let's try
775 // a larger threshold to unroll more loops.
776 if (L->getLoopDepth() > 1)
777 UP.PartialThreshold *= 2;
778
779 // Disable partial & runtime unrolling on -Os.
780 UP.PartialOptSizeThreshold = 0;
781
782 if (ST->getProcFamily() == AArch64Subtarget::Falkor &&
783 EnableFalkorHWPFUnrollFix)
784 getFalkorUnrollingPreferences(L, SE, UP);
785}
786
787Value *AArch64TTIImpl::getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
788 Type *ExpectedType) {
789 switch (Inst->getIntrinsicID()) {
790 default:
791 return nullptr;
792 case Intrinsic::aarch64_neon_st2:
793 case Intrinsic::aarch64_neon_st3:
794 case Intrinsic::aarch64_neon_st4: {
795 // Create a struct type
796 StructType *ST = dyn_cast<StructType>(ExpectedType);
797 if (!ST)
798 return nullptr;
799 unsigned NumElts = Inst->getNumArgOperands() - 1;
800 if (ST->getNumElements() != NumElts)
801 return nullptr;
802 for (unsigned i = 0, e = NumElts; i != e; ++i) {
803 if (Inst->getArgOperand(i)->getType() != ST->getElementType(i))
804 return nullptr;
805 }
806 Value *Res = UndefValue::get(ExpectedType);
807 IRBuilder<> Builder(Inst);
808 for (unsigned i = 0, e = NumElts; i != e; ++i) {
809 Value *L = Inst->getArgOperand(i);
810 Res = Builder.CreateInsertValue(Res, L, i);
811 }
812 return Res;
813 }
814 case Intrinsic::aarch64_neon_ld2:
815 case Intrinsic::aarch64_neon_ld3:
816 case Intrinsic::aarch64_neon_ld4:
817 if (Inst->getType() == ExpectedType)
818 return Inst;
819 return nullptr;
820 }
821}
822
823bool AArch64TTIImpl::getTgtMemIntrinsic(IntrinsicInst *Inst,
824 MemIntrinsicInfo &Info) {
825 switch (Inst->getIntrinsicID()) {
826 default:
827 break;
828 case Intrinsic::aarch64_neon_ld2:
829 case Intrinsic::aarch64_neon_ld3:
830 case Intrinsic::aarch64_neon_ld4:
831 Info.ReadMem = true;
832 Info.WriteMem = false;
833 Info.PtrVal = Inst->getArgOperand(0);
834 break;
835 case Intrinsic::aarch64_neon_st2:
836 case Intrinsic::aarch64_neon_st3:
837 case Intrinsic::aarch64_neon_st4:
838 Info.ReadMem = false;
839 Info.WriteMem = true;
840 Info.PtrVal = Inst->getArgOperand(Inst->getNumArgOperands() - 1);
841 break;
842 }
843
844 switch (Inst->getIntrinsicID()) {
845 default:
846 return false;
847 case Intrinsic::aarch64_neon_ld2:
848 case Intrinsic::aarch64_neon_st2:
849 Info.MatchingId = VECTOR_LDST_TWO_ELEMENTS;
850 break;
851 case Intrinsic::aarch64_neon_ld3:
852 case Intrinsic::aarch64_neon_st3:
853 Info.MatchingId = VECTOR_LDST_THREE_ELEMENTS;
854 break;
855 case Intrinsic::aarch64_neon_ld4:
856 case Intrinsic::aarch64_neon_st4:
857 Info.MatchingId = VECTOR_LDST_FOUR_ELEMENTS;
858 break;
859 }
860 return true;
861}
862
863/// See if \p I should be considered for address type promotion. We check if \p
864/// I is a sext with right type and used in memory accesses. If it used in a
865/// "complex" getelementptr, we allow it to be promoted without finding other
866/// sext instructions that sign extended the same initial value. A getelementptr
867/// is considered as "complex" if it has more than 2 operands.
868bool AArch64TTIImpl::shouldConsiderAddressTypePromotion(
869 const Instruction &I, bool &AllowPromotionWithoutCommonHeader) {
870 bool Considerable = false;
871 AllowPromotionWithoutCommonHeader = false;
872 if (!isa<SExtInst>(&I))
873 return false;
874 Type *ConsideredSExtType =
875 Type::getInt64Ty(I.getParent()->getParent()->getContext());
876 if (I.getType() != ConsideredSExtType)
877 return false;
878 // See if the sext is the one with the right type and used in at least one
879 // GetElementPtrInst.
880 for (const User *U : I.users()) {
881 if (const GetElementPtrInst *GEPInst = dyn_cast<GetElementPtrInst>(U)) {
882 Considerable = true;
883 // A getelementptr is considered as "complex" if it has more than 2
884 // operands. We will promote a SExt used in such complex GEP as we
885 // expect some computation to be merged if they are done on 64 bits.
886 if (GEPInst->getNumOperands() > 2) {
887 AllowPromotionWithoutCommonHeader = true;
888 break;
889 }
890 }
891 }
892 return Considerable;
893}
894
895bool AArch64TTIImpl::useReductionIntrinsic(unsigned Opcode, Type *Ty,
896 TTI::ReductionFlags Flags) const {
897 assert(isa<VectorType>(Ty) && "Expected Ty to be a vector type")((isa<VectorType>(Ty) && "Expected Ty to be a vector type"
) ? static_cast<void> (0) : __assert_fail ("isa<VectorType>(Ty) && \"Expected Ty to be a vector type\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp"
, 897, __PRETTY_FUNCTION__))
;
898 unsigned ScalarBits = Ty->getScalarSizeInBits();
899 switch (Opcode) {
900 case Instruction::FAdd:
901 case Instruction::FMul:
902 case Instruction::And:
903 case Instruction::Or:
904 case Instruction::Xor:
905 case Instruction::Mul:
906 return false;
907 case Instruction::Add:
908 return ScalarBits * Ty->getVectorNumElements() >= 128;
909 case Instruction::ICmp:
910 return (ScalarBits < 64) &&
911 (ScalarBits * Ty->getVectorNumElements() >= 128);
912 case Instruction::FCmp:
913 return Flags.NoNaN;
914 default:
915 llvm_unreachable("Unhandled reduction opcode")::llvm::llvm_unreachable_internal("Unhandled reduction opcode"
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp"
, 915)
;
916 }
917 return false;
918}
919
920int AArch64TTIImpl::getArithmeticReductionCost(unsigned Opcode, Type *ValTy,
921 bool IsPairwiseForm) {
922
923 if (IsPairwiseForm)
924 return BaseT::getArithmeticReductionCost(Opcode, ValTy, IsPairwiseForm);
925
926 std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
927 MVT MTy = LT.second;
928 int ISD = TLI->InstructionOpcodeToISD(Opcode);
929 assert(ISD && "Invalid opcode")((ISD && "Invalid opcode") ? static_cast<void> (
0) : __assert_fail ("ISD && \"Invalid opcode\"", "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp"
, 929, __PRETTY_FUNCTION__))
;
930
931 // Horizontal adds can use the 'addv' instruction. We model the cost of these
932 // instructions as normal vector adds. This is the only arithmetic vector
933 // reduction operation for which we have an instruction.
934 static const CostTblEntry CostTblNoPairwise[]{
935 {ISD::ADD, MVT::v8i8, 1},
936 {ISD::ADD, MVT::v16i8, 1},
937 {ISD::ADD, MVT::v4i16, 1},
938 {ISD::ADD, MVT::v8i16, 1},
939 {ISD::ADD, MVT::v4i32, 1},
940 };
941
942 if (const auto *Entry = CostTableLookup(CostTblNoPairwise, ISD, MTy))
943 return LT.first * Entry->Cost;
944
945 return BaseT::getArithmeticReductionCost(Opcode, ValTy, IsPairwiseForm);
946}
947
948int AArch64TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
949 Type *SubTp) {
950 if (Kind == TTI::SK_Broadcast || Kind == TTI::SK_Transpose ||
951 Kind == TTI::SK_Select || Kind == TTI::SK_PermuteSingleSrc) {
952 static const CostTblEntry ShuffleTbl[] = {
953 // Broadcast shuffle kinds can be performed with 'dup'.
954 { TTI::SK_Broadcast, MVT::v8i8, 1 },
955 { TTI::SK_Broadcast, MVT::v16i8, 1 },
956 { TTI::SK_Broadcast, MVT::v4i16, 1 },
957 { TTI::SK_Broadcast, MVT::v8i16, 1 },
958 { TTI::SK_Broadcast, MVT::v2i32, 1 },
959 { TTI::SK_Broadcast, MVT::v4i32, 1 },
960 { TTI::SK_Broadcast, MVT::v2i64, 1 },
961 { TTI::SK_Broadcast, MVT::v2f32, 1 },
962 { TTI::SK_Broadcast, MVT::v4f32, 1 },
963 { TTI::SK_Broadcast, MVT::v2f64, 1 },
964 // Transpose shuffle kinds can be performed with 'trn1/trn2' and
965 // 'zip1/zip2' instructions.
966 { TTI::SK_Transpose, MVT::v8i8, 1 },
967 { TTI::SK_Transpose, MVT::v16i8, 1 },
968 { TTI::SK_Transpose, MVT::v4i16, 1 },
969 { TTI::SK_Transpose, MVT::v8i16, 1 },
970 { TTI::SK_Transpose, MVT::v2i32, 1 },
971 { TTI::SK_Transpose, MVT::v4i32, 1 },
972 { TTI::SK_Transpose, MVT::v2i64, 1 },
973 { TTI::SK_Transpose, MVT::v2f32, 1 },
974 { TTI::SK_Transpose, MVT::v4f32, 1 },
975 { TTI::SK_Transpose, MVT::v2f64, 1 },
976 // Select shuffle kinds.
977 // TODO: handle vXi8/vXi16.
978 { TTI::SK_Select, MVT::v2i32, 1 }, // mov.
979 { TTI::SK_Select, MVT::v4i32, 2 }, // rev+trn (or similar).
980 { TTI::SK_Select, MVT::v2i64, 1 }, // mov.
981 { TTI::SK_Select, MVT::v2f32, 1 }, // mov.
982 { TTI::SK_Select, MVT::v4f32, 2 }, // rev+trn (or similar).
983 { TTI::SK_Select, MVT::v2f64, 1 }, // mov.
984 // PermuteSingleSrc shuffle kinds.
985 // TODO: handle vXi8/vXi16.
986 { TTI::SK_PermuteSingleSrc, MVT::v2i32, 1 }, // mov.
987 { TTI::SK_PermuteSingleSrc, MVT::v4i32, 3 }, // perfectshuffle worst case.
988 { TTI::SK_PermuteSingleSrc, MVT::v2i64, 1 }, // mov.
989 { TTI::SK_PermuteSingleSrc, MVT::v2f32, 1 }, // mov.
990 { TTI::SK_PermuteSingleSrc, MVT::v4f32, 3 }, // perfectshuffle worst case.
991 { TTI::SK_PermuteSingleSrc, MVT::v2f64, 1 }, // mov.
992 };
993 std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
994 if (const auto *Entry = CostTableLookup(ShuffleTbl, Kind, LT.second))
995 return LT.first * Entry->Cost;
996 }
997
998 return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
999}

/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/IR/Type.h

1//===- llvm/Type.h - Classes for handling data types ------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains the declaration of the Type class. For more "Type"
10// stuff, look in DerivedTypes.h.
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_IR_TYPE_H
15#define LLVM_IR_TYPE_H
16
17#include "llvm/ADT/APFloat.h"
18#include "llvm/ADT/ArrayRef.h"
19#include "llvm/ADT/SmallPtrSet.h"
20#include "llvm/Support/CBindingWrapping.h"
21#include "llvm/Support/Casting.h"
22#include "llvm/Support/Compiler.h"
23#include "llvm/Support/ErrorHandling.h"
24#include "llvm/Support/TypeSize.h"
25#include <cassert>
26#include <cstdint>
27#include <iterator>
28
29namespace llvm {
30
31template<class GraphType> struct GraphTraits;
32class IntegerType;
33class LLVMContext;
34class PointerType;
35class raw_ostream;
36class StringRef;
37
38/// The instances of the Type class are immutable: once they are created,
39/// they are never changed. Also note that only one instance of a particular
40/// type is ever created. Thus seeing if two types are equal is a matter of
41/// doing a trivial pointer comparison. To enforce that no two equal instances
42/// are created, Type instances can only be created via static factory methods
43/// in class Type and in derived classes. Once allocated, Types are never
44/// free'd.
45///
46class Type {
47public:
48 //===--------------------------------------------------------------------===//
49 /// Definitions of all of the base types for the Type system. Based on this
50 /// value, you can cast to a class defined in DerivedTypes.h.
51 /// Note: If you add an element to this, you need to add an element to the
52 /// Type::getPrimitiveType function, or else things will break!
53 /// Also update LLVMTypeKind and LLVMGetTypeKind () in the C binding.
54 ///
55 enum TypeID {
56 // PrimitiveTypes - make sure LastPrimitiveTyID stays up to date.
57 VoidTyID = 0, ///< 0: type with no size
58 HalfTyID, ///< 1: 16-bit floating point type
59 FloatTyID, ///< 2: 32-bit floating point type
60 DoubleTyID, ///< 3: 64-bit floating point type
61 X86_FP80TyID, ///< 4: 80-bit floating point type (X87)
62 FP128TyID, ///< 5: 128-bit floating point type (112-bit mantissa)
63 PPC_FP128TyID, ///< 6: 128-bit floating point type (two 64-bits, PowerPC)
64 LabelTyID, ///< 7: Labels
65 MetadataTyID, ///< 8: Metadata
66 X86_MMXTyID, ///< 9: MMX vectors (64 bits, X86 specific)
67 TokenTyID, ///< 10: Tokens
68
69 // Derived types... see DerivedTypes.h file.
70 // Make sure FirstDerivedTyID stays up to date!
71 IntegerTyID, ///< 11: Arbitrary bit width integers
72 FunctionTyID, ///< 12: Functions
73 StructTyID, ///< 13: Structures
74 ArrayTyID, ///< 14: Arrays
75 PointerTyID, ///< 15: Pointers
76 VectorTyID ///< 16: SIMD 'packed' format, or other vector type
77 };
78
79private:
80 /// This refers to the LLVMContext in which this type was uniqued.
81 LLVMContext &Context;
82
83 TypeID ID : 8; // The current base type of this type.
84 unsigned SubclassData : 24; // Space for subclasses to store data.
85 // Note that this should be synchronized with
86 // MAX_INT_BITS value in IntegerType class.
87
88protected:
89 friend class LLVMContextImpl;
90
91 explicit Type(LLVMContext &C, TypeID tid)
92 : Context(C), ID(tid), SubclassData(0) {}
93 ~Type() = default;
94
95 unsigned getSubclassData() const { return SubclassData; }
96
97 void setSubclassData(unsigned val) {
98 SubclassData = val;
99 // Ensure we don't have any accidental truncation.
100 assert(getSubclassData() == val && "Subclass data too large for field")((getSubclassData() == val && "Subclass data too large for field"
) ? static_cast<void> (0) : __assert_fail ("getSubclassData() == val && \"Subclass data too large for field\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/IR/Type.h"
, 100, __PRETTY_FUNCTION__))
;
101 }
102
103 /// Keeps track of how many Type*'s there are in the ContainedTys list.
104 unsigned NumContainedTys = 0;
105
106 /// A pointer to the array of Types contained by this Type. For example, this
107 /// includes the arguments of a function type, the elements of a structure,
108 /// the pointee of a pointer, the element type of an array, etc. This pointer
109 /// may be 0 for types that don't contain other types (Integer, Double,
110 /// Float).
111 Type * const *ContainedTys = nullptr;
112
113 static bool isSequentialType(TypeID TyID) {
114 return TyID == ArrayTyID || TyID == VectorTyID;
115 }
116
117public:
118 /// Print the current type.
119 /// Omit the type details if \p NoDetails == true.
120 /// E.g., let %st = type { i32, i16 }
121 /// When \p NoDetails is true, we only print %st.
122 /// Put differently, \p NoDetails prints the type as if
123 /// inlined with the operands when printing an instruction.
124 void print(raw_ostream &O, bool IsForDebug = false,
125 bool NoDetails = false) const;
126
127 void dump() const;
128
129 /// Return the LLVMContext in which this type was uniqued.
130 LLVMContext &getContext() const { return Context; }
131
132 //===--------------------------------------------------------------------===//
133 // Accessors for working with types.
134 //
135
136 /// Return the type id for the type. This will return one of the TypeID enum
137 /// elements defined above.
138 TypeID getTypeID() const { return ID; }
139
140 /// Return true if this is 'void'.
141 bool isVoidTy() const { return getTypeID() == VoidTyID; }
142
143 /// Return true if this is 'half', a 16-bit IEEE fp type.
144 bool isHalfTy() const { return getTypeID() == HalfTyID; }
145
146 /// Return true if this is 'float', a 32-bit IEEE fp type.
147 bool isFloatTy() const { return getTypeID() == FloatTyID; }
148
149 /// Return true if this is 'double', a 64-bit IEEE fp type.
150 bool isDoubleTy() const { return getTypeID() == DoubleTyID; }
151
152 /// Return true if this is x86 long double.
153 bool isX86_FP80Ty() const { return getTypeID() == X86_FP80TyID; }
154
155 /// Return true if this is 'fp128'.
156 bool isFP128Ty() const { return getTypeID() == FP128TyID; }
157
158 /// Return true if this is powerpc long double.
159 bool isPPC_FP128Ty() const { return getTypeID() == PPC_FP128TyID; }
160
161 /// Return true if this is one of the six floating-point types
162 bool isFloatingPointTy() const {
163 return getTypeID() == HalfTyID || getTypeID() == FloatTyID ||
164 getTypeID() == DoubleTyID ||
165 getTypeID() == X86_FP80TyID || getTypeID() == FP128TyID ||
166 getTypeID() == PPC_FP128TyID;
167 }
168
169 const fltSemantics &getFltSemantics() const {
170 switch (getTypeID()) {
171 case HalfTyID: return APFloat::IEEEhalf();
172 case FloatTyID: return APFloat::IEEEsingle();
173 case DoubleTyID: return APFloat::IEEEdouble();
174 case X86_FP80TyID: return APFloat::x87DoubleExtended();
175 case FP128TyID: return APFloat::IEEEquad();
176 case PPC_FP128TyID: return APFloat::PPCDoubleDouble();
177 default: llvm_unreachable("Invalid floating type")::llvm::llvm_unreachable_internal("Invalid floating type", "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/IR/Type.h"
, 177)
;
178 }
179 }
180
181 /// Return true if this is X86 MMX.
182 bool isX86_MMXTy() const { return getTypeID() == X86_MMXTyID; }
183
184 /// Return true if this is a FP type or a vector of FP.
185 bool isFPOrFPVectorTy() const { return getScalarType()->isFloatingPointTy(); }
186
187 /// Return true if this is 'label'.
188 bool isLabelTy() const { return getTypeID() == LabelTyID; }
189
190 /// Return true if this is 'metadata'.
191 bool isMetadataTy() const { return getTypeID() == MetadataTyID; }
192
193 /// Return true if this is 'token'.
194 bool isTokenTy() const { return getTypeID() == TokenTyID; }
195
196 /// True if this is an instance of IntegerType.
197 bool isIntegerTy() const { return getTypeID() == IntegerTyID; }
198
199 /// Return true if this is an IntegerType of the given width.
200 bool isIntegerTy(unsigned Bitwidth) const;
201
202 /// Return true if this is an integer type or a vector of integer types.
203 bool isIntOrIntVectorTy() const { return getScalarType()->isIntegerTy(); }
204
205 /// Return true if this is an integer type or a vector of integer types of
206 /// the given width.
207 bool isIntOrIntVectorTy(unsigned BitWidth) const {
208 return getScalarType()->isIntegerTy(BitWidth);
209 }
210
211 /// Return true if this is an integer type or a pointer type.
212 bool isIntOrPtrTy() const { return isIntegerTy() || isPointerTy(); }
213
214 /// True if this is an instance of FunctionType.
215 bool isFunctionTy() const { return getTypeID() == FunctionTyID; }
216
217 /// True if this is an instance of StructType.
218 bool isStructTy() const { return getTypeID() == StructTyID; }
219
220 /// True if this is an instance of ArrayType.
221 bool isArrayTy() const { return getTypeID() == ArrayTyID; }
222
223 /// True if this is an instance of PointerType.
224 bool isPointerTy() const { return getTypeID() == PointerTyID; }
225
226 /// Return true if this is a pointer type or a vector of pointer types.
227 bool isPtrOrPtrVectorTy() const { return getScalarType()->isPointerTy(); }
228
229 /// True if this is an instance of VectorType.
230 bool isVectorTy() const { return getTypeID() == VectorTyID; }
2
Assuming the condition is false
3
Returning zero, which participates in a condition later
13
Returning the value 1, which participates in a condition later
21
Assuming the condition is true
22
Returning the value 1, which participates in a condition later
231
232 /// Return true if this type could be converted with a lossless BitCast to
233 /// type 'Ty'. For example, i8* to i32*. BitCasts are valid for types of the
234 /// same size only where no re-interpretation of the bits is done.
235 /// Determine if this type could be losslessly bitcast to Ty
236 bool canLosslesslyBitCastTo(Type *Ty) const;
237
238 /// Return true if this type is empty, that is, it has no elements or all of
239 /// its elements are empty.
240 bool isEmptyTy() const;
241
242 /// Return true if the type is "first class", meaning it is a valid type for a
243 /// Value.
244 bool isFirstClassType() const {
245 return getTypeID() != FunctionTyID && getTypeID() != VoidTyID;
246 }
247
248 /// Return true if the type is a valid type for a register in codegen. This
249 /// includes all first-class types except struct and array types.
250 bool isSingleValueType() const {
251 return isFloatingPointTy() || isX86_MMXTy() || isIntegerTy() ||
252 isPointerTy() || isVectorTy();
253 }
254
255 /// Return true if the type is an aggregate type. This means it is valid as
256 /// the first operand of an insertvalue or extractvalue instruction. This
257 /// includes struct and array types, but does not include vector types.
258 bool isAggregateType() const {
259 return getTypeID() == StructTyID || getTypeID() == ArrayTyID;
260 }
261
262 /// Return true if it makes sense to take the size of this type. To get the
263 /// actual size for a particular target, it is reasonable to use the
264 /// DataLayout subsystem to do this.
265 bool isSized(SmallPtrSetImpl<Type*> *Visited = nullptr) const {
266 // If it's a primitive, it is always sized.
267 if (getTypeID() == IntegerTyID || isFloatingPointTy() ||
268 getTypeID() == PointerTyID ||
269 getTypeID() == X86_MMXTyID)
270 return true;
271 // If it is not something that can have a size (e.g. a function or label),
272 // it doesn't have a size.
273 if (getTypeID() != StructTyID && getTypeID() != ArrayTyID &&
274 getTypeID() != VectorTyID)
275 return false;
276 // Otherwise we have to try harder to decide.
277 return isSizedDerivedType(Visited);
278 }
279
280 /// Return the basic size of this type if it is a primitive type. These are
281 /// fixed by LLVM and are not target-dependent.
282 /// This will return zero if the type does not have a size or is not a
283 /// primitive type.
284 ///
285 /// If this is a scalable vector type, the scalable property will be set and
286 /// the runtime size will be a positive integer multiple of the base size.
287 ///
288 /// Note that this may not reflect the size of memory allocated for an
289 /// instance of the type or the number of bytes that are written when an
290 /// instance of the type is stored to memory. The DataLayout class provides
291 /// additional query functions to provide this information.
292 ///
293 TypeSize getPrimitiveSizeInBits() const LLVM_READONLY__attribute__((__pure__));
294
295 /// If this is a vector type, return the getPrimitiveSizeInBits value for the
296 /// element type. Otherwise return the getPrimitiveSizeInBits value for this
297 /// type.
298 unsigned getScalarSizeInBits() const LLVM_READONLY__attribute__((__pure__));
299
300 /// Return the width of the mantissa of this type. This is only valid on
301 /// floating-point types. If the FP type does not have a stable mantissa (e.g.
302 /// ppc long double), this method returns -1.
303 int getFPMantissaWidth() const;
304
305 /// If this is a vector type, return the element type, otherwise return
306 /// 'this'.
307 Type *getScalarType() const {
308 if (isVectorTy())
309 return getVectorElementType();
310 return const_cast<Type*>(this);
311 }
312
313 //===--------------------------------------------------------------------===//
314 // Type Iteration support.
315 //
316 using subtype_iterator = Type * const *;
317
318 subtype_iterator subtype_begin() const { return ContainedTys; }
319 subtype_iterator subtype_end() const { return &ContainedTys[NumContainedTys];}
320 ArrayRef<Type*> subtypes() const {
321 return makeArrayRef(subtype_begin(), subtype_end());
322 }
323
324 using subtype_reverse_iterator = std::reverse_iterator<subtype_iterator>;
325
326 subtype_reverse_iterator subtype_rbegin() const {
327 return subtype_reverse_iterator(subtype_end());
328 }
329 subtype_reverse_iterator subtype_rend() const {
330 return subtype_reverse_iterator(subtype_begin());
331 }
332
333 /// This method is used to implement the type iterator (defined at the end of
334 /// the file). For derived types, this returns the types 'contained' in the
335 /// derived type.
336 Type *getContainedType(unsigned i) const {
337 assert(i < NumContainedTys && "Index out of range!")((i < NumContainedTys && "Index out of range!") ? static_cast
<void> (0) : __assert_fail ("i < NumContainedTys && \"Index out of range!\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/IR/Type.h"
, 337, __PRETTY_FUNCTION__))
;
338 return ContainedTys[i];
339 }
340
341 /// Return the number of types in the derived type.
342 unsigned getNumContainedTypes() const { return NumContainedTys; }
343
344 //===--------------------------------------------------------------------===//
345 // Helper methods corresponding to subclass methods. This forces a cast to
346 // the specified subclass and calls its accessor. "getVectorNumElements" (for
347 // example) is shorthand for cast<VectorType>(Ty)->getNumElements(). This is
348 // only intended to cover the core methods that are frequently used, helper
349 // methods should not be added here.
350
351 inline unsigned getIntegerBitWidth() const;
352
353 inline Type *getFunctionParamType(unsigned i) const;
354 inline unsigned getFunctionNumParams() const;
355 inline bool isFunctionVarArg() const;
356
357 inline StringRef getStructName() const;
358 inline unsigned getStructNumElements() const;
359 inline Type *getStructElementType(unsigned N) const;
360
361 inline Type *getSequentialElementType() const {
362 assert(isSequentialType(getTypeID()) && "Not a sequential type!")((isSequentialType(getTypeID()) && "Not a sequential type!"
) ? static_cast<void> (0) : __assert_fail ("isSequentialType(getTypeID()) && \"Not a sequential type!\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/IR/Type.h"
, 362, __PRETTY_FUNCTION__))
;
363 return ContainedTys[0];
364 }
365
366 inline uint64_t getArrayNumElements() const;
367
368 Type *getArrayElementType() const {
369 assert(getTypeID() == ArrayTyID)((getTypeID() == ArrayTyID) ? static_cast<void> (0) : __assert_fail
("getTypeID() == ArrayTyID", "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/IR/Type.h"
, 369, __PRETTY_FUNCTION__))
;
370 return ContainedTys[0];
371 }
372
373 inline bool getVectorIsScalable() const;
374 inline unsigned getVectorNumElements() const;
375 inline ElementCount getVectorElementCount() const;
376 Type *getVectorElementType() const {
377 assert(getTypeID() == VectorTyID)((getTypeID() == VectorTyID) ? static_cast<void> (0) : __assert_fail
("getTypeID() == VectorTyID", "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/IR/Type.h"
, 377, __PRETTY_FUNCTION__))
;
378 return ContainedTys[0];
379 }
380
381 Type *getPointerElementType() const {
382 assert(getTypeID() == PointerTyID)((getTypeID() == PointerTyID) ? static_cast<void> (0) :
__assert_fail ("getTypeID() == PointerTyID", "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/IR/Type.h"
, 382, __PRETTY_FUNCTION__))
;
383 return ContainedTys[0];
384 }
385
386 /// Given an integer or vector type, change the lane bitwidth to NewBitwidth,
387 /// whilst keeping the old number of lanes.
388 inline Type *getWithNewBitWidth(unsigned NewBitWidth) const;
389
390 /// Given scalar/vector integer type, returns a type with elements twice as
391 /// wide as in the original type. For vectors, preserves element count.
392 inline Type *getExtendedType() const;
393
394 /// Get the address space of this pointer or pointer vector type.
395 inline unsigned getPointerAddressSpace() const;
396
397 //===--------------------------------------------------------------------===//
398 // Static members exported by the Type class itself. Useful for getting
399 // instances of Type.
400 //
401
402 /// Return a type based on an identifier.
403 static Type *getPrimitiveType(LLVMContext &C, TypeID IDNumber);
404
405 //===--------------------------------------------------------------------===//
406 // These are the builtin types that are always available.
407 //
408 static Type *getVoidTy(LLVMContext &C);
409 static Type *getLabelTy(LLVMContext &C);
410 static Type *getHalfTy(LLVMContext &C);
411 static Type *getFloatTy(LLVMContext &C);
412 static Type *getDoubleTy(LLVMContext &C);
413 static Type *getMetadataTy(LLVMContext &C);
414 static Type *getX86_FP80Ty(LLVMContext &C);
415 static Type *getFP128Ty(LLVMContext &C);
416 static Type *getPPC_FP128Ty(LLVMContext &C);
417 static Type *getX86_MMXTy(LLVMContext &C);
418 static Type *getTokenTy(LLVMContext &C);
419 static IntegerType *getIntNTy(LLVMContext &C, unsigned N);
420 static IntegerType *getInt1Ty(LLVMContext &C);
421 static IntegerType *getInt8Ty(LLVMContext &C);
422 static IntegerType *getInt16Ty(LLVMContext &C);
423 static IntegerType *getInt32Ty(LLVMContext &C);
424 static IntegerType *getInt64Ty(LLVMContext &C);
425 static IntegerType *getInt128Ty(LLVMContext &C);
426 template <typename ScalarTy> static Type *getScalarTy(LLVMContext &C) {
427 int noOfBits = sizeof(ScalarTy) * CHAR_BIT8;
428 if (std::is_integral<ScalarTy>::value) {
429 return (Type*) Type::getIntNTy(C, noOfBits);
430 } else if (std::is_floating_point<ScalarTy>::value) {
431 switch (noOfBits) {
432 case 32:
433 return Type::getFloatTy(C);
434 case 64:
435 return Type::getDoubleTy(C);
436 }
437 }
438 llvm_unreachable("Unsupported type in Type::getScalarTy")::llvm::llvm_unreachable_internal("Unsupported type in Type::getScalarTy"
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/IR/Type.h"
, 438)
;
439 }
440
441 //===--------------------------------------------------------------------===//
442 // Convenience methods for getting pointer types with one of the above builtin
443 // types as pointee.
444 //
445 static PointerType *getHalfPtrTy(LLVMContext &C, unsigned AS = 0);
446 static PointerType *getFloatPtrTy(LLVMContext &C, unsigned AS = 0);
447 static PointerType *getDoublePtrTy(LLVMContext &C, unsigned AS = 0);
448 static PointerType *getX86_FP80PtrTy(LLVMContext &C, unsigned AS = 0);
449 static PointerType *getFP128PtrTy(LLVMContext &C, unsigned AS = 0);
450 static PointerType *getPPC_FP128PtrTy(LLVMContext &C, unsigned AS = 0);
451 static PointerType *getX86_MMXPtrTy(LLVMContext &C, unsigned AS = 0);
452 static PointerType *getIntNPtrTy(LLVMContext &C, unsigned N, unsigned AS = 0);
453 static PointerType *getInt1PtrTy(LLVMContext &C, unsigned AS = 0);
454 static PointerType *getInt8PtrTy(LLVMContext &C, unsigned AS = 0);
455 static PointerType *getInt16PtrTy(LLVMContext &C, unsigned AS = 0);
456 static PointerType *getInt32PtrTy(LLVMContext &C, unsigned AS = 0);
457 static PointerType *getInt64PtrTy(LLVMContext &C, unsigned AS = 0);
458
459 /// Return a pointer to the current type. This is equivalent to
460 /// PointerType::get(Foo, AddrSpace).
461 PointerType *getPointerTo(unsigned AddrSpace = 0) const;
462
463private:
464 /// Derived types like structures and arrays are sized iff all of the members
465 /// of the type are sized as well. Since asking for their size is relatively
466 /// uncommon, move this operation out-of-line.
467 bool isSizedDerivedType(SmallPtrSetImpl<Type*> *Visited = nullptr) const;
468};
469
470// Printing of types.
471inline raw_ostream &operator<<(raw_ostream &OS, const Type &T) {
472 T.print(OS);
473 return OS;
474}
475
476// allow isa<PointerType>(x) to work without DerivedTypes.h included.
477template <> struct isa_impl<PointerType, Type> {
478 static inline bool doit(const Type &Ty) {
479 return Ty.getTypeID() == Type::PointerTyID;
480 }
481};
482
483// Create wrappers for C Binding types (see CBindingWrapping.h).
484DEFINE_ISA_CONVERSION_FUNCTIONS(Type, LLVMTypeRef)inline Type *unwrap(LLVMTypeRef P) { return reinterpret_cast<
Type*>(P); } inline LLVMTypeRef wrap(const Type *P) { return
reinterpret_cast<LLVMTypeRef>(const_cast<Type*>(
P)); } template<typename T> inline T *unwrap(LLVMTypeRef
P) { return cast<T>(unwrap(P)); }
485
486/* Specialized opaque type conversions.
487 */
488inline Type **unwrap(LLVMTypeRef* Tys) {
489 return reinterpret_cast<Type**>(Tys);
490}
491
492inline LLVMTypeRef *wrap(Type **Tys) {
493 return reinterpret_cast<LLVMTypeRef*>(const_cast<Type**>(Tys));
494}
495
496} // end namespace llvm
497
498#endif // LLVM_IR_TYPE_H

/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/CodeGen/BasicTTIImpl.h

1//===- BasicTTIImpl.h -------------------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// This file provides a helper that implements much of the TTI interface in
11/// terms of the target-independent code generator and TargetLowering
12/// interfaces.
13//
14//===----------------------------------------------------------------------===//
15
16#ifndef LLVM_CODEGEN_BASICTTIIMPL_H
17#define LLVM_CODEGEN_BASICTTIIMPL_H
18
19#include "llvm/ADT/APInt.h"
20#include "llvm/ADT/ArrayRef.h"
21#include "llvm/ADT/BitVector.h"
22#include "llvm/ADT/SmallPtrSet.h"
23#include "llvm/ADT/SmallVector.h"
24#include "llvm/Analysis/LoopInfo.h"
25#include "llvm/Analysis/TargetTransformInfo.h"
26#include "llvm/Analysis/TargetTransformInfoImpl.h"
27#include "llvm/CodeGen/ISDOpcodes.h"
28#include "llvm/CodeGen/TargetLowering.h"
29#include "llvm/CodeGen/TargetSubtargetInfo.h"
30#include "llvm/CodeGen/ValueTypes.h"
31#include "llvm/IR/BasicBlock.h"
32#include "llvm/IR/CallSite.h"
33#include "llvm/IR/Constant.h"
34#include "llvm/IR/Constants.h"
35#include "llvm/IR/DataLayout.h"
36#include "llvm/IR/DerivedTypes.h"
37#include "llvm/IR/InstrTypes.h"
38#include "llvm/IR/Instruction.h"
39#include "llvm/IR/Instructions.h"
40#include "llvm/IR/Intrinsics.h"
41#include "llvm/IR/Operator.h"
42#include "llvm/IR/Type.h"
43#include "llvm/IR/Value.h"
44#include "llvm/MC/MCSchedule.h"
45#include "llvm/Support/Casting.h"
46#include "llvm/Support/CommandLine.h"
47#include "llvm/Support/ErrorHandling.h"
48#include "llvm/Support/MachineValueType.h"
49#include "llvm/Support/MathExtras.h"
50#include <algorithm>
51#include <cassert>
52#include <cstdint>
53#include <limits>
54#include <utility>
55
56namespace llvm {
57
58class Function;
59class GlobalValue;
60class LLVMContext;
61class ScalarEvolution;
62class SCEV;
63class TargetMachine;
64
65extern cl::opt<unsigned> PartialUnrollingThreshold;
66
67/// Base class which can be used to help build a TTI implementation.
68///
69/// This class provides as much implementation of the TTI interface as is
70/// possible using the target independent parts of the code generator.
71///
72/// In order to subclass it, your class must implement a getST() method to
73/// return the subtarget, and a getTLI() method to return the target lowering.
74/// We need these methods implemented in the derived class so that this class
75/// doesn't have to duplicate storage for them.
76template <typename T>
77class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
78private:
79 using BaseT = TargetTransformInfoImplCRTPBase<T>;
80 using TTI = TargetTransformInfo;
81
82 /// Estimate a cost of Broadcast as an extract and sequence of insert
83 /// operations.
84 unsigned getBroadcastShuffleOverhead(Type *Ty) {
85 assert(Ty->isVectorTy() && "Can only shuffle vectors")((Ty->isVectorTy() && "Can only shuffle vectors") ?
static_cast<void> (0) : __assert_fail ("Ty->isVectorTy() && \"Can only shuffle vectors\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 85, __PRETTY_FUNCTION__))
;
86 unsigned Cost = 0;
87 // Broadcast cost is equal to the cost of extracting the zero'th element
88 // plus the cost of inserting it into every element of the result vector.
89 Cost += static_cast<T *>(this)->getVectorInstrCost(
90 Instruction::ExtractElement, Ty, 0);
91
92 for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i) {
93 Cost += static_cast<T *>(this)->getVectorInstrCost(
94 Instruction::InsertElement, Ty, i);
95 }
96 return Cost;
97 }
98
99 /// Estimate a cost of shuffle as a sequence of extract and insert
100 /// operations.
101 unsigned getPermuteShuffleOverhead(Type *Ty) {
102 assert(Ty->isVectorTy() && "Can only shuffle vectors")((Ty->isVectorTy() && "Can only shuffle vectors") ?
static_cast<void> (0) : __assert_fail ("Ty->isVectorTy() && \"Can only shuffle vectors\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 102, __PRETTY_FUNCTION__))
;
103 unsigned Cost = 0;
104 // Shuffle cost is equal to the cost of extracting element from its argument
105 // plus the cost of inserting them onto the result vector.
106
107 // e.g. <4 x float> has a mask of <0,5,2,7> i.e we need to extract from
108 // index 0 of first vector, index 1 of second vector,index 2 of first
109 // vector and finally index 3 of second vector and insert them at index
110 // <0,1,2,3> of result vector.
111 for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i) {
112 Cost += static_cast<T *>(this)
113 ->getVectorInstrCost(Instruction::InsertElement, Ty, i);
114 Cost += static_cast<T *>(this)
115 ->getVectorInstrCost(Instruction::ExtractElement, Ty, i);
116 }
117 return Cost;
118 }
119
120 /// Estimate a cost of subvector extraction as a sequence of extract and
121 /// insert operations.
122 unsigned getExtractSubvectorOverhead(Type *Ty, int Index, Type *SubTy) {
123 assert(Ty && Ty->isVectorTy() && SubTy && SubTy->isVectorTy() &&((Ty && Ty->isVectorTy() && SubTy &&
SubTy->isVectorTy() && "Can only extract subvectors from vectors"
) ? static_cast<void> (0) : __assert_fail ("Ty && Ty->isVectorTy() && SubTy && SubTy->isVectorTy() && \"Can only extract subvectors from vectors\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 124, __PRETTY_FUNCTION__))
124 "Can only extract subvectors from vectors")((Ty && Ty->isVectorTy() && SubTy &&
SubTy->isVectorTy() && "Can only extract subvectors from vectors"
) ? static_cast<void> (0) : __assert_fail ("Ty && Ty->isVectorTy() && SubTy && SubTy->isVectorTy() && \"Can only extract subvectors from vectors\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 124, __PRETTY_FUNCTION__))
;
125 int NumSubElts = SubTy->getVectorNumElements();
126 assert((Index + NumSubElts) <= (int)Ty->getVectorNumElements() &&(((Index + NumSubElts) <= (int)Ty->getVectorNumElements
() && "SK_ExtractSubvector index out of range") ? static_cast
<void> (0) : __assert_fail ("(Index + NumSubElts) <= (int)Ty->getVectorNumElements() && \"SK_ExtractSubvector index out of range\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 127, __PRETTY_FUNCTION__))
127 "SK_ExtractSubvector index out of range")(((Index + NumSubElts) <= (int)Ty->getVectorNumElements
() && "SK_ExtractSubvector index out of range") ? static_cast
<void> (0) : __assert_fail ("(Index + NumSubElts) <= (int)Ty->getVectorNumElements() && \"SK_ExtractSubvector index out of range\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 127, __PRETTY_FUNCTION__))
;
128
129 unsigned Cost = 0;
130 // Subvector extraction cost is equal to the cost of extracting element from
131 // the source type plus the cost of inserting them into the result vector
132 // type.
133 for (int i = 0; i != NumSubElts; ++i) {
134 Cost += static_cast<T *>(this)->getVectorInstrCost(
135 Instruction::ExtractElement, Ty, i + Index);
136 Cost += static_cast<T *>(this)->getVectorInstrCost(
137 Instruction::InsertElement, SubTy, i);
138 }
139 return Cost;
140 }
141
142 /// Estimate a cost of subvector insertion as a sequence of extract and
143 /// insert operations.
144 unsigned getInsertSubvectorOverhead(Type *Ty, int Index, Type *SubTy) {
145 assert(Ty && Ty->isVectorTy() && SubTy && SubTy->isVectorTy() &&((Ty && Ty->isVectorTy() && SubTy &&
SubTy->isVectorTy() && "Can only insert subvectors into vectors"
) ? static_cast<void> (0) : __assert_fail ("Ty && Ty->isVectorTy() && SubTy && SubTy->isVectorTy() && \"Can only insert subvectors into vectors\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 146, __PRETTY_FUNCTION__))
146 "Can only insert subvectors into vectors")((Ty && Ty->isVectorTy() && SubTy &&
SubTy->isVectorTy() && "Can only insert subvectors into vectors"
) ? static_cast<void> (0) : __assert_fail ("Ty && Ty->isVectorTy() && SubTy && SubTy->isVectorTy() && \"Can only insert subvectors into vectors\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 146, __PRETTY_FUNCTION__))
;
147 int NumSubElts = SubTy->getVectorNumElements();
148 assert((Index + NumSubElts) <= (int)Ty->getVectorNumElements() &&(((Index + NumSubElts) <= (int)Ty->getVectorNumElements
() && "SK_InsertSubvector index out of range") ? static_cast
<void> (0) : __assert_fail ("(Index + NumSubElts) <= (int)Ty->getVectorNumElements() && \"SK_InsertSubvector index out of range\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 149, __PRETTY_FUNCTION__))
149 "SK_InsertSubvector index out of range")(((Index + NumSubElts) <= (int)Ty->getVectorNumElements
() && "SK_InsertSubvector index out of range") ? static_cast
<void> (0) : __assert_fail ("(Index + NumSubElts) <= (int)Ty->getVectorNumElements() && \"SK_InsertSubvector index out of range\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 149, __PRETTY_FUNCTION__))
;
150
151 unsigned Cost = 0;
152 // Subvector insertion cost is equal to the cost of extracting element from
153 // the source type plus the cost of inserting them into the result vector
154 // type.
155 for (int i = 0; i != NumSubElts; ++i) {
156 Cost += static_cast<T *>(this)->getVectorInstrCost(
157 Instruction::ExtractElement, SubTy, i);
158 Cost += static_cast<T *>(this)->getVectorInstrCost(
159 Instruction::InsertElement, Ty, i + Index);
160 }
161 return Cost;
162 }
163
164 /// Local query method delegates up to T which *must* implement this!
165 const TargetSubtargetInfo *getST() const {
166 return static_cast<const T *>(this)->getST();
167 }
168
169 /// Local query method delegates up to T which *must* implement this!
170 const TargetLoweringBase *getTLI() const {
171 return static_cast<const T *>(this)->getTLI();
172 }
173
174 static ISD::MemIndexedMode getISDIndexedMode(TTI::MemIndexedMode M) {
175 switch (M) {
176 case TTI::MIM_Unindexed:
177 return ISD::UNINDEXED;
178 case TTI::MIM_PreInc:
179 return ISD::PRE_INC;
180 case TTI::MIM_PreDec:
181 return ISD::PRE_DEC;
182 case TTI::MIM_PostInc:
183 return ISD::POST_INC;
184 case TTI::MIM_PostDec:
185 return ISD::POST_DEC;
186 }
187 llvm_unreachable("Unexpected MemIndexedMode")::llvm::llvm_unreachable_internal("Unexpected MemIndexedMode"
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 187)
;
188 }
189
190protected:
191 explicit BasicTTIImplBase(const TargetMachine *TM, const DataLayout &DL)
192 : BaseT(DL) {}
193 virtual ~BasicTTIImplBase() = default;
194
195 using TargetTransformInfoImplBase::DL;
196
197public:
198 /// \name Scalar TTI Implementations
199 /// @{
200 bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth,
201 unsigned AddressSpace, unsigned Alignment,
202 bool *Fast) const {
203 EVT E = EVT::getIntegerVT(Context, BitWidth);
204 return getTLI()->allowsMisalignedMemoryAccesses(
205 E, AddressSpace, Alignment, MachineMemOperand::MONone, Fast);
206 }
207
208 bool hasBranchDivergence() { return false; }
209
210 bool isSourceOfDivergence(const Value *V) { return false; }
211
212 bool isAlwaysUniform(const Value *V) { return false; }
213
214 unsigned getFlatAddressSpace() {
215 // Return an invalid address space.
216 return -1;
217 }
218
219 bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
220 Intrinsic::ID IID) const {
221 return false;
222 }
223
224 bool rewriteIntrinsicWithAddressSpace(IntrinsicInst *II,
225 Value *OldV, Value *NewV) const {
226 return false;
227 }
228
229 bool isLegalAddImmediate(int64_t imm) {
230 return getTLI()->isLegalAddImmediate(imm);
231 }
232
233 bool isLegalICmpImmediate(int64_t imm) {
234 return getTLI()->isLegalICmpImmediate(imm);
235 }
236
237 bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
238 bool HasBaseReg, int64_t Scale,
239 unsigned AddrSpace, Instruction *I = nullptr) {
240 TargetLoweringBase::AddrMode AM;
241 AM.BaseGV = BaseGV;
242 AM.BaseOffs = BaseOffset;
243 AM.HasBaseReg = HasBaseReg;
244 AM.Scale = Scale;
245 return getTLI()->isLegalAddressingMode(DL, AM, Ty, AddrSpace, I);
246 }
247
248 bool isIndexedLoadLegal(TTI::MemIndexedMode M, Type *Ty,
249 const DataLayout &DL) const {
250 EVT VT = getTLI()->getValueType(DL, Ty);
251 return getTLI()->isIndexedLoadLegal(getISDIndexedMode(M), VT);
252 }
253
254 bool isIndexedStoreLegal(TTI::MemIndexedMode M, Type *Ty,
255 const DataLayout &DL) const {
256 EVT VT = getTLI()->getValueType(DL, Ty);
257 return getTLI()->isIndexedStoreLegal(getISDIndexedMode(M), VT);
258 }
259
260 bool isLSRCostLess(TTI::LSRCost C1, TTI::LSRCost C2) {
261 return TargetTransformInfoImplBase::isLSRCostLess(C1, C2);
262 }
263
264 int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
265 bool HasBaseReg, int64_t Scale, unsigned AddrSpace) {
266 TargetLoweringBase::AddrMode AM;
267 AM.BaseGV = BaseGV;
268 AM.BaseOffs = BaseOffset;
269 AM.HasBaseReg = HasBaseReg;
270 AM.Scale = Scale;
271 return getTLI()->getScalingFactorCost(DL, AM, Ty, AddrSpace);
272 }
273
274 bool isTruncateFree(Type *Ty1, Type *Ty2) {
275 return getTLI()->isTruncateFree(Ty1, Ty2);
276 }
277
278 bool isProfitableToHoist(Instruction *I) {
279 return getTLI()->isProfitableToHoist(I);
280 }
281
282 bool useAA() const { return getST()->useAA(); }
283
284 bool isTypeLegal(Type *Ty) {
285 EVT VT = getTLI()->getValueType(DL, Ty);
286 return getTLI()->isTypeLegal(VT);
287 }
288
289 int getGEPCost(Type *PointeeType, const Value *Ptr,
290 ArrayRef<const Value *> Operands) {
291 return BaseT::getGEPCost(PointeeType, Ptr, Operands);
292 }
293
294 int getExtCost(const Instruction *I, const Value *Src) {
295 if (getTLI()->isExtFree(I))
296 return TargetTransformInfo::TCC_Free;
297
298 if (isa<ZExtInst>(I) || isa<SExtInst>(I))
299 if (const LoadInst *LI = dyn_cast<LoadInst>(Src))
300 if (getTLI()->isExtLoad(LI, I, DL))
301 return TargetTransformInfo::TCC_Free;
302
303 return TargetTransformInfo::TCC_Basic;
304 }
305
306 unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
307 ArrayRef<const Value *> Arguments, const User *U) {
308 return BaseT::getIntrinsicCost(IID, RetTy, Arguments, U);
309 }
310
311 unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
312 ArrayRef<Type *> ParamTys, const User *U) {
313 if (IID == Intrinsic::cttz) {
314 if (getTLI()->isCheapToSpeculateCttz())
315 return TargetTransformInfo::TCC_Basic;
316 return TargetTransformInfo::TCC_Expensive;
317 }
318
319 if (IID == Intrinsic::ctlz) {
320 if (getTLI()->isCheapToSpeculateCtlz())
321 return TargetTransformInfo::TCC_Basic;
322 return TargetTransformInfo::TCC_Expensive;
323 }
324
325 return BaseT::getIntrinsicCost(IID, RetTy, ParamTys, U);
326 }
327
328 unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
329 unsigned &JumpTableSize,
330 ProfileSummaryInfo *PSI,
331 BlockFrequencyInfo *BFI) {
332 /// Try to find the estimated number of clusters. Note that the number of
333 /// clusters identified in this function could be different from the actual
334 /// numbers found in lowering. This function ignore switches that are
335 /// lowered with a mix of jump table / bit test / BTree. This function was
336 /// initially intended to be used when estimating the cost of switch in
337 /// inline cost heuristic, but it's a generic cost model to be used in other
338 /// places (e.g., in loop unrolling).
339 unsigned N = SI.getNumCases();
340 const TargetLoweringBase *TLI = getTLI();
341 const DataLayout &DL = this->getDataLayout();
342
343 JumpTableSize = 0;
344 bool IsJTAllowed = TLI->areJTsAllowed(SI.getParent()->getParent());
345
346 // Early exit if both a jump table and bit test are not allowed.
347 if (N < 1 || (!IsJTAllowed && DL.getIndexSizeInBits(0u) < N))
348 return N;
349
350 APInt MaxCaseVal = SI.case_begin()->getCaseValue()->getValue();
351 APInt MinCaseVal = MaxCaseVal;
352 for (auto CI : SI.cases()) {
353 const APInt &CaseVal = CI.getCaseValue()->getValue();
354 if (CaseVal.sgt(MaxCaseVal))
355 MaxCaseVal = CaseVal;
356 if (CaseVal.slt(MinCaseVal))
357 MinCaseVal = CaseVal;
358 }
359
360 // Check if suitable for a bit test
361 if (N <= DL.getIndexSizeInBits(0u)) {
362 SmallPtrSet<const BasicBlock *, 4> Dests;
363 for (auto I : SI.cases())
364 Dests.insert(I.getCaseSuccessor());
365
366 if (TLI->isSuitableForBitTests(Dests.size(), N, MinCaseVal, MaxCaseVal,
367 DL))
368 return 1;
369 }
370
371 // Check if suitable for a jump table.
372 if (IsJTAllowed) {
373 if (N < 2 || N < TLI->getMinimumJumpTableEntries())
374 return N;
375 uint64_t Range =
376 (MaxCaseVal - MinCaseVal)
377 .getLimitedValue(std::numeric_limits<uint64_t>::max() - 1) + 1;
378 // Check whether a range of clusters is dense enough for a jump table
379 if (TLI->isSuitableForJumpTable(&SI, N, Range, PSI, BFI)) {
380 JumpTableSize = Range;
381 return 1;
382 }
383 }
384 return N;
385 }
386
387 bool shouldBuildLookupTables() {
388 const TargetLoweringBase *TLI = getTLI();
389 return TLI->isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) ||
390 TLI->isOperationLegalOrCustom(ISD::BRIND, MVT::Other);
391 }
392
393 bool haveFastSqrt(Type *Ty) {
394 const TargetLoweringBase *TLI = getTLI();
395 EVT VT = TLI->getValueType(DL, Ty);
396 return TLI->isTypeLegal(VT) &&
397 TLI->isOperationLegalOrCustom(ISD::FSQRT, VT);
398 }
399
400 bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) {
401 return true;
402 }
403
404 unsigned getFPOpCost(Type *Ty) {
405 // Check whether FADD is available, as a proxy for floating-point in
406 // general.
407 const TargetLoweringBase *TLI = getTLI();
408 EVT VT = TLI->getValueType(DL, Ty);
409 if (TLI->isOperationLegalOrCustomOrPromote(ISD::FADD, VT))
410 return TargetTransformInfo::TCC_Basic;
411 return TargetTransformInfo::TCC_Expensive;
412 }
413
414 unsigned getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) {
415 const TargetLoweringBase *TLI = getTLI();
416 switch (Opcode) {
417 default: break;
418 case Instruction::Trunc:
419 if (TLI->isTruncateFree(OpTy, Ty))
420 return TargetTransformInfo::TCC_Free;
421 return TargetTransformInfo::TCC_Basic;
422 case Instruction::ZExt:
423 if (TLI->isZExtFree(OpTy, Ty))
424 return TargetTransformInfo::TCC_Free;
425 return TargetTransformInfo::TCC_Basic;
426
427 case Instruction::AddrSpaceCast:
428 if (TLI->isFreeAddrSpaceCast(OpTy->getPointerAddressSpace(),
429 Ty->getPointerAddressSpace()))
430 return TargetTransformInfo::TCC_Free;
431 return TargetTransformInfo::TCC_Basic;
432 }
433
434 return BaseT::getOperationCost(Opcode, Ty, OpTy);
435 }
436
437 unsigned getInliningThresholdMultiplier() { return 1; }
438
439 int getInlinerVectorBonusPercent() { return 150; }
440
441 void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
442 TTI::UnrollingPreferences &UP) {
443 // This unrolling functionality is target independent, but to provide some
444 // motivation for its intended use, for x86:
445
446 // According to the Intel 64 and IA-32 Architectures Optimization Reference
447 // Manual, Intel Core models and later have a loop stream detector (and
448 // associated uop queue) that can benefit from partial unrolling.
449 // The relevant requirements are:
450 // - The loop must have no more than 4 (8 for Nehalem and later) branches
451 // taken, and none of them may be calls.
452 // - The loop can have no more than 18 (28 for Nehalem and later) uops.
453
454 // According to the Software Optimization Guide for AMD Family 15h
455 // Processors, models 30h-4fh (Steamroller and later) have a loop predictor
456 // and loop buffer which can benefit from partial unrolling.
457 // The relevant requirements are:
458 // - The loop must have fewer than 16 branches
459 // - The loop must have less than 40 uops in all executed loop branches
460
461 // The number of taken branches in a loop is hard to estimate here, and
462 // benchmarking has revealed that it is better not to be conservative when
463 // estimating the branch count. As a result, we'll ignore the branch limits
464 // until someone finds a case where it matters in practice.
465
466 unsigned MaxOps;
467 const TargetSubtargetInfo *ST = getST();
468 if (PartialUnrollingThreshold.getNumOccurrences() > 0)
469 MaxOps = PartialUnrollingThreshold;
470 else if (ST->getSchedModel().LoopMicroOpBufferSize > 0)
471 MaxOps = ST->getSchedModel().LoopMicroOpBufferSize;
472 else
473 return;
474
475 // Scan the loop: don't unroll loops with calls.
476 for (Loop::block_iterator I = L->block_begin(), E = L->block_end(); I != E;
477 ++I) {
478 BasicBlock *BB = *I;
479
480 for (BasicBlock::iterator J = BB->begin(), JE = BB->end(); J != JE; ++J)
481 if (isa<CallInst>(J) || isa<InvokeInst>(J)) {
482 ImmutableCallSite CS(&*J);
483 if (const Function *F = CS.getCalledFunction()) {
484 if (!static_cast<T *>(this)->isLoweredToCall(F))
485 continue;
486 }
487
488 return;
489 }
490 }
491
492 // Enable runtime and partial unrolling up to the specified size.
493 // Enable using trip count upper bound to unroll loops.
494 UP.Partial = UP.Runtime = UP.UpperBound = true;
495 UP.PartialThreshold = MaxOps;
496
497 // Avoid unrolling when optimizing for size.
498 UP.OptSizeThreshold = 0;
499 UP.PartialOptSizeThreshold = 0;
500
501 // Set number of instructions optimized when "back edge"
502 // becomes "fall through" to default value of 2.
503 UP.BEInsns = 2;
504 }
505
506 bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
507 AssumptionCache &AC,
508 TargetLibraryInfo *LibInfo,
509 HardwareLoopInfo &HWLoopInfo) {
510 return BaseT::isHardwareLoopProfitable(L, SE, AC, LibInfo, HWLoopInfo);
511 }
512
513 bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, ScalarEvolution &SE,
514 AssumptionCache &AC, TargetLibraryInfo *TLI,
515 DominatorTree *DT,
516 const LoopAccessInfo *LAI) {
517 return BaseT::preferPredicateOverEpilogue(L, LI, SE, AC, TLI, DT, LAI);
518 }
519
520 int getInstructionLatency(const Instruction *I) {
521 if (isa<LoadInst>(I))
522 return getST()->getSchedModel().DefaultLoadLatency;
523
524 return BaseT::getInstructionLatency(I);
525 }
526
527 virtual Optional<unsigned>
528 getCacheSize(TargetTransformInfo::CacheLevel Level) const {
529 return Optional<unsigned>(
530 getST()->getCacheSize(static_cast<unsigned>(Level)));
531 }
532
533 virtual Optional<unsigned>
534 getCacheAssociativity(TargetTransformInfo::CacheLevel Level) const {
535 Optional<unsigned> TargetResult =
536 getST()->getCacheAssociativity(static_cast<unsigned>(Level));
537
538 if (TargetResult)
539 return TargetResult;
540
541 return BaseT::getCacheAssociativity(Level);
542 }
543
544 virtual unsigned getCacheLineSize() const {
545 return getST()->getCacheLineSize();
546 }
547
548 virtual unsigned getPrefetchDistance() const {
549 return getST()->getPrefetchDistance();
550 }
551
552 virtual unsigned getMinPrefetchStride() const {
553 return getST()->getMinPrefetchStride();
554 }
555
556 virtual unsigned getMaxPrefetchIterationsAhead() const {
557 return getST()->getMaxPrefetchIterationsAhead();
558 }
559
560 /// @}
561
562 /// \name Vector TTI Implementations
563 /// @{
564
565 unsigned getRegisterBitWidth(bool Vector) const { return 32; }
566
567 /// Estimate the overhead of scalarizing an instruction. Insert and Extract
568 /// are set if the result needs to be inserted and/or extracted from vectors.
569 unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) {
570 assert(Ty->isVectorTy() && "Can only scalarize vectors")((Ty->isVectorTy() && "Can only scalarize vectors"
) ? static_cast<void> (0) : __assert_fail ("Ty->isVectorTy() && \"Can only scalarize vectors\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 570, __PRETTY_FUNCTION__))
;
571 unsigned Cost = 0;
572
573 for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i) {
574 if (Insert)
575 Cost += static_cast<T *>(this)
576 ->getVectorInstrCost(Instruction::InsertElement, Ty, i);
577 if (Extract)
578 Cost += static_cast<T *>(this)
579 ->getVectorInstrCost(Instruction::ExtractElement, Ty, i);
580 }
581
582 return Cost;
583 }
584
585 /// Estimate the overhead of scalarizing an instructions unique
586 /// non-constant operands. The types of the arguments are ordinarily
587 /// scalar, in which case the costs are multiplied with VF.
588 unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
589 unsigned VF) {
590 unsigned Cost = 0;
591 SmallPtrSet<const Value*, 4> UniqueOperands;
592 for (const Value *A : Args) {
593 if (!isa<Constant>(A) && UniqueOperands.insert(A).second) {
594 Type *VecTy = nullptr;
595 if (A->getType()->isVectorTy()) {
596 VecTy = A->getType();
597 // If A is a vector operand, VF should be 1 or correspond to A.
598 assert((VF == 1 || VF == VecTy->getVectorNumElements()) &&(((VF == 1 || VF == VecTy->getVectorNumElements()) &&
"Vector argument does not match VF") ? static_cast<void>
(0) : __assert_fail ("(VF == 1 || VF == VecTy->getVectorNumElements()) && \"Vector argument does not match VF\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 599, __PRETTY_FUNCTION__))
599 "Vector argument does not match VF")(((VF == 1 || VF == VecTy->getVectorNumElements()) &&
"Vector argument does not match VF") ? static_cast<void>
(0) : __assert_fail ("(VF == 1 || VF == VecTy->getVectorNumElements()) && \"Vector argument does not match VF\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 599, __PRETTY_FUNCTION__))
;
600 }
601 else
602 VecTy = VectorType::get(A->getType(), VF);
603
604 Cost += getScalarizationOverhead(VecTy, false, true);
605 }
606 }
607
608 return Cost;
609 }
610
611 unsigned getScalarizationOverhead(Type *VecTy, ArrayRef<const Value *> Args) {
612 assert(VecTy->isVectorTy())((VecTy->isVectorTy()) ? static_cast<void> (0) : __assert_fail
("VecTy->isVectorTy()", "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 612, __PRETTY_FUNCTION__))
;
613
614 unsigned Cost = 0;
615
616 Cost += getScalarizationOverhead(VecTy, true, false);
617 if (!Args.empty())
618 Cost += getOperandsScalarizationOverhead(Args,
619 VecTy->getVectorNumElements());
620 else
621 // When no information on arguments is provided, we add the cost
622 // associated with one argument as a heuristic.
623 Cost += getScalarizationOverhead(VecTy, false, true);
624
625 return Cost;
626 }
627
628 unsigned getMaxInterleaveFactor(unsigned VF) { return 1; }
629
630 unsigned getArithmeticInstrCost(
631 unsigned Opcode, Type *Ty,
632 TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
633 TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
634 TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
635 TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None,
636 ArrayRef<const Value *> Args = ArrayRef<const Value *>()) {
637 // Check if any of the operands are vector operands.
638 const TargetLoweringBase *TLI = getTLI();
639 int ISD = TLI->InstructionOpcodeToISD(Opcode);
640 assert(ISD && "Invalid opcode")((ISD && "Invalid opcode") ? static_cast<void> (
0) : __assert_fail ("ISD && \"Invalid opcode\"", "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 640, __PRETTY_FUNCTION__))
;
641
642 std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
643
644 bool IsFloat = Ty->isFPOrFPVectorTy();
645 // Assume that floating point arithmetic operations cost twice as much as
646 // integer operations.
647 unsigned OpCost = (IsFloat ? 2 : 1);
648
649 if (TLI->isOperationLegalOrPromote(ISD, LT.second)) {
650 // The operation is legal. Assume it costs 1.
651 // TODO: Once we have extract/insert subvector cost we need to use them.
652 return LT.first * OpCost;
653 }
654
655 if (!TLI->isOperationExpand(ISD, LT.second)) {
656 // If the operation is custom lowered, then assume that the code is twice
657 // as expensive.
658 return LT.first * 2 * OpCost;
659 }
660
661 // Else, assume that we need to scalarize this op.
662 // TODO: If one of the types get legalized by splitting, handle this
663 // similarly to what getCastInstrCost() does.
664 if (Ty->isVectorTy()) {
665 unsigned Num = Ty->getVectorNumElements();
666 unsigned Cost = static_cast<T *>(this)
667 ->getArithmeticInstrCost(Opcode, Ty->getScalarType());
668 // Return the cost of multiple scalar invocation plus the cost of
669 // inserting and extracting the values.
670 return getScalarizationOverhead(Ty, Args) + Num * Cost;
671 }
672
673 // We don't know anything about this scalar instruction.
674 return OpCost;
675 }
676
677 unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
678 Type *SubTp) {
679 switch (Kind) {
680 case TTI::SK_Broadcast:
681 return getBroadcastShuffleOverhead(Tp);
682 case TTI::SK_Select:
683 case TTI::SK_Reverse:
684 case TTI::SK_Transpose:
685 case TTI::SK_PermuteSingleSrc:
686 case TTI::SK_PermuteTwoSrc:
687 return getPermuteShuffleOverhead(Tp);
688 case TTI::SK_ExtractSubvector:
689 return getExtractSubvectorOverhead(Tp, Index, SubTp);
690 case TTI::SK_InsertSubvector:
691 return getInsertSubvectorOverhead(Tp, Index, SubTp);
692 }
693 llvm_unreachable("Unknown TTI::ShuffleKind")::llvm::llvm_unreachable_internal("Unknown TTI::ShuffleKind",
"/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 693)
;
694 }
695
696 unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
697 const Instruction *I = nullptr) {
698 const TargetLoweringBase *TLI = getTLI();
699 int ISD = TLI->InstructionOpcodeToISD(Opcode);
700 assert(ISD && "Invalid opcode")((ISD && "Invalid opcode") ? static_cast<void> (
0) : __assert_fail ("ISD && \"Invalid opcode\"", "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 700, __PRETTY_FUNCTION__))
;
701 std::pair<unsigned, MVT> SrcLT = TLI->getTypeLegalizationCost(DL, Src);
702 std::pair<unsigned, MVT> DstLT = TLI->getTypeLegalizationCost(DL, Dst);
703
704 // Check for NOOP conversions.
705 if (SrcLT.first == DstLT.first &&
706 SrcLT.second.getSizeInBits() == DstLT.second.getSizeInBits()) {
707
708 // Bitcast between types that are legalized to the same type are free.
709 if (Opcode == Instruction::BitCast || Opcode == Instruction::Trunc)
710 return 0;
711 }
712
713 if (Opcode == Instruction::Trunc &&
714 TLI->isTruncateFree(SrcLT.second, DstLT.second))
715 return 0;
716
717 if (Opcode == Instruction::ZExt &&
718 TLI->isZExtFree(SrcLT.second, DstLT.second))
719 return 0;
720
721 if (Opcode == Instruction::AddrSpaceCast &&
722 TLI->isFreeAddrSpaceCast(Src->getPointerAddressSpace(),
723 Dst->getPointerAddressSpace()))
724 return 0;
725
726 // If this is a zext/sext of a load, return 0 if the corresponding
727 // extending load exists on target.
728 if ((Opcode == Instruction::ZExt || Opcode == Instruction::SExt) &&
729 I && isa<LoadInst>(I->getOperand(0))) {
730 EVT ExtVT = EVT::getEVT(Dst);
731 EVT LoadVT = EVT::getEVT(Src);
732 unsigned LType =
733 ((Opcode == Instruction::ZExt) ? ISD::ZEXTLOAD : ISD::SEXTLOAD);
734 if (TLI->isLoadExtLegal(LType, ExtVT, LoadVT))
735 return 0;
736 }
737
738 // If the cast is marked as legal (or promote) then assume low cost.
739 if (SrcLT.first == DstLT.first &&
740 TLI->isOperationLegalOrPromote(ISD, DstLT.second))
741 return 1;
742
743 // Handle scalar conversions.
744 if (!Src->isVectorTy() && !Dst->isVectorTy()) {
745 // Scalar bitcasts are usually free.
746 if (Opcode == Instruction::BitCast)
747 return 0;
748
749 // Just check the op cost. If the operation is legal then assume it costs
750 // 1.
751 if (!TLI->isOperationExpand(ISD, DstLT.second))
752 return 1;
753
754 // Assume that illegal scalar instruction are expensive.
755 return 4;
756 }
757
758 // Check vector-to-vector casts.
759 if (Dst->isVectorTy() && Src->isVectorTy()) {
760 // If the cast is between same-sized registers, then the check is simple.
761 if (SrcLT.first == DstLT.first &&
762 SrcLT.second.getSizeInBits() == DstLT.second.getSizeInBits()) {
763
764 // Assume that Zext is done using AND.
765 if (Opcode == Instruction::ZExt)
766 return 1;
767
768 // Assume that sext is done using SHL and SRA.
769 if (Opcode == Instruction::SExt)
770 return 2;
771
772 // Just check the op cost. If the operation is legal then assume it
773 // costs
774 // 1 and multiply by the type-legalization overhead.
775 if (!TLI->isOperationExpand(ISD, DstLT.second))
776 return SrcLT.first * 1;
777 }
778
779 // If we are legalizing by splitting, query the concrete TTI for the cost
780 // of casting the original vector twice. We also need to factor in the
781 // cost of the split itself. Count that as 1, to be consistent with
782 // TLI->getTypeLegalizationCost().
783 if ((TLI->getTypeAction(Src->getContext(), TLI->getValueType(DL, Src)) ==
784 TargetLowering::TypeSplitVector) ||
785 (TLI->getTypeAction(Dst->getContext(), TLI->getValueType(DL, Dst)) ==
786 TargetLowering::TypeSplitVector)) {
787 Type *SplitDst = VectorType::get(Dst->getVectorElementType(),
788 Dst->getVectorNumElements() / 2);
789 Type *SplitSrc = VectorType::get(Src->getVectorElementType(),
790 Src->getVectorNumElements() / 2);
791 T *TTI = static_cast<T *>(this);
792 return TTI->getVectorSplitCost() +
793 (2 * TTI->getCastInstrCost(Opcode, SplitDst, SplitSrc, I));
794 }
795
796 // In other cases where the source or destination are illegal, assume
797 // the operation will get scalarized.
798 unsigned Num = Dst->getVectorNumElements();
799 unsigned Cost = static_cast<T *>(this)->getCastInstrCost(
800 Opcode, Dst->getScalarType(), Src->getScalarType(), I);
801
802 // Return the cost of multiple scalar invocation plus the cost of
803 // inserting and extracting the values.
804 return getScalarizationOverhead(Dst, true, true) + Num * Cost;
805 }
806
807 // We already handled vector-to-vector and scalar-to-scalar conversions.
808 // This
809 // is where we handle bitcast between vectors and scalars. We need to assume
810 // that the conversion is scalarized in one way or another.
811 if (Opcode == Instruction::BitCast)
812 // Illegal bitcasts are done by storing and loading from a stack slot.
813 return (Src->isVectorTy() ? getScalarizationOverhead(Src, false, true)
814 : 0) +
815 (Dst->isVectorTy() ? getScalarizationOverhead(Dst, true, false)
816 : 0);
817
818 llvm_unreachable("Unhandled cast")::llvm::llvm_unreachable_internal("Unhandled cast", "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 818)
;
819 }
820
821 unsigned getExtractWithExtendCost(unsigned Opcode, Type *Dst,
822 VectorType *VecTy, unsigned Index) {
823 return static_cast<T *>(this)->getVectorInstrCost(
824 Instruction::ExtractElement, VecTy, Index) +
825 static_cast<T *>(this)->getCastInstrCost(Opcode, Dst,
826 VecTy->getElementType());
827 }
828
829 unsigned getCFInstrCost(unsigned Opcode) {
830 // Branches are assumed to be predicted.
831 return 0;
832 }
833
834 unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
835 const Instruction *I) {
836 const TargetLoweringBase *TLI = getTLI();
837 int ISD = TLI->InstructionOpcodeToISD(Opcode);
838 assert(ISD && "Invalid opcode")((ISD && "Invalid opcode") ? static_cast<void> (
0) : __assert_fail ("ISD && \"Invalid opcode\"", "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 838, __PRETTY_FUNCTION__))
;
7
Assuming 'ISD' is not equal to 0
8
'?' condition is true
839
840 // Selects on vectors are actually vector selects.
841 if (ISD == ISD::SELECT) {
9
Assuming 'ISD' is not equal to SELECT
10
Taking false branch
842 assert(CondTy && "CondTy must exist")((CondTy && "CondTy must exist") ? static_cast<void
> (0) : __assert_fail ("CondTy && \"CondTy must exist\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 842, __PRETTY_FUNCTION__))
;
843 if (CondTy->isVectorTy())
844 ISD = ISD::VSELECT;
845 }
846 std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
847
848 if (!(ValTy->isVectorTy() && !LT.second.isVector()) &&
11
Taking false branch
849 !TLI->isOperationExpand(ISD, LT.second)) {
850 // The operation is legal. Assume it costs 1. Multiply
851 // by the type-legalization overhead.
852 return LT.first * 1;
853 }
854
855 // Otherwise, assume that the cast is scalarized.
856 // TODO: If one of the types get legalized by splitting, handle this
857 // similarly to what getCastInstrCost() does.
858 if (ValTy->isVectorTy()) {
12
Calling 'Type::isVectorTy'
14
Returning from 'Type::isVectorTy'
15
Taking true branch
859 unsigned Num = ValTy->getVectorNumElements();
860 if (CondTy)
16
Assuming 'CondTy' is null
17
Taking false branch
861 CondTy = CondTy->getScalarType();
862 unsigned Cost = static_cast<T *>(this)->getCmpSelInstrCost(
19
Calling 'AArch64TTIImpl::getCmpSelInstrCost'
863 Opcode, ValTy->getScalarType(), CondTy, I);
18
Passing null pointer value via 3rd parameter 'CondTy'
864
865 // Return the cost of multiple scalar invocation plus the cost of
866 // inserting and extracting the values.
867 return getScalarizationOverhead(ValTy, true, false) + Num * Cost;
868 }
869
870 // Unknown scalar opcode.
871 return 1;
872 }
873
874 unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) {
875 std::pair<unsigned, MVT> LT =
876 getTLI()->getTypeLegalizationCost(DL, Val->getScalarType());
877
878 return LT.first;
879 }
880
881 unsigned getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment,
882 unsigned AddressSpace,
883 const Instruction *I = nullptr) {
884 assert(!Src->isVoidTy() && "Invalid type")((!Src->isVoidTy() && "Invalid type") ? static_cast
<void> (0) : __assert_fail ("!Src->isVoidTy() && \"Invalid type\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 884, __PRETTY_FUNCTION__))
;
885 std::pair<unsigned, MVT> LT = getTLI()->getTypeLegalizationCost(DL, Src);
886
887 // Assuming that all loads of legal types cost 1.
888 unsigned Cost = LT.first;
889
890 if (Src->isVectorTy() &&
891 Src->getPrimitiveSizeInBits() < LT.second.getSizeInBits()) {
892 // This is a vector load that legalizes to a larger type than the vector
893 // itself. Unless the corresponding extending load or truncating store is
894 // legal, then this will scalarize.
895 TargetLowering::LegalizeAction LA = TargetLowering::Expand;
896 EVT MemVT = getTLI()->getValueType(DL, Src);
897 if (Opcode == Instruction::Store)
898 LA = getTLI()->getTruncStoreAction(LT.second, MemVT);
899 else
900 LA = getTLI()->getLoadExtAction(ISD::EXTLOAD, LT.second, MemVT);
901
902 if (LA != TargetLowering::Legal && LA != TargetLowering::Custom) {
903 // This is a vector load/store for some illegal type that is scalarized.
904 // We must account for the cost of building or decomposing the vector.
905 Cost += getScalarizationOverhead(Src, Opcode != Instruction::Store,
906 Opcode == Instruction::Store);
907 }
908 }
909
910 return Cost;
911 }
912
913 unsigned getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
914 unsigned Factor,
915 ArrayRef<unsigned> Indices,
916 unsigned Alignment, unsigned AddressSpace,
917 bool UseMaskForCond = false,
918 bool UseMaskForGaps = false) {
919 VectorType *VT = dyn_cast<VectorType>(VecTy);
920 assert(VT && "Expect a vector type for interleaved memory op")((VT && "Expect a vector type for interleaved memory op"
) ? static_cast<void> (0) : __assert_fail ("VT && \"Expect a vector type for interleaved memory op\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 920, __PRETTY_FUNCTION__))
;
921
922 unsigned NumElts = VT->getNumElements();
923 assert(Factor > 1 && NumElts % Factor == 0 && "Invalid interleave factor")((Factor > 1 && NumElts % Factor == 0 && "Invalid interleave factor"
) ? static_cast<void> (0) : __assert_fail ("Factor > 1 && NumElts % Factor == 0 && \"Invalid interleave factor\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 923, __PRETTY_FUNCTION__))
;
924
925 unsigned NumSubElts = NumElts / Factor;
926 VectorType *SubVT = VectorType::get(VT->getElementType(), NumSubElts);
927
928 // Firstly, the cost of load/store operation.
929 unsigned Cost;
930 if (UseMaskForCond || UseMaskForGaps)
931 Cost = static_cast<T *>(this)->getMaskedMemoryOpCost(
932 Opcode, VecTy, Alignment, AddressSpace);
933 else
934 Cost = static_cast<T *>(this)->getMemoryOpCost(
935 Opcode, VecTy, MaybeAlign(Alignment), AddressSpace);
936
937 // Legalize the vector type, and get the legalized and unlegalized type
938 // sizes.
939 MVT VecTyLT = getTLI()->getTypeLegalizationCost(DL, VecTy).second;
940 unsigned VecTySize =
941 static_cast<T *>(this)->getDataLayout().getTypeStoreSize(VecTy);
942 unsigned VecTyLTSize = VecTyLT.getStoreSize();
943
944 // Return the ceiling of dividing A by B.
945 auto ceil = [](unsigned A, unsigned B) { return (A + B - 1) / B; };
946
947 // Scale the cost of the memory operation by the fraction of legalized
948 // instructions that will actually be used. We shouldn't account for the
949 // cost of dead instructions since they will be removed.
950 //
951 // E.g., An interleaved load of factor 8:
952 // %vec = load <16 x i64>, <16 x i64>* %ptr
953 // %v0 = shufflevector %vec, undef, <0, 8>
954 //
955 // If <16 x i64> is legalized to 8 v2i64 loads, only 2 of the loads will be
956 // used (those corresponding to elements [0:1] and [8:9] of the unlegalized
957 // type). The other loads are unused.
958 //
959 // We only scale the cost of loads since interleaved store groups aren't
960 // allowed to have gaps.
961 if (Opcode == Instruction::Load && VecTySize > VecTyLTSize) {
962 // The number of loads of a legal type it will take to represent a load
963 // of the unlegalized vector type.
964 unsigned NumLegalInsts = ceil(VecTySize, VecTyLTSize);
965
966 // The number of elements of the unlegalized type that correspond to a
967 // single legal instruction.
968 unsigned NumEltsPerLegalInst = ceil(NumElts, NumLegalInsts);
969
970 // Determine which legal instructions will be used.
971 BitVector UsedInsts(NumLegalInsts, false);
972 for (unsigned Index : Indices)
973 for (unsigned Elt = 0; Elt < NumSubElts; ++Elt)
974 UsedInsts.set((Index + Elt * Factor) / NumEltsPerLegalInst);
975
976 // Scale the cost of the load by the fraction of legal instructions that
977 // will be used.
978 Cost *= UsedInsts.count() / NumLegalInsts;
979 }
980
981 // Then plus the cost of interleave operation.
982 if (Opcode == Instruction::Load) {
983 // The interleave cost is similar to extract sub vectors' elements
984 // from the wide vector, and insert them into sub vectors.
985 //
986 // E.g. An interleaved load of factor 2 (with one member of index 0):
987 // %vec = load <8 x i32>, <8 x i32>* %ptr
988 // %v0 = shuffle %vec, undef, <0, 2, 4, 6> ; Index 0
989 // The cost is estimated as extract elements at 0, 2, 4, 6 from the
990 // <8 x i32> vector and insert them into a <4 x i32> vector.
991
992 assert(Indices.size() <= Factor &&((Indices.size() <= Factor && "Interleaved memory op has too many members"
) ? static_cast<void> (0) : __assert_fail ("Indices.size() <= Factor && \"Interleaved memory op has too many members\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 993, __PRETTY_FUNCTION__))
993 "Interleaved memory op has too many members")((Indices.size() <= Factor && "Interleaved memory op has too many members"
) ? static_cast<void> (0) : __assert_fail ("Indices.size() <= Factor && \"Interleaved memory op has too many members\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 993, __PRETTY_FUNCTION__))
;
994
995 for (unsigned Index : Indices) {
996 assert(Index < Factor && "Invalid index for interleaved memory op")((Index < Factor && "Invalid index for interleaved memory op"
) ? static_cast<void> (0) : __assert_fail ("Index < Factor && \"Invalid index for interleaved memory op\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 996, __PRETTY_FUNCTION__))
;
997
998 // Extract elements from loaded vector for each sub vector.
999 for (unsigned i = 0; i < NumSubElts; i++)
1000 Cost += static_cast<T *>(this)->getVectorInstrCost(
1001 Instruction::ExtractElement, VT, Index + i * Factor);
1002 }
1003
1004 unsigned InsSubCost = 0;
1005 for (unsigned i = 0; i < NumSubElts; i++)
1006 InsSubCost += static_cast<T *>(this)->getVectorInstrCost(
1007 Instruction::InsertElement, SubVT, i);
1008
1009 Cost += Indices.size() * InsSubCost;
1010 } else {
1011 // The interleave cost is extract all elements from sub vectors, and
1012 // insert them into the wide vector.
1013 //
1014 // E.g. An interleaved store of factor 2:
1015 // %v0_v1 = shuffle %v0, %v1, <0, 4, 1, 5, 2, 6, 3, 7>
1016 // store <8 x i32> %interleaved.vec, <8 x i32>* %ptr
1017 // The cost is estimated as extract all elements from both <4 x i32>
1018 // vectors and insert into the <8 x i32> vector.
1019
1020 unsigned ExtSubCost = 0;
1021 for (unsigned i = 0; i < NumSubElts; i++)
1022 ExtSubCost += static_cast<T *>(this)->getVectorInstrCost(
1023 Instruction::ExtractElement, SubVT, i);
1024 Cost += ExtSubCost * Factor;
1025
1026 for (unsigned i = 0; i < NumElts; i++)
1027 Cost += static_cast<T *>(this)
1028 ->getVectorInstrCost(Instruction::InsertElement, VT, i);
1029 }
1030
1031 if (!UseMaskForCond)
1032 return Cost;
1033
1034 Type *I8Type = Type::getInt8Ty(VT->getContext());
1035 VectorType *MaskVT = VectorType::get(I8Type, NumElts);
1036 SubVT = VectorType::get(I8Type, NumSubElts);
1037
1038 // The Mask shuffling cost is extract all the elements of the Mask
1039 // and insert each of them Factor times into the wide vector:
1040 //
1041 // E.g. an interleaved group with factor 3:
1042 // %mask = icmp ult <8 x i32> %vec1, %vec2
1043 // %interleaved.mask = shufflevector <8 x i1> %mask, <8 x i1> undef,
1044 // <24 x i32> <0,0,0,1,1,1,2,2,2,3,3,3,4,4,4,5,5,5,6,6,6,7,7,7>
1045 // The cost is estimated as extract all mask elements from the <8xi1> mask
1046 // vector and insert them factor times into the <24xi1> shuffled mask
1047 // vector.
1048 for (unsigned i = 0; i < NumSubElts; i++)
1049 Cost += static_cast<T *>(this)->getVectorInstrCost(
1050 Instruction::ExtractElement, SubVT, i);
1051
1052 for (unsigned i = 0; i < NumElts; i++)
1053 Cost += static_cast<T *>(this)->getVectorInstrCost(
1054 Instruction::InsertElement, MaskVT, i);
1055
1056 // The Gaps mask is invariant and created outside the loop, therefore the
1057 // cost of creating it is not accounted for here. However if we have both
1058 // a MaskForGaps and some other mask that guards the execution of the
1059 // memory access, we need to account for the cost of And-ing the two masks
1060 // inside the loop.
1061 if (UseMaskForGaps)
1062 Cost += static_cast<T *>(this)->getArithmeticInstrCost(
1063 BinaryOperator::And, MaskVT);
1064
1065 return Cost;
1066 }
1067
1068 /// Get intrinsic cost based on arguments.
1069 unsigned getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
1070 ArrayRef<Value *> Args, FastMathFlags FMF,
1071 unsigned VF = 1) {
1072 unsigned RetVF = (RetTy->isVectorTy() ? RetTy->getVectorNumElements() : 1);
1073 assert((RetVF == 1 || VF == 1) && "VF > 1 and RetVF is a vector type")(((RetVF == 1 || VF == 1) && "VF > 1 and RetVF is a vector type"
) ? static_cast<void> (0) : __assert_fail ("(RetVF == 1 || VF == 1) && \"VF > 1 and RetVF is a vector type\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 1073, __PRETTY_FUNCTION__))
;
1074 auto *ConcreteTTI = static_cast<T *>(this);
1075
1076 switch (IID) {
1077 default: {
1078 // Assume that we need to scalarize this intrinsic.
1079 SmallVector<Type *, 4> Types;
1080 for (Value *Op : Args) {
1081 Type *OpTy = Op->getType();
1082 assert(VF == 1 || !OpTy->isVectorTy())((VF == 1 || !OpTy->isVectorTy()) ? static_cast<void>
(0) : __assert_fail ("VF == 1 || !OpTy->isVectorTy()", "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 1082, __PRETTY_FUNCTION__))
;
1083 Types.push_back(VF == 1 ? OpTy : VectorType::get(OpTy, VF));
1084 }
1085
1086 if (VF > 1 && !RetTy->isVoidTy())
1087 RetTy = VectorType::get(RetTy, VF);
1088
1089 // Compute the scalarization overhead based on Args for a vector
1090 // intrinsic. A vectorizer will pass a scalar RetTy and VF > 1, while
1091 // CostModel will pass a vector RetTy and VF is 1.
1092 unsigned ScalarizationCost = std::numeric_limits<unsigned>::max();
1093 if (RetVF > 1 || VF > 1) {
1094 ScalarizationCost = 0;
1095 if (!RetTy->isVoidTy())
1096 ScalarizationCost += getScalarizationOverhead(RetTy, true, false);
1097 ScalarizationCost += getOperandsScalarizationOverhead(Args, VF);
1098 }
1099
1100 return ConcreteTTI->getIntrinsicInstrCost(IID, RetTy, Types, FMF,
1101 ScalarizationCost);
1102 }
1103 case Intrinsic::masked_scatter: {
1104 assert(VF == 1 && "Can't vectorize types here.")((VF == 1 && "Can't vectorize types here.") ? static_cast
<void> (0) : __assert_fail ("VF == 1 && \"Can't vectorize types here.\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 1104, __PRETTY_FUNCTION__))
;
1105 Value *Mask = Args[3];
1106 bool VarMask = !isa<Constant>(Mask);
1107 unsigned Alignment = cast<ConstantInt>(Args[2])->getZExtValue();
1108 return ConcreteTTI->getGatherScatterOpCost(
1109 Instruction::Store, Args[0]->getType(), Args[1], VarMask, Alignment);
1110 }
1111 case Intrinsic::masked_gather: {
1112 assert(VF == 1 && "Can't vectorize types here.")((VF == 1 && "Can't vectorize types here.") ? static_cast
<void> (0) : __assert_fail ("VF == 1 && \"Can't vectorize types here.\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 1112, __PRETTY_FUNCTION__))
;
1113 Value *Mask = Args[2];
1114 bool VarMask = !isa<Constant>(Mask);
1115 unsigned Alignment = cast<ConstantInt>(Args[1])->getZExtValue();
1116 return ConcreteTTI->getGatherScatterOpCost(Instruction::Load, RetTy,
1117 Args[0], VarMask, Alignment);
1118 }
1119 case Intrinsic::experimental_vector_reduce_add:
1120 case Intrinsic::experimental_vector_reduce_mul:
1121 case Intrinsic::experimental_vector_reduce_and:
1122 case Intrinsic::experimental_vector_reduce_or:
1123 case Intrinsic::experimental_vector_reduce_xor:
1124 case Intrinsic::experimental_vector_reduce_v2_fadd:
1125 case Intrinsic::experimental_vector_reduce_v2_fmul:
1126 case Intrinsic::experimental_vector_reduce_smax:
1127 case Intrinsic::experimental_vector_reduce_smin:
1128 case Intrinsic::experimental_vector_reduce_fmax:
1129 case Intrinsic::experimental_vector_reduce_fmin:
1130 case Intrinsic::experimental_vector_reduce_umax:
1131 case Intrinsic::experimental_vector_reduce_umin:
1132 return getIntrinsicInstrCost(IID, RetTy, Args[0]->getType(), FMF);
1133 case Intrinsic::fshl:
1134 case Intrinsic::fshr: {
1135 Value *X = Args[0];
1136 Value *Y = Args[1];
1137 Value *Z = Args[2];
1138 TTI::OperandValueProperties OpPropsX, OpPropsY, OpPropsZ, OpPropsBW;
1139 TTI::OperandValueKind OpKindX = TTI::getOperandInfo(X, OpPropsX);
1140 TTI::OperandValueKind OpKindY = TTI::getOperandInfo(Y, OpPropsY);
1141 TTI::OperandValueKind OpKindZ = TTI::getOperandInfo(Z, OpPropsZ);
1142 TTI::OperandValueKind OpKindBW = TTI::OK_UniformConstantValue;
1143 OpPropsBW = isPowerOf2_32(RetTy->getScalarSizeInBits()) ? TTI::OP_PowerOf2
1144 : TTI::OP_None;
1145 // fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
1146 // fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
1147 unsigned Cost = 0;
1148 Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::Or, RetTy);
1149 Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::Sub, RetTy);
1150 Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::Shl, RetTy,
1151 OpKindX, OpKindZ, OpPropsX);
1152 Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::LShr, RetTy,
1153 OpKindY, OpKindZ, OpPropsY);
1154 // Non-constant shift amounts requires a modulo.
1155 if (OpKindZ != TTI::OK_UniformConstantValue &&
1156 OpKindZ != TTI::OK_NonUniformConstantValue)
1157 Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::URem, RetTy,
1158 OpKindZ, OpKindBW, OpPropsZ,
1159 OpPropsBW);
1160 // For non-rotates (X != Y) we must add shift-by-zero handling costs.
1161 if (X != Y) {
1162 Type *CondTy = RetTy->getWithNewBitWidth(1);
1163 Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy,
1164 CondTy, nullptr);
1165 Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::Select, RetTy,
1166 CondTy, nullptr);
1167 }
1168 return Cost;
1169 }
1170 }
1171 }
1172
1173 /// Get intrinsic cost based on argument types.
1174 /// If ScalarizationCostPassed is std::numeric_limits<unsigned>::max(), the
1175 /// cost of scalarizing the arguments and the return value will be computed
1176 /// based on types.
1177 unsigned getIntrinsicInstrCost(
1178 Intrinsic::ID IID, Type *RetTy, ArrayRef<Type *> Tys, FastMathFlags FMF,
1179 unsigned ScalarizationCostPassed = std::numeric_limits<unsigned>::max()) {
1180 auto *ConcreteTTI = static_cast<T *>(this);
1181
1182 SmallVector<unsigned, 2> ISDs;
1183 unsigned SingleCallCost = 10; // Library call cost. Make it expensive.
1184 switch (IID) {
1185 default: {
1186 // Assume that we need to scalarize this intrinsic.
1187 unsigned ScalarizationCost = ScalarizationCostPassed;
1188 unsigned ScalarCalls = 1;
1189 Type *ScalarRetTy = RetTy;
1190 if (RetTy->isVectorTy()) {
1191 if (ScalarizationCostPassed == std::numeric_limits<unsigned>::max())
1192 ScalarizationCost = getScalarizationOverhead(RetTy, true, false);
1193 ScalarCalls = std::max(ScalarCalls, RetTy->getVectorNumElements());
1194 ScalarRetTy = RetTy->getScalarType();
1195 }
1196 SmallVector<Type *, 4> ScalarTys;
1197 for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) {
1198 Type *Ty = Tys[i];
1199 if (Ty->isVectorTy()) {
1200 if (ScalarizationCostPassed == std::numeric_limits<unsigned>::max())
1201 ScalarizationCost += getScalarizationOverhead(Ty, false, true);
1202 ScalarCalls = std::max(ScalarCalls, Ty->getVectorNumElements());
1203 Ty = Ty->getScalarType();
1204 }
1205 ScalarTys.push_back(Ty);
1206 }
1207 if (ScalarCalls == 1)
1208 return 1; // Return cost of a scalar intrinsic. Assume it to be cheap.
1209
1210 unsigned ScalarCost =
1211 ConcreteTTI->getIntrinsicInstrCost(IID, ScalarRetTy, ScalarTys, FMF);
1212
1213 return ScalarCalls * ScalarCost + ScalarizationCost;
1214 }
1215 // Look for intrinsics that can be lowered directly or turned into a scalar
1216 // intrinsic call.
1217 case Intrinsic::sqrt:
1218 ISDs.push_back(ISD::FSQRT);
1219 break;
1220 case Intrinsic::sin:
1221 ISDs.push_back(ISD::FSIN);
1222 break;
1223 case Intrinsic::cos:
1224 ISDs.push_back(ISD::FCOS);
1225 break;
1226 case Intrinsic::exp:
1227 ISDs.push_back(ISD::FEXP);
1228 break;
1229 case Intrinsic::exp2:
1230 ISDs.push_back(ISD::FEXP2);
1231 break;
1232 case Intrinsic::log:
1233 ISDs.push_back(ISD::FLOG);
1234 break;
1235 case Intrinsic::log10:
1236 ISDs.push_back(ISD::FLOG10);
1237 break;
1238 case Intrinsic::log2:
1239 ISDs.push_back(ISD::FLOG2);
1240 break;
1241 case Intrinsic::fabs:
1242 ISDs.push_back(ISD::FABS);
1243 break;
1244 case Intrinsic::canonicalize:
1245 ISDs.push_back(ISD::FCANONICALIZE);
1246 break;
1247 case Intrinsic::minnum:
1248 ISDs.push_back(ISD::FMINNUM);
1249 if (FMF.noNaNs())
1250 ISDs.push_back(ISD::FMINIMUM);
1251 break;
1252 case Intrinsic::maxnum:
1253 ISDs.push_back(ISD::FMAXNUM);
1254 if (FMF.noNaNs())
1255 ISDs.push_back(ISD::FMAXIMUM);
1256 break;
1257 case Intrinsic::copysign:
1258 ISDs.push_back(ISD::FCOPYSIGN);
1259 break;
1260 case Intrinsic::floor:
1261 ISDs.push_back(ISD::FFLOOR);
1262 break;
1263 case Intrinsic::ceil:
1264 ISDs.push_back(ISD::FCEIL);
1265 break;
1266 case Intrinsic::trunc:
1267 ISDs.push_back(ISD::FTRUNC);
1268 break;
1269 case Intrinsic::nearbyint:
1270 ISDs.push_back(ISD::FNEARBYINT);
1271 break;
1272 case Intrinsic::rint:
1273 ISDs.push_back(ISD::FRINT);
1274 break;
1275 case Intrinsic::round:
1276 ISDs.push_back(ISD::FROUND);
1277 break;
1278 case Intrinsic::pow:
1279 ISDs.push_back(ISD::FPOW);
1280 break;
1281 case Intrinsic::fma:
1282 ISDs.push_back(ISD::FMA);
1283 break;
1284 case Intrinsic::fmuladd:
1285 ISDs.push_back(ISD::FMA);
1286 break;
1287 // FIXME: We should return 0 whenever getIntrinsicCost == TCC_Free.
1288 case Intrinsic::lifetime_start:
1289 case Intrinsic::lifetime_end:
1290 case Intrinsic::sideeffect:
1291 return 0;
1292 case Intrinsic::masked_store:
1293 return ConcreteTTI->getMaskedMemoryOpCost(Instruction::Store, Tys[0], 0,
1294 0);
1295 case Intrinsic::masked_load:
1296 return ConcreteTTI->getMaskedMemoryOpCost(Instruction::Load, RetTy, 0, 0);
1297 case Intrinsic::experimental_vector_reduce_add:
1298 return ConcreteTTI->getArithmeticReductionCost(Instruction::Add, Tys[0],
1299 /*IsPairwiseForm=*/false);
1300 case Intrinsic::experimental_vector_reduce_mul:
1301 return ConcreteTTI->getArithmeticReductionCost(Instruction::Mul, Tys[0],
1302 /*IsPairwiseForm=*/false);
1303 case Intrinsic::experimental_vector_reduce_and:
1304 return ConcreteTTI->getArithmeticReductionCost(Instruction::And, Tys[0],
1305 /*IsPairwiseForm=*/false);
1306 case Intrinsic::experimental_vector_reduce_or:
1307 return ConcreteTTI->getArithmeticReductionCost(Instruction::Or, Tys[0],
1308 /*IsPairwiseForm=*/false);
1309 case Intrinsic::experimental_vector_reduce_xor:
1310 return ConcreteTTI->getArithmeticReductionCost(Instruction::Xor, Tys[0],
1311 /*IsPairwiseForm=*/false);
1312 case Intrinsic::experimental_vector_reduce_v2_fadd:
1313 return ConcreteTTI->getArithmeticReductionCost(
1314 Instruction::FAdd, Tys[0],
1315 /*IsPairwiseForm=*/false); // FIXME: Add new flag for cost of strict
1316 // reductions.
1317 case Intrinsic::experimental_vector_reduce_v2_fmul:
1318 return ConcreteTTI->getArithmeticReductionCost(
1319 Instruction::FMul, Tys[0],
1320 /*IsPairwiseForm=*/false); // FIXME: Add new flag for cost of strict
1321 // reductions.
1322 case Intrinsic::experimental_vector_reduce_smax:
1323 case Intrinsic::experimental_vector_reduce_smin:
1324 case Intrinsic::experimental_vector_reduce_fmax:
1325 case Intrinsic::experimental_vector_reduce_fmin:
1326 return ConcreteTTI->getMinMaxReductionCost(
1327 Tys[0], CmpInst::makeCmpResultType(Tys[0]), /*IsPairwiseForm=*/false,
1328 /*IsUnsigned=*/true);
1329 case Intrinsic::experimental_vector_reduce_umax:
1330 case Intrinsic::experimental_vector_reduce_umin:
1331 return ConcreteTTI->getMinMaxReductionCost(
1332 Tys[0], CmpInst::makeCmpResultType(Tys[0]), /*IsPairwiseForm=*/false,
1333 /*IsUnsigned=*/false);
1334 case Intrinsic::sadd_sat:
1335 case Intrinsic::ssub_sat: {
1336 Type *CondTy = RetTy->getWithNewBitWidth(1);
1337
1338 Type *OpTy = StructType::create({RetTy, CondTy});
1339 Intrinsic::ID OverflowOp = IID == Intrinsic::sadd_sat
1340 ? Intrinsic::sadd_with_overflow
1341 : Intrinsic::ssub_with_overflow;
1342
1343 // SatMax -> Overflow && SumDiff < 0
1344 // SatMin -> Overflow && SumDiff >= 0
1345 unsigned Cost = 0;
1346 Cost += ConcreteTTI->getIntrinsicInstrCost(
1347 OverflowOp, OpTy, {RetTy, RetTy}, FMF, ScalarizationCostPassed);
1348 Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy,
1349 CondTy, nullptr);
1350 Cost += 2 * ConcreteTTI->getCmpSelInstrCost(BinaryOperator::Select, RetTy,
1351 CondTy, nullptr);
1352 return Cost;
1353 }
1354 case Intrinsic::uadd_sat:
1355 case Intrinsic::usub_sat: {
1356 Type *CondTy = RetTy->getWithNewBitWidth(1);
1357
1358 Type *OpTy = StructType::create({RetTy, CondTy});
1359 Intrinsic::ID OverflowOp = IID == Intrinsic::uadd_sat
1360 ? Intrinsic::uadd_with_overflow
1361 : Intrinsic::usub_with_overflow;
1362
1363 unsigned Cost = 0;
1364 Cost += ConcreteTTI->getIntrinsicInstrCost(
1365 OverflowOp, OpTy, {RetTy, RetTy}, FMF, ScalarizationCostPassed);
1366 Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::Select, RetTy,
1367 CondTy, nullptr);
1368 return Cost;
1369 }
1370 case Intrinsic::smul_fix:
1371 case Intrinsic::umul_fix: {
1372 unsigned ExtSize = RetTy->getScalarSizeInBits() * 2;
1373 Type *ExtTy = RetTy->getWithNewBitWidth(ExtSize);
1374
1375 unsigned ExtOp =
1376 IID == Intrinsic::smul_fix ? Instruction::SExt : Instruction::ZExt;
1377
1378 unsigned Cost = 0;
1379 Cost += 2 * ConcreteTTI->getCastInstrCost(ExtOp, ExtTy, RetTy);
1380 Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::Mul, ExtTy);
1381 Cost +=
1382 2 * ConcreteTTI->getCastInstrCost(Instruction::Trunc, RetTy, ExtTy);
1383 Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::LShr, RetTy,
1384 TTI::OK_AnyValue,
1385 TTI::OK_UniformConstantValue);
1386 Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::Shl, RetTy,
1387 TTI::OK_AnyValue,
1388 TTI::OK_UniformConstantValue);
1389 Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::Or, RetTy);
1390 return Cost;
1391 }
1392 case Intrinsic::sadd_with_overflow:
1393 case Intrinsic::ssub_with_overflow: {
1394 Type *SumTy = RetTy->getContainedType(0);
1395 Type *OverflowTy = RetTy->getContainedType(1);
1396 unsigned Opcode = IID == Intrinsic::sadd_with_overflow
1397 ? BinaryOperator::Add
1398 : BinaryOperator::Sub;
1399
1400 // LHSSign -> LHS >= 0
1401 // RHSSign -> RHS >= 0
1402 // SumSign -> Sum >= 0
1403 //
1404 // Add:
1405 // Overflow -> (LHSSign == RHSSign) && (LHSSign != SumSign)
1406 // Sub:
1407 // Overflow -> (LHSSign != RHSSign) && (LHSSign != SumSign)
1408 unsigned Cost = 0;
1409 Cost += ConcreteTTI->getArithmeticInstrCost(Opcode, SumTy);
1410 Cost += 3 * ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, SumTy,
1411 OverflowTy, nullptr);
1412 Cost += 2 * ConcreteTTI->getCmpSelInstrCost(
1413 BinaryOperator::ICmp, OverflowTy, OverflowTy, nullptr);
1414 Cost +=
1415 ConcreteTTI->getArithmeticInstrCost(BinaryOperator::And, OverflowTy);
1416 return Cost;
1417 }
1418 case Intrinsic::uadd_with_overflow:
1419 case Intrinsic::usub_with_overflow: {
1420 Type *SumTy = RetTy->getContainedType(0);
1421 Type *OverflowTy = RetTy->getContainedType(1);
1422 unsigned Opcode = IID == Intrinsic::uadd_with_overflow
1423 ? BinaryOperator::Add
1424 : BinaryOperator::Sub;
1425
1426 unsigned Cost = 0;
1427 Cost += ConcreteTTI->getArithmeticInstrCost(Opcode, SumTy);
1428 Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, SumTy,
1429 OverflowTy, nullptr);
1430 return Cost;
1431 }
1432 case Intrinsic::smul_with_overflow:
1433 case Intrinsic::umul_with_overflow: {
1434 Type *MulTy = RetTy->getContainedType(0);
1435 Type *OverflowTy = RetTy->getContainedType(1);
1436 unsigned ExtSize = MulTy->getScalarSizeInBits() * 2;
1437 Type *ExtTy = MulTy->getWithNewBitWidth(ExtSize);
1438
1439 unsigned ExtOp =
1440 IID == Intrinsic::smul_fix ? Instruction::SExt : Instruction::ZExt;
1441
1442 unsigned Cost = 0;
1443 Cost += 2 * ConcreteTTI->getCastInstrCost(ExtOp, ExtTy, MulTy);
1444 Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::Mul, ExtTy);
1445 Cost +=
1446 2 * ConcreteTTI->getCastInstrCost(Instruction::Trunc, MulTy, ExtTy);
1447 Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::LShr, MulTy,
1448 TTI::OK_AnyValue,
1449 TTI::OK_UniformConstantValue);
1450
1451 if (IID == Intrinsic::smul_with_overflow)
1452 Cost += ConcreteTTI->getArithmeticInstrCost(
1453 Instruction::AShr, MulTy, TTI::OK_AnyValue,
1454 TTI::OK_UniformConstantValue);
1455
1456 Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, MulTy,
1457 OverflowTy, nullptr);
1458 return Cost;
1459 }
1460 case Intrinsic::ctpop:
1461 ISDs.push_back(ISD::CTPOP);
1462 // In case of legalization use TCC_Expensive. This is cheaper than a
1463 // library call but still not a cheap instruction.
1464 SingleCallCost = TargetTransformInfo::TCC_Expensive;
1465 break;
1466 // FIXME: ctlz, cttz, ...
1467 }
1468
1469 const TargetLoweringBase *TLI = getTLI();
1470 std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, RetTy);
1471
1472 SmallVector<unsigned, 2> LegalCost;
1473 SmallVector<unsigned, 2> CustomCost;
1474 for (unsigned ISD : ISDs) {
1475 if (TLI->isOperationLegalOrPromote(ISD, LT.second)) {
1476 if (IID == Intrinsic::fabs && LT.second.isFloatingPoint() &&
1477 TLI->isFAbsFree(LT.second)) {
1478 return 0;
1479 }
1480
1481 // The operation is legal. Assume it costs 1.
1482 // If the type is split to multiple registers, assume that there is some
1483 // overhead to this.
1484 // TODO: Once we have extract/insert subvector cost we need to use them.
1485 if (LT.first > 1)
1486 LegalCost.push_back(LT.first * 2);
1487 else
1488 LegalCost.push_back(LT.first * 1);
1489 } else if (!TLI->isOperationExpand(ISD, LT.second)) {
1490 // If the operation is custom lowered then assume
1491 // that the code is twice as expensive.
1492 CustomCost.push_back(LT.first * 2);
1493 }
1494 }
1495
1496 auto MinLegalCostI = std::min_element(LegalCost.begin(), LegalCost.end());
1497 if (MinLegalCostI != LegalCost.end())
1498 return *MinLegalCostI;
1499
1500 auto MinCustomCostI =
1501 std::min_element(CustomCost.begin(), CustomCost.end());
1502 if (MinCustomCostI != CustomCost.end())
1503 return *MinCustomCostI;
1504
1505 // If we can't lower fmuladd into an FMA estimate the cost as a floating
1506 // point mul followed by an add.
1507 if (IID == Intrinsic::fmuladd)
1508 return ConcreteTTI->getArithmeticInstrCost(BinaryOperator::FMul, RetTy) +
1509 ConcreteTTI->getArithmeticInstrCost(BinaryOperator::FAdd, RetTy);
1510
1511 // Else, assume that we need to scalarize this intrinsic. For math builtins
1512 // this will emit a costly libcall, adding call overhead and spills. Make it
1513 // very expensive.
1514 if (RetTy->isVectorTy()) {
1515 unsigned ScalarizationCost =
1516 ((ScalarizationCostPassed != std::numeric_limits<unsigned>::max())
1517 ? ScalarizationCostPassed
1518 : getScalarizationOverhead(RetTy, true, false));
1519 unsigned ScalarCalls = RetTy->getVectorNumElements();
1520 SmallVector<Type *, 4> ScalarTys;
1521 for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) {
1522 Type *Ty = Tys[i];
1523 if (Ty->isVectorTy())
1524 Ty = Ty->getScalarType();
1525 ScalarTys.push_back(Ty);
1526 }
1527 unsigned ScalarCost = ConcreteTTI->getIntrinsicInstrCost(
1528 IID, RetTy->getScalarType(), ScalarTys, FMF);
1529 for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) {
1530 if (Tys[i]->isVectorTy()) {
1531 if (ScalarizationCostPassed == std::numeric_limits<unsigned>::max())
1532 ScalarizationCost += getScalarizationOverhead(Tys[i], false, true);
1533 ScalarCalls = std::max(ScalarCalls, Tys[i]->getVectorNumElements());
1534 }
1535 }
1536
1537 return ScalarCalls * ScalarCost + ScalarizationCost;
1538 }
1539
1540 // This is going to be turned into a library call, make it expensive.
1541 return SingleCallCost;
1542 }
1543
1544 /// Compute a cost of the given call instruction.
1545 ///
1546 /// Compute the cost of calling function F with return type RetTy and
1547 /// argument types Tys. F might be nullptr, in this case the cost of an
1548 /// arbitrary call with the specified signature will be returned.
1549 /// This is used, for instance, when we estimate call of a vector
1550 /// counterpart of the given function.
1551 /// \param F Called function, might be nullptr.
1552 /// \param RetTy Return value types.
1553 /// \param Tys Argument types.
1554 /// \returns The cost of Call instruction.
1555 unsigned getCallInstrCost(Function *F, Type *RetTy, ArrayRef<Type *> Tys) {
1556 return 10;
1557 }
1558
1559 unsigned getNumberOfParts(Type *Tp) {
1560 std::pair<unsigned, MVT> LT = getTLI()->getTypeLegalizationCost(DL, Tp);
1561 return LT.first;
1562 }
1563
1564 unsigned getAddressComputationCost(Type *Ty, ScalarEvolution *,
1565 const SCEV *) {
1566 return 0;
1567 }
1568
1569 /// Try to calculate arithmetic and shuffle op costs for reduction operations.
1570 /// We're assuming that reduction operation are performing the following way:
1571 /// 1. Non-pairwise reduction
1572 /// %val1 = shufflevector<n x t> %val, <n x t> %undef,
1573 /// <n x i32> <i32 n/2, i32 n/2 + 1, ..., i32 n, i32 undef, ..., i32 undef>
1574 /// \----------------v-------------/ \----------v------------/
1575 /// n/2 elements n/2 elements
1576 /// %red1 = op <n x t> %val, <n x t> val1
1577 /// After this operation we have a vector %red1 where only the first n/2
1578 /// elements are meaningful, the second n/2 elements are undefined and can be
1579 /// dropped. All other operations are actually working with the vector of
1580 /// length n/2, not n, though the real vector length is still n.
1581 /// %val2 = shufflevector<n x t> %red1, <n x t> %undef,
1582 /// <n x i32> <i32 n/4, i32 n/4 + 1, ..., i32 n/2, i32 undef, ..., i32 undef>
1583 /// \----------------v-------------/ \----------v------------/
1584 /// n/4 elements 3*n/4 elements
1585 /// %red2 = op <n x t> %red1, <n x t> val2 - working with the vector of
1586 /// length n/2, the resulting vector has length n/4 etc.
1587 /// 2. Pairwise reduction:
1588 /// Everything is the same except for an additional shuffle operation which
1589 /// is used to produce operands for pairwise kind of reductions.
1590 /// %val1 = shufflevector<n x t> %val, <n x t> %undef,
1591 /// <n x i32> <i32 0, i32 2, ..., i32 n-2, i32 undef, ..., i32 undef>
1592 /// \-------------v----------/ \----------v------------/
1593 /// n/2 elements n/2 elements
1594 /// %val2 = shufflevector<n x t> %val, <n x t> %undef,
1595 /// <n x i32> <i32 1, i32 3, ..., i32 n-1, i32 undef, ..., i32 undef>
1596 /// \-------------v----------/ \----------v------------/
1597 /// n/2 elements n/2 elements
1598 /// %red1 = op <n x t> %val1, <n x t> val2
1599 /// Again, the operation is performed on <n x t> vector, but the resulting
1600 /// vector %red1 is <n/2 x t> vector.
1601 ///
1602 /// The cost model should take into account that the actual length of the
1603 /// vector is reduced on each iteration.
1604 unsigned getArithmeticReductionCost(unsigned Opcode, Type *Ty,
1605 bool IsPairwise) {
1606 assert(Ty->isVectorTy() && "Expect a vector type")((Ty->isVectorTy() && "Expect a vector type") ? static_cast
<void> (0) : __assert_fail ("Ty->isVectorTy() && \"Expect a vector type\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 1606, __PRETTY_FUNCTION__))
;
1607 Type *ScalarTy = Ty->getVectorElementType();
1608 unsigned NumVecElts = Ty->getVectorNumElements();
1609 unsigned NumReduxLevels = Log2_32(NumVecElts);
1610 unsigned ArithCost = 0;
1611 unsigned ShuffleCost = 0;
1612 auto *ConcreteTTI = static_cast<T *>(this);
1613 std::pair<unsigned, MVT> LT =
1614 ConcreteTTI->getTLI()->getTypeLegalizationCost(DL, Ty);
1615 unsigned LongVectorCount = 0;
1616 unsigned MVTLen =
1617 LT.second.isVector() ? LT.second.getVectorNumElements() : 1;
1618 while (NumVecElts > MVTLen) {
1619 NumVecElts /= 2;
1620 Type *SubTy = VectorType::get(ScalarTy, NumVecElts);
1621 // Assume the pairwise shuffles add a cost.
1622 ShuffleCost += (IsPairwise + 1) *
1623 ConcreteTTI->getShuffleCost(TTI::SK_ExtractSubvector, Ty,
1624 NumVecElts, SubTy);
1625 ArithCost += ConcreteTTI->getArithmeticInstrCost(Opcode, SubTy);
1626 Ty = SubTy;
1627 ++LongVectorCount;
1628 }
1629
1630 NumReduxLevels -= LongVectorCount;
1631
1632 // The minimal length of the vector is limited by the real length of vector
1633 // operations performed on the current platform. That's why several final
1634 // reduction operations are performed on the vectors with the same
1635 // architecture-dependent length.
1636
1637 // Non pairwise reductions need one shuffle per reduction level. Pairwise
1638 // reductions need two shuffles on every level, but the last one. On that
1639 // level one of the shuffles is <0, u, u, ...> which is identity.
1640 unsigned NumShuffles = NumReduxLevels;
1641 if (IsPairwise && NumReduxLevels >= 1)
1642 NumShuffles += NumReduxLevels - 1;
1643 ShuffleCost += NumShuffles *
1644 ConcreteTTI->getShuffleCost(TTI::SK_PermuteSingleSrc, Ty,
1645 0, Ty);
1646 ArithCost += NumReduxLevels *
1647 ConcreteTTI->getArithmeticInstrCost(Opcode, Ty);
1648 return ShuffleCost + ArithCost +
1649 ConcreteTTI->getVectorInstrCost(Instruction::ExtractElement, Ty, 0);
1650 }
1651
1652 /// Try to calculate op costs for min/max reduction operations.
1653 /// \param CondTy Conditional type for the Select instruction.
1654 unsigned getMinMaxReductionCost(Type *Ty, Type *CondTy, bool IsPairwise,
1655 bool) {
1656 assert(Ty->isVectorTy() && "Expect a vector type")((Ty->isVectorTy() && "Expect a vector type") ? static_cast
<void> (0) : __assert_fail ("Ty->isVectorTy() && \"Expect a vector type\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 1656, __PRETTY_FUNCTION__))
;
1657 Type *ScalarTy = Ty->getVectorElementType();
1658 Type *ScalarCondTy = CondTy->getVectorElementType();
1659 unsigned NumVecElts = Ty->getVectorNumElements();
1660 unsigned NumReduxLevels = Log2_32(NumVecElts);
1661 unsigned CmpOpcode;
1662 if (Ty->isFPOrFPVectorTy()) {
1663 CmpOpcode = Instruction::FCmp;
1664 } else {
1665 assert(Ty->isIntOrIntVectorTy() &&((Ty->isIntOrIntVectorTy() && "expecting floating point or integer type for min/max reduction"
) ? static_cast<void> (0) : __assert_fail ("Ty->isIntOrIntVectorTy() && \"expecting floating point or integer type for min/max reduction\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 1666, __PRETTY_FUNCTION__))
1666 "expecting floating point or integer type for min/max reduction")((Ty->isIntOrIntVectorTy() && "expecting floating point or integer type for min/max reduction"
) ? static_cast<void> (0) : __assert_fail ("Ty->isIntOrIntVectorTy() && \"expecting floating point or integer type for min/max reduction\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 1666, __PRETTY_FUNCTION__))
;
1667 CmpOpcode = Instruction::ICmp;
1668 }
1669 unsigned MinMaxCost = 0;
1670 unsigned ShuffleCost = 0;
1671 auto *ConcreteTTI = static_cast<T *>(this);
1672 std::pair<unsigned, MVT> LT =
1673 ConcreteTTI->getTLI()->getTypeLegalizationCost(DL, Ty);
1674 unsigned LongVectorCount = 0;
1675 unsigned MVTLen =
1676 LT.second.isVector() ? LT.second.getVectorNumElements() : 1;
1677 while (NumVecElts > MVTLen) {
1678 NumVecElts /= 2;
1679 Type *SubTy = VectorType::get(ScalarTy, NumVecElts);
1680 CondTy = VectorType::get(ScalarCondTy, NumVecElts);
1681
1682 // Assume the pairwise shuffles add a cost.
1683 ShuffleCost += (IsPairwise + 1) *
1684 ConcreteTTI->getShuffleCost(TTI::SK_ExtractSubvector, Ty,
1685 NumVecElts, SubTy);
1686 MinMaxCost +=
1687 ConcreteTTI->getCmpSelInstrCost(CmpOpcode, SubTy, CondTy, nullptr) +
1688 ConcreteTTI->getCmpSelInstrCost(Instruction::Select, SubTy, CondTy,
1689 nullptr);
1690 Ty = SubTy;
1691 ++LongVectorCount;
1692 }
1693
1694 NumReduxLevels -= LongVectorCount;
1695
1696 // The minimal length of the vector is limited by the real length of vector
1697 // operations performed on the current platform. That's why several final
1698 // reduction opertions are perfomed on the vectors with the same
1699 // architecture-dependent length.
1700
1701 // Non pairwise reductions need one shuffle per reduction level. Pairwise
1702 // reductions need two shuffles on every level, but the last one. On that
1703 // level one of the shuffles is <0, u, u, ...> which is identity.
1704 unsigned NumShuffles = NumReduxLevels;
1705 if (IsPairwise && NumReduxLevels >= 1)
1706 NumShuffles += NumReduxLevels - 1;
1707 ShuffleCost += NumShuffles *
1708 ConcreteTTI->getShuffleCost(TTI::SK_PermuteSingleSrc, Ty,
1709 0, Ty);
1710 MinMaxCost +=
1711 NumReduxLevels *
1712 (ConcreteTTI->getCmpSelInstrCost(CmpOpcode, Ty, CondTy, nullptr) +
1713 ConcreteTTI->getCmpSelInstrCost(Instruction::Select, Ty, CondTy,
1714 nullptr));
1715 // The last min/max should be in vector registers and we counted it above.
1716 // So just need a single extractelement.
1717 return ShuffleCost + MinMaxCost +
1718 ConcreteTTI->getVectorInstrCost(Instruction::ExtractElement, Ty, 0);
1719 }
1720
1721 unsigned getVectorSplitCost() { return 1; }
1722
1723 /// @}
1724};
1725
1726/// Concrete BasicTTIImpl that can be used if no further customization
1727/// is needed.
1728class BasicTTIImpl : public BasicTTIImplBase<BasicTTIImpl> {
1729 using BaseT = BasicTTIImplBase<BasicTTIImpl>;
1730
1731 friend class BasicTTIImplBase<BasicTTIImpl>;
1732
1733 const TargetSubtargetInfo *ST;
1734 const TargetLoweringBase *TLI;
1735
1736 const TargetSubtargetInfo *getST() const { return ST; }
1737 const TargetLoweringBase *getTLI() const { return TLI; }
1738
1739public:
1740 explicit BasicTTIImpl(const TargetMachine *TM, const Function &F);
1741};
1742
1743} // end namespace llvm
1744
1745#endif // LLVM_CODEGEN_BASICTTIIMPL_H

/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/CodeGen/TargetLowering.h

1//===- llvm/CodeGen/TargetLowering.h - Target Lowering Info -----*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file describes how to lower LLVM code to machine code. This has two
11/// main components:
12///
13/// 1. Which ValueTypes are natively supported by the target.
14/// 2. Which operations are supported for supported ValueTypes.
15/// 3. Cost thresholds for alternative implementations of certain operations.
16///
17/// In addition it has a few other components, like information about FP
18/// immediates.
19///
20//===----------------------------------------------------------------------===//
21
22#ifndef LLVM_CODEGEN_TARGETLOWERING_H
23#define LLVM_CODEGEN_TARGETLOWERING_H
24
25#include "llvm/ADT/APInt.h"
26#include "llvm/ADT/ArrayRef.h"
27#include "llvm/ADT/DenseMap.h"
28#include "llvm/ADT/STLExtras.h"
29#include "llvm/ADT/SmallVector.h"
30#include "llvm/ADT/StringRef.h"
31#include "llvm/Analysis/ProfileSummaryInfo.h"
32#include "llvm/CodeGen/DAGCombine.h"
33#include "llvm/CodeGen/ISDOpcodes.h"
34#include "llvm/CodeGen/RuntimeLibcalls.h"
35#include "llvm/CodeGen/SelectionDAG.h"
36#include "llvm/CodeGen/SelectionDAGNodes.h"
37#include "llvm/CodeGen/TargetCallingConv.h"
38#include "llvm/CodeGen/ValueTypes.h"
39#include "llvm/IR/Attributes.h"
40#include "llvm/IR/CallSite.h"
41#include "llvm/IR/CallingConv.h"
42#include "llvm/IR/DataLayout.h"
43#include "llvm/IR/DerivedTypes.h"
44#include "llvm/IR/Function.h"
45#include "llvm/IR/IRBuilder.h"
46#include "llvm/IR/InlineAsm.h"
47#include "llvm/IR/Instruction.h"
48#include "llvm/IR/Instructions.h"
49#include "llvm/IR/Type.h"
50#include "llvm/MC/MCRegisterInfo.h"
51#include "llvm/Support/Alignment.h"
52#include "llvm/Support/AtomicOrdering.h"
53#include "llvm/Support/Casting.h"
54#include "llvm/Support/ErrorHandling.h"
55#include "llvm/Support/MachineValueType.h"
56#include "llvm/Target/TargetMachine.h"
57#include "llvm/Transforms/Utils/SizeOpts.h"
58#include <algorithm>
59#include <cassert>
60#include <climits>
61#include <cstdint>
62#include <iterator>
63#include <map>
64#include <string>
65#include <utility>
66#include <vector>
67
68namespace llvm {
69
70class BranchProbability;
71class CCState;
72class CCValAssign;
73class Constant;
74class FastISel;
75class FunctionLoweringInfo;
76class GlobalValue;
77class GISelKnownBits;
78class IntrinsicInst;
79struct KnownBits;
80class LegacyDivergenceAnalysis;
81class LLVMContext;
82class MachineBasicBlock;
83class MachineFunction;
84class MachineInstr;
85class MachineJumpTableInfo;
86class MachineLoop;
87class MachineRegisterInfo;
88class MCContext;
89class MCExpr;
90class Module;
91class TargetRegisterClass;
92class TargetLibraryInfo;
93class TargetRegisterInfo;
94class Value;
95
96namespace Sched {
97
98 enum Preference {
99 None, // No preference
100 Source, // Follow source order.
101 RegPressure, // Scheduling for lowest register pressure.
102 Hybrid, // Scheduling for both latency and register pressure.
103 ILP, // Scheduling for ILP in low register pressure mode.
104 VLIW // Scheduling for VLIW targets.
105 };
106
107} // end namespace Sched
108
109/// This base class for TargetLowering contains the SelectionDAG-independent
110/// parts that can be used from the rest of CodeGen.
111class TargetLoweringBase {
112public:
113 /// This enum indicates whether operations are valid for a target, and if not,
114 /// what action should be used to make them valid.
115 enum LegalizeAction : uint8_t {
116 Legal, // The target natively supports this operation.
117 Promote, // This operation should be executed in a larger type.
118 Expand, // Try to expand this to other ops, otherwise use a libcall.
119 LibCall, // Don't try to expand this to other ops, always use a libcall.
120 Custom // Use the LowerOperation hook to implement custom lowering.
121 };
122
123 /// This enum indicates whether a types are legal for a target, and if not,
124 /// what action should be used to make them valid.
125 enum LegalizeTypeAction : uint8_t {
126 TypeLegal, // The target natively supports this type.
127 TypePromoteInteger, // Replace this integer with a larger one.
128 TypeExpandInteger, // Split this integer into two of half the size.
129 TypeSoftenFloat, // Convert this float to a same size integer type.
130 TypeExpandFloat, // Split this float into two of half the size.
131 TypeScalarizeVector, // Replace this one-element vector with its element.
132 TypeSplitVector, // Split this vector into two of half the size.
133 TypeWidenVector, // This vector should be widened into a larger vector.
134 TypePromoteFloat // Replace this float with a larger one.
135 };
136
137 /// LegalizeKind holds the legalization kind that needs to happen to EVT
138 /// in order to type-legalize it.
139 using LegalizeKind = std::pair<LegalizeTypeAction, EVT>;
140
141 /// Enum that describes how the target represents true/false values.
142 enum BooleanContent {
143 UndefinedBooleanContent, // Only bit 0 counts, the rest can hold garbage.
144 ZeroOrOneBooleanContent, // All bits zero except for bit 0.
145 ZeroOrNegativeOneBooleanContent // All bits equal to bit 0.
146 };
147
148 /// Enum that describes what type of support for selects the target has.
149 enum SelectSupportKind {
150 ScalarValSelect, // The target supports scalar selects (ex: cmov).
151 ScalarCondVectorVal, // The target supports selects with a scalar condition
152 // and vector values (ex: cmov).
153 VectorMaskSelect // The target supports vector selects with a vector
154 // mask (ex: x86 blends).
155 };
156
157 /// Enum that specifies what an atomic load/AtomicRMWInst is expanded
158 /// to, if at all. Exists because different targets have different levels of
159 /// support for these atomic instructions, and also have different options
160 /// w.r.t. what they should expand to.
161 enum class AtomicExpansionKind {
162 None, // Don't expand the instruction.
163 LLSC, // Expand the instruction into loadlinked/storeconditional; used
164 // by ARM/AArch64.
165 LLOnly, // Expand the (load) instruction into just a load-linked, which has
166 // greater atomic guarantees than a normal load.
167 CmpXChg, // Expand the instruction into cmpxchg; used by at least X86.
168 MaskedIntrinsic, // Use a target-specific intrinsic for the LL/SC loop.
169 };
170
171 /// Enum that specifies when a multiplication should be expanded.
172 enum class MulExpansionKind {
173 Always, // Always expand the instruction.
174 OnlyLegalOrCustom, // Only expand when the resulting instructions are legal
175 // or custom.
176 };
177
178 class ArgListEntry {
179 public:
180 Value *Val = nullptr;
181 SDValue Node = SDValue();
182 Type *Ty = nullptr;
183 bool IsSExt : 1;
184 bool IsZExt : 1;
185 bool IsInReg : 1;
186 bool IsSRet : 1;
187 bool IsNest : 1;
188 bool IsByVal : 1;
189 bool IsInAlloca : 1;
190 bool IsReturned : 1;
191 bool IsSwiftSelf : 1;
192 bool IsSwiftError : 1;
193 bool IsCFGuardTarget : 1;
194 uint16_t Alignment = 0;
195 Type *ByValType = nullptr;
196
197 ArgListEntry()
198 : IsSExt(false), IsZExt(false), IsInReg(false), IsSRet(false),
199 IsNest(false), IsByVal(false), IsInAlloca(false), IsReturned(false),
200 IsSwiftSelf(false), IsSwiftError(false), IsCFGuardTarget(false) {}
201
202 void setAttributes(const CallBase *Call, unsigned ArgIdx);
203
204 void setAttributes(ImmutableCallSite *CS, unsigned ArgIdx) {
205 return setAttributes(cast<CallBase>(CS->getInstruction()), ArgIdx);
206 }
207 };
208 using ArgListTy = std::vector<ArgListEntry>;
209
210 virtual void markLibCallAttributes(MachineFunction *MF, unsigned CC,
211 ArgListTy &Args) const {};
212
213 static ISD::NodeType getExtendForContent(BooleanContent Content) {
214 switch (Content) {
215 case UndefinedBooleanContent:
216 // Extend by adding rubbish bits.
217 return ISD::ANY_EXTEND;
218 case ZeroOrOneBooleanContent:
219 // Extend by adding zero bits.
220 return ISD::ZERO_EXTEND;
221 case ZeroOrNegativeOneBooleanContent:
222 // Extend by copying the sign bit.
223 return ISD::SIGN_EXTEND;
224 }
225 llvm_unreachable("Invalid content kind")::llvm::llvm_unreachable_internal("Invalid content kind", "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/CodeGen/TargetLowering.h"
, 225)
;
226 }
227
228 /// NOTE: The TargetMachine owns TLOF.
229 explicit TargetLoweringBase(const TargetMachine &TM);
230 TargetLoweringBase(const TargetLoweringBase &) = delete;
231 TargetLoweringBase &operator=(const TargetLoweringBase &) = delete;
232 virtual ~TargetLoweringBase() = default;
233
234protected:
235 /// Initialize all of the actions to default values.
236 void initActions();
237
238public:
239 const TargetMachine &getTargetMachine() const { return TM; }
240
241 virtual bool useSoftFloat() const { return false; }
242
243 /// Return the pointer type for the given address space, defaults to
244 /// the pointer type from the data layout.
245 /// FIXME: The default needs to be removed once all the code is updated.
246 virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS = 0) const {
247 return MVT::getIntegerVT(DL.getPointerSizeInBits(AS));
248 }
249
250 /// Return the in-memory pointer type for the given address space, defaults to
251 /// the pointer type from the data layout. FIXME: The default needs to be
252 /// removed once all the code is updated.
253 MVT getPointerMemTy(const DataLayout &DL, uint32_t AS = 0) const {
254 return MVT::getIntegerVT(DL.getPointerSizeInBits(AS));
255 }
256
257 /// Return the type for frame index, which is determined by
258 /// the alloca address space specified through the data layout.
259 MVT getFrameIndexTy(const DataLayout &DL) const {
260 return getPointerTy(DL, DL.getAllocaAddrSpace());
261 }
262
263 /// Return the type for operands of fence.
264 /// TODO: Let fence operands be of i32 type and remove this.
265 virtual MVT getFenceOperandTy(const DataLayout &DL) const {
266 return getPointerTy(DL);
267 }
268
269 /// EVT is not used in-tree, but is used by out-of-tree target.
270 /// A documentation for this function would be nice...
271 virtual MVT getScalarShiftAmountTy(const DataLayout &, EVT) const;
272
273 EVT getShiftAmountTy(EVT LHSTy, const DataLayout &DL,
274 bool LegalTypes = true) const;
275
276 /// Returns the type to be used for the index operand of:
277 /// ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT,
278 /// ISD::INSERT_SUBVECTOR, and ISD::EXTRACT_SUBVECTOR
279 virtual MVT getVectorIdxTy(const DataLayout &DL) const {
280 return getPointerTy(DL);
281 }
282
283 virtual bool isSelectSupported(SelectSupportKind /*kind*/) const {
284 return true;
285 }
286
287 /// Return true if it is profitable to convert a select of FP constants into
288 /// a constant pool load whose address depends on the select condition. The
289 /// parameter may be used to differentiate a select with FP compare from
290 /// integer compare.
291 virtual bool reduceSelectOfFPConstantLoads(EVT CmpOpVT) const {
292 return true;
293 }
294
295 /// Return true if multiple condition registers are available.
296 bool hasMultipleConditionRegisters() const {
297 return HasMultipleConditionRegisters;
298 }
299
300 /// Return true if the target has BitExtract instructions.
301 bool hasExtractBitsInsn() const { return HasExtractBitsInsn; }
302
303 /// Return the preferred vector type legalization action.
304 virtual TargetLoweringBase::LegalizeTypeAction
305 getPreferredVectorAction(MVT VT) const {
306 // The default action for one element vectors is to scalarize
307 if (VT.getVectorNumElements() == 1)
308 return TypeScalarizeVector;
309 // The default action for an odd-width vector is to widen.
310 if (!VT.isPow2VectorType())
311 return TypeWidenVector;
312 // The default action for other vectors is to promote
313 return TypePromoteInteger;
314 }
315
316 // There are two general methods for expanding a BUILD_VECTOR node:
317 // 1. Use SCALAR_TO_VECTOR on the defined scalar values and then shuffle
318 // them together.
319 // 2. Build the vector on the stack and then load it.
320 // If this function returns true, then method (1) will be used, subject to
321 // the constraint that all of the necessary shuffles are legal (as determined
322 // by isShuffleMaskLegal). If this function returns false, then method (2) is
323 // always used. The vector type, and the number of defined values, are
324 // provided.
325 virtual bool
326 shouldExpandBuildVectorWithShuffles(EVT /* VT */,
327 unsigned DefinedValues) const {
328 return DefinedValues < 3;
329 }
330
331 /// Return true if integer divide is usually cheaper than a sequence of
332 /// several shifts, adds, and multiplies for this target.
333 /// The definition of "cheaper" may depend on whether we're optimizing
334 /// for speed or for size.
335 virtual bool isIntDivCheap(EVT VT, AttributeList Attr) const { return false; }
336
337 /// Return true if the target can handle a standalone remainder operation.
338 virtual bool hasStandaloneRem(EVT VT) const {
339 return true;
340 }
341
342 /// Return true if SQRT(X) shouldn't be replaced with X*RSQRT(X).
343 virtual bool isFsqrtCheap(SDValue X, SelectionDAG &DAG) const {
344 // Default behavior is to replace SQRT(X) with X*RSQRT(X).
345 return false;
346 }
347
348 /// Reciprocal estimate status values used by the functions below.
349 enum ReciprocalEstimate : int {
350 Unspecified = -1,
351 Disabled = 0,
352 Enabled = 1
353 };
354
355 /// Return a ReciprocalEstimate enum value for a square root of the given type
356 /// based on the function's attributes. If the operation is not overridden by
357 /// the function's attributes, "Unspecified" is returned and target defaults
358 /// are expected to be used for instruction selection.
359 int getRecipEstimateSqrtEnabled(EVT VT, MachineFunction &MF) const;
360
361 /// Return a ReciprocalEstimate enum value for a division of the given type
362 /// based on the function's attributes. If the operation is not overridden by
363 /// the function's attributes, "Unspecified" is returned and target defaults
364 /// are expected to be used for instruction selection.
365 int getRecipEstimateDivEnabled(EVT VT, MachineFunction &MF) const;
366
367 /// Return the refinement step count for a square root of the given type based
368 /// on the function's attributes. If the operation is not overridden by
369 /// the function's attributes, "Unspecified" is returned and target defaults
370 /// are expected to be used for instruction selection.
371 int getSqrtRefinementSteps(EVT VT, MachineFunction &MF) const;
372
373 /// Return the refinement step count for a division of the given type based
374 /// on the function's attributes. If the operation is not overridden by
375 /// the function's attributes, "Unspecified" is returned and target defaults
376 /// are expected to be used for instruction selection.
377 int getDivRefinementSteps(EVT VT, MachineFunction &MF) const;
378
379 /// Returns true if target has indicated at least one type should be bypassed.
380 bool isSlowDivBypassed() const { return !BypassSlowDivWidths.empty(); }
381
382 /// Returns map of slow types for division or remainder with corresponding
383 /// fast types
384 const DenseMap<unsigned int, unsigned int> &getBypassSlowDivWidths() const {
385 return BypassSlowDivWidths;
386 }
387
388 /// Return true if Flow Control is an expensive operation that should be
389 /// avoided.
390 bool isJumpExpensive() const { return JumpIsExpensive; }
391
392 /// Return true if selects are only cheaper than branches if the branch is
393 /// unlikely to be predicted right.
394 bool isPredictableSelectExpensive() const {
395 return PredictableSelectIsExpensive;
396 }
397
398 /// If a branch or a select condition is skewed in one direction by more than
399 /// this factor, it is very likely to be predicted correctly.
400 virtual BranchProbability getPredictableBranchThreshold() const;
401
402 /// Return true if the following transform is beneficial:
403 /// fold (conv (load x)) -> (load (conv*)x)
404 /// On architectures that don't natively support some vector loads
405 /// efficiently, casting the load to a smaller vector of larger types and
406 /// loading is more efficient, however, this can be undone by optimizations in
407 /// dag combiner.
408 virtual bool isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT,
409 const SelectionDAG &DAG,
410 const MachineMemOperand &MMO) const {
411 // Don't do if we could do an indexed load on the original type, but not on
412 // the new one.
413 if (!LoadVT.isSimple() || !BitcastVT.isSimple())
414 return true;
415
416 MVT LoadMVT = LoadVT.getSimpleVT();
417
418 // Don't bother doing this if it's just going to be promoted again later, as
419 // doing so might interfere with other combines.
420 if (getOperationAction(ISD::LOAD, LoadMVT) == Promote &&
421 getTypeToPromoteTo(ISD::LOAD, LoadMVT) == BitcastVT.getSimpleVT())
422 return false;
423
424 bool Fast = false;
425 return allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), BitcastVT,
426 MMO, &Fast) && Fast;
427 }
428
429 /// Return true if the following transform is beneficial:
430 /// (store (y (conv x)), y*)) -> (store x, (x*))
431 virtual bool isStoreBitCastBeneficial(EVT StoreVT, EVT BitcastVT,
432 const SelectionDAG &DAG,
433 const MachineMemOperand &MMO) const {
434 // Default to the same logic as loads.
435 return isLoadBitCastBeneficial(StoreVT, BitcastVT, DAG, MMO);
436 }
437
438 /// Return true if it is expected to be cheaper to do a store of a non-zero
439 /// vector constant with the given size and type for the address space than to
440 /// store the individual scalar element constants.
441 virtual bool storeOfVectorConstantIsCheap(EVT MemVT,
442 unsigned NumElem,
443 unsigned AddrSpace) const {
444 return false;
445 }
446
447 /// Allow store merging for the specified type after legalization in addition
448 /// to before legalization. This may transform stores that do not exist
449 /// earlier (for example, stores created from intrinsics).
450 virtual bool mergeStoresAfterLegalization(EVT MemVT) const {
451 return true;
452 }
453
454 /// Returns if it's reasonable to merge stores to MemVT size.
455 virtual bool canMergeStoresTo(unsigned AS, EVT MemVT,
456 const SelectionDAG &DAG) const {
457 return true;
458 }
459
460 /// Return true if it is cheap to speculate a call to intrinsic cttz.
461 virtual bool isCheapToSpeculateCttz() const {
462 return false;
463 }
464
465 /// Return true if it is cheap to speculate a call to intrinsic ctlz.
466 virtual bool isCheapToSpeculateCtlz() const {
467 return false;
468 }
469
470 /// Return true if ctlz instruction is fast.
471 virtual bool isCtlzFast() const {
472 return false;
473 }
474
475 /// Return true if instruction generated for equality comparison is folded
476 /// with instruction generated for signed comparison.
477 virtual bool isEqualityCmpFoldedWithSignedCmp() const { return true; }
478
479 /// Return true if it is safe to transform an integer-domain bitwise operation
480 /// into the equivalent floating-point operation. This should be set to true
481 /// if the target has IEEE-754-compliant fabs/fneg operations for the input
482 /// type.
483 virtual bool hasBitPreservingFPLogic(EVT VT) const {
484 return false;
485 }
486
487 /// Return true if it is cheaper to split the store of a merged int val
488 /// from a pair of smaller values into multiple stores.
489 virtual bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const {
490 return false;
491 }
492
493 /// Return if the target supports combining a
494 /// chain like:
495 /// \code
496 /// %andResult = and %val1, #mask
497 /// %icmpResult = icmp %andResult, 0
498 /// \endcode
499 /// into a single machine instruction of a form like:
500 /// \code
501 /// cc = test %register, #mask
502 /// \endcode
503 virtual bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const {
504 return false;
505 }
506
507 /// Use bitwise logic to make pairs of compares more efficient. For example:
508 /// and (seteq A, B), (seteq C, D) --> seteq (or (xor A, B), (xor C, D)), 0
509 /// This should be true when it takes more than one instruction to lower
510 /// setcc (cmp+set on x86 scalar), when bitwise ops are faster than logic on
511 /// condition bits (crand on PowerPC), and/or when reducing cmp+br is a win.
512 virtual bool convertSetCCLogicToBitwiseLogic(EVT VT) const {
513 return false;
514 }
515
516 /// Return the preferred operand type if the target has a quick way to compare
517 /// integer values of the given size. Assume that any legal integer type can
518 /// be compared efficiently. Targets may override this to allow illegal wide
519 /// types to return a vector type if there is support to compare that type.
520 virtual MVT hasFastEqualityCompare(unsigned NumBits) const {
521 MVT VT = MVT::getIntegerVT(NumBits);
522 return isTypeLegal(VT) ? VT : MVT::INVALID_SIMPLE_VALUE_TYPE;
523 }
524
525 /// Return true if the target should transform:
526 /// (X & Y) == Y ---> (~X & Y) == 0
527 /// (X & Y) != Y ---> (~X & Y) != 0
528 ///
529 /// This may be profitable if the target has a bitwise and-not operation that
530 /// sets comparison flags. A target may want to limit the transformation based
531 /// on the type of Y or if Y is a constant.
532 ///
533 /// Note that the transform will not occur if Y is known to be a power-of-2
534 /// because a mask and compare of a single bit can be handled by inverting the
535 /// predicate, for example:
536 /// (X & 8) == 8 ---> (X & 8) != 0
537 virtual bool hasAndNotCompare(SDValue Y) const {
538 return false;
539 }
540
541 /// Return true if the target has a bitwise and-not operation:
542 /// X = ~A & B
543 /// This can be used to simplify select or other instructions.
544 virtual bool hasAndNot(SDValue X) const {
545 // If the target has the more complex version of this operation, assume that
546 // it has this operation too.
547 return hasAndNotCompare(X);
548 }
549
550 /// Return true if the target has a bit-test instruction:
551 /// (X & (1 << Y)) ==/!= 0
552 /// This knowledge can be used to prevent breaking the pattern,
553 /// or creating it if it could be recognized.
554 virtual bool hasBitTest(SDValue X, SDValue Y) const { return false; }
555
556 /// There are two ways to clear extreme bits (either low or high):
557 /// Mask: x & (-1 << y) (the instcombine canonical form)
558 /// Shifts: x >> y << y
559 /// Return true if the variant with 2 variable shifts is preferred.
560 /// Return false if there is no preference.
561 virtual bool shouldFoldMaskToVariableShiftPair(SDValue X) const {
562 // By default, let's assume that no one prefers shifts.
563 return false;
564 }
565
566 /// Return true if it is profitable to fold a pair of shifts into a mask.
567 /// This is usually true on most targets. But some targets, like Thumb1,
568 /// have immediate shift instructions, but no immediate "and" instruction;
569 /// this makes the fold unprofitable.
570 virtual bool shouldFoldConstantShiftPairToMask(const SDNode *N,
571 CombineLevel Level) const {
572 return true;
573 }
574
575 /// Should we tranform the IR-optimal check for whether given truncation
576 /// down into KeptBits would be truncating or not:
577 /// (add %x, (1 << (KeptBits-1))) srccond (1 << KeptBits)
578 /// Into it's more traditional form:
579 /// ((%x << C) a>> C) dstcond %x
580 /// Return true if we should transform.
581 /// Return false if there is no preference.
582 virtual bool shouldTransformSignedTruncationCheck(EVT XVT,
583 unsigned KeptBits) const {
584 // By default, let's assume that no one prefers shifts.
585 return false;
586 }
587
588 /// Given the pattern
589 /// (X & (C l>>/<< Y)) ==/!= 0
590 /// return true if it should be transformed into:
591 /// ((X <</l>> Y) & C) ==/!= 0
592 /// WARNING: if 'X' is a constant, the fold may deadlock!
593 /// FIXME: we could avoid passing XC, but we can't use isConstOrConstSplat()
594 /// here because it can end up being not linked in.
595 virtual bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
596 SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y,
597 unsigned OldShiftOpcode, unsigned NewShiftOpcode,
598 SelectionDAG &DAG) const {
599 if (hasBitTest(X, Y)) {
600 // One interesting pattern that we'd want to form is 'bit test':
601 // ((1 << Y) & C) ==/!= 0
602 // But we also need to be careful not to try to reverse that fold.
603
604 // Is this '1 << Y' ?
605 if (OldShiftOpcode == ISD::SHL && CC->isOne())
606 return false; // Keep the 'bit test' pattern.
607
608 // Will it be '1 << Y' after the transform ?
609 if (XC && NewShiftOpcode == ISD::SHL && XC->isOne())
610 return true; // Do form the 'bit test' pattern.
611 }
612
613 // If 'X' is a constant, and we transform, then we will immediately
614 // try to undo the fold, thus causing endless combine loop.
615 // So by default, let's assume everyone prefers the fold
616 // iff 'X' is not a constant.
617 return !XC;
618 }
619
620 /// These two forms are equivalent:
621 /// sub %y, (xor %x, -1)
622 /// add (add %x, 1), %y
623 /// The variant with two add's is IR-canonical.
624 /// Some targets may prefer one to the other.
625 virtual bool preferIncOfAddToSubOfNot(EVT VT) const {
626 // By default, let's assume that everyone prefers the form with two add's.
627 return true;
628 }
629
630 /// Return true if the target wants to use the optimization that
631 /// turns ext(promotableInst1(...(promotableInstN(load)))) into
632 /// promotedInst1(...(promotedInstN(ext(load)))).
633 bool enableExtLdPromotion() const { return EnableExtLdPromotion; }
634
635 /// Return true if the target can combine store(extractelement VectorTy,
636 /// Idx).
637 /// \p Cost[out] gives the cost of that transformation when this is true.
638 virtual bool canCombineStoreAndExtract(Type *VectorTy, Value *Idx,
639 unsigned &Cost) const {
640 return false;
641 }
642
643 /// Return true if inserting a scalar into a variable element of an undef
644 /// vector is more efficiently handled by splatting the scalar instead.
645 virtual bool shouldSplatInsEltVarIndex(EVT) const {
646 return false;
647 }
648
649 /// Return true if target always beneficiates from combining into FMA for a
650 /// given value type. This must typically return false on targets where FMA
651 /// takes more cycles to execute than FADD.
652 virtual bool enableAggressiveFMAFusion(EVT VT) const {
653 return false;
654 }
655
656 /// Return the ValueType of the result of SETCC operations.
657 virtual EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
658 EVT VT) const;
659
660 /// Return the ValueType for comparison libcalls. Comparions libcalls include
661 /// floating point comparion calls, and Ordered/Unordered check calls on
662 /// floating point numbers.
663 virtual
664 MVT::SimpleValueType getCmpLibcallReturnType() const;
665
666 /// For targets without i1 registers, this gives the nature of the high-bits
667 /// of boolean values held in types wider than i1.
668 ///
669 /// "Boolean values" are special true/false values produced by nodes like
670 /// SETCC and consumed (as the condition) by nodes like SELECT and BRCOND.
671 /// Not to be confused with general values promoted from i1. Some cpus
672 /// distinguish between vectors of boolean and scalars; the isVec parameter
673 /// selects between the two kinds. For example on X86 a scalar boolean should
674 /// be zero extended from i1, while the elements of a vector of booleans
675 /// should be sign extended from i1.
676 ///
677 /// Some cpus also treat floating point types the same way as they treat
678 /// vectors instead of the way they treat scalars.
679 BooleanContent getBooleanContents(bool isVec, bool isFloat) const {
680 if (isVec)
681 return BooleanVectorContents;
682 return isFloat ? BooleanFloatContents : BooleanContents;
683 }
684
685 BooleanContent getBooleanContents(EVT Type) const {
686 return getBooleanContents(Type.isVector(), Type.isFloatingPoint());
687 }
688
689 /// Return target scheduling preference.
690 Sched::Preference getSchedulingPreference() const {
691 return SchedPreferenceInfo;
692 }
693
694 /// Some scheduler, e.g. hybrid, can switch to different scheduling heuristics
695 /// for different nodes. This function returns the preference (or none) for
696 /// the given node.
697 virtual Sched::Preference getSchedulingPreference(SDNode *) const {
698 return Sched::None;
699 }
700
701 /// Return the register class that should be used for the specified value
702 /// type.
703 virtual const TargetRegisterClass *getRegClassFor(MVT VT, bool isDivergent = false) const {
704 (void)isDivergent;
705 const TargetRegisterClass *RC = RegClassForVT[VT.SimpleTy];
706 assert(RC && "This value type is not natively supported!")((RC && "This value type is not natively supported!")
? static_cast<void> (0) : __assert_fail ("RC && \"This value type is not natively supported!\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/CodeGen/TargetLowering.h"
, 706, __PRETTY_FUNCTION__))
;
707 return RC;
708 }
709
710 /// Allows target to decide about the register class of the
711 /// specific value that is live outside the defining block.
712 /// Returns true if the value needs uniform register class.
713 virtual bool requiresUniformRegister(MachineFunction &MF,
714 const Value *) const {
715 return false;
716 }
717
718 /// Return the 'representative' register class for the specified value
719 /// type.
720 ///
721 /// The 'representative' register class is the largest legal super-reg
722 /// register class for the register class of the value type. For example, on
723 /// i386 the rep register class for i8, i16, and i32 are GR32; while the rep
724 /// register class is GR64 on x86_64.
725 virtual const TargetRegisterClass *getRepRegClassFor(MVT VT) const {
726 const TargetRegisterClass *RC = RepRegClassForVT[VT.SimpleTy];
727 return RC;
728 }
729
730 /// Return the cost of the 'representative' register class for the specified
731 /// value type.
732 virtual uint8_t getRepRegClassCostFor(MVT VT) const {
733 return RepRegClassCostForVT[VT.SimpleTy];
734 }
735
736 /// Return true if SHIFT instructions should be expanded to SHIFT_PARTS
737 /// instructions, and false if a library call is preferred (e.g for code-size
738 /// reasons).
739 virtual bool shouldExpandShift(SelectionDAG &DAG, SDNode *N) const {
740 return true;
741 }
742
743 /// Return true if the target has native support for the specified value type.
744 /// This means that it has a register that directly holds it without
745 /// promotions or expansions.
746 bool isTypeLegal(EVT VT) const {
747 assert(!VT.isSimple() ||((!VT.isSimple() || (unsigned)VT.getSimpleVT().SimpleTy < array_lengthof
(RegClassForVT)) ? static_cast<void> (0) : __assert_fail
("!VT.isSimple() || (unsigned)VT.getSimpleVT().SimpleTy < array_lengthof(RegClassForVT)"
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/CodeGen/TargetLowering.h"
, 748, __PRETTY_FUNCTION__))
748 (unsigned)VT.getSimpleVT().SimpleTy < array_lengthof(RegClassForVT))((!VT.isSimple() || (unsigned)VT.getSimpleVT().SimpleTy < array_lengthof
(RegClassForVT)) ? static_cast<void> (0) : __assert_fail
("!VT.isSimple() || (unsigned)VT.getSimpleVT().SimpleTy < array_lengthof(RegClassForVT)"
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/CodeGen/TargetLowering.h"
, 748, __PRETTY_FUNCTION__))
;
749 return VT.isSimple() && RegClassForVT[VT.getSimpleVT().SimpleTy] != nullptr;
750 }
751
752 class ValueTypeActionImpl {
753 /// ValueTypeActions - For each value type, keep a LegalizeTypeAction enum
754 /// that indicates how instruction selection should deal with the type.
755 LegalizeTypeAction ValueTypeActions[MVT::LAST_VALUETYPE];
756
757 public:
758 ValueTypeActionImpl() {
759 std::fill(std::begin(ValueTypeActions), std::end(ValueTypeActions),
760 TypeLegal);
761 }
762
763 LegalizeTypeAction getTypeAction(MVT VT) const {
764 return ValueTypeActions[VT.SimpleTy];
765 }
766
767 void setTypeAction(MVT VT, LegalizeTypeAction Action) {
768 ValueTypeActions[VT.SimpleTy] = Action;
769 }
770 };
771
772 const ValueTypeActionImpl &getValueTypeActions() const {
773 return ValueTypeActions;
774 }
775
776 /// Return how we should legalize values of this type, either it is already
777 /// legal (return 'Legal') or we need to promote it to a larger type (return
778 /// 'Promote'), or we need to expand it into multiple registers of smaller
779 /// integer type (return 'Expand'). 'Custom' is not an option.
780 LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const {
781 return getTypeConversion(Context, VT).first;
782 }
783 LegalizeTypeAction getTypeAction(MVT VT) const {
784 return ValueTypeActions.getTypeAction(VT);
785 }
786
787 /// For types supported by the target, this is an identity function. For
788 /// types that must be promoted to larger types, this returns the larger type
789 /// to promote to. For integer types that are larger than the largest integer
790 /// register, this contains one step in the expansion to get to the smaller
791 /// register. For illegal floating point types, this returns the integer type
792 /// to transform to.
793 EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const {
794 return getTypeConversion(Context, VT).second;
795 }
796
797 /// For types supported by the target, this is an identity function. For
798 /// types that must be expanded (i.e. integer types that are larger than the
799 /// largest integer register or illegal floating point types), this returns
800 /// the largest legal type it will be expanded to.
801 EVT getTypeToExpandTo(LLVMContext &Context, EVT VT) const {
802 assert(!VT.isVector())((!VT.isVector()) ? static_cast<void> (0) : __assert_fail
("!VT.isVector()", "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/CodeGen/TargetLowering.h"
, 802, __PRETTY_FUNCTION__))
;
803 while (true) {
804 switch (getTypeAction(Context, VT)) {
805 case TypeLegal:
806 return VT;
807 case TypeExpandInteger:
808 VT = getTypeToTransformTo(Context, VT);
809 break;
810 default:
811 llvm_unreachable("Type is not legal nor is it to be expanded!")::llvm::llvm_unreachable_internal("Type is not legal nor is it to be expanded!"
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/CodeGen/TargetLowering.h"
, 811)
;
812 }
813 }
814 }
815
816 /// Vector types are broken down into some number of legal first class types.
817 /// For example, EVT::v8f32 maps to 2 EVT::v4f32 with Altivec or SSE1, or 8
818 /// promoted EVT::f64 values with the X86 FP stack. Similarly, EVT::v2i64
819 /// turns into 4 EVT::i32 values with both PPC and X86.
820 ///
821 /// This method returns the number of registers needed, and the VT for each
822 /// register. It also returns the VT and quantity of the intermediate values
823 /// before they are promoted/expanded.
824 unsigned getVectorTypeBreakdown(LLVMContext &Context, EVT VT,
825 EVT &IntermediateVT,
826 unsigned &NumIntermediates,
827 MVT &RegisterVT) const;
828
829 /// Certain targets such as MIPS require that some types such as vectors are
830 /// always broken down into scalars in some contexts. This occurs even if the
831 /// vector type is legal.
832 virtual unsigned getVectorTypeBreakdownForCallingConv(
833 LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,
834 unsigned &NumIntermediates, MVT &RegisterVT) const {
835 return getVectorTypeBreakdown(Context, VT, IntermediateVT, NumIntermediates,
836 RegisterVT);
837 }
838
839 struct IntrinsicInfo {
840 unsigned opc = 0; // target opcode
841 EVT memVT; // memory VT
842
843 // value representing memory location
844 PointerUnion<const Value *, const PseudoSourceValue *> ptrVal;
845
846 int offset = 0; // offset off of ptrVal
847 uint64_t size = 0; // the size of the memory location
848 // (taken from memVT if zero)
849 MaybeAlign align = Align::None(); // alignment
850
851 MachineMemOperand::Flags flags = MachineMemOperand::MONone;
852 IntrinsicInfo() = default;
853 };
854
855 /// Given an intrinsic, checks if on the target the intrinsic will need to map
856 /// to a MemIntrinsicNode (touches memory). If this is the case, it returns
857 /// true and store the intrinsic information into the IntrinsicInfo that was
858 /// passed to the function.
859 virtual bool getTgtMemIntrinsic(IntrinsicInfo &, const CallInst &,
860 MachineFunction &,
861 unsigned /*Intrinsic*/) const {
862 return false;
863 }
864
865 /// Returns true if the target can instruction select the specified FP
866 /// immediate natively. If false, the legalizer will materialize the FP
867 /// immediate as a load from a constant pool.
868 virtual bool isFPImmLegal(const APFloat & /*Imm*/, EVT /*VT*/,
869 bool ForCodeSize = false) const {
870 return false;
871 }
872
873 /// Targets can use this to indicate that they only support *some*
874 /// VECTOR_SHUFFLE operations, those with specific masks. By default, if a
875 /// target supports the VECTOR_SHUFFLE node, all mask values are assumed to be
876 /// legal.
877 virtual bool isShuffleMaskLegal(ArrayRef<int> /*Mask*/, EVT /*VT*/) const {
878 return true;
879 }
880
881 /// Returns true if the operation can trap for the value type.
882 ///
883 /// VT must be a legal type. By default, we optimistically assume most
884 /// operations don't trap except for integer divide and remainder.
885 virtual bool canOpTrap(unsigned Op, EVT VT) const;
886
887 /// Similar to isShuffleMaskLegal. Targets can use this to indicate if there
888 /// is a suitable VECTOR_SHUFFLE that can be used to replace a VAND with a
889 /// constant pool entry.
890 virtual bool isVectorClearMaskLegal(ArrayRef<int> /*Mask*/,
891 EVT /*VT*/) const {
892 return false;
893 }
894
895 /// Return how this operation should be treated: either it is legal, needs to
896 /// be promoted to a larger size, needs to be expanded to some other code
897 /// sequence, or the target has a custom expander for it.
898 LegalizeAction getOperationAction(unsigned Op, EVT VT) const {
899 if (VT.isExtended()) return Expand;
900 // If a target-specific SDNode requires legalization, require the target
901 // to provide custom legalization for it.
902 if (Op >= array_lengthof(OpActions[0])) return Custom;
903 return OpActions[(unsigned)VT.getSimpleVT().SimpleTy][Op];
904 }
905
906 /// Custom method defined by each target to indicate if an operation which
907 /// may require a scale is supported natively by the target.
908 /// If not, the operation is illegal.
909 virtual bool isSupportedFixedPointOperation(unsigned Op, EVT VT,
910 unsigned Scale) const {
911 return false;
912 }
913
914 /// Some fixed point operations may be natively supported by the target but
915 /// only for specific scales. This method allows for checking
916 /// if the width is supported by the target for a given operation that may
917 /// depend on scale.
918 LegalizeAction getFixedPointOperationAction(unsigned Op, EVT VT,
919 unsigned Scale) const {
920 auto Action = getOperationAction(Op, VT);
921 if (Action != Legal)
922 return Action;
923
924 // This operation is supported in this type but may only work on specific
925 // scales.
926 bool Supported;
927 switch (Op) {
928 default:
929 llvm_unreachable("Unexpected fixed point operation.")::llvm::llvm_unreachable_internal("Unexpected fixed point operation."
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/CodeGen/TargetLowering.h"
, 929)
;
930 case ISD::SMULFIX:
931 case ISD::SMULFIXSAT:
932 case ISD::UMULFIX:
933 case ISD::UMULFIXSAT:
934 Supported = isSupportedFixedPointOperation(Op, VT, Scale);
935 break;
936 }
937
938 return Supported ? Action : Expand;
939 }
940
941 // If Op is a strict floating-point operation, return the result
942 // of getOperationAction for the equivalent non-strict operation.
943 LegalizeAction getStrictFPOperationAction(unsigned Op, EVT VT) const {
944 unsigned EqOpc;
945 switch (Op) {
946 default: llvm_unreachable("Unexpected FP pseudo-opcode")::llvm::llvm_unreachable_internal("Unexpected FP pseudo-opcode"
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/CodeGen/TargetLowering.h"
, 946)
;
947 case ISD::STRICT_FADD: EqOpc = ISD::FADD; break;
948 case ISD::STRICT_FSUB: EqOpc = ISD::FSUB; break;
949 case ISD::STRICT_FMUL: EqOpc = ISD::FMUL; break;
950 case ISD::STRICT_FDIV: EqOpc = ISD::FDIV; break;
951 case ISD::STRICT_FREM: EqOpc = ISD::FREM; break;
952 case ISD::STRICT_FSQRT: EqOpc = ISD::FSQRT; break;
953 case ISD::STRICT_FPOW: EqOpc = ISD::FPOW; break;
954 case ISD::STRICT_FPOWI: EqOpc = ISD::FPOWI; break;
955 case ISD::STRICT_FMA: EqOpc = ISD::FMA; break;
956 case ISD::STRICT_FSIN: EqOpc = ISD::FSIN; break;
957 case ISD::STRICT_FCOS: EqOpc = ISD::FCOS; break;
958 case ISD::STRICT_FEXP: EqOpc = ISD::FEXP; break;
959 case ISD::STRICT_FEXP2: EqOpc = ISD::FEXP2; break;
960 case ISD::STRICT_FLOG: EqOpc = ISD::FLOG; break;
961 case ISD::STRICT_FLOG10: EqOpc = ISD::FLOG10; break;
962 case ISD::STRICT_FLOG2: EqOpc = ISD::FLOG2; break;
963 case ISD::STRICT_LRINT: EqOpc = ISD::LRINT; break;
964 case ISD::STRICT_LLRINT: EqOpc = ISD::LLRINT; break;
965 case ISD::STRICT_FRINT: EqOpc = ISD::FRINT; break;
966 case ISD::STRICT_FNEARBYINT: EqOpc = ISD::FNEARBYINT; break;
967 case ISD::STRICT_FMAXNUM: EqOpc = ISD::FMAXNUM; break;
968 case ISD::STRICT_FMINNUM: EqOpc = ISD::FMINNUM; break;
969 case ISD::STRICT_FCEIL: EqOpc = ISD::FCEIL; break;
970 case ISD::STRICT_FFLOOR: EqOpc = ISD::FFLOOR; break;
971 case ISD::STRICT_LROUND: EqOpc = ISD::LROUND; break;
972 case ISD::STRICT_LLROUND: EqOpc = ISD::LLROUND; break;
973 case ISD::STRICT_FROUND: EqOpc = ISD::FROUND; break;
974 case ISD::STRICT_FTRUNC: EqOpc = ISD::FTRUNC; break;
975 case ISD::STRICT_FP_TO_SINT: EqOpc = ISD::FP_TO_SINT; break;
976 case ISD::STRICT_FP_TO_UINT: EqOpc = ISD::FP_TO_UINT; break;
977 case ISD::STRICT_FP_ROUND: EqOpc = ISD::FP_ROUND; break;
978 case ISD::STRICT_FP_EXTEND: EqOpc = ISD::FP_EXTEND; break;
979 }
980
981 return getOperationAction(EqOpc, VT);
982 }
983
984 /// Return true if the specified operation is legal on this target or can be
985 /// made legal with custom lowering. This is used to help guide high-level
986 /// lowering decisions.
987 bool isOperationLegalOrCustom(unsigned Op, EVT VT) const {
988 return (VT == MVT::Other || isTypeLegal(VT)) &&
989 (getOperationAction(Op, VT) == Legal ||
990 getOperationAction(Op, VT) == Custom);
991 }
992
993 /// Return true if the specified operation is legal on this target or can be
994 /// made legal using promotion. This is used to help guide high-level lowering
995 /// decisions.
996 bool isOperationLegalOrPromote(unsigned Op, EVT VT) const {
997 return (VT == MVT::Other || isTypeLegal(VT)) &&
998 (getOperationAction(Op, VT) == Legal ||
999 getOperationAction(Op, VT) == Promote);
1000 }
1001
1002 /// Return true if the specified operation is legal on this target or can be
1003 /// made legal with custom lowering or using promotion. This is used to help
1004 /// guide high-level lowering decisions.
1005 bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT) const {
1006 return (VT == MVT::Other || isTypeLegal(VT)) &&
1007 (getOperationAction(Op, VT) == Legal ||
1008 getOperationAction(Op, VT) == Custom ||
1009 getOperationAction(Op, VT) == Promote);
1010 }
1011
1012 /// Return true if the operation uses custom lowering, regardless of whether
1013 /// the type is legal or not.
1014 bool isOperationCustom(unsigned Op, EVT VT) const {
1015 return getOperationAction(Op, VT) == Custom;
1016 }
1017
1018 /// Return true if lowering to a jump table is allowed.
1019 virtual bool areJTsAllowed(const Function *Fn) const {
1020 if (Fn->getFnAttribute("no-jump-tables").getValueAsString() == "true")
1021 return false;
1022
1023 return isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) ||
1024 isOperationLegalOrCustom(ISD::BRIND, MVT::Other);
1025 }
1026
1027 /// Check whether the range [Low,High] fits in a machine word.
1028 bool rangeFitsInWord(const APInt &Low, const APInt &High,
1029 const DataLayout &DL) const {
1030 // FIXME: Using the pointer type doesn't seem ideal.
1031 uint64_t BW = DL.getIndexSizeInBits(0u);
1032 uint64_t Range = (High - Low).getLimitedValue(UINT64_MAX(18446744073709551615UL) - 1) + 1;
1033 return Range <= BW;
1034 }
1035
1036 /// Return true if lowering to a jump table is suitable for a set of case
1037 /// clusters which may contain \p NumCases cases, \p Range range of values.
1038 virtual bool isSuitableForJumpTable(const SwitchInst *SI, uint64_t NumCases,
1039 uint64_t Range, ProfileSummaryInfo *PSI,
1040 BlockFrequencyInfo *BFI) const;
1041
1042 /// Return true if lowering to a bit test is suitable for a set of case
1043 /// clusters which contains \p NumDests unique destinations, \p Low and
1044 /// \p High as its lowest and highest case values, and expects \p NumCmps
1045 /// case value comparisons. Check if the number of destinations, comparison
1046 /// metric, and range are all suitable.
1047 bool isSuitableForBitTests(unsigned NumDests, unsigned NumCmps,
1048 const APInt &Low, const APInt &High,
1049 const DataLayout &DL) const {
1050 // FIXME: I don't think NumCmps is the correct metric: a single case and a
1051 // range of cases both require only one branch to lower. Just looking at the
1052 // number of clusters and destinations should be enough to decide whether to
1053 // build bit tests.
1054
1055 // To lower a range with bit tests, the range must fit the bitwidth of a
1056 // machine word.
1057 if (!rangeFitsInWord(Low, High, DL))
1058 return false;
1059
1060 // Decide whether it's profitable to lower this range with bit tests. Each
1061 // destination requires a bit test and branch, and there is an overall range
1062 // check branch. For a small number of clusters, separate comparisons might
1063 // be cheaper, and for many destinations, splitting the range might be
1064 // better.
1065 return (NumDests == 1 && NumCmps >= 3) || (NumDests == 2 && NumCmps >= 5) ||
1066 (NumDests == 3 && NumCmps >= 6);
1067 }
1068
1069 /// Return true if the specified operation is illegal on this target or
1070 /// unlikely to be made legal with custom lowering. This is used to help guide
1071 /// high-level lowering decisions.
1072 bool isOperationExpand(unsigned Op, EVT VT) const {
1073 return (!isTypeLegal(VT) || getOperationAction(Op, VT) == Expand);
1074 }
1075
1076 /// Return true if the specified operation is legal on this target.
1077 bool isOperationLegal(unsigned Op, EVT VT) const {
1078 return (VT == MVT::Other || isTypeLegal(VT)) &&
1079 getOperationAction(Op, VT) == Legal;
1080 }
1081
1082 /// Return how this load with extension should be treated: either it is legal,
1083 /// needs to be promoted to a larger size, needs to be expanded to some other
1084 /// code sequence, or the target has a custom expander for it.
1085 LegalizeAction getLoadExtAction(unsigned ExtType, EVT ValVT,
1086 EVT MemVT) const {
1087 if (ValVT.isExtended() || MemVT.isExtended()) return Expand;
1088 unsigned ValI = (unsigned) ValVT.getSimpleVT().SimpleTy;
1089 unsigned MemI = (unsigned) MemVT.getSimpleVT().SimpleTy;
1090 assert(ExtType < ISD::LAST_LOADEXT_TYPE && ValI < MVT::LAST_VALUETYPE &&((ExtType < ISD::LAST_LOADEXT_TYPE && ValI < MVT
::LAST_VALUETYPE && MemI < MVT::LAST_VALUETYPE &&
"Table isn't big enough!") ? static_cast<void> (0) : __assert_fail
("ExtType < ISD::LAST_LOADEXT_TYPE && ValI < MVT::LAST_VALUETYPE && MemI < MVT::LAST_VALUETYPE && \"Table isn't big enough!\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/CodeGen/TargetLowering.h"
, 1091, __PRETTY_FUNCTION__))
1091 MemI < MVT::LAST_VALUETYPE && "Table isn't big enough!")((ExtType < ISD::LAST_LOADEXT_TYPE && ValI < MVT
::LAST_VALUETYPE && MemI < MVT::LAST_VALUETYPE &&
"Table isn't big enough!") ? static_cast<void> (0) : __assert_fail
("ExtType < ISD::LAST_LOADEXT_TYPE && ValI < MVT::LAST_VALUETYPE && MemI < MVT::LAST_VALUETYPE && \"Table isn't big enough!\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/CodeGen/TargetLowering.h"
, 1091, __PRETTY_FUNCTION__))
;
1092 unsigned Shift = 4 * ExtType;
1093 return (LegalizeAction)((LoadExtActions[ValI][MemI] >> Shift) & 0xf);
1094 }
1095
1096 /// Return true if the specified load with extension is legal on this target.
1097 bool isLoadExtLegal(unsigned ExtType, EVT ValVT, EVT MemVT) const {
1098 return getLoadExtAction(ExtType, ValVT, MemVT) == Legal;
1099 }
1100
1101 /// Return true if the specified load with extension is legal or custom
1102 /// on this target.
1103 bool isLoadExtLegalOrCustom(unsigned ExtType, EVT ValVT, EVT MemVT) const {
1104 return getLoadExtAction(ExtType, ValVT, MemVT) == Legal ||
1105 getLoadExtAction(ExtType, ValVT, MemVT) == Custom;
1106 }
1107
1108 /// Return how this store with truncation should be treated: either it is
1109 /// legal, needs to be promoted to a larger size, needs to be expanded to some
1110 /// other code sequence, or the target has a custom expander for it.
1111 LegalizeAction getTruncStoreAction(EVT ValVT, EVT MemVT) const {
1112 if (ValVT.isExtended() || MemVT.isExtended()) return Expand;
1113 unsigned ValI = (unsigned) ValVT.getSimpleVT().SimpleTy;
1114 unsigned MemI = (unsigned) MemVT.getSimpleVT().SimpleTy;
1115 assert(ValI < MVT::LAST_VALUETYPE && MemI < MVT::LAST_VALUETYPE &&((ValI < MVT::LAST_VALUETYPE && MemI < MVT::LAST_VALUETYPE
&& "Table isn't big enough!") ? static_cast<void>
(0) : __assert_fail ("ValI < MVT::LAST_VALUETYPE && MemI < MVT::LAST_VALUETYPE && \"Table isn't big enough!\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/CodeGen/TargetLowering.h"
, 1116, __PRETTY_FUNCTION__))
1116 "Table isn't big enough!")((ValI < MVT::LAST_VALUETYPE && MemI < MVT::LAST_VALUETYPE
&& "Table isn't big enough!") ? static_cast<void>
(0) : __assert_fail ("ValI < MVT::LAST_VALUETYPE && MemI < MVT::LAST_VALUETYPE && \"Table isn't big enough!\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/CodeGen/TargetLowering.h"
, 1116, __PRETTY_FUNCTION__))
;
1117 return TruncStoreActions[ValI][MemI];
1118 }
1119
1120 /// Return true if the specified store with truncation is legal on this
1121 /// target.
1122 bool isTruncStoreLegal(EVT ValVT, EVT MemVT) const {
1123 return isTypeLegal(ValVT) && getTruncStoreAction(ValVT, MemVT) == Legal;
1124 }
1125
1126 /// Return true if the specified store with truncation has solution on this
1127 /// target.
1128 bool isTruncStoreLegalOrCustom(EVT ValVT, EVT MemVT) const {
1129 return isTypeLegal(ValVT) &&
1130 (getTruncStoreAction(ValVT, MemVT) == Legal ||
1131 getTruncStoreAction(ValVT, MemVT) == Custom);
1132 }
1133
1134 /// Return how the indexed load should be treated: either it is legal, needs
1135 /// to be promoted to a larger size, needs to be expanded to some other code
1136 /// sequence, or the target has a custom expander for it.
1137 LegalizeAction
1138 getIndexedLoadAction(unsigned IdxMode, MVT VT) const {
1139 assert(IdxMode < ISD::LAST_INDEXED_MODE && VT.isValid() &&((IdxMode < ISD::LAST_INDEXED_MODE && VT.isValid()
&& "Table isn't big enough!") ? static_cast<void>
(0) : __assert_fail ("IdxMode < ISD::LAST_INDEXED_MODE && VT.isValid() && \"Table isn't big enough!\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/CodeGen/TargetLowering.h"
, 1140, __PRETTY_FUNCTION__))
1140 "Table isn't big enough!")((IdxMode < ISD::LAST_INDEXED_MODE && VT.isValid()
&& "Table isn't big enough!") ? static_cast<void>
(0) : __assert_fail ("IdxMode < ISD::LAST_INDEXED_MODE && VT.isValid() && \"Table isn't big enough!\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/CodeGen/TargetLowering.h"
, 1140, __PRETTY_FUNCTION__))
;
1141 unsigned Ty = (unsigned)VT.SimpleTy;
1142 return (LegalizeAction)((IndexedModeActions[Ty][IdxMode] & 0xf0) >> 4);
1143 }
1144
1145 /// Return true if the specified indexed load is legal on this target.
1146 bool isIndexedLoadLegal(unsigned IdxMode, EVT VT) const {
1147 return VT.isSimple() &&
1148 (getIndexedLoadAction(IdxMode, VT.getSimpleVT()) == Legal ||
1149 getIndexedLoadAction(IdxMode, VT.getSimpleVT()) == Custom);
1150 }
1151
1152 /// Return how the indexed store should be treated: either it is legal, needs
1153 /// to be promoted to a larger size, needs to be expanded to some other code
1154 /// sequence, or the target has a custom expander for it.
1155 LegalizeAction
1156 getIndexedStoreAction(unsigned IdxMode, MVT VT) const {
1157 assert(IdxMode < ISD::LAST_INDEXED_MODE && VT.isValid() &&((IdxMode < ISD::LAST_INDEXED_MODE && VT.isValid()
&& "Table isn't big enough!") ? static_cast<void>
(0) : __assert_fail ("IdxMode < ISD::LAST_INDEXED_MODE && VT.isValid() && \"Table isn't big enough!\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/CodeGen/TargetLowering.h"
, 1158, __PRETTY_FUNCTION__))
1158 "Table isn't big enough!")((IdxMode < ISD::LAST_INDEXED_MODE && VT.isValid()
&& "Table isn't big enough!") ? static_cast<void>
(0) : __assert_fail ("IdxMode < ISD::LAST_INDEXED_MODE && VT.isValid() && \"Table isn't big enough!\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/CodeGen/TargetLowering.h"
, 1158, __PRETTY_FUNCTION__))
;
1159 unsigned Ty = (unsigned)VT.SimpleTy;
1160 return (LegalizeAction)(IndexedModeActions[Ty][IdxMode] & 0x0f);
1161 }
1162
1163 /// Return true if the specified indexed load is legal on this target.
1164 bool isIndexedStoreLegal(unsigned IdxMode, EVT VT) const {
1165 return VT.isSimple() &&
1166 (getIndexedStoreAction(IdxMode, VT.getSimpleVT()) == Legal ||
1167 getIndexedStoreAction(IdxMode, VT.getSimpleVT()) == Custom);
1168 }
1169
1170 /// Return how the condition code should be treated: either it is legal, needs
1171 /// to be expanded to some other code sequence, or the target has a custom
1172 /// expander for it.
1173 LegalizeAction
1174 getCondCodeAction(ISD::CondCode CC, MVT VT) const {
1175 assert((unsigned)CC < array_lengthof(CondCodeActions) &&(((unsigned)CC < array_lengthof(CondCodeActions) &&
((unsigned)VT.SimpleTy >> 3) < array_lengthof(CondCodeActions
[0]) && "Table isn't big enough!") ? static_cast<void
> (0) : __assert_fail ("(unsigned)CC < array_lengthof(CondCodeActions) && ((unsigned)VT.SimpleTy >> 3) < array_lengthof(CondCodeActions[0]) && \"Table isn't big enough!\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/CodeGen/TargetLowering.h"
, 1177, __PRETTY_FUNCTION__))
1176 ((unsigned)VT.SimpleTy >> 3) < array_lengthof(CondCodeActions[0]) &&(((unsigned)CC < array_lengthof(CondCodeActions) &&
((unsigned)VT.SimpleTy >> 3) < array_lengthof(CondCodeActions
[0]) && "Table isn't big enough!") ? static_cast<void
> (0) : __assert_fail ("(unsigned)CC < array_lengthof(CondCodeActions) && ((unsigned)VT.SimpleTy >> 3) < array_lengthof(CondCodeActions[0]) && \"Table isn't big enough!\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/CodeGen/TargetLowering.h"
, 1177, __PRETTY_FUNCTION__))
1177 "Table isn't big enough!")(((unsigned)CC < array_lengthof(CondCodeActions) &&
((unsigned)VT.SimpleTy >> 3) < array_lengthof(CondCodeActions
[0]) && "Table isn't big enough!") ? static_cast<void
> (0) : __assert_fail ("(unsigned)CC < array_lengthof(CondCodeActions) && ((unsigned)VT.SimpleTy >> 3) < array_lengthof(CondCodeActions[0]) && \"Table isn't big enough!\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/CodeGen/TargetLowering.h"
, 1177, __PRETTY_FUNCTION__))
;
1178 // See setCondCodeAction for how this is encoded.
1179 uint32_t Shift = 4 * (VT.SimpleTy & 0x7);
1180 uint32_t Value = CondCodeActions[CC][VT.SimpleTy >> 3];
1181 LegalizeAction Action = (LegalizeAction) ((Value >> Shift) & 0xF);
1182 assert(Action != Promote && "Can't promote condition code!")((Action != Promote && "Can't promote condition code!"
) ? static_cast<void> (0) : __assert_fail ("Action != Promote && \"Can't promote condition code!\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/CodeGen/TargetLowering.h"
, 1182, __PRETTY_FUNCTION__))
;
1183 return Action;
1184 }
1185
1186 /// Return true if the specified condition code is legal on this target.
1187 bool isCondCodeLegal(ISD::CondCode CC, MVT VT) const {
1188 return getCondCodeAction(CC, VT) == Legal;
1189 }
1190
1191 /// Return true if the specified condition code is legal or custom on this
1192 /// target.
1193 bool isCondCodeLegalOrCustom(ISD::CondCode CC, MVT VT) const {
1194 return getCondCodeAction(CC, VT) == Legal ||
1195 getCondCodeAction(CC, VT) == Custom;
1196 }
1197
1198 /// If the action for this operation is to promote, this method returns the
1199 /// ValueType to promote to.
1200 MVT getTypeToPromoteTo(unsigned Op, MVT VT) const {
1201 assert(getOperationAction(Op, VT) == Promote &&((getOperationAction(Op, VT) == Promote && "This operation isn't promoted!"
) ? static_cast<void> (0) : __assert_fail ("getOperationAction(Op, VT) == Promote && \"This operation isn't promoted!\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/CodeGen/TargetLowering.h"
, 1202, __PRETTY_FUNCTION__))
1202 "This operation isn't promoted!")((getOperationAction(Op, VT) == Promote && "This operation isn't promoted!"
) ? static_cast<void> (0) : __assert_fail ("getOperationAction(Op, VT) == Promote && \"This operation isn't promoted!\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/CodeGen/TargetLowering.h"
, 1202, __PRETTY_FUNCTION__))
;
1203
1204 // See if this has an explicit type specified.
1205 std::map<std::pair<unsigned, MVT::SimpleValueType>,
1206 MVT::SimpleValueType>::const_iterator PTTI =
1207 PromoteToType.find(std::make_pair(Op, VT.SimpleTy));
1208 if (PTTI != PromoteToType.end()) return PTTI->second;
1209
1210 assert((VT.isInteger() || VT.isFloatingPoint()) &&(((VT.isInteger() || VT.isFloatingPoint()) && "Cannot autopromote this type, add it with AddPromotedToType."
) ? static_cast<void> (0) : __assert_fail ("(VT.isInteger() || VT.isFloatingPoint()) && \"Cannot autopromote this type, add it with AddPromotedToType.\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/CodeGen/TargetLowering.h"
, 1211, __PRETTY_FUNCTION__))
1211 "Cannot autopromote this type, add it with AddPromotedToType.")(((VT.isInteger() || VT.isFloatingPoint()) && "Cannot autopromote this type, add it with AddPromotedToType."
) ? static_cast<void> (0) : __assert_fail ("(VT.isInteger() || VT.isFloatingPoint()) && \"Cannot autopromote this type, add it with AddPromotedToType.\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/CodeGen/TargetLowering.h"
, 1211, __PRETTY_FUNCTION__))
;
1212
1213 MVT NVT = VT;
1214 do {
1215 NVT = (MVT::SimpleValueType)(NVT.SimpleTy+1);
1216 assert(NVT.isInteger() == VT.isInteger() && NVT != MVT::isVoid &&((NVT.isInteger() == VT.isInteger() && NVT != MVT::isVoid
&& "Didn't find type to promote to!") ? static_cast<
void> (0) : __assert_fail ("NVT.isInteger() == VT.isInteger() && NVT != MVT::isVoid && \"Didn't find type to promote to!\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/CodeGen/TargetLowering.h"
, 1217, __PRETTY_FUNCTION__))
1217 "Didn't find type to promote to!")((NVT.isInteger() == VT.isInteger() && NVT != MVT::isVoid
&& "Didn't find type to promote to!") ? static_cast<
void> (0) : __assert_fail ("NVT.isInteger() == VT.isInteger() && NVT != MVT::isVoid && \"Didn't find type to promote to!\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/CodeGen/TargetLowering.h"
, 1217, __PRETTY_FUNCTION__))
;
1218 } while (!isTypeLegal(NVT) ||
1219 getOperationAction(Op, NVT) == Promote);
1220 return NVT;
1221 }
1222
1223 /// Return the EVT corresponding to this LLVM type. This is fixed by the LLVM
1224 /// operations except for the pointer size. If AllowUnknown is true, this
1225 /// will return MVT::Other for types with no EVT counterpart (e.g. structs),
1226 /// otherwise it will assert.
1227 EVT getValueType(const DataLayout &DL, Type *Ty,
1228 bool AllowUnknown = false) const {
1229 // Lower scalar pointers to native pointer types.
1230 if (auto *PTy = dyn_cast<PointerType>(Ty))
28
Assuming 'PTy' is null
29
Taking false branch
1231 return getPointerTy(DL, PTy->getAddressSpace());
1232
1233 if (auto *VTy = dyn_cast<VectorType>(Ty)) {
30
Assuming 'VTy' is non-null
31
Taking true branch
1234 Type *EltTy = VTy->getElementType();
1235 // Lower vectors of pointers to native pointer types.
1236 if (auto *PTy
32.1
'PTy' is null
32.1
'PTy' is null
32.1
'PTy' is null
32.1
'PTy' is null
= dyn_cast<PointerType>(EltTy)) {
32
Assuming 'EltTy' is not a 'PointerType'
33
Taking false branch
1237 EVT PointerTy(getPointerTy(DL, PTy->getAddressSpace()));
1238 EltTy = PointerTy.getTypeForEVT(Ty->getContext());
1239 }
1240 return EVT::getVectorVT(Ty->getContext(), EVT::getEVT(EltTy, false),
34
Called C++ object pointer is null
1241 VTy->getElementCount());
1242 }
1243
1244 return EVT::getEVT(Ty, AllowUnknown);
1245 }
1246
1247 EVT getMemValueType(const DataLayout &DL, Type *Ty,
1248 bool AllowUnknown = false) const {
1249 // Lower scalar pointers to native pointer types.
1250 if (PointerType *PTy = dyn_cast<PointerType>(Ty))
1251 return getPointerMemTy(DL, PTy->getAddressSpace());
1252 else if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
1253 Type *Elm = VTy->getElementType();
1254 if (PointerType *PT = dyn_cast<PointerType>(Elm)) {
1255 EVT PointerTy(getPointerMemTy(DL, PT->getAddressSpace()));
1256 Elm = PointerTy.getTypeForEVT(Ty->getContext());
1257 }
1258 return EVT::getVectorVT(Ty->getContext(), EVT::getEVT(Elm, false),
1259 VTy->getNumElements());
1260 }
1261
1262 return getValueType(DL, Ty, AllowUnknown);
1263 }
1264
1265
1266 /// Return the MVT corresponding to this LLVM type. See getValueType.
1267 MVT getSimpleValueType(const DataLayout &DL, Type *Ty,
1268 bool AllowUnknown = false) const {
1269 return getValueType(DL, Ty, AllowUnknown).getSimpleVT();
1270 }
1271
1272 /// Return the desired alignment for ByVal or InAlloca aggregate function
1273 /// arguments in the caller parameter area. This is the actual alignment, not
1274 /// its logarithm.
1275 virtual unsigned getByValTypeAlignment(Type *Ty, const DataLayout &DL) const;
1276
1277 /// Return the type of registers that this ValueType will eventually require.
1278 MVT getRegisterType(MVT VT) const {
1279 assert((unsigned)VT.SimpleTy < array_lengthof(RegisterTypeForVT))(((unsigned)VT.SimpleTy < array_lengthof(RegisterTypeForVT
)) ? static_cast<void> (0) : __assert_fail ("(unsigned)VT.SimpleTy < array_lengthof(RegisterTypeForVT)"
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/CodeGen/TargetLowering.h"
, 1279, __PRETTY_FUNCTION__))
;
1280 return RegisterTypeForVT[VT.SimpleTy];
1281 }
1282
1283 /// Return the type of registers that this ValueType will eventually require.
1284 MVT getRegisterType(LLVMContext &Context, EVT VT) const {
1285 if (VT.isSimple()) {
1286 assert((unsigned)VT.getSimpleVT().SimpleTy <(((unsigned)VT.getSimpleVT().SimpleTy < array_lengthof(RegisterTypeForVT
)) ? static_cast<void> (0) : __assert_fail ("(unsigned)VT.getSimpleVT().SimpleTy < array_lengthof(RegisterTypeForVT)"
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/CodeGen/TargetLowering.h"
, 1287, __PRETTY_FUNCTION__))
1287 array_lengthof(RegisterTypeForVT))(((unsigned)VT.getSimpleVT().SimpleTy < array_lengthof(RegisterTypeForVT
)) ? static_cast<void> (0) : __assert_fail ("(unsigned)VT.getSimpleVT().SimpleTy < array_lengthof(RegisterTypeForVT)"
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/CodeGen/TargetLowering.h"
, 1287, __PRETTY_FUNCTION__))
;
1288 return RegisterTypeForVT[VT.getSimpleVT().SimpleTy];
1289 }
1290 if (VT.isVector()) {
1291 EVT VT1;
1292 MVT RegisterVT;
1293 unsigned NumIntermediates;
1294 (void)getVectorTypeBreakdown(Context, VT, VT1,
1295 NumIntermediates, RegisterVT);
1296 return RegisterVT;
1297 }
1298 if (VT.isInteger()) {
1299 return getRegisterType(Context, getTypeToTransformTo(Context, VT));
1300 }
1301 llvm_unreachable("Unsupported extended type!")::llvm::llvm_unreachable_internal("Unsupported extended type!"
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/CodeGen/TargetLowering.h"
, 1301)
;
1302 }
1303
1304 /// Return the number of registers that this ValueType will eventually
1305 /// require.
1306 ///
1307 /// This is one for any types promoted to live in larger registers, but may be
1308 /// more than one for types (like i64) that are split into pieces. For types
1309 /// like i140, which are first promoted then expanded, it is the number of
1310 /// registers needed to hold all the bits of the original type. For an i140
1311 /// on a 32 bit machine this means 5 registers.
1312 unsigned getNumRegisters(LLVMContext &Context, EVT VT) const {
1313 if (VT.isSimple()) {
1314 assert((unsigned)VT.getSimpleVT().SimpleTy <(((unsigned)VT.getSimpleVT().SimpleTy < array_lengthof(NumRegistersForVT
)) ? static_cast<void> (0) : __assert_fail ("(unsigned)VT.getSimpleVT().SimpleTy < array_lengthof(NumRegistersForVT)"
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/CodeGen/TargetLowering.h"
, 1315, __PRETTY_FUNCTION__))
1315 array_lengthof(NumRegistersForVT))(((unsigned)VT.getSimpleVT().SimpleTy < array_lengthof(NumRegistersForVT
)) ? static_cast<void> (0) : __assert_fail ("(unsigned)VT.getSimpleVT().SimpleTy < array_lengthof(NumRegistersForVT)"
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/CodeGen/TargetLowering.h"
, 1315, __PRETTY_FUNCTION__))
;
1316 return NumRegistersForVT[VT.getSimpleVT().SimpleTy];
1317 }
1318 if (VT.isVector()) {
1319 EVT VT1;
1320 MVT VT2;
1321 unsigned NumIntermediates;
1322 return getVectorTypeBreakdown(Context, VT, VT1, NumIntermediates, VT2);
1323 }
1324 if (VT.isInteger()) {
1325 unsigned BitWidth = VT.getSizeInBits();
1326 unsigned RegWidth = getRegisterType(Context, VT).getSizeInBits();
1327 return (BitWidth + RegWidth - 1) / RegWidth;
1328 }
1329 llvm_unreachable("Unsupported extended type!")::llvm::llvm_unreachable_internal("Unsupported extended type!"
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/CodeGen/TargetLowering.h"
, 1329)
;
1330 }
1331
1332 /// Certain combinations of ABIs, Targets and features require that types
1333 /// are legal for some operations and not for other operations.
1334 /// For MIPS all vector types must be passed through the integer register set.
1335 virtual MVT getRegisterTypeForCallingConv(LLVMContext &Context,
1336 CallingConv::ID CC, EVT VT) const {
1337 return getRegisterType(Context, VT);
1338 }
1339
1340 /// Certain targets require unusual breakdowns of certain types. For MIPS,
1341 /// this occurs when a vector type is used, as vector are passed through the
1342 /// integer register set.
1343 virtual unsigned getNumRegistersForCallingConv(LLVMContext &Context,
1344 CallingConv::ID CC,
1345 EVT VT) const {
1346 return getNumRegisters(Context, VT);
1347 }
1348
1349 /// Certain targets have context senstive alignment requirements, where one
1350 /// type has the alignment requirement of another type.
1351 virtual Align getABIAlignmentForCallingConv(Type *ArgTy,
1352 DataLayout DL) const {
1353 return Align(DL.getABITypeAlignment(ArgTy));
1354 }
1355
1356 /// If true, then instruction selection should seek to shrink the FP constant
1357 /// of the specified type to a smaller type in order to save space and / or
1358 /// reduce runtime.
1359 virtual bool ShouldShrinkFPConstant(EVT) const { return true; }
1360
1361 /// Return true if it is profitable to reduce a load to a smaller type.
1362 /// Example: (i16 (trunc (i32 (load x))) -> i16 load x
1363 virtual bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy,
1364 EVT NewVT) const {
1365 // By default, assume that it is cheaper to extract a subvector from a wide
1366 // vector load rather than creating multiple narrow vector loads.
1367 if (NewVT.isVector() && !Load->hasOneUse())
1368 return false;
1369
1370 return true;
1371 }
1372
1373 /// When splitting a value of the specified type into parts, does the Lo
1374 /// or Hi part come first? This usually follows the endianness, except
1375 /// for ppcf128, where the Hi part always comes first.
1376 bool hasBigEndianPartOrdering(EVT VT, const DataLayout &DL) const {
1377 return DL.isBigEndian() || VT == MVT::ppcf128;
1378 }
1379
1380 /// If true, the target has custom DAG combine transformations that it can
1381 /// perform for the specified node.
1382 bool hasTargetDAGCombine(ISD::NodeType NT) const {
1383 assert(unsigned(NT >> 3) < array_lengthof(TargetDAGCombineArray))((unsigned(NT >> 3) < array_lengthof(TargetDAGCombineArray
)) ? static_cast<void> (0) : __assert_fail ("unsigned(NT >> 3) < array_lengthof(TargetDAGCombineArray)"
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/CodeGen/TargetLowering.h"
, 1383, __PRETTY_FUNCTION__))
;
1384 return TargetDAGCombineArray[NT >> 3] & (1 << (NT&7));
1385 }
1386
1387 unsigned getGatherAllAliasesMaxDepth() const {
1388 return GatherAllAliasesMaxDepth;
1389 }
1390
1391 /// Returns the size of the platform's va_list object.
1392 virtual unsigned getVaListSizeInBits(const DataLayout &DL) const {
1393 return getPointerTy(DL).getSizeInBits();
1394 }
1395
1396 /// Get maximum # of store operations permitted for llvm.memset
1397 ///
1398 /// This function returns the maximum number of store operations permitted
1399 /// to replace a call to llvm.memset. The value is set by the target at the
1400 /// performance threshold for such a replacement. If OptSize is true,
1401 /// return the limit for functions that have OptSize attribute.
1402 unsigned getMaxStoresPerMemset(bool OptSize) const {
1403 return OptSize ? MaxStoresPerMemsetOptSize : MaxStoresPerMemset;
1404 }
1405
1406 /// Get maximum # of store operations permitted for llvm.memcpy
1407 ///
1408 /// This function returns the maximum number of store operations permitted
1409 /// to replace a call to llvm.memcpy. The value is set by the target at the
1410 /// performance threshold for such a replacement. If OptSize is true,
1411 /// return the limit for functions that have OptSize attribute.
1412 unsigned getMaxStoresPerMemcpy(bool OptSize) const {
1413 return OptSize ? MaxStoresPerMemcpyOptSize : MaxStoresPerMemcpy;
1414 }
1415
1416 /// \brief Get maximum # of store operations to be glued together
1417 ///
1418 /// This function returns the maximum number of store operations permitted
1419 /// to glue together during lowering of llvm.memcpy. The value is set by
1420 // the target at the performance threshold for such a replacement.
1421 virtual unsigned getMaxGluedStoresPerMemcpy() const {
1422 return MaxGluedStoresPerMemcpy;
1423 }
1424
1425 /// Get maximum # of load operations permitted for memcmp
1426 ///
1427 /// This function returns the maximum number of load operations permitted
1428 /// to replace a call to memcmp. The value is set by the target at the
1429 /// performance threshold for such a replacement. If OptSize is true,
1430 /// return the limit for functions that have OptSize attribute.
1431 unsigned getMaxExpandSizeMemcmp(bool OptSize) const {
1432 return OptSize ? MaxLoadsPerMemcmpOptSize : MaxLoadsPerMemcmp;
1433 }
1434
1435 /// Get maximum # of store operations permitted for llvm.memmove
1436 ///
1437 /// This function returns the maximum number of store operations permitted
1438 /// to replace a call to llvm.memmove. The value is set by the target at the
1439 /// performance threshold for such a replacement. If OptSize is true,
1440 /// return the limit for functions that have OptSize attribute.
1441 unsigned getMaxStoresPerMemmove(bool OptSize) const {
1442 return OptSize ? MaxStoresPerMemmoveOptSize : MaxStoresPerMemmove;
1443 }
1444
1445 /// Determine if the target supports unaligned memory accesses.
1446 ///
1447 /// This function returns true if the target allows unaligned memory accesses
1448 /// of the specified type in the given address space. If true, it also returns
1449 /// whether the unaligned memory access is "fast" in the last argument by
1450 /// reference. This is used, for example, in situations where an array
1451 /// copy/move/set is converted to a sequence of store operations. Its use
1452 /// helps to ensure that such replacements don't generate code that causes an
1453 /// alignment error (trap) on the target machine.
1454 virtual bool allowsMisalignedMemoryAccesses(
1455 EVT, unsigned AddrSpace = 0, unsigned Align = 1,
1456 MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
1457 bool * /*Fast*/ = nullptr) const {
1458 return false;
1459 }
1460
1461 /// LLT handling variant.
1462 virtual bool allowsMisalignedMemoryAccesses(
1463 LLT, unsigned AddrSpace = 0, unsigned Align = 1,
1464 MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
1465 bool * /*Fast*/ = nullptr) const {
1466 return false;
1467 }
1468
1469 /// This function returns true if the memory access is aligned or if the
1470 /// target allows this specific unaligned memory access. If the access is
1471 /// allowed, the optional final parameter returns if the access is also fast
1472 /// (as defined by the target).
1473 bool allowsMemoryAccessForAlignment(
1474 LLVMContext &Context, const DataLayout &DL, EVT VT,
1475 unsigned AddrSpace = 0, unsigned Alignment = 1,
1476 MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
1477 bool *Fast = nullptr) const;
1478
1479 /// Return true if the memory access of this type is aligned or if the target
1480 /// allows this specific unaligned access for the given MachineMemOperand.
1481 /// If the access is allowed, the optional final parameter returns if the
1482 /// access is also fast (as defined by the target).
1483 bool allowsMemoryAccessForAlignment(LLVMContext &Context,
1484 const DataLayout &DL, EVT VT,
1485 const MachineMemOperand &MMO,
1486 bool *Fast = nullptr) const;
1487
1488 /// Return true if the target supports a memory access of this type for the
1489 /// given address space and alignment. If the access is allowed, the optional
1490 /// final parameter returns if the access is also fast (as defined by the
1491 /// target).
1492 virtual bool
1493 allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT,
1494 unsigned AddrSpace = 0, unsigned Alignment = 1,
1495 MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
1496 bool *Fast = nullptr) const;
1497
1498 /// Return true if the target supports a memory access of this type for the
1499 /// given MachineMemOperand. If the access is allowed, the optional
1500 /// final parameter returns if the access is also fast (as defined by the
1501 /// target).
1502 bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT,
1503 const MachineMemOperand &MMO,
1504 bool *Fast = nullptr) const;
1505
1506 /// Returns the target specific optimal type for load and store operations as
1507 /// a result of memset, memcpy, and memmove lowering.
1508 ///
1509 /// If DstAlign is zero that means it's safe to destination alignment can
1510 /// satisfy any constraint. Similarly if SrcAlign is zero it means there isn't
1511 /// a need to check it against alignment requirement, probably because the
1512 /// source does not need to be loaded. If 'IsMemset' is true, that means it's
1513 /// expanding a memset. If 'ZeroMemset' is true, that means it's a memset of
1514 /// zero. 'MemcpyStrSrc' indicates whether the memcpy source is constant so it
1515 /// does not need to be loaded. It returns EVT::Other if the type should be
1516 /// determined using generic target-independent logic.
1517 virtual EVT
1518 getOptimalMemOpType(uint64_t /*Size*/, unsigned /*DstAlign*/,
1519 unsigned /*SrcAlign*/, bool /*IsMemset*/,
1520 bool /*ZeroMemset*/, bool /*MemcpyStrSrc*/,
1521 const AttributeList & /*FuncAttributes*/) const {
1522 return MVT::Other;
1523 }
1524
1525
1526 /// LLT returning variant.
1527 virtual LLT
1528 getOptimalMemOpLLT(uint64_t /*Size*/, unsigned /*DstAlign*/,
1529 unsigned /*SrcAlign*/, bool /*IsMemset*/,
1530 bool /*ZeroMemset*/, bool /*MemcpyStrSrc*/,
1531 const AttributeList & /*FuncAttributes*/) const {
1532 return LLT();
1533 }
1534
1535 /// Returns true if it's safe to use load / store of the specified type to
1536 /// expand memcpy / memset inline.
1537 ///
1538 /// This is mostly true for all types except for some special cases. For
1539 /// example, on X86 targets without SSE2 f64 load / store are done with fldl /
1540 /// fstpl which also does type conversion. Note the specified type doesn't
1541 /// have to be legal as the hook is used before type legalization.
1542 virtual bool isSafeMemOpType(MVT /*VT*/) const { return true; }
1543
1544 /// Determine if we should use _setjmp or setjmp to implement llvm.setjmp.
1545 bool usesUnderscoreSetJmp() const {
1546 return UseUnderscoreSetJmp;
1547 }
1548
1549 /// Determine if we should use _longjmp or longjmp to implement llvm.longjmp.
1550 bool usesUnderscoreLongJmp() const {
1551 return UseUnderscoreLongJmp;
1552 }
1553
1554 /// Return lower limit for number of blocks in a jump table.
1555 virtual unsigned getMinimumJumpTableEntries() const;
1556
1557 /// Return lower limit of the density in a jump table.
1558 unsigned getMinimumJumpTableDensity(bool OptForSize) const;
1559
1560 /// Return upper limit for number of entries in a jump table.
1561 /// Zero if no limit.
1562 unsigned getMaximumJumpTableSize() const;
1563
1564 virtual bool isJumpTableRelative() const {
1565 return TM.isPositionIndependent();
1566 }
1567
1568 /// If a physical register, this specifies the register that
1569 /// llvm.savestack/llvm.restorestack should save and restore.
1570 unsigned getStackPointerRegisterToSaveRestore() const {
1571 return StackPointerRegisterToSaveRestore;
1572 }
1573
1574 /// If a physical register, this returns the register that receives the
1575 /// exception address on entry to an EH pad.
1576 virtual unsigned
1577 getExceptionPointerRegister(const Constant *PersonalityFn) const {
1578 // 0 is guaranteed to be the NoRegister value on all targets
1579 return 0;
1580 }
1581
1582 /// If a physical register, this returns the register that receives the
1583 /// exception typeid on entry to a landing pad.
1584 virtual unsigned
1585 getExceptionSelectorRegister(const Constant *PersonalityFn) const {
1586 // 0 is guaranteed to be the NoRegister value on all targets
1587 return 0;
1588 }
1589
1590 virtual bool needsFixedCatchObjects() const {
1591 report_fatal_error("Funclet EH is not implemented for this target");
1592 }
1593
1594 /// Return the minimum stack alignment of an argument.
1595 Align getMinStackArgumentAlignment() const {
1596 return MinStackArgumentAlignment;
1597 }
1598
1599 /// Return the minimum function alignment.
1600 Align getMinFunctionAlignment() const { return MinFunctionAlignment; }
1601
1602 /// Return the preferred function alignment.
1603 Align getPrefFunctionAlignment() const { return PrefFunctionAlignment; }
1604
1605 /// Return the preferred loop alignment.
1606 virtual Align getPrefLoopAlignment(MachineLoop *ML = nullptr) const {
1607 return PrefLoopAlignment;
1608 }
1609
1610 /// Should loops be aligned even when the function is marked OptSize (but not
1611 /// MinSize).
1612 virtual bool alignLoopsWithOptSize() const {
1613 return false;
1614 }
1615
1616 /// If the target has a standard location for the stack protector guard,
1617 /// returns the address of that location. Otherwise, returns nullptr.
1618 /// DEPRECATED: please override useLoadStackGuardNode and customize
1619 /// LOAD_STACK_GUARD, or customize \@llvm.stackguard().
1620 virtual Value *getIRStackGuard(IRBuilder<> &IRB) const;
1621
1622 /// Inserts necessary declarations for SSP (stack protection) purpose.
1623 /// Should be used only when getIRStackGuard returns nullptr.
1624 virtual void insertSSPDeclarations(Module &M) const;
1625
1626 /// Return the variable that's previously inserted by insertSSPDeclarations,
1627 /// if any, otherwise return nullptr. Should be used only when
1628 /// getIRStackGuard returns nullptr.
1629 virtual Value *getSDagStackGuard(const Module &M) const;
1630
1631 /// If this function returns true, stack protection checks should XOR the
1632 /// frame pointer (or whichever pointer is used to address locals) into the
1633 /// stack guard value before checking it. getIRStackGuard must return nullptr
1634 /// if this returns true.
1635 virtual bool useStackGuardXorFP() const { return false; }
1636
1637 /// If the target has a standard stack protection check function that
1638 /// performs validation and error handling, returns the function. Otherwise,
1639 /// returns nullptr. Must be previously inserted by insertSSPDeclarations.
1640 /// Should be used only when getIRStackGuard returns nullptr.
1641 virtual Function *getSSPStackGuardCheck(const Module &M) const;
1642
1643protected:
1644 Value *getDefaultSafeStackPointerLocation(IRBuilder<> &IRB,
1645 bool UseTLS) const;
1646
1647public:
1648 /// Returns the target-specific address of the unsafe stack pointer.
1649 virtual Value *getSafeStackPointerLocation(IRBuilder<> &IRB) const;
1650
1651 /// Returns the name of the symbol used to emit stack probes or the empty
1652 /// string if not applicable.
1653 virtual StringRef getStackProbeSymbolName(MachineFunction &MF) const {
1654 return "";
1655 }
1656
1657 /// Returns true if a cast between SrcAS and DestAS is a noop.
1658 virtual bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const {
1659 return false;
1660 }
1661
1662 /// Returns true if a cast from SrcAS to DestAS is "cheap", such that e.g. we
1663 /// are happy to sink it into basic blocks. A cast may be free, but not
1664 /// necessarily a no-op. e.g. a free truncate from a 64-bit to 32-bit pointer.
1665 virtual bool isFreeAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const {
1666 return isNoopAddrSpaceCast(SrcAS, DestAS);
1667 }
1668
1669 /// Return true if the pointer arguments to CI should be aligned by aligning
1670 /// the object whose address is being passed. If so then MinSize is set to the
1671 /// minimum size the object must be to be aligned and PrefAlign is set to the
1672 /// preferred alignment.
1673 virtual bool shouldAlignPointerArgs(CallInst * /*CI*/, unsigned & /*MinSize*/,
1674 unsigned & /*PrefAlign*/) const {
1675 return false;
1676 }
1677
1678 //===--------------------------------------------------------------------===//
1679 /// \name Helpers for TargetTransformInfo implementations
1680 /// @{
1681
1682 /// Get the ISD node that corresponds to the Instruction class opcode.
1683 int InstructionOpcodeToISD(unsigned Opcode) const;
1684
1685 /// Estimate the cost of type-legalization and the legalized type.
1686 std::pair<int, MVT> getTypeLegalizationCost(const DataLayout &DL,
1687 Type *Ty) const;
1688
1689 /// @}
1690
1691 //===--------------------------------------------------------------------===//
1692 /// \name Helpers for atomic expansion.
1693 /// @{
1694
1695 /// Returns the maximum atomic operation size (in bits) supported by
1696 /// the backend. Atomic operations greater than this size (as well
1697 /// as ones that are not naturally aligned), will be expanded by
1698 /// AtomicExpandPass into an __atomic_* library call.
1699 unsigned getMaxAtomicSizeInBitsSupported() const {
1700 return MaxAtomicSizeInBitsSupported;
1701 }
1702
1703 /// Returns the size of the smallest cmpxchg or ll/sc instruction
1704 /// the backend supports. Any smaller operations are widened in
1705 /// AtomicExpandPass.
1706 ///
1707 /// Note that *unlike* operations above the maximum size, atomic ops
1708 /// are still natively supported below the minimum; they just
1709 /// require a more complex expansion.
1710 unsigned getMinCmpXchgSizeInBits() const { return MinCmpXchgSizeInBits; }
1711
1712 /// Whether the target supports unaligned atomic operations.
1713 bool supportsUnalignedAtomics() const { return SupportsUnalignedAtomics; }
1714
1715 /// Whether AtomicExpandPass should automatically insert fences and reduce
1716 /// ordering for this atomic. This should be true for most architectures with
1717 /// weak memory ordering. Defaults to false.
1718 virtual bool shouldInsertFencesForAtomic(const Instruction *I) const {
1719 return false;
1720 }
1721
1722 /// Perform a load-linked operation on Addr, returning a "Value *" with the
1723 /// corresponding pointee type. This may entail some non-trivial operations to
1724 /// truncate or reconstruct types that will be illegal in the backend. See
1725 /// ARMISelLowering for an example implementation.
1726 virtual Value *emitLoadLinked(IRBuilder<> &Builder, Value *Addr,
1727 AtomicOrdering Ord) const {
1728 llvm_unreachable("Load linked unimplemented on this target")::llvm::llvm_unreachable_internal("Load linked unimplemented on this target"
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/CodeGen/TargetLowering.h"
, 1728)
;
1729 }
1730
1731 /// Perform a store-conditional operation to Addr. Return the status of the
1732 /// store. This should be 0 if the store succeeded, non-zero otherwise.
1733 virtual Value *emitStoreConditional(IRBuilder<> &Builder, Value *Val,
1734 Value *Addr, AtomicOrdering Ord) const {
1735 llvm_unreachable("Store conditional unimplemented on this target")::llvm::llvm_unreachable_internal("Store conditional unimplemented on this target"
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/CodeGen/TargetLowering.h"
, 1735)
;
1736 }
1737
1738 /// Perform a masked atomicrmw using a target-specific intrinsic. This
1739 /// represents the core LL/SC loop which will be lowered at a late stage by
1740 /// the backend.
1741 virtual Value *emitMaskedAtomicRMWIntrinsic(IRBuilder<> &Builder,
1742 AtomicRMWInst *AI,
1743 Value *AlignedAddr, Value *Incr,
1744 Value *Mask, Value *ShiftAmt,
1745 AtomicOrdering Ord) const {
1746 llvm_unreachable("Masked atomicrmw expansion unimplemented on this target")::llvm::llvm_unreachable_internal("Masked atomicrmw expansion unimplemented on this target"
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/CodeGen/TargetLowering.h"
, 1746)
;
1747 }
1748
1749 /// Perform a masked cmpxchg using a target-specific intrinsic. This
1750 /// represents the core LL/SC loop which will be lowered at a late stage by
1751 /// the backend.
1752 virtual Value *emitMaskedAtomicCmpXchgIntrinsic(
1753 IRBuilder<> &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
1754 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
1755 llvm_unreachable("Masked cmpxchg expansion unimplemented on this target")::llvm::llvm_unreachable_internal("Masked cmpxchg expansion unimplemented on this target"
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/CodeGen/TargetLowering.h"
, 1755)
;
1756 }
1757
1758 /// Inserts in the IR a target-specific intrinsic specifying a fence.
1759 /// It is called by AtomicExpandPass before expanding an
1760 /// AtomicRMW/AtomicCmpXchg/AtomicStore/AtomicLoad
1761 /// if shouldInsertFencesForAtomic returns true.
1762 ///
1763 /// Inst is the original atomic instruction, prior to other expansions that
1764 /// may be performed.
1765 ///
1766 /// This function should either return a nullptr, or a pointer to an IR-level
1767 /// Instruction*. Even complex fence sequences can be represented by a
1768 /// single Instruction* through an intrinsic to be lowered later.
1769 /// Backends should override this method to produce target-specific intrinsic
1770 /// for their fences.
1771 /// FIXME: Please note that the default implementation here in terms of
1772 /// IR-level fences exists for historical/compatibility reasons and is
1773 /// *unsound* ! Fences cannot, in general, be used to restore sequential
1774 /// consistency. For example, consider the following example:
1775 /// atomic<int> x = y = 0;
1776 /// int r1, r2, r3, r4;
1777 /// Thread 0:
1778 /// x.store(1);
1779 /// Thread 1:
1780 /// y.store(1);
1781 /// Thread 2:
1782 /// r1 = x.load();
1783 /// r2 = y.load();
1784 /// Thread 3:
1785 /// r3 = y.load();
1786 /// r4 = x.load();
1787 /// r1 = r3 = 1 and r2 = r4 = 0 is impossible as long as the accesses are all
1788 /// seq_cst. But if they are lowered to monotonic accesses, no amount of
1789 /// IR-level fences can prevent it.
1790 /// @{
1791 virtual Instruction *emitLeadingFence(IRBuilder<> &Builder, Instruction *Inst,
1792 AtomicOrdering Ord) const {
1793 if (isReleaseOrStronger(Ord) && Inst->hasAtomicStore())
1794 return Builder.CreateFence(Ord);
1795 else
1796 return nullptr;
1797 }
1798
1799 virtual Instruction *emitTrailingFence(IRBuilder<> &Builder,
1800 Instruction *Inst,
1801 AtomicOrdering Ord) const {
1802 if (isAcquireOrStronger(Ord))
1803 return Builder.CreateFence(Ord);
1804 else
1805 return nullptr;
1806 }
1807 /// @}
1808
1809 // Emits code that executes when the comparison result in the ll/sc
1810 // expansion of a cmpxchg instruction is such that the store-conditional will
1811 // not execute. This makes it possible to balance out the load-linked with
1812 // a dedicated instruction, if desired.
1813 // E.g., on ARM, if ldrex isn't followed by strex, the exclusive monitor would
1814 // be unnecessarily held, except if clrex, inserted by this hook, is executed.
1815 virtual void emitAtomicCmpXchgNoStoreLLBalance(IRBuilder<> &Builder) const {}
1816
1817 /// Returns true if the given (atomic) store should be expanded by the
1818 /// IR-level AtomicExpand pass into an "atomic xchg" which ignores its input.
1819 virtual bool shouldExpandAtomicStoreInIR(StoreInst *SI) const {
1820 return false;
1821 }
1822
1823 /// Returns true if arguments should be sign-extended in lib calls.
1824 virtual bool shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const {
1825 return IsSigned;
1826 }
1827
1828 /// Returns true if arguments should be extended in lib calls.
1829 virtual bool shouldExtendTypeInLibCall(EVT Type) const {
1830 return true;
1831 }
1832
1833 /// Returns how the given (atomic) load should be expanded by the
1834 /// IR-level AtomicExpand pass.
1835 virtual AtomicExpansionKind shouldExpandAtomicLoadInIR(LoadInst *LI) const {
1836 return AtomicExpansionKind::None;
1837 }
1838
1839 /// Returns how the given atomic cmpxchg should be expanded by the IR-level
1840 /// AtomicExpand pass.
1841 virtual AtomicExpansionKind
1842 shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const {
1843 return AtomicExpansionKind::None;
1844 }
1845
1846 /// Returns how the IR-level AtomicExpand pass should expand the given
1847 /// AtomicRMW, if at all. Default is to never expand.
1848 virtual AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
1849 return RMW->isFloatingPointOperation() ?
1850 AtomicExpansionKind::CmpXChg : AtomicExpansionKind::None;
1851 }
1852
1853 /// On some platforms, an AtomicRMW that never actually modifies the value
1854 /// (such as fetch_add of 0) can be turned into a fence followed by an
1855 /// atomic load. This may sound useless, but it makes it possible for the
1856 /// processor to keep the cacheline shared, dramatically improving
1857 /// performance. And such idempotent RMWs are useful for implementing some
1858 /// kinds of locks, see for example (justification + benchmarks):
1859 /// http://www.hpl.hp.com/techreports/2012/HPL-2012-68.pdf
1860 /// This method tries doing that transformation, returning the atomic load if
1861 /// it succeeds, and nullptr otherwise.
1862 /// If shouldExpandAtomicLoadInIR returns true on that load, it will undergo
1863 /// another round of expansion.
1864 virtual LoadInst *
1865 lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *RMWI) const {
1866 return nullptr;
1867 }
1868
1869 /// Returns how the platform's atomic operations are extended (ZERO_EXTEND,
1870 /// SIGN_EXTEND, or ANY_EXTEND).
1871 virtual ISD::NodeType getExtendForAtomicOps() const {
1872 return ISD::ZERO_EXTEND;
1873 }
1874
1875 /// @}
1876
1877 /// Returns true if we should normalize
1878 /// select(N0&N1, X, Y) => select(N0, select(N1, X, Y), Y) and
1879 /// select(N0|N1, X, Y) => select(N0, select(N1, X, Y, Y)) if it is likely
1880 /// that it saves us from materializing N0 and N1 in an integer register.
1881 /// Targets that are able to perform and/or on flags should return false here.
1882 virtual bool shouldNormalizeToSelectSequence(LLVMContext &Context,
1883 EVT VT) const {
1884 // If a target has multiple condition registers, then it likely has logical
1885 // operations on those registers.
1886 if (hasMultipleConditionRegisters())
1887 return false;
1888 // Only do the transform if the value won't be split into multiple
1889 // registers.
1890 LegalizeTypeAction Action = getTypeAction(Context, VT);
1891 return Action != TypeExpandInteger && Action != TypeExpandFloat &&
1892 Action != TypeSplitVector;
1893 }
1894
1895 virtual bool isProfitableToCombineMinNumMaxNum(EVT VT) const { return true; }
1896
1897 /// Return true if a select of constants (select Cond, C1, C2) should be
1898 /// transformed into simple math ops with the condition value. For example:
1899 /// select Cond, C1, C1-1 --> add (zext Cond), C1-1
1900 virtual bool convertSelectOfConstantsToMath(EVT VT) const {
1901 return false;
1902 }
1903
1904 /// Return true if it is profitable to transform an integer
1905 /// multiplication-by-constant into simpler operations like shifts and adds.
1906 /// This may be true if the target does not directly support the
1907 /// multiplication operation for the specified type or the sequence of simpler
1908 /// ops is faster than the multiply.
1909 virtual bool decomposeMulByConstant(LLVMContext &Context,
1910 EVT VT, SDValue C) const {
1911 return false;
1912 }
1913
1914 /// Return true if it is more correct/profitable to use strict FP_TO_INT
1915 /// conversion operations - canonicalizing the FP source value instead of
1916 /// converting all cases and then selecting based on value.
1917 /// This may be true if the target throws exceptions for out of bounds
1918 /// conversions or has fast FP CMOV.
1919 virtual bool shouldUseStrictFP_TO_INT(EVT FpVT, EVT IntVT,
1920 bool IsSigned) const {
1921 return false;
1922 }
1923
1924 //===--------------------------------------------------------------------===//
1925 // TargetLowering Configuration Methods - These methods should be invoked by
1926 // the derived class constructor to configure this object for the target.
1927 //
1928protected:
1929 /// Specify how the target extends the result of integer and floating point
1930 /// boolean values from i1 to a wider type. See getBooleanContents.
1931 void setBooleanContents(BooleanContent Ty) {
1932 BooleanContents = Ty;
1933 BooleanFloatContents = Ty;
1934 }
1935
1936 /// Specify how the target extends the result of integer and floating point
1937 /// boolean values from i1 to a wider type. See getBooleanContents.
1938 void setBooleanContents(BooleanContent IntTy, BooleanContent FloatTy) {
1939 BooleanContents = IntTy;
1940 BooleanFloatContents = FloatTy;
1941 }
1942
1943 /// Specify how the target extends the result of a vector boolean value from a
1944 /// vector of i1 to a wider type. See getBooleanContents.
1945 void setBooleanVectorContents(BooleanContent Ty) {
1946 BooleanVectorContents = Ty;
1947 }
1948
1949 /// Specify the target scheduling preference.
1950 void setSchedulingPreference(Sched::Preference Pref) {
1951 SchedPreferenceInfo = Pref;
1952 }
1953
1954 /// Indicate whether this target prefers to use _setjmp to implement
1955 /// llvm.setjmp or the version without _. Defaults to false.
1956 void setUseUnderscoreSetJmp(bool Val) {
1957 UseUnderscoreSetJmp = Val;
1958 }
1959
1960 /// Indicate whether this target prefers to use _longjmp to implement
1961 /// llvm.longjmp or the version without _. Defaults to false.
1962 void setUseUnderscoreLongJmp(bool Val) {
1963 UseUnderscoreLongJmp = Val;
1964 }
1965
1966 /// Indicate the minimum number of blocks to generate jump tables.
1967 void setMinimumJumpTableEntries(unsigned Val);
1968
1969 /// Indicate the maximum number of entries in jump tables.
1970 /// Set to zero to generate unlimited jump tables.
1971 void setMaximumJumpTableSize(unsigned);
1972
1973 /// If set to a physical register, this specifies the register that
1974 /// llvm.savestack/llvm.restorestack should save and restore.
1975 void setStackPointerRegisterToSaveRestore(unsigned R) {
1976 StackPointerRegisterToSaveRestore = R;
1977 }
1978
1979 /// Tells the code generator that the target has multiple (allocatable)
1980 /// condition registers that can be used to store the results of comparisons
1981 /// for use by selects and conditional branches. With multiple condition
1982 /// registers, the code generator will not aggressively sink comparisons into
1983 /// the blocks of their users.
1984 void setHasMultipleConditionRegisters(bool hasManyRegs = true) {
1985 HasMultipleConditionRegisters = hasManyRegs;
1986 }
1987
1988 /// Tells the code generator that the target has BitExtract instructions.
1989 /// The code generator will aggressively sink "shift"s into the blocks of
1990 /// their users if the users will generate "and" instructions which can be
1991 /// combined with "shift" to BitExtract instructions.
1992 void setHasExtractBitsInsn(bool hasExtractInsn = true) {
1993 HasExtractBitsInsn = hasExtractInsn;
1994 }
1995
1996 /// Tells the code generator not to expand logic operations on comparison
1997 /// predicates into separate sequences that increase the amount of flow
1998 /// control.
1999 void setJumpIsExpensive(bool isExpensive = true);
2000
2001 /// Tells the code generator which bitwidths to bypass.
2002 void addBypassSlowDiv(unsigned int SlowBitWidth, unsigned int FastBitWidth) {
2003 BypassSlowDivWidths[SlowBitWidth] = FastBitWidth;
2004 }
2005
2006 /// Add the specified register class as an available regclass for the
2007 /// specified value type. This indicates the selector can handle values of
2008 /// that class natively.
2009 void addRegisterClass(MVT VT, const TargetRegisterClass *RC) {
2010 assert((unsigned)VT.SimpleTy < array_lengthof(RegClassForVT))(((unsigned)VT.SimpleTy < array_lengthof(RegClassForVT)) ?
static_cast<void> (0) : __assert_fail ("(unsigned)VT.SimpleTy < array_lengthof(RegClassForVT)"
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/CodeGen/TargetLowering.h"
, 2010, __PRETTY_FUNCTION__))
;
2011 RegClassForVT[VT.SimpleTy] = RC;
2012 }
2013
2014 /// Return the largest legal super-reg register class of the register class
2015 /// for the specified type and its associated "cost".
2016 virtual std::pair<const TargetRegisterClass *, uint8_t>
2017 findRepresentativeClass(const TargetRegisterInfo *TRI, MVT VT) const;
2018
2019 /// Once all of the register classes are added, this allows us to compute
2020 /// derived properties we expose.
2021 void computeRegisterProperties(const TargetRegisterInfo *TRI);
2022
2023 /// Indicate that the specified operation does not work with the specified
2024 /// type and indicate what to do about it. Note that VT may refer to either
2025 /// the type of a result or that of an operand of Op.
2026 void setOperationAction(unsigned Op, MVT VT,
2027 LegalizeAction Action) {
2028 assert(Op < array_lengthof(OpActions[0]) && "Table isn't big enough!")((Op < array_lengthof(OpActions[0]) && "Table isn't big enough!"
) ? static_cast<void> (0) : __assert_fail ("Op < array_lengthof(OpActions[0]) && \"Table isn't big enough!\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/CodeGen/TargetLowering.h"
, 2028, __PRETTY_FUNCTION__))
;
2029 OpActions[(unsigned)VT.SimpleTy][Op] = Action;
2030 }
2031
2032 /// Indicate that the specified load with extension does not work with the
2033 /// specified type and indicate what to do about it.
2034 void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT,
2035 LegalizeAction Action) {
2036 assert(ExtType < ISD::LAST_LOADEXT_TYPE && ValVT.isValid() &&((ExtType < ISD::LAST_LOADEXT_TYPE && ValVT.isValid
() && MemVT.isValid() && "Table isn't big enough!"
) ? static_cast<void> (0) : __assert_fail ("ExtType < ISD::LAST_LOADEXT_TYPE && ValVT.isValid() && MemVT.isValid() && \"Table isn't big enough!\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/CodeGen/TargetLowering.h"
, 2037, __PRETTY_FUNCTION__))
2037 MemVT.isValid() && "Table isn't big enough!")((ExtType < ISD::LAST_LOADEXT_TYPE && ValVT.isValid
() && MemVT.isValid() && "Table isn't big enough!"
) ? static_cast<void> (0) : __assert_fail ("ExtType < ISD::LAST_LOADEXT_TYPE && ValVT.isValid() && MemVT.isValid() && \"Table isn't big enough!\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/CodeGen/TargetLowering.h"
, 2037, __PRETTY_FUNCTION__))
;
2038 assert((unsigned)Action < 0x10 && "too many bits for bitfield array")(((unsigned)Action < 0x10 && "too many bits for bitfield array"
) ? static_cast<void> (0) : __assert_fail ("(unsigned)Action < 0x10 && \"too many bits for bitfield array\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/CodeGen/TargetLowering.h"
, 2038, __PRETTY_FUNCTION__))
;
2039 unsigned Shift = 4 * ExtType;
2040 LoadExtActions[ValVT.SimpleTy][MemVT.SimpleTy] &= ~((uint16_t)0xF << Shift);
2041 LoadExtActions[ValVT.SimpleTy][MemVT.SimpleTy] |= (uint16_t)Action << Shift;
2042 }
2043
2044 /// Indicate that the specified truncating store does not work with the
2045 /// specified type and indicate what to do about it.
2046 void setTruncStoreAction(MVT ValVT, MVT MemVT,
2047 LegalizeAction Action) {
2048 assert(ValVT.isValid() && MemVT.isValid() && "Table isn't big enough!")((ValVT.isValid() && MemVT.isValid() && "Table isn't big enough!"
) ? static_cast<void> (0) : __assert_fail ("ValVT.isValid() && MemVT.isValid() && \"Table isn't big enough!\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/CodeGen/TargetLowering.h"
, 2048, __PRETTY_FUNCTION__))
;
2049 TruncStoreActions[(unsigned)ValVT.SimpleTy][MemVT.SimpleTy] = Action;
2050 }
2051
2052 /// Indicate that the specified indexed load does or does not work with the
2053 /// specified type and indicate what to do abort it.
2054 ///
2055 /// NOTE: All indexed mode loads are initialized to Expand in
2056 /// TargetLowering.cpp
2057 void setIndexedLoadAction(unsigned IdxMode, MVT VT,
2058 LegalizeAction Action) {
2059 assert(VT.isValid() && IdxMode < ISD::LAST_INDEXED_MODE &&((VT.isValid() && IdxMode < ISD::LAST_INDEXED_MODE
&& (unsigned)Action < 0xf && "Table isn't big enough!"
) ? static_cast<void> (0) : __assert_fail ("VT.isValid() && IdxMode < ISD::LAST_INDEXED_MODE && (unsigned)Action < 0xf && \"Table isn't big enough!\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/CodeGen/TargetLowering.h"
, 2060, __PRETTY_FUNCTION__))
2060 (unsigned)Action < 0xf && "Table isn't big enough!")((VT.isValid() && IdxMode < ISD::LAST_INDEXED_MODE
&& (unsigned)Action < 0xf && "Table isn't big enough!"
) ? static_cast<void> (0) : __assert_fail ("VT.isValid() && IdxMode < ISD::LAST_INDEXED_MODE && (unsigned)Action < 0xf && \"Table isn't big enough!\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/CodeGen/TargetLowering.h"
, 2060, __PRETTY_FUNCTION__))
;
2061 // Load action are kept in the upper half.
2062 IndexedModeActions[(unsigned)VT.SimpleTy][IdxMode] &= ~0xf0;
2063 IndexedModeActions[(unsigned)VT.SimpleTy][IdxMode] |= ((uint8_t)Action) <<4;
2064 }
2065
2066 /// Indicate that the specified indexed store does or does not work with the
2067 /// specified type and indicate what to do about it.
2068 ///
2069 /// NOTE: All indexed mode stores are initialized to Expand in
2070 /// TargetLowering.cpp
2071 void setIndexedStoreAction(unsigned IdxMode, MVT VT,
2072 LegalizeAction Action) {
2073 assert(VT.isValid() && IdxMode < ISD::LAST_INDEXED_MODE &&((VT.isValid() && IdxMode < ISD::LAST_INDEXED_MODE
&& (unsigned)Action < 0xf && "Table isn't big enough!"
) ? static_cast<void> (0) : __assert_fail ("VT.isValid() && IdxMode < ISD::LAST_INDEXED_MODE && (unsigned)Action < 0xf && \"Table isn't big enough!\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/CodeGen/TargetLowering.h"
, 2074, __PRETTY_FUNCTION__))
2074 (unsigned)Action < 0xf && "Table isn't big enough!")((VT.isValid() && IdxMode < ISD::LAST_INDEXED_MODE
&& (unsigned)Action < 0xf && "Table isn't big enough!"
) ? static_cast<void> (0) : __assert_fail ("VT.isValid() && IdxMode < ISD::LAST_INDEXED_MODE && (unsigned)Action < 0xf && \"Table isn't big enough!\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/CodeGen/TargetLowering.h"
, 2074, __PRETTY_FUNCTION__))
;
2075 // Store action are kept in the lower half.
2076 IndexedModeActions[(unsigned)VT.SimpleTy][IdxMode] &= ~0x0f;
2077 IndexedModeActions[(unsigned)VT.SimpleTy][IdxMode] |= ((uint8_t)Action);
2078 }
2079
2080 /// Indicate that the specified condition code is or isn't supported on the
2081 /// target and indicate what to do about it.
2082 void setCondCodeAction(ISD::CondCode CC, MVT VT,
2083 LegalizeAction Action) {
2084 assert(VT.isValid() && (unsigned)CC < array_lengthof(CondCodeActions) &&((VT.isValid() && (unsigned)CC < array_lengthof(CondCodeActions
) && "Table isn't big enough!") ? static_cast<void
> (0) : __assert_fail ("VT.isValid() && (unsigned)CC < array_lengthof(CondCodeActions) && \"Table isn't big enough!\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/CodeGen/TargetLowering.h"
, 2085, __PRETTY_FUNCTION__))
2085 "Table isn't big enough!")((VT.isValid() && (unsigned)CC < array_lengthof(CondCodeActions
) && "Table isn't big enough!") ? static_cast<void
> (0) : __assert_fail ("VT.isValid() && (unsigned)CC < array_lengthof(CondCodeActions) && \"Table isn't big enough!\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/CodeGen/TargetLowering.h"
, 2085, __PRETTY_FUNCTION__))
;
2086 assert((unsigned)Action < 0x10 && "too many bits for bitfield array")(((unsigned)Action < 0x10 && "too many bits for bitfield array"
) ? static_cast<void> (0) : __assert_fail ("(unsigned)Action < 0x10 && \"too many bits for bitfield array\""
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/CodeGen/TargetLowering.h"
, 2086, __PRETTY_FUNCTION__))
;
2087 /// The lower 3 bits of the SimpleTy index into Nth 4bit set from the 32-bit
2088 /// value and the upper 29 bits index into the second dimension of the array
2089 /// to select what 32-bit value to use.
2090 uint32_t Shift = 4 * (VT.SimpleTy & 0x7);
2091 CondCodeActions[CC][VT.SimpleTy >> 3] &= ~((uint32_t)0xF << Shift);
2092 CondCodeActions[CC][VT.SimpleTy >> 3] |= (uint32_t)Action << Shift;
2093 }
2094
2095 /// If Opc/OrigVT is specified as being promoted, the promotion code defaults
2096 /// to trying a larger integer/fp until it can find one that works. If that
2097 /// default is insufficient, this method can be used by the target to override
2098 /// the default.
2099 void AddPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT) {
2100 PromoteToType[std::make_pair(Opc, OrigVT.SimpleTy)] = DestVT.SimpleTy;
2101 }
2102
2103 /// Convenience method to set an operation to Promote and specify the type
2104 /// in a single call.
2105 void setOperationPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT) {
2106 setOperationAction(Opc, OrigVT, Promote);
2107 AddPromotedToType(Opc, OrigVT, DestVT);
2108 }
2109
2110 /// Targets should invoke this method for each target independent node that
2111 /// they want to provide a custom DAG combiner for by implementing the
2112 /// PerformDAGCombine virtual method.
2113 void setTargetDAGCombine(ISD::NodeType NT) {
2114 assert(unsigned(NT >> 3) < array_lengthof(TargetDAGCombineArray))((unsigned(NT >> 3) < array_lengthof(TargetDAGCombineArray
)) ? static_cast<void> (0) : __assert_fail ("unsigned(NT >> 3) < array_lengthof(TargetDAGCombineArray)"
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/CodeGen/TargetLowering.h"
, 2114, __PRETTY_FUNCTION__))
;
2115 TargetDAGCombineArray[NT >> 3] |= 1 << (NT&7);
2116 }
2117
2118 /// Set the target's minimum function alignment.
2119 void setMinFunctionAlignment(Align Alignment) {
2120 MinFunctionAlignment = Alignment;
2121 }
2122
2123 /// Set the target's preferred function alignment. This should be set if
2124 /// there is a performance benefit to higher-than-minimum alignment
2125 void setPrefFunctionAlignment(Align Alignment) {
2126 PrefFunctionAlignment = Alignment;
2127 }
2128
2129 /// Set the target's preferred loop alignment. Default alignment is one, it
2130 /// means the target does not care about loop alignment. The target may also
2131 /// override getPrefLoopAlignment to provide per-loop values.
2132 void setPrefLoopAlignment(Align Alignment) { PrefLoopAlignment = Alignment; }
2133
2134 /// Set the minimum stack alignment of an argument.
2135 void setMinStackArgumentAlignment(Align Alignment) {
2136 MinStackArgumentAlignment = Alignment;
2137 }
2138
2139 /// Set the maximum atomic operation size supported by the
2140 /// backend. Atomic operations greater than this size (as well as
2141 /// ones that are not naturally aligned), will be expanded by
2142 /// AtomicExpandPass into an __atomic_* library call.
2143 void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits) {
2144 MaxAtomicSizeInBitsSupported = SizeInBits;
2145 }
2146
2147 /// Sets the minimum cmpxchg or ll/sc size supported by the backend.
2148 void setMinCmpXchgSizeInBits(unsigned SizeInBits) {
2149 MinCmpXchgSizeInBits = SizeInBits;
2150 }
2151
2152 /// Sets whether unaligned atomic operations are supported.
2153 void setSupportsUnalignedAtomics(bool UnalignedSupported) {
2154 SupportsUnalignedAtomics = UnalignedSupported;
2155 }
2156
2157public:
2158 //===--------------------------------------------------------------------===//
2159 // Addressing mode description hooks (used by LSR etc).
2160 //
2161
2162 /// CodeGenPrepare sinks address calculations into the same BB as Load/Store
2163 /// instructions reading the address. This allows as much computation as
2164 /// possible to be done in the address mode for that operand. This hook lets
2165 /// targets also pass back when this should be done on intrinsics which
2166 /// load/store.
2167 virtual bool getAddrModeArguments(IntrinsicInst * /*I*/,
2168 SmallVectorImpl<Value*> &/*Ops*/,
2169 Type *&/*AccessTy*/) const {
2170 return false;
2171 }
2172
2173 /// This represents an addressing mode of:
2174 /// BaseGV + BaseOffs + BaseReg + Scale*ScaleReg
2175 /// If BaseGV is null, there is no BaseGV.
2176 /// If BaseOffs is zero, there is no base offset.
2177 /// If HasBaseReg is false, there is no base register.
2178 /// If Scale is zero, there is no ScaleReg. Scale of 1 indicates a reg with
2179 /// no scale.
2180 struct AddrMode {
2181 GlobalValue *BaseGV = nullptr;
2182 int64_t BaseOffs = 0;
2183 bool HasBaseReg = false;
2184 int64_t Scale = 0;
2185 AddrMode() = default;
2186 };
2187
2188 /// Return true if the addressing mode represented by AM is legal for this
2189 /// target, for a load/store of the specified type.
2190 ///
2191 /// The type may be VoidTy, in which case only return true if the addressing
2192 /// mode is legal for a load/store of any legal type. TODO: Handle
2193 /// pre/postinc as well.
2194 ///
2195 /// If the address space cannot be determined, it will be -1.
2196 ///
2197 /// TODO: Remove default argument
2198 virtual bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM,
2199 Type *Ty, unsigned AddrSpace,
2200 Instruction *I = nullptr) const;
2201
2202 /// Return the cost of the scaling factor used in the addressing mode
2203 /// represented by AM for this target, for a load/store of the specified type.
2204 ///
2205 /// If the AM is supported, the return value must be >= 0.
2206 /// If the AM is not supported, it returns a negative value.
2207 /// TODO: Handle pre/postinc as well.
2208 /// TODO: Remove default argument
2209 virtual int getScalingFactorCost(const DataLayout &DL, const AddrMode &AM,
2210 Type *Ty, unsigned AS = 0) const {
2211 // Default: assume that any scaling factor used in a legal AM is free.
2212 if (isLegalAddressingMode(DL, AM, Ty, AS))
2213 return 0;
2214 return -1;
2215 }
2216
2217 /// Return true if the specified immediate is legal icmp immediate, that is
2218 /// the target has icmp instructions which can compare a register against the
2219 /// immediate without having to materialize the immediate into a register.
2220 virtual bool isLegalICmpImmediate(int64_t) const {
2221 return true;
2222 }
2223
2224 /// Return true if the specified immediate is legal add immediate, that is the
2225 /// target has add instructions which can add a register with the immediate
2226 /// without having to materialize the immediate into a register.
2227 virtual bool isLegalAddImmediate(int64_t) const {
2228 return true;
2229 }
2230
2231 /// Return true if the specified immediate is legal for the value input of a
2232 /// store instruction.
2233 virtual bool isLegalStoreImmediate(int64_t Value) const {
2234 // Default implementation assumes that at least 0 works since it is likely
2235 // that a zero register exists or a zero immediate is allowed.
2236 return Value == 0;
2237 }
2238
2239 /// Return true if it's significantly cheaper to shift a vector by a uniform
2240 /// scalar than by an amount which will vary across each lane. On x86, for
2241 /// example, there is a "psllw" instruction for the former case, but no simple
2242 /// instruction for a general "a << b" operation on vectors.
2243 virtual bool isVectorShiftByScalarCheap(Type *Ty) const {
2244 return false;
2245 }
2246
2247 /// Returns true if the opcode is a commutative binary operation.
2248 virtual bool isCommutativeBinOp(unsigned Opcode) const {
2249 // FIXME: This should get its info from the td file.
2250 switch (Opcode) {
2251 case ISD::ADD:
2252 case ISD::SMIN:
2253 case ISD::SMAX:
2254 case ISD::UMIN:
2255 case ISD::UMAX:
2256 case ISD::MUL:
2257 case ISD::MULHU:
2258 case ISD::MULHS:
2259 case ISD::SMUL_LOHI:
2260 case ISD::UMUL_LOHI:
2261 case ISD::FADD:
2262 case ISD::FMUL:
2263 case ISD::AND:
2264 case ISD::OR:
2265 case ISD::XOR:
2266 case ISD::SADDO:
2267 case ISD::UADDO:
2268 case ISD::ADDC:
2269 case ISD::ADDE:
2270 case ISD::SADDSAT:
2271 case ISD::UADDSAT:
2272 case ISD::FMINNUM:
2273 case ISD::FMAXNUM:
2274 case ISD::FMINNUM_IEEE:
2275 case ISD::FMAXNUM_IEEE:
2276 case ISD::FMINIMUM:
2277 case ISD::FMAXIMUM:
2278 return true;
2279 default: return false;
2280 }
2281 }
2282
2283 /// Return true if the node is a math/logic binary operator.
2284 virtual bool isBinOp(unsigned Opcode) const {
2285 // A commutative binop must be a binop.
2286 if (isCommutativeBinOp(Opcode))
2287 return true;
2288 // These are non-commutative binops.
2289 switch (Opcode) {
2290 case ISD::SUB:
2291 case ISD::SHL:
2292 case ISD::SRL:
2293 case ISD::SRA:
2294 case ISD::SDIV:
2295 case ISD::UDIV:
2296 case ISD::SREM:
2297 case ISD::UREM:
2298 case ISD::FSUB:
2299 case ISD::FDIV:
2300 case ISD::FREM:
2301 return true;
2302 default:
2303 return false;
2304 }
2305 }
2306
2307 /// Return true if it's free to truncate a value of type FromTy to type
2308 /// ToTy. e.g. On x86 it's free to truncate a i32 value in register EAX to i16
2309 /// by referencing its sub-register AX.
2310 /// Targets must return false when FromTy <= ToTy.
2311 virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const {
2312 return false;
2313 }
2314
2315 /// Return true if a truncation from FromTy to ToTy is permitted when deciding
2316 /// whether a call is in tail position. Typically this means that both results
2317 /// would be assigned to the same register or stack slot, but it could mean
2318 /// the target performs adequate checks of its own before proceeding with the
2319 /// tail call. Targets must return false when FromTy <= ToTy.
2320 virtual bool allowTruncateForTailCall(Type *FromTy, Type *ToTy) const {
2321 return false;
2322 }
2323
2324 virtual bool isTruncateFree(EVT FromVT, EVT ToVT) const {
2325 return false;
2326 }
2327
2328 virtual bool isProfitableToHoist(Instruction *I) const { return true; }
2329
2330 /// Return true if the extension represented by \p I is free.
2331 /// Unlikely the is[Z|FP]ExtFree family which is based on types,
2332 /// this method can use the context provided by \p I to decide
2333 /// whether or not \p I is free.
2334 /// This method extends the behavior of the is[Z|FP]ExtFree family.
2335 /// In other words, if is[Z|FP]Free returns true, then this method
2336 /// returns true as well. The converse is not true.
2337 /// The target can perform the adequate checks by overriding isExtFreeImpl.
2338 /// \pre \p I must be a sign, zero, or fp extension.
2339 bool isExtFree(const Instruction *I) const {
2340 switch (I->getOpcode()) {
2341 case Instruction::FPExt:
2342 if (isFPExtFree(EVT::getEVT(I->getType()),
2343 EVT::getEVT(I->getOperand(0)->getType())))
2344 return true;
2345 break;
2346 case Instruction::ZExt:
2347 if (isZExtFree(I->getOperand(0)->getType(), I->getType()))
2348 return true;
2349 break;
2350 case Instruction::SExt:
2351 break;
2352 default:
2353 llvm_unreachable("Instruction is not an extension")::llvm::llvm_unreachable_internal("Instruction is not an extension"
, "/build/llvm-toolchain-snapshot-10~+201911111502510600c19528f1809/llvm/include/llvm/CodeGen/TargetLowering.h"
, 2353)
;
2354 }
2355 return isExtFreeImpl(I);
2356 }
2357
2358 /// Return true if \p Load and \p Ext can form an ExtLoad.
2359 /// For example, in AArch64
2360 /// %L = load i8, i8* %ptr
2361 /// %E = zext i8 %L to i32
2362 /// can be lowered into one load instruction
2363 /// ldrb w0, [x0]
2364 bool isExtLoad(const LoadInst *Load, const Instruction *Ext,
2365 const DataLayout &DL) const {
2366 EVT VT = getValueType(DL, Ext->getType());
2367 EVT LoadVT = getValueType(DL, Load->getType());
2368
2369 // If the load has other users and the truncate is not free, the ext
2370 // probably isn't free.
2371 if (!Load->hasOneUse() && (isTypeLegal(LoadVT) || !isTypeLegal(VT)) &&
2372 !isTruncateFree(Ext->getType(), Load->getType()))
2373 return false;
2374
2375 // Check whether the target supports casts folded into loads.
2376