Bug Summary

File:llvm/include/llvm/CodeGen/TargetLowering.h
Warning:line 1237, column 41
Called C++ object pointer is null

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name AArch64TargetTransformInfo.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mthread-model posix -mframe-pointer=none -fmath-errno -fno-rounding-math -masm-verbose -mconstructor-aliases -munwind-tables -target-cpu x86-64 -dwarf-column-info -fno-split-dwarf-inlining -debugger-tuning=gdb -ffunction-sections -fdata-sections -resource-dir /usr/lib/llvm-10/lib/clang/10.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I /build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/build-llvm/lib/Target/AArch64 -I /build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64 -I /build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/build-llvm/include -I /build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0/backward -internal-isystem /usr/local/include -internal-isystem /usr/lib/llvm-10/lib/clang/10.0.0/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir /build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/build-llvm/lib/Target/AArch64 -fdebug-prefix-map=/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd=. -ferror-limit 19 -fmessage-length 0 -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -fobjc-runtime=gcc -fdiagnostics-show-option -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -o /tmp/scan-build-2020-01-13-084841-49055-1 -x c++ /build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

1//===-- AArch64TargetTransformInfo.cpp - AArch64 specific TTI -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "AArch64ExpandImm.h"
10#include "AArch64TargetTransformInfo.h"
11#include "MCTargetDesc/AArch64AddressingModes.h"
12#include "llvm/Analysis/LoopInfo.h"
13#include "llvm/Analysis/TargetTransformInfo.h"
14#include "llvm/CodeGen/BasicTTIImpl.h"
15#include "llvm/CodeGen/CostTable.h"
16#include "llvm/CodeGen/TargetLowering.h"
17#include "llvm/IR/IntrinsicInst.h"
18#include "llvm/IR/IntrinsicsAArch64.h"
19#include "llvm/Support/Debug.h"
20#include <algorithm>
21using namespace llvm;
22
23#define DEBUG_TYPE"aarch64tti" "aarch64tti"
24
25static cl::opt<bool> EnableFalkorHWPFUnrollFix("enable-falkor-hwpf-unroll-fix",
26 cl::init(true), cl::Hidden);
27
28bool AArch64TTIImpl::areInlineCompatible(const Function *Caller,
29 const Function *Callee) const {
30 const TargetMachine &TM = getTLI()->getTargetMachine();
31
32 const FeatureBitset &CallerBits =
33 TM.getSubtargetImpl(*Caller)->getFeatureBits();
34 const FeatureBitset &CalleeBits =
35 TM.getSubtargetImpl(*Callee)->getFeatureBits();
36
37 // Inline a callee if its target-features are a subset of the callers
38 // target-features.
39 return (CallerBits & CalleeBits) == CalleeBits;
40}
41
42/// Calculate the cost of materializing a 64-bit value. This helper
43/// method might only calculate a fraction of a larger immediate. Therefore it
44/// is valid to return a cost of ZERO.
45int AArch64TTIImpl::getIntImmCost(int64_t Val) {
46 // Check if the immediate can be encoded within an instruction.
47 if (Val == 0 || AArch64_AM::isLogicalImmediate(Val, 64))
48 return 0;
49
50 if (Val < 0)
51 Val = ~Val;
52
53 // Calculate how many moves we will need to materialize this constant.
54 SmallVector<AArch64_IMM::ImmInsnModel, 4> Insn;
55 AArch64_IMM::expandMOVImm(Val, 64, Insn);
56 return Insn.size();
57}
58
59/// Calculate the cost of materializing the given constant.
60int AArch64TTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {
61 assert(Ty->isIntegerTy())((Ty->isIntegerTy()) ? static_cast<void> (0) : __assert_fail
("Ty->isIntegerTy()", "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp"
, 61, __PRETTY_FUNCTION__))
;
62
63 unsigned BitSize = Ty->getPrimitiveSizeInBits();
64 if (BitSize == 0)
65 return ~0U;
66
67 // Sign-extend all constants to a multiple of 64-bit.
68 APInt ImmVal = Imm;
69 if (BitSize & 0x3f)
70 ImmVal = Imm.sext((BitSize + 63) & ~0x3fU);
71
72 // Split the constant into 64-bit chunks and calculate the cost for each
73 // chunk.
74 int Cost = 0;
75 for (unsigned ShiftVal = 0; ShiftVal < BitSize; ShiftVal += 64) {
76 APInt Tmp = ImmVal.ashr(ShiftVal).sextOrTrunc(64);
77 int64_t Val = Tmp.getSExtValue();
78 Cost += getIntImmCost(Val);
79 }
80 // We need at least one instruction to materialze the constant.
81 return std::max(1, Cost);
82}
83
84int AArch64TTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx,
85 const APInt &Imm, Type *Ty) {
86 assert(Ty->isIntegerTy())((Ty->isIntegerTy()) ? static_cast<void> (0) : __assert_fail
("Ty->isIntegerTy()", "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp"
, 86, __PRETTY_FUNCTION__))
;
87
88 unsigned BitSize = Ty->getPrimitiveSizeInBits();
89 // There is no cost model for constants with a bit size of 0. Return TCC_Free
90 // here, so that constant hoisting will ignore this constant.
91 if (BitSize == 0)
92 return TTI::TCC_Free;
93
94 unsigned ImmIdx = ~0U;
95 switch (Opcode) {
96 default:
97 return TTI::TCC_Free;
98 case Instruction::GetElementPtr:
99 // Always hoist the base address of a GetElementPtr.
100 if (Idx == 0)
101 return 2 * TTI::TCC_Basic;
102 return TTI::TCC_Free;
103 case Instruction::Store:
104 ImmIdx = 0;
105 break;
106 case Instruction::Add:
107 case Instruction::Sub:
108 case Instruction::Mul:
109 case Instruction::UDiv:
110 case Instruction::SDiv:
111 case Instruction::URem:
112 case Instruction::SRem:
113 case Instruction::And:
114 case Instruction::Or:
115 case Instruction::Xor:
116 case Instruction::ICmp:
117 ImmIdx = 1;
118 break;
119 // Always return TCC_Free for the shift value of a shift instruction.
120 case Instruction::Shl:
121 case Instruction::LShr:
122 case Instruction::AShr:
123 if (Idx == 1)
124 return TTI::TCC_Free;
125 break;
126 case Instruction::Trunc:
127 case Instruction::ZExt:
128 case Instruction::SExt:
129 case Instruction::IntToPtr:
130 case Instruction::PtrToInt:
131 case Instruction::BitCast:
132 case Instruction::PHI:
133 case Instruction::Call:
134 case Instruction::Select:
135 case Instruction::Ret:
136 case Instruction::Load:
137 break;
138 }
139
140 if (Idx == ImmIdx) {
141 int NumConstants = (BitSize + 63) / 64;
142 int Cost = AArch64TTIImpl::getIntImmCost(Imm, Ty);
143 return (Cost <= NumConstants * TTI::TCC_Basic)
144 ? static_cast<int>(TTI::TCC_Free)
145 : Cost;
146 }
147 return AArch64TTIImpl::getIntImmCost(Imm, Ty);
148}
149
150int AArch64TTIImpl::getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
151 const APInt &Imm, Type *Ty) {
152 assert(Ty->isIntegerTy())((Ty->isIntegerTy()) ? static_cast<void> (0) : __assert_fail
("Ty->isIntegerTy()", "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp"
, 152, __PRETTY_FUNCTION__))
;
153
154 unsigned BitSize = Ty->getPrimitiveSizeInBits();
155 // There is no cost model for constants with a bit size of 0. Return TCC_Free
156 // here, so that constant hoisting will ignore this constant.
157 if (BitSize == 0)
158 return TTI::TCC_Free;
159
160 // Most (all?) AArch64 intrinsics do not support folding immediates into the
161 // selected instruction, so we compute the materialization cost for the
162 // immediate directly.
163 if (IID >= Intrinsic::aarch64_addg && IID <= Intrinsic::aarch64_udiv)
164 return AArch64TTIImpl::getIntImmCost(Imm, Ty);
165
166 switch (IID) {
167 default:
168 return TTI::TCC_Free;
169 case Intrinsic::sadd_with_overflow:
170 case Intrinsic::uadd_with_overflow:
171 case Intrinsic::ssub_with_overflow:
172 case Intrinsic::usub_with_overflow:
173 case Intrinsic::smul_with_overflow:
174 case Intrinsic::umul_with_overflow:
175 if (Idx == 1) {
176 int NumConstants = (BitSize + 63) / 64;
177 int Cost = AArch64TTIImpl::getIntImmCost(Imm, Ty);
178 return (Cost <= NumConstants * TTI::TCC_Basic)
179 ? static_cast<int>(TTI::TCC_Free)
180 : Cost;
181 }
182 break;
183 case Intrinsic::experimental_stackmap:
184 if ((Idx < 2) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
185 return TTI::TCC_Free;
186 break;
187 case Intrinsic::experimental_patchpoint_void:
188 case Intrinsic::experimental_patchpoint_i64:
189 if ((Idx < 4) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
190 return TTI::TCC_Free;
191 break;
192 }
193 return AArch64TTIImpl::getIntImmCost(Imm, Ty);
194}
195
196TargetTransformInfo::PopcntSupportKind
197AArch64TTIImpl::getPopcntSupport(unsigned TyWidth) {
198 assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2")((isPowerOf2_32(TyWidth) && "Ty width must be power of 2"
) ? static_cast<void> (0) : __assert_fail ("isPowerOf2_32(TyWidth) && \"Ty width must be power of 2\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp"
, 198, __PRETTY_FUNCTION__))
;
199 if (TyWidth == 32 || TyWidth == 64)
200 return TTI::PSK_FastHardware;
201 // TODO: AArch64TargetLowering::LowerCTPOP() supports 128bit popcount.
202 return TTI::PSK_Software;
203}
204
205bool AArch64TTIImpl::isWideningInstruction(Type *DstTy, unsigned Opcode,
206 ArrayRef<const Value *> Args) {
207
208 // A helper that returns a vector type from the given type. The number of
209 // elements in type Ty determine the vector width.
210 auto toVectorTy = [&](Type *ArgTy) {
211 return VectorType::get(ArgTy->getScalarType(),
212 DstTy->getVectorNumElements());
213 };
214
215 // Exit early if DstTy is not a vector type whose elements are at least
216 // 16-bits wide.
217 if (!DstTy->isVectorTy() || DstTy->getScalarSizeInBits() < 16)
218 return false;
219
220 // Determine if the operation has a widening variant. We consider both the
221 // "long" (e.g., usubl) and "wide" (e.g., usubw) versions of the
222 // instructions.
223 //
224 // TODO: Add additional widening operations (e.g., mul, shl, etc.) once we
225 // verify that their extending operands are eliminated during code
226 // generation.
227 switch (Opcode) {
228 case Instruction::Add: // UADDL(2), SADDL(2), UADDW(2), SADDW(2).
229 case Instruction::Sub: // USUBL(2), SSUBL(2), USUBW(2), SSUBW(2).
230 break;
231 default:
232 return false;
233 }
234
235 // To be a widening instruction (either the "wide" or "long" versions), the
236 // second operand must be a sign- or zero extend having a single user. We
237 // only consider extends having a single user because they may otherwise not
238 // be eliminated.
239 if (Args.size() != 2 ||
240 (!isa<SExtInst>(Args[1]) && !isa<ZExtInst>(Args[1])) ||
241 !Args[1]->hasOneUse())
242 return false;
243 auto *Extend = cast<CastInst>(Args[1]);
244
245 // Legalize the destination type and ensure it can be used in a widening
246 // operation.
247 auto DstTyL = TLI->getTypeLegalizationCost(DL, DstTy);
248 unsigned DstElTySize = DstTyL.second.getScalarSizeInBits();
249 if (!DstTyL.second.isVector() || DstElTySize != DstTy->getScalarSizeInBits())
250 return false;
251
252 // Legalize the source type and ensure it can be used in a widening
253 // operation.
254 Type *SrcTy = toVectorTy(Extend->getSrcTy());
255 auto SrcTyL = TLI->getTypeLegalizationCost(DL, SrcTy);
256 unsigned SrcElTySize = SrcTyL.second.getScalarSizeInBits();
257 if (!SrcTyL.second.isVector() || SrcElTySize != SrcTy->getScalarSizeInBits())
258 return false;
259
260 // Get the total number of vector elements in the legalized types.
261 unsigned NumDstEls = DstTyL.first * DstTyL.second.getVectorNumElements();
262 unsigned NumSrcEls = SrcTyL.first * SrcTyL.second.getVectorNumElements();
263
264 // Return true if the legalized types have the same number of vector elements
265 // and the destination element type size is twice that of the source type.
266 return NumDstEls == NumSrcEls && 2 * SrcElTySize == DstElTySize;
267}
268
269int AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
270 const Instruction *I) {
271 int ISD = TLI->InstructionOpcodeToISD(Opcode);
272 assert(ISD && "Invalid opcode")((ISD && "Invalid opcode") ? static_cast<void> (
0) : __assert_fail ("ISD && \"Invalid opcode\"", "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp"
, 272, __PRETTY_FUNCTION__))
;
273
274 // If the cast is observable, and it is used by a widening instruction (e.g.,
275 // uaddl, saddw, etc.), it may be free.
276 if (I && I->hasOneUse()) {
277 auto *SingleUser = cast<Instruction>(*I->user_begin());
278 SmallVector<const Value *, 4> Operands(SingleUser->operand_values());
279 if (isWideningInstruction(Dst, SingleUser->getOpcode(), Operands)) {
280 // If the cast is the second operand, it is free. We will generate either
281 // a "wide" or "long" version of the widening instruction.
282 if (I == SingleUser->getOperand(1))
283 return 0;
284 // If the cast is not the second operand, it will be free if it looks the
285 // same as the second operand. In this case, we will generate a "long"
286 // version of the widening instruction.
287 if (auto *Cast = dyn_cast<CastInst>(SingleUser->getOperand(1)))
288 if (I->getOpcode() == unsigned(Cast->getOpcode()) &&
289 cast<CastInst>(I)->getSrcTy() == Cast->getSrcTy())
290 return 0;
291 }
292 }
293
294 EVT SrcTy = TLI->getValueType(DL, Src);
295 EVT DstTy = TLI->getValueType(DL, Dst);
296
297 if (!SrcTy.isSimple() || !DstTy.isSimple())
298 return BaseT::getCastInstrCost(Opcode, Dst, Src);
299
300 static const TypeConversionCostTblEntry
301 ConversionTbl[] = {
302 { ISD::TRUNCATE, MVT::v4i16, MVT::v4i32, 1 },
303 { ISD::TRUNCATE, MVT::v4i32, MVT::v4i64, 0 },
304 { ISD::TRUNCATE, MVT::v8i8, MVT::v8i32, 3 },
305 { ISD::TRUNCATE, MVT::v16i8, MVT::v16i32, 6 },
306
307 // The number of shll instructions for the extension.
308 { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 3 },
309 { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i16, 3 },
310 { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i32, 2 },
311 { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i32, 2 },
312 { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i8, 3 },
313 { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i8, 3 },
314 { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i16, 2 },
315 { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i16, 2 },
316 { ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i8, 7 },
317 { ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i8, 7 },
318 { ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i16, 6 },
319 { ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i16, 6 },
320 { ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i8, 2 },
321 { ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i8, 2 },
322 { ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i8, 6 },
323 { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i8, 6 },
324
325 // LowerVectorINT_TO_FP:
326 { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i32, 1 },
327 { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 },
328 { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i64, 1 },
329 { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i32, 1 },
330 { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 },
331 { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i64, 1 },
332
333 // Complex: to v2f32
334 { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i8, 3 },
335 { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i16, 3 },
336 { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i64, 2 },
337 { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i8, 3 },
338 { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i16, 3 },
339 { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i64, 2 },
340
341 // Complex: to v4f32
342 { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i8, 4 },
343 { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i16, 2 },
344 { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i8, 3 },
345 { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i16, 2 },
346
347 // Complex: to v8f32
348 { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i8, 10 },
349 { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i16, 4 },
350 { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i8, 10 },
351 { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i16, 4 },
352
353 // Complex: to v16f32
354 { ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i8, 21 },
355 { ISD::UINT_TO_FP, MVT::v16f32, MVT::v16i8, 21 },
356
357 // Complex: to v2f64
358 { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i8, 4 },
359 { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i16, 4 },
360 { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 },
361 { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i8, 4 },
362 { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i16, 4 },
363 { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 },
364
365
366 // LowerVectorFP_TO_INT
367 { ISD::FP_TO_SINT, MVT::v2i32, MVT::v2f32, 1 },
368 { ISD::FP_TO_SINT, MVT::v4i32, MVT::v4f32, 1 },
369 { ISD::FP_TO_SINT, MVT::v2i64, MVT::v2f64, 1 },
370 { ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f32, 1 },
371 { ISD::FP_TO_UINT, MVT::v4i32, MVT::v4f32, 1 },
372 { ISD::FP_TO_UINT, MVT::v2i64, MVT::v2f64, 1 },
373
374 // Complex, from v2f32: legal type is v2i32 (no cost) or v2i64 (1 ext).
375 { ISD::FP_TO_SINT, MVT::v2i64, MVT::v2f32, 2 },
376 { ISD::FP_TO_SINT, MVT::v2i16, MVT::v2f32, 1 },
377 { ISD::FP_TO_SINT, MVT::v2i8, MVT::v2f32, 1 },
378 { ISD::FP_TO_UINT, MVT::v2i64, MVT::v2f32, 2 },
379 { ISD::FP_TO_UINT, MVT::v2i16, MVT::v2f32, 1 },
380 { ISD::FP_TO_UINT, MVT::v2i8, MVT::v2f32, 1 },
381
382 // Complex, from v4f32: legal type is v4i16, 1 narrowing => ~2
383 { ISD::FP_TO_SINT, MVT::v4i16, MVT::v4f32, 2 },
384 { ISD::FP_TO_SINT, MVT::v4i8, MVT::v4f32, 2 },
385 { ISD::FP_TO_UINT, MVT::v4i16, MVT::v4f32, 2 },
386 { ISD::FP_TO_UINT, MVT::v4i8, MVT::v4f32, 2 },
387
388 // Complex, from v2f64: legal type is v2i32, 1 narrowing => ~2.
389 { ISD::FP_TO_SINT, MVT::v2i32, MVT::v2f64, 2 },
390 { ISD::FP_TO_SINT, MVT::v2i16, MVT::v2f64, 2 },
391 { ISD::FP_TO_SINT, MVT::v2i8, MVT::v2f64, 2 },
392 { ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f64, 2 },
393 { ISD::FP_TO_UINT, MVT::v2i16, MVT::v2f64, 2 },
394 { ISD::FP_TO_UINT, MVT::v2i8, MVT::v2f64, 2 },
395 };
396
397 if (const auto *Entry = ConvertCostTableLookup(ConversionTbl, ISD,
398 DstTy.getSimpleVT(),
399 SrcTy.getSimpleVT()))
400 return Entry->Cost;
401
402 return BaseT::getCastInstrCost(Opcode, Dst, Src);
403}
404
405int AArch64TTIImpl::getExtractWithExtendCost(unsigned Opcode, Type *Dst,
406 VectorType *VecTy,
407 unsigned Index) {
408
409 // Make sure we were given a valid extend opcode.
410 assert((Opcode == Instruction::SExt || Opcode == Instruction::ZExt) &&(((Opcode == Instruction::SExt || Opcode == Instruction::ZExt
) && "Invalid opcode") ? static_cast<void> (0) :
__assert_fail ("(Opcode == Instruction::SExt || Opcode == Instruction::ZExt) && \"Invalid opcode\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp"
, 411, __PRETTY_FUNCTION__))
411 "Invalid opcode")(((Opcode == Instruction::SExt || Opcode == Instruction::ZExt
) && "Invalid opcode") ? static_cast<void> (0) :
__assert_fail ("(Opcode == Instruction::SExt || Opcode == Instruction::ZExt) && \"Invalid opcode\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp"
, 411, __PRETTY_FUNCTION__))
;
412
413 // We are extending an element we extract from a vector, so the source type
414 // of the extend is the element type of the vector.
415 auto *Src = VecTy->getElementType();
416
417 // Sign- and zero-extends are for integer types only.
418 assert(isa<IntegerType>(Dst) && isa<IntegerType>(Src) && "Invalid type")((isa<IntegerType>(Dst) && isa<IntegerType>
(Src) && "Invalid type") ? static_cast<void> (0
) : __assert_fail ("isa<IntegerType>(Dst) && isa<IntegerType>(Src) && \"Invalid type\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp"
, 418, __PRETTY_FUNCTION__))
;
419
420 // Get the cost for the extract. We compute the cost (if any) for the extend
421 // below.
422 auto Cost = getVectorInstrCost(Instruction::ExtractElement, VecTy, Index);
423
424 // Legalize the types.
425 auto VecLT = TLI->getTypeLegalizationCost(DL, VecTy);
426 auto DstVT = TLI->getValueType(DL, Dst);
427 auto SrcVT = TLI->getValueType(DL, Src);
428
429 // If the resulting type is still a vector and the destination type is legal,
430 // we may get the extension for free. If not, get the default cost for the
431 // extend.
432 if (!VecLT.second.isVector() || !TLI->isTypeLegal(DstVT))
433 return Cost + getCastInstrCost(Opcode, Dst, Src);
434
435 // The destination type should be larger than the element type. If not, get
436 // the default cost for the extend.
437 if (DstVT.getSizeInBits() < SrcVT.getSizeInBits())
438 return Cost + getCastInstrCost(Opcode, Dst, Src);
439
440 switch (Opcode) {
441 default:
442 llvm_unreachable("Opcode should be either SExt or ZExt")::llvm::llvm_unreachable_internal("Opcode should be either SExt or ZExt"
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp"
, 442)
;
443
444 // For sign-extends, we only need a smov, which performs the extension
445 // automatically.
446 case Instruction::SExt:
447 return Cost;
448
449 // For zero-extends, the extend is performed automatically by a umov unless
450 // the destination type is i64 and the element type is i8 or i16.
451 case Instruction::ZExt:
452 if (DstVT.getSizeInBits() != 64u || SrcVT.getSizeInBits() == 32u)
453 return Cost;
454 }
455
456 // If we are unable to perform the extend for free, get the default cost.
457 return Cost + getCastInstrCost(Opcode, Dst, Src);
458}
459
460int AArch64TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
461 unsigned Index) {
462 assert(Val->isVectorTy() && "This must be a vector type")((Val->isVectorTy() && "This must be a vector type"
) ? static_cast<void> (0) : __assert_fail ("Val->isVectorTy() && \"This must be a vector type\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp"
, 462, __PRETTY_FUNCTION__))
;
463
464 if (Index != -1U) {
465 // Legalize the type.
466 std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Val);
467
468 // This type is legalized to a scalar type.
469 if (!LT.second.isVector())
470 return 0;
471
472 // The type may be split. Normalize the index to the new type.
473 unsigned Width = LT.second.getVectorNumElements();
474 Index = Index % Width;
475
476 // The element at index zero is already inside the vector.
477 if (Index == 0)
478 return 0;
479 }
480
481 // All other insert/extracts cost this much.
482 return ST->getVectorInsertExtractBaseCost();
483}
484
485int AArch64TTIImpl::getArithmeticInstrCost(
486 unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info,
487 TTI::OperandValueKind Opd2Info, TTI::OperandValueProperties Opd1PropInfo,
488 TTI::OperandValueProperties Opd2PropInfo, ArrayRef<const Value *> Args,
489 const Instruction *CxtI) {
490 // Legalize the type.
491 std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
492
493 // If the instruction is a widening instruction (e.g., uaddl, saddw, etc.),
494 // add in the widening overhead specified by the sub-target. Since the
495 // extends feeding widening instructions are performed automatically, they
496 // aren't present in the generated code and have a zero cost. By adding a
497 // widening overhead here, we attach the total cost of the combined operation
498 // to the widening instruction.
499 int Cost = 0;
500 if (isWideningInstruction(Ty, Opcode, Args))
501 Cost += ST->getWideningBaseCost();
502
503 int ISD = TLI->InstructionOpcodeToISD(Opcode);
504
505 switch (ISD) {
506 default:
507 return Cost + BaseT::getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
508 Opd1PropInfo, Opd2PropInfo);
509 case ISD::SDIV:
510 if (Opd2Info == TargetTransformInfo::OK_UniformConstantValue &&
511 Opd2PropInfo == TargetTransformInfo::OP_PowerOf2) {
512 // On AArch64, scalar signed division by constants power-of-two are
513 // normally expanded to the sequence ADD + CMP + SELECT + SRA.
514 // The OperandValue properties many not be same as that of previous
515 // operation; conservatively assume OP_None.
516 Cost += getArithmeticInstrCost(Instruction::Add, Ty, Opd1Info, Opd2Info,
517 TargetTransformInfo::OP_None,
518 TargetTransformInfo::OP_None);
519 Cost += getArithmeticInstrCost(Instruction::Sub, Ty, Opd1Info, Opd2Info,
520 TargetTransformInfo::OP_None,
521 TargetTransformInfo::OP_None);
522 Cost += getArithmeticInstrCost(Instruction::Select, Ty, Opd1Info, Opd2Info,
523 TargetTransformInfo::OP_None,
524 TargetTransformInfo::OP_None);
525 Cost += getArithmeticInstrCost(Instruction::AShr, Ty, Opd1Info, Opd2Info,
526 TargetTransformInfo::OP_None,
527 TargetTransformInfo::OP_None);
528 return Cost;
529 }
530 LLVM_FALLTHROUGH[[gnu::fallthrough]];
531 case ISD::UDIV:
532 if (Opd2Info == TargetTransformInfo::OK_UniformConstantValue) {
533 auto VT = TLI->getValueType(DL, Ty);
534 if (TLI->isOperationLegalOrCustom(ISD::MULHU, VT)) {
535 // Vector signed division by constant are expanded to the
536 // sequence MULHS + ADD/SUB + SRA + SRL + ADD, and unsigned division
537 // to MULHS + SUB + SRL + ADD + SRL.
538 int MulCost = getArithmeticInstrCost(Instruction::Mul, Ty, Opd1Info,
539 Opd2Info,
540 TargetTransformInfo::OP_None,
541 TargetTransformInfo::OP_None);
542 int AddCost = getArithmeticInstrCost(Instruction::Add, Ty, Opd1Info,
543 Opd2Info,
544 TargetTransformInfo::OP_None,
545 TargetTransformInfo::OP_None);
546 int ShrCost = getArithmeticInstrCost(Instruction::AShr, Ty, Opd1Info,
547 Opd2Info,
548 TargetTransformInfo::OP_None,
549 TargetTransformInfo::OP_None);
550 return MulCost * 2 + AddCost * 2 + ShrCost * 2 + 1;
551 }
552 }
553
554 Cost += BaseT::getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
555 Opd1PropInfo, Opd2PropInfo);
556 if (Ty->isVectorTy()) {
557 // On AArch64, vector divisions are not supported natively and are
558 // expanded into scalar divisions of each pair of elements.
559 Cost += getArithmeticInstrCost(Instruction::ExtractElement, Ty, Opd1Info,
560 Opd2Info, Opd1PropInfo, Opd2PropInfo);
561 Cost += getArithmeticInstrCost(Instruction::InsertElement, Ty, Opd1Info,
562 Opd2Info, Opd1PropInfo, Opd2PropInfo);
563 // TODO: if one of the arguments is scalar, then it's not necessary to
564 // double the cost of handling the vector elements.
565 Cost += Cost;
566 }
567 return Cost;
568
569 case ISD::ADD:
570 case ISD::MUL:
571 case ISD::XOR:
572 case ISD::OR:
573 case ISD::AND:
574 // These nodes are marked as 'custom' for combining purposes only.
575 // We know that they are legal. See LowerAdd in ISelLowering.
576 return (Cost + 1) * LT.first;
577 }
578}
579
580int AArch64TTIImpl::getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
581 const SCEV *Ptr) {
582 // Address computations in vectorized code with non-consecutive addresses will
583 // likely result in more instructions compared to scalar code where the
584 // computation can more often be merged into the index mode. The resulting
585 // extra micro-ops can significantly decrease throughput.
586 unsigned NumVectorInstToHideOverhead = 10;
587 int MaxMergeDistance = 64;
588
589 if (Ty->isVectorTy() && SE &&
590 !BaseT::isConstantStridedAccessLessThan(SE, Ptr, MaxMergeDistance + 1))
591 return NumVectorInstToHideOverhead;
592
593 // In many cases the address computation is not merged into the instruction
594 // addressing mode.
595 return 1;
596}
597
598int AArch64TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
599 Type *CondTy, const Instruction *I) {
600
601 int ISD = TLI->InstructionOpcodeToISD(Opcode);
602 // We don't lower some vector selects well that are wider than the register
603 // width.
604 if (ValTy->isVectorTy() && ISD == ISD::SELECT) {
1
Calling 'Type::isVectorTy'
4
Returning from 'Type::isVectorTy'
20
Calling 'Type::isVectorTy'
23
Returning from 'Type::isVectorTy'
24
Assuming 'ISD' is equal to SELECT
25
Taking true branch
605 // We would need this many instructions to hide the scalarization happening.
606 const int AmortizationCost = 20;
607 static const TypeConversionCostTblEntry
608 VectorSelectTbl[] = {
609 { ISD::SELECT, MVT::v16i1, MVT::v16i16, 16 },
610 { ISD::SELECT, MVT::v8i1, MVT::v8i32, 8 },
611 { ISD::SELECT, MVT::v16i1, MVT::v16i32, 16 },
612 { ISD::SELECT, MVT::v4i1, MVT::v4i64, 4 * AmortizationCost },
613 { ISD::SELECT, MVT::v8i1, MVT::v8i64, 8 * AmortizationCost },
614 { ISD::SELECT, MVT::v16i1, MVT::v16i64, 16 * AmortizationCost }
615 };
616
617 EVT SelCondTy = TLI->getValueType(DL, CondTy);
26
Passing null pointer value via 2nd parameter 'Ty'
27
Calling 'TargetLoweringBase::getValueType'
618 EVT SelValTy = TLI->getValueType(DL, ValTy);
619 if (SelCondTy.isSimple() && SelValTy.isSimple()) {
620 if (const auto *Entry = ConvertCostTableLookup(VectorSelectTbl, ISD,
621 SelCondTy.getSimpleVT(),
622 SelValTy.getSimpleVT()))
623 return Entry->Cost;
624 }
625 }
626 return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, I);
5
Passing value via 3rd parameter 'CondTy'
6
Calling 'BasicTTIImplBase::getCmpSelInstrCost'
627}
628
629AArch64TTIImpl::TTI::MemCmpExpansionOptions
630AArch64TTIImpl::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const {
631 TTI::MemCmpExpansionOptions Options;
632 Options.AllowOverlappingLoads = !ST->requiresStrictAlign();
633 Options.MaxNumLoads = TLI->getMaxExpandSizeMemcmp(OptSize);
634 Options.NumLoadsPerBlock = Options.MaxNumLoads;
635 // TODO: Though vector loads usually perform well on AArch64, in some targets
636 // they may wake up the FP unit, which raises the power consumption. Perhaps
637 // they could be used with no holds barred (-O3).
638 Options.LoadSizes = {8, 4, 2, 1};
639 return Options;
640}
641
642int AArch64TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Ty,
643 MaybeAlign Alignment, unsigned AddressSpace,
644 const Instruction *I) {
645 auto LT = TLI->getTypeLegalizationCost(DL, Ty);
646
647 if (ST->isMisaligned128StoreSlow() && Opcode == Instruction::Store &&
648 LT.second.is128BitVector() && (!Alignment || *Alignment < Align(16))) {
649 // Unaligned stores are extremely inefficient. We don't split all
650 // unaligned 128-bit stores because the negative impact that has shown in
651 // practice on inlined block copy code.
652 // We make such stores expensive so that we will only vectorize if there
653 // are 6 other instructions getting vectorized.
654 const int AmortizationCost = 6;
655
656 return LT.first * 2 * AmortizationCost;
657 }
658
659 if (Ty->isVectorTy() && Ty->getVectorElementType()->isIntegerTy(8)) {
660 unsigned ProfitableNumElements;
661 if (Opcode == Instruction::Store)
662 // We use a custom trunc store lowering so v.4b should be profitable.
663 ProfitableNumElements = 4;
664 else
665 // We scalarize the loads because there is not v.4b register and we
666 // have to promote the elements to v.2.
667 ProfitableNumElements = 8;
668
669 if (Ty->getVectorNumElements() < ProfitableNumElements) {
670 unsigned NumVecElts = Ty->getVectorNumElements();
671 unsigned NumVectorizableInstsToAmortize = NumVecElts * 2;
672 // We generate 2 instructions per vector element.
673 return NumVectorizableInstsToAmortize * NumVecElts * 2;
674 }
675 }
676
677 return LT.first;
678}
679
680int AArch64TTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
681 unsigned Factor,
682 ArrayRef<unsigned> Indices,
683 unsigned Alignment,
684 unsigned AddressSpace,
685 bool UseMaskForCond,
686 bool UseMaskForGaps) {
687 assert(Factor >= 2 && "Invalid interleave factor")((Factor >= 2 && "Invalid interleave factor") ? static_cast
<void> (0) : __assert_fail ("Factor >= 2 && \"Invalid interleave factor\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp"
, 687, __PRETTY_FUNCTION__))
;
688 assert(isa<VectorType>(VecTy) && "Expect a vector type")((isa<VectorType>(VecTy) && "Expect a vector type"
) ? static_cast<void> (0) : __assert_fail ("isa<VectorType>(VecTy) && \"Expect a vector type\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp"
, 688, __PRETTY_FUNCTION__))
;
689
690 if (!UseMaskForCond && !UseMaskForGaps &&
691 Factor <= TLI->getMaxSupportedInterleaveFactor()) {
692 unsigned NumElts = VecTy->getVectorNumElements();
693 auto *SubVecTy = VectorType::get(VecTy->getScalarType(), NumElts / Factor);
694
695 // ldN/stN only support legal vector types of size 64 or 128 in bits.
696 // Accesses having vector types that are a multiple of 128 bits can be
697 // matched to more than one ldN/stN instruction.
698 if (NumElts % Factor == 0 &&
699 TLI->isLegalInterleavedAccessType(SubVecTy, DL))
700 return Factor * TLI->getNumInterleavedAccesses(SubVecTy, DL);
701 }
702
703 return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
704 Alignment, AddressSpace,
705 UseMaskForCond, UseMaskForGaps);
706}
707
708int AArch64TTIImpl::getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) {
709 int Cost = 0;
710 for (auto *I : Tys) {
711 if (!I->isVectorTy())
712 continue;
713 if (I->getScalarSizeInBits() * I->getVectorNumElements() == 128)
714 Cost += getMemoryOpCost(Instruction::Store, I, Align(128), 0) +
715 getMemoryOpCost(Instruction::Load, I, Align(128), 0);
716 }
717 return Cost;
718}
719
720unsigned AArch64TTIImpl::getMaxInterleaveFactor(unsigned VF) {
721 return ST->getMaxInterleaveFactor();
722}
723
724// For Falkor, we want to avoid having too many strided loads in a loop since
725// that can exhaust the HW prefetcher resources. We adjust the unroller
726// MaxCount preference below to attempt to ensure unrolling doesn't create too
727// many strided loads.
728static void
729getFalkorUnrollingPreferences(Loop *L, ScalarEvolution &SE,
730 TargetTransformInfo::UnrollingPreferences &UP) {
731 enum { MaxStridedLoads = 7 };
732 auto countStridedLoads = [](Loop *L, ScalarEvolution &SE) {
733 int StridedLoads = 0;
734 // FIXME? We could make this more precise by looking at the CFG and
735 // e.g. not counting loads in each side of an if-then-else diamond.
736 for (const auto BB : L->blocks()) {
737 for (auto &I : *BB) {
738 LoadInst *LMemI = dyn_cast<LoadInst>(&I);
739 if (!LMemI)
740 continue;
741
742 Value *PtrValue = LMemI->getPointerOperand();
743 if (L->isLoopInvariant(PtrValue))
744 continue;
745
746 const SCEV *LSCEV = SE.getSCEV(PtrValue);
747 const SCEVAddRecExpr *LSCEVAddRec = dyn_cast<SCEVAddRecExpr>(LSCEV);
748 if (!LSCEVAddRec || !LSCEVAddRec->isAffine())
749 continue;
750
751 // FIXME? We could take pairing of unrolled load copies into account
752 // by looking at the AddRec, but we would probably have to limit this
753 // to loops with no stores or other memory optimization barriers.
754 ++StridedLoads;
755 // We've seen enough strided loads that seeing more won't make a
756 // difference.
757 if (StridedLoads > MaxStridedLoads / 2)
758 return StridedLoads;
759 }
760 }
761 return StridedLoads;
762 };
763
764 int StridedLoads = countStridedLoads(L, SE);
765 LLVM_DEBUG(dbgs() << "falkor-hwpf: detected " << StridedLoadsdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64tti")) { dbgs() << "falkor-hwpf: detected " <<
StridedLoads << " strided loads\n"; } } while (false)
766 << " strided loads\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64tti")) { dbgs() << "falkor-hwpf: detected " <<
StridedLoads << " strided loads\n"; } } while (false)
;
767 // Pick the largest power of 2 unroll count that won't result in too many
768 // strided loads.
769 if (StridedLoads) {
770 UP.MaxCount = 1 << Log2_32(MaxStridedLoads / StridedLoads);
771 LLVM_DEBUG(dbgs() << "falkor-hwpf: setting unroll MaxCount to "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64tti")) { dbgs() << "falkor-hwpf: setting unroll MaxCount to "
<< UP.MaxCount << '\n'; } } while (false)
772 << UP.MaxCount << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64tti")) { dbgs() << "falkor-hwpf: setting unroll MaxCount to "
<< UP.MaxCount << '\n'; } } while (false)
;
773 }
774}
775
776void AArch64TTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
777 TTI::UnrollingPreferences &UP) {
778 // Enable partial unrolling and runtime unrolling.
779 BaseT::getUnrollingPreferences(L, SE, UP);
780
781 // For inner loop, it is more likely to be a hot one, and the runtime check
782 // can be promoted out from LICM pass, so the overhead is less, let's try
783 // a larger threshold to unroll more loops.
784 if (L->getLoopDepth() > 1)
785 UP.PartialThreshold *= 2;
786
787 // Disable partial & runtime unrolling on -Os.
788 UP.PartialOptSizeThreshold = 0;
789
790 if (ST->getProcFamily() == AArch64Subtarget::Falkor &&
791 EnableFalkorHWPFUnrollFix)
792 getFalkorUnrollingPreferences(L, SE, UP);
793}
794
795Value *AArch64TTIImpl::getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
796 Type *ExpectedType) {
797 switch (Inst->getIntrinsicID()) {
798 default:
799 return nullptr;
800 case Intrinsic::aarch64_neon_st2:
801 case Intrinsic::aarch64_neon_st3:
802 case Intrinsic::aarch64_neon_st4: {
803 // Create a struct type
804 StructType *ST = dyn_cast<StructType>(ExpectedType);
805 if (!ST)
806 return nullptr;
807 unsigned NumElts = Inst->getNumArgOperands() - 1;
808 if (ST->getNumElements() != NumElts)
809 return nullptr;
810 for (unsigned i = 0, e = NumElts; i != e; ++i) {
811 if (Inst->getArgOperand(i)->getType() != ST->getElementType(i))
812 return nullptr;
813 }
814 Value *Res = UndefValue::get(ExpectedType);
815 IRBuilder<> Builder(Inst);
816 for (unsigned i = 0, e = NumElts; i != e; ++i) {
817 Value *L = Inst->getArgOperand(i);
818 Res = Builder.CreateInsertValue(Res, L, i);
819 }
820 return Res;
821 }
822 case Intrinsic::aarch64_neon_ld2:
823 case Intrinsic::aarch64_neon_ld3:
824 case Intrinsic::aarch64_neon_ld4:
825 if (Inst->getType() == ExpectedType)
826 return Inst;
827 return nullptr;
828 }
829}
830
831bool AArch64TTIImpl::getTgtMemIntrinsic(IntrinsicInst *Inst,
832 MemIntrinsicInfo &Info) {
833 switch (Inst->getIntrinsicID()) {
834 default:
835 break;
836 case Intrinsic::aarch64_neon_ld2:
837 case Intrinsic::aarch64_neon_ld3:
838 case Intrinsic::aarch64_neon_ld4:
839 Info.ReadMem = true;
840 Info.WriteMem = false;
841 Info.PtrVal = Inst->getArgOperand(0);
842 break;
843 case Intrinsic::aarch64_neon_st2:
844 case Intrinsic::aarch64_neon_st3:
845 case Intrinsic::aarch64_neon_st4:
846 Info.ReadMem = false;
847 Info.WriteMem = true;
848 Info.PtrVal = Inst->getArgOperand(Inst->getNumArgOperands() - 1);
849 break;
850 }
851
852 switch (Inst->getIntrinsicID()) {
853 default:
854 return false;
855 case Intrinsic::aarch64_neon_ld2:
856 case Intrinsic::aarch64_neon_st2:
857 Info.MatchingId = VECTOR_LDST_TWO_ELEMENTS;
858 break;
859 case Intrinsic::aarch64_neon_ld3:
860 case Intrinsic::aarch64_neon_st3:
861 Info.MatchingId = VECTOR_LDST_THREE_ELEMENTS;
862 break;
863 case Intrinsic::aarch64_neon_ld4:
864 case Intrinsic::aarch64_neon_st4:
865 Info.MatchingId = VECTOR_LDST_FOUR_ELEMENTS;
866 break;
867 }
868 return true;
869}
870
871/// See if \p I should be considered for address type promotion. We check if \p
872/// I is a sext with right type and used in memory accesses. If it used in a
873/// "complex" getelementptr, we allow it to be promoted without finding other
874/// sext instructions that sign extended the same initial value. A getelementptr
875/// is considered as "complex" if it has more than 2 operands.
876bool AArch64TTIImpl::shouldConsiderAddressTypePromotion(
877 const Instruction &I, bool &AllowPromotionWithoutCommonHeader) {
878 bool Considerable = false;
879 AllowPromotionWithoutCommonHeader = false;
880 if (!isa<SExtInst>(&I))
881 return false;
882 Type *ConsideredSExtType =
883 Type::getInt64Ty(I.getParent()->getParent()->getContext());
884 if (I.getType() != ConsideredSExtType)
885 return false;
886 // See if the sext is the one with the right type and used in at least one
887 // GetElementPtrInst.
888 for (const User *U : I.users()) {
889 if (const GetElementPtrInst *GEPInst = dyn_cast<GetElementPtrInst>(U)) {
890 Considerable = true;
891 // A getelementptr is considered as "complex" if it has more than 2
892 // operands. We will promote a SExt used in such complex GEP as we
893 // expect some computation to be merged if they are done on 64 bits.
894 if (GEPInst->getNumOperands() > 2) {
895 AllowPromotionWithoutCommonHeader = true;
896 break;
897 }
898 }
899 }
900 return Considerable;
901}
902
903bool AArch64TTIImpl::useReductionIntrinsic(unsigned Opcode, Type *Ty,
904 TTI::ReductionFlags Flags) const {
905 assert(isa<VectorType>(Ty) && "Expected Ty to be a vector type")((isa<VectorType>(Ty) && "Expected Ty to be a vector type"
) ? static_cast<void> (0) : __assert_fail ("isa<VectorType>(Ty) && \"Expected Ty to be a vector type\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp"
, 905, __PRETTY_FUNCTION__))
;
906 unsigned ScalarBits = Ty->getScalarSizeInBits();
907 switch (Opcode) {
908 case Instruction::FAdd:
909 case Instruction::FMul:
910 case Instruction::And:
911 case Instruction::Or:
912 case Instruction::Xor:
913 case Instruction::Mul:
914 return false;
915 case Instruction::Add:
916 return ScalarBits * Ty->getVectorNumElements() >= 128;
917 case Instruction::ICmp:
918 return (ScalarBits < 64) &&
919 (ScalarBits * Ty->getVectorNumElements() >= 128);
920 case Instruction::FCmp:
921 return Flags.NoNaN;
922 default:
923 llvm_unreachable("Unhandled reduction opcode")::llvm::llvm_unreachable_internal("Unhandled reduction opcode"
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp"
, 923)
;
924 }
925 return false;
926}
927
928int AArch64TTIImpl::getArithmeticReductionCost(unsigned Opcode, Type *ValTy,
929 bool IsPairwiseForm) {
930
931 if (IsPairwiseForm)
932 return BaseT::getArithmeticReductionCost(Opcode, ValTy, IsPairwiseForm);
933
934 std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
935 MVT MTy = LT.second;
936 int ISD = TLI->InstructionOpcodeToISD(Opcode);
937 assert(ISD && "Invalid opcode")((ISD && "Invalid opcode") ? static_cast<void> (
0) : __assert_fail ("ISD && \"Invalid opcode\"", "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp"
, 937, __PRETTY_FUNCTION__))
;
938
939 // Horizontal adds can use the 'addv' instruction. We model the cost of these
940 // instructions as normal vector adds. This is the only arithmetic vector
941 // reduction operation for which we have an instruction.
942 static const CostTblEntry CostTblNoPairwise[]{
943 {ISD::ADD, MVT::v8i8, 1},
944 {ISD::ADD, MVT::v16i8, 1},
945 {ISD::ADD, MVT::v4i16, 1},
946 {ISD::ADD, MVT::v8i16, 1},
947 {ISD::ADD, MVT::v4i32, 1},
948 };
949
950 if (const auto *Entry = CostTableLookup(CostTblNoPairwise, ISD, MTy))
951 return LT.first * Entry->Cost;
952
953 return BaseT::getArithmeticReductionCost(Opcode, ValTy, IsPairwiseForm);
954}
955
956int AArch64TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
957 Type *SubTp) {
958 if (Kind == TTI::SK_Broadcast || Kind == TTI::SK_Transpose ||
959 Kind == TTI::SK_Select || Kind == TTI::SK_PermuteSingleSrc) {
960 static const CostTblEntry ShuffleTbl[] = {
961 // Broadcast shuffle kinds can be performed with 'dup'.
962 { TTI::SK_Broadcast, MVT::v8i8, 1 },
963 { TTI::SK_Broadcast, MVT::v16i8, 1 },
964 { TTI::SK_Broadcast, MVT::v4i16, 1 },
965 { TTI::SK_Broadcast, MVT::v8i16, 1 },
966 { TTI::SK_Broadcast, MVT::v2i32, 1 },
967 { TTI::SK_Broadcast, MVT::v4i32, 1 },
968 { TTI::SK_Broadcast, MVT::v2i64, 1 },
969 { TTI::SK_Broadcast, MVT::v2f32, 1 },
970 { TTI::SK_Broadcast, MVT::v4f32, 1 },
971 { TTI::SK_Broadcast, MVT::v2f64, 1 },
972 // Transpose shuffle kinds can be performed with 'trn1/trn2' and
973 // 'zip1/zip2' instructions.
974 { TTI::SK_Transpose, MVT::v8i8, 1 },
975 { TTI::SK_Transpose, MVT::v16i8, 1 },
976 { TTI::SK_Transpose, MVT::v4i16, 1 },
977 { TTI::SK_Transpose, MVT::v8i16, 1 },
978 { TTI::SK_Transpose, MVT::v2i32, 1 },
979 { TTI::SK_Transpose, MVT::v4i32, 1 },
980 { TTI::SK_Transpose, MVT::v2i64, 1 },
981 { TTI::SK_Transpose, MVT::v2f32, 1 },
982 { TTI::SK_Transpose, MVT::v4f32, 1 },
983 { TTI::SK_Transpose, MVT::v2f64, 1 },
984 // Select shuffle kinds.
985 // TODO: handle vXi8/vXi16.
986 { TTI::SK_Select, MVT::v2i32, 1 }, // mov.
987 { TTI::SK_Select, MVT::v4i32, 2 }, // rev+trn (or similar).
988 { TTI::SK_Select, MVT::v2i64, 1 }, // mov.
989 { TTI::SK_Select, MVT::v2f32, 1 }, // mov.
990 { TTI::SK_Select, MVT::v4f32, 2 }, // rev+trn (or similar).
991 { TTI::SK_Select, MVT::v2f64, 1 }, // mov.
992 // PermuteSingleSrc shuffle kinds.
993 // TODO: handle vXi8/vXi16.
994 { TTI::SK_PermuteSingleSrc, MVT::v2i32, 1 }, // mov.
995 { TTI::SK_PermuteSingleSrc, MVT::v4i32, 3 }, // perfectshuffle worst case.
996 { TTI::SK_PermuteSingleSrc, MVT::v2i64, 1 }, // mov.
997 { TTI::SK_PermuteSingleSrc, MVT::v2f32, 1 }, // mov.
998 { TTI::SK_PermuteSingleSrc, MVT::v4f32, 3 }, // perfectshuffle worst case.
999 { TTI::SK_PermuteSingleSrc, MVT::v2f64, 1 }, // mov.
1000 };
1001 std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
1002 if (const auto *Entry = CostTableLookup(ShuffleTbl, Kind, LT.second))
1003 return LT.first * Entry->Cost;
1004 }
1005
1006 return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
1007}

/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/IR/Type.h

1//===- llvm/Type.h - Classes for handling data types ------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains the declaration of the Type class. For more "Type"
10// stuff, look in DerivedTypes.h.
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_IR_TYPE_H
15#define LLVM_IR_TYPE_H
16
17#include "llvm/ADT/APFloat.h"
18#include "llvm/ADT/ArrayRef.h"
19#include "llvm/ADT/SmallPtrSet.h"
20#include "llvm/Support/CBindingWrapping.h"
21#include "llvm/Support/Casting.h"
22#include "llvm/Support/Compiler.h"
23#include "llvm/Support/ErrorHandling.h"
24#include "llvm/Support/TypeSize.h"
25#include <cassert>
26#include <cstdint>
27#include <iterator>
28
29namespace llvm {
30
31template<class GraphType> struct GraphTraits;
32class IntegerType;
33class LLVMContext;
34class PointerType;
35class raw_ostream;
36class StringRef;
37
38/// The instances of the Type class are immutable: once they are created,
39/// they are never changed. Also note that only one instance of a particular
40/// type is ever created. Thus seeing if two types are equal is a matter of
41/// doing a trivial pointer comparison. To enforce that no two equal instances
42/// are created, Type instances can only be created via static factory methods
43/// in class Type and in derived classes. Once allocated, Types are never
44/// free'd.
45///
46class Type {
47public:
48 //===--------------------------------------------------------------------===//
49 /// Definitions of all of the base types for the Type system. Based on this
50 /// value, you can cast to a class defined in DerivedTypes.h.
51 /// Note: If you add an element to this, you need to add an element to the
52 /// Type::getPrimitiveType function, or else things will break!
53 /// Also update LLVMTypeKind and LLVMGetTypeKind () in the C binding.
54 ///
55 enum TypeID {
56 // PrimitiveTypes - make sure LastPrimitiveTyID stays up to date.
57 VoidTyID = 0, ///< 0: type with no size
58 HalfTyID, ///< 1: 16-bit floating point type
59 FloatTyID, ///< 2: 32-bit floating point type
60 DoubleTyID, ///< 3: 64-bit floating point type
61 X86_FP80TyID, ///< 4: 80-bit floating point type (X87)
62 FP128TyID, ///< 5: 128-bit floating point type (112-bit mantissa)
63 PPC_FP128TyID, ///< 6: 128-bit floating point type (two 64-bits, PowerPC)
64 LabelTyID, ///< 7: Labels
65 MetadataTyID, ///< 8: Metadata
66 X86_MMXTyID, ///< 9: MMX vectors (64 bits, X86 specific)
67 TokenTyID, ///< 10: Tokens
68
69 // Derived types... see DerivedTypes.h file.
70 // Make sure FirstDerivedTyID stays up to date!
71 IntegerTyID, ///< 11: Arbitrary bit width integers
72 FunctionTyID, ///< 12: Functions
73 StructTyID, ///< 13: Structures
74 ArrayTyID, ///< 14: Arrays
75 PointerTyID, ///< 15: Pointers
76 VectorTyID ///< 16: SIMD 'packed' format, or other vector type
77 };
78
79private:
80 /// This refers to the LLVMContext in which this type was uniqued.
81 LLVMContext &Context;
82
83 TypeID ID : 8; // The current base type of this type.
84 unsigned SubclassData : 24; // Space for subclasses to store data.
85 // Note that this should be synchronized with
86 // MAX_INT_BITS value in IntegerType class.
87
88protected:
89 friend class LLVMContextImpl;
90
91 explicit Type(LLVMContext &C, TypeID tid)
92 : Context(C), ID(tid), SubclassData(0) {}
93 ~Type() = default;
94
95 unsigned getSubclassData() const { return SubclassData; }
96
97 void setSubclassData(unsigned val) {
98 SubclassData = val;
99 // Ensure we don't have any accidental truncation.
100 assert(getSubclassData() == val && "Subclass data too large for field")((getSubclassData() == val && "Subclass data too large for field"
) ? static_cast<void> (0) : __assert_fail ("getSubclassData() == val && \"Subclass data too large for field\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/IR/Type.h"
, 100, __PRETTY_FUNCTION__))
;
101 }
102
103 /// Keeps track of how many Type*'s there are in the ContainedTys list.
104 unsigned NumContainedTys = 0;
105
106 /// A pointer to the array of Types contained by this Type. For example, this
107 /// includes the arguments of a function type, the elements of a structure,
108 /// the pointee of a pointer, the element type of an array, etc. This pointer
109 /// may be 0 for types that don't contain other types (Integer, Double,
110 /// Float).
111 Type * const *ContainedTys = nullptr;
112
113 static bool isSequentialType(TypeID TyID) {
114 return TyID == ArrayTyID || TyID == VectorTyID;
115 }
116
117public:
118 /// Print the current type.
119 /// Omit the type details if \p NoDetails == true.
120 /// E.g., let %st = type { i32, i16 }
121 /// When \p NoDetails is true, we only print %st.
122 /// Put differently, \p NoDetails prints the type as if
123 /// inlined with the operands when printing an instruction.
124 void print(raw_ostream &O, bool IsForDebug = false,
125 bool NoDetails = false) const;
126
127 void dump() const;
128
129 /// Return the LLVMContext in which this type was uniqued.
130 LLVMContext &getContext() const { return Context; }
131
132 //===--------------------------------------------------------------------===//
133 // Accessors for working with types.
134 //
135
136 /// Return the type id for the type. This will return one of the TypeID enum
137 /// elements defined above.
138 TypeID getTypeID() const { return ID; }
139
140 /// Return true if this is 'void'.
141 bool isVoidTy() const { return getTypeID() == VoidTyID; }
142
143 /// Return true if this is 'half', a 16-bit IEEE fp type.
144 bool isHalfTy() const { return getTypeID() == HalfTyID; }
145
146 /// Return true if this is 'float', a 32-bit IEEE fp type.
147 bool isFloatTy() const { return getTypeID() == FloatTyID; }
148
149 /// Return true if this is 'double', a 64-bit IEEE fp type.
150 bool isDoubleTy() const { return getTypeID() == DoubleTyID; }
151
152 /// Return true if this is x86 long double.
153 bool isX86_FP80Ty() const { return getTypeID() == X86_FP80TyID; }
154
155 /// Return true if this is 'fp128'.
156 bool isFP128Ty() const { return getTypeID() == FP128TyID; }
157
158 /// Return true if this is powerpc long double.
159 bool isPPC_FP128Ty() const { return getTypeID() == PPC_FP128TyID; }
160
161 /// Return true if this is one of the six floating-point types
162 bool isFloatingPointTy() const {
163 return getTypeID() == HalfTyID || getTypeID() == FloatTyID ||
164 getTypeID() == DoubleTyID ||
165 getTypeID() == X86_FP80TyID || getTypeID() == FP128TyID ||
166 getTypeID() == PPC_FP128TyID;
167 }
168
169 const fltSemantics &getFltSemantics() const {
170 switch (getTypeID()) {
171 case HalfTyID: return APFloat::IEEEhalf();
172 case FloatTyID: return APFloat::IEEEsingle();
173 case DoubleTyID: return APFloat::IEEEdouble();
174 case X86_FP80TyID: return APFloat::x87DoubleExtended();
175 case FP128TyID: return APFloat::IEEEquad();
176 case PPC_FP128TyID: return APFloat::PPCDoubleDouble();
177 default: llvm_unreachable("Invalid floating type")::llvm::llvm_unreachable_internal("Invalid floating type", "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/IR/Type.h"
, 177)
;
178 }
179 }
180
181 /// Return true if this is X86 MMX.
182 bool isX86_MMXTy() const { return getTypeID() == X86_MMXTyID; }
183
184 /// Return true if this is a FP type or a vector of FP.
185 bool isFPOrFPVectorTy() const { return getScalarType()->isFloatingPointTy(); }
186
187 /// Return true if this is 'label'.
188 bool isLabelTy() const { return getTypeID() == LabelTyID; }
189
190 /// Return true if this is 'metadata'.
191 bool isMetadataTy() const { return getTypeID() == MetadataTyID; }
192
193 /// Return true if this is 'token'.
194 bool isTokenTy() const { return getTypeID() == TokenTyID; }
195
196 /// True if this is an instance of IntegerType.
197 bool isIntegerTy() const { return getTypeID() == IntegerTyID; }
198
199 /// Return true if this is an IntegerType of the given width.
200 bool isIntegerTy(unsigned Bitwidth) const;
201
202 /// Return true if this is an integer type or a vector of integer types.
203 bool isIntOrIntVectorTy() const { return getScalarType()->isIntegerTy(); }
204
205 /// Return true if this is an integer type or a vector of integer types of
206 /// the given width.
207 bool isIntOrIntVectorTy(unsigned BitWidth) const {
208 return getScalarType()->isIntegerTy(BitWidth);
209 }
210
211 /// Return true if this is an integer type or a pointer type.
212 bool isIntOrPtrTy() const { return isIntegerTy() || isPointerTy(); }
213
214 /// True if this is an instance of FunctionType.
215 bool isFunctionTy() const { return getTypeID() == FunctionTyID; }
216
217 /// True if this is an instance of StructType.
218 bool isStructTy() const { return getTypeID() == StructTyID; }
219
220 /// True if this is an instance of ArrayType.
221 bool isArrayTy() const { return getTypeID() == ArrayTyID; }
222
223 /// True if this is an instance of PointerType.
224 bool isPointerTy() const { return getTypeID() == PointerTyID; }
225
226 /// Return true if this is a pointer type or a vector of pointer types.
227 bool isPtrOrPtrVectorTy() const { return getScalarType()->isPointerTy(); }
228
229 /// True if this is an instance of VectorType.
230 bool isVectorTy() const { return getTypeID() == VectorTyID; }
2
Assuming the condition is false
3
Returning zero, which participates in a condition later
13
Returning the value 1, which participates in a condition later
21
Assuming the condition is true
22
Returning the value 1, which participates in a condition later
231
232 /// Return true if this type could be converted with a lossless BitCast to
233 /// type 'Ty'. For example, i8* to i32*. BitCasts are valid for types of the
234 /// same size only where no re-interpretation of the bits is done.
235 /// Determine if this type could be losslessly bitcast to Ty
236 bool canLosslesslyBitCastTo(Type *Ty) const;
237
238 /// Return true if this type is empty, that is, it has no elements or all of
239 /// its elements are empty.
240 bool isEmptyTy() const;
241
242 /// Return true if the type is "first class", meaning it is a valid type for a
243 /// Value.
244 bool isFirstClassType() const {
245 return getTypeID() != FunctionTyID && getTypeID() != VoidTyID;
246 }
247
248 /// Return true if the type is a valid type for a register in codegen. This
249 /// includes all first-class types except struct and array types.
250 bool isSingleValueType() const {
251 return isFloatingPointTy() || isX86_MMXTy() || isIntegerTy() ||
252 isPointerTy() || isVectorTy();
253 }
254
255 /// Return true if the type is an aggregate type. This means it is valid as
256 /// the first operand of an insertvalue or extractvalue instruction. This
257 /// includes struct and array types, but does not include vector types.
258 bool isAggregateType() const {
259 return getTypeID() == StructTyID || getTypeID() == ArrayTyID;
260 }
261
262 /// Return true if it makes sense to take the size of this type. To get the
263 /// actual size for a particular target, it is reasonable to use the
264 /// DataLayout subsystem to do this.
265 bool isSized(SmallPtrSetImpl<Type*> *Visited = nullptr) const {
266 // If it's a primitive, it is always sized.
267 if (getTypeID() == IntegerTyID || isFloatingPointTy() ||
268 getTypeID() == PointerTyID ||
269 getTypeID() == X86_MMXTyID)
270 return true;
271 // If it is not something that can have a size (e.g. a function or label),
272 // it doesn't have a size.
273 if (getTypeID() != StructTyID && getTypeID() != ArrayTyID &&
274 getTypeID() != VectorTyID)
275 return false;
276 // Otherwise we have to try harder to decide.
277 return isSizedDerivedType(Visited);
278 }
279
280 /// Return the basic size of this type if it is a primitive type. These are
281 /// fixed by LLVM and are not target-dependent.
282 /// This will return zero if the type does not have a size or is not a
283 /// primitive type.
284 ///
285 /// If this is a scalable vector type, the scalable property will be set and
286 /// the runtime size will be a positive integer multiple of the base size.
287 ///
288 /// Note that this may not reflect the size of memory allocated for an
289 /// instance of the type or the number of bytes that are written when an
290 /// instance of the type is stored to memory. The DataLayout class provides
291 /// additional query functions to provide this information.
292 ///
293 TypeSize getPrimitiveSizeInBits() const LLVM_READONLY__attribute__((__pure__));
294
295 /// If this is a vector type, return the getPrimitiveSizeInBits value for the
296 /// element type. Otherwise return the getPrimitiveSizeInBits value for this
297 /// type.
298 unsigned getScalarSizeInBits() const LLVM_READONLY__attribute__((__pure__));
299
300 /// Return the width of the mantissa of this type. This is only valid on
301 /// floating-point types. If the FP type does not have a stable mantissa (e.g.
302 /// ppc long double), this method returns -1.
303 int getFPMantissaWidth() const;
304
305 /// If this is a vector type, return the element type, otherwise return
306 /// 'this'.
307 Type *getScalarType() const {
308 if (isVectorTy())
309 return getVectorElementType();
310 return const_cast<Type*>(this);
311 }
312
313 //===--------------------------------------------------------------------===//
314 // Type Iteration support.
315 //
316 using subtype_iterator = Type * const *;
317
318 subtype_iterator subtype_begin() const { return ContainedTys; }
319 subtype_iterator subtype_end() const { return &ContainedTys[NumContainedTys];}
320 ArrayRef<Type*> subtypes() const {
321 return makeArrayRef(subtype_begin(), subtype_end());
322 }
323
324 using subtype_reverse_iterator = std::reverse_iterator<subtype_iterator>;
325
326 subtype_reverse_iterator subtype_rbegin() const {
327 return subtype_reverse_iterator(subtype_end());
328 }
329 subtype_reverse_iterator subtype_rend() const {
330 return subtype_reverse_iterator(subtype_begin());
331 }
332
333 /// This method is used to implement the type iterator (defined at the end of
334 /// the file). For derived types, this returns the types 'contained' in the
335 /// derived type.
336 Type *getContainedType(unsigned i) const {
337 assert(i < NumContainedTys && "Index out of range!")((i < NumContainedTys && "Index out of range!") ? static_cast
<void> (0) : __assert_fail ("i < NumContainedTys && \"Index out of range!\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/IR/Type.h"
, 337, __PRETTY_FUNCTION__))
;
338 return ContainedTys[i];
339 }
340
341 /// Return the number of types in the derived type.
342 unsigned getNumContainedTypes() const { return NumContainedTys; }
343
344 //===--------------------------------------------------------------------===//
345 // Helper methods corresponding to subclass methods. This forces a cast to
346 // the specified subclass and calls its accessor. "getVectorNumElements" (for
347 // example) is shorthand for cast<VectorType>(Ty)->getNumElements(). This is
348 // only intended to cover the core methods that are frequently used, helper
349 // methods should not be added here.
350
351 inline unsigned getIntegerBitWidth() const;
352
353 inline Type *getFunctionParamType(unsigned i) const;
354 inline unsigned getFunctionNumParams() const;
355 inline bool isFunctionVarArg() const;
356
357 inline StringRef getStructName() const;
358 inline unsigned getStructNumElements() const;
359 inline Type *getStructElementType(unsigned N) const;
360
361 inline Type *getSequentialElementType() const {
362 assert(isSequentialType(getTypeID()) && "Not a sequential type!")((isSequentialType(getTypeID()) && "Not a sequential type!"
) ? static_cast<void> (0) : __assert_fail ("isSequentialType(getTypeID()) && \"Not a sequential type!\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/IR/Type.h"
, 362, __PRETTY_FUNCTION__))
;
363 return ContainedTys[0];
364 }
365
366 inline uint64_t getArrayNumElements() const;
367
368 Type *getArrayElementType() const {
369 assert(getTypeID() == ArrayTyID)((getTypeID() == ArrayTyID) ? static_cast<void> (0) : __assert_fail
("getTypeID() == ArrayTyID", "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/IR/Type.h"
, 369, __PRETTY_FUNCTION__))
;
370 return ContainedTys[0];
371 }
372
373 inline bool getVectorIsScalable() const;
374 inline unsigned getVectorNumElements() const;
375 inline ElementCount getVectorElementCount() const;
376 Type *getVectorElementType() const {
377 assert(getTypeID() == VectorTyID)((getTypeID() == VectorTyID) ? static_cast<void> (0) : __assert_fail
("getTypeID() == VectorTyID", "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/IR/Type.h"
, 377, __PRETTY_FUNCTION__))
;
378 return ContainedTys[0];
379 }
380
381 Type *getPointerElementType() const {
382 assert(getTypeID() == PointerTyID)((getTypeID() == PointerTyID) ? static_cast<void> (0) :
__assert_fail ("getTypeID() == PointerTyID", "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/IR/Type.h"
, 382, __PRETTY_FUNCTION__))
;
383 return ContainedTys[0];
384 }
385
386 /// Given an integer or vector type, change the lane bitwidth to NewBitwidth,
387 /// whilst keeping the old number of lanes.
388 inline Type *getWithNewBitWidth(unsigned NewBitWidth) const;
389
390 /// Given scalar/vector integer type, returns a type with elements twice as
391 /// wide as in the original type. For vectors, preserves element count.
392 inline Type *getExtendedType() const;
393
394 /// Get the address space of this pointer or pointer vector type.
395 inline unsigned getPointerAddressSpace() const;
396
397 //===--------------------------------------------------------------------===//
398 // Static members exported by the Type class itself. Useful for getting
399 // instances of Type.
400 //
401
402 /// Return a type based on an identifier.
403 static Type *getPrimitiveType(LLVMContext &C, TypeID IDNumber);
404
405 //===--------------------------------------------------------------------===//
406 // These are the builtin types that are always available.
407 //
408 static Type *getVoidTy(LLVMContext &C);
409 static Type *getLabelTy(LLVMContext &C);
410 static Type *getHalfTy(LLVMContext &C);
411 static Type *getFloatTy(LLVMContext &C);
412 static Type *getDoubleTy(LLVMContext &C);
413 static Type *getMetadataTy(LLVMContext &C);
414 static Type *getX86_FP80Ty(LLVMContext &C);
415 static Type *getFP128Ty(LLVMContext &C);
416 static Type *getPPC_FP128Ty(LLVMContext &C);
417 static Type *getX86_MMXTy(LLVMContext &C);
418 static Type *getTokenTy(LLVMContext &C);
419 static IntegerType *getIntNTy(LLVMContext &C, unsigned N);
420 static IntegerType *getInt1Ty(LLVMContext &C);
421 static IntegerType *getInt8Ty(LLVMContext &C);
422 static IntegerType *getInt16Ty(LLVMContext &C);
423 static IntegerType *getInt32Ty(LLVMContext &C);
424 static IntegerType *getInt64Ty(LLVMContext &C);
425 static IntegerType *getInt128Ty(LLVMContext &C);
426 template <typename ScalarTy> static Type *getScalarTy(LLVMContext &C) {
427 int noOfBits = sizeof(ScalarTy) * CHAR_BIT8;
428 if (std::is_integral<ScalarTy>::value) {
429 return (Type*) Type::getIntNTy(C, noOfBits);
430 } else if (std::is_floating_point<ScalarTy>::value) {
431 switch (noOfBits) {
432 case 32:
433 return Type::getFloatTy(C);
434 case 64:
435 return Type::getDoubleTy(C);
436 }
437 }
438 llvm_unreachable("Unsupported type in Type::getScalarTy")::llvm::llvm_unreachable_internal("Unsupported type in Type::getScalarTy"
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/IR/Type.h"
, 438)
;
439 }
440
441 //===--------------------------------------------------------------------===//
442 // Convenience methods for getting pointer types with one of the above builtin
443 // types as pointee.
444 //
445 static PointerType *getHalfPtrTy(LLVMContext &C, unsigned AS = 0);
446 static PointerType *getFloatPtrTy(LLVMContext &C, unsigned AS = 0);
447 static PointerType *getDoublePtrTy(LLVMContext &C, unsigned AS = 0);
448 static PointerType *getX86_FP80PtrTy(LLVMContext &C, unsigned AS = 0);
449 static PointerType *getFP128PtrTy(LLVMContext &C, unsigned AS = 0);
450 static PointerType *getPPC_FP128PtrTy(LLVMContext &C, unsigned AS = 0);
451 static PointerType *getX86_MMXPtrTy(LLVMContext &C, unsigned AS = 0);
452 static PointerType *getIntNPtrTy(LLVMContext &C, unsigned N, unsigned AS = 0);
453 static PointerType *getInt1PtrTy(LLVMContext &C, unsigned AS = 0);
454 static PointerType *getInt8PtrTy(LLVMContext &C, unsigned AS = 0);
455 static PointerType *getInt16PtrTy(LLVMContext &C, unsigned AS = 0);
456 static PointerType *getInt32PtrTy(LLVMContext &C, unsigned AS = 0);
457 static PointerType *getInt64PtrTy(LLVMContext &C, unsigned AS = 0);
458
459 /// Return a pointer to the current type. This is equivalent to
460 /// PointerType::get(Foo, AddrSpace).
461 PointerType *getPointerTo(unsigned AddrSpace = 0) const;
462
463private:
464 /// Derived types like structures and arrays are sized iff all of the members
465 /// of the type are sized as well. Since asking for their size is relatively
466 /// uncommon, move this operation out-of-line.
467 bool isSizedDerivedType(SmallPtrSetImpl<Type*> *Visited = nullptr) const;
468};
469
470// Printing of types.
471inline raw_ostream &operator<<(raw_ostream &OS, const Type &T) {
472 T.print(OS);
473 return OS;
474}
475
476// allow isa<PointerType>(x) to work without DerivedTypes.h included.
477template <> struct isa_impl<PointerType, Type> {
478 static inline bool doit(const Type &Ty) {
479 return Ty.getTypeID() == Type::PointerTyID;
480 }
481};
482
483// Create wrappers for C Binding types (see CBindingWrapping.h).
484DEFINE_ISA_CONVERSION_FUNCTIONS(Type, LLVMTypeRef)inline Type *unwrap(LLVMTypeRef P) { return reinterpret_cast<
Type*>(P); } inline LLVMTypeRef wrap(const Type *P) { return
reinterpret_cast<LLVMTypeRef>(const_cast<Type*>(
P)); } template<typename T> inline T *unwrap(LLVMTypeRef
P) { return cast<T>(unwrap(P)); }
485
486/* Specialized opaque type conversions.
487 */
488inline Type **unwrap(LLVMTypeRef* Tys) {
489 return reinterpret_cast<Type**>(Tys);
490}
491
492inline LLVMTypeRef *wrap(Type **Tys) {
493 return reinterpret_cast<LLVMTypeRef*>(const_cast<Type**>(Tys));
494}
495
496} // end namespace llvm
497
498#endif // LLVM_IR_TYPE_H

/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/CodeGen/BasicTTIImpl.h

1//===- BasicTTIImpl.h -------------------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// This file provides a helper that implements much of the TTI interface in
11/// terms of the target-independent code generator and TargetLowering
12/// interfaces.
13//
14//===----------------------------------------------------------------------===//
15
16#ifndef LLVM_CODEGEN_BASICTTIIMPL_H
17#define LLVM_CODEGEN_BASICTTIIMPL_H
18
19#include "llvm/ADT/APInt.h"
20#include "llvm/ADT/ArrayRef.h"
21#include "llvm/ADT/BitVector.h"
22#include "llvm/ADT/SmallPtrSet.h"
23#include "llvm/ADT/SmallVector.h"
24#include "llvm/Analysis/LoopInfo.h"
25#include "llvm/Analysis/TargetTransformInfo.h"
26#include "llvm/Analysis/TargetTransformInfoImpl.h"
27#include "llvm/CodeGen/ISDOpcodes.h"
28#include "llvm/CodeGen/TargetLowering.h"
29#include "llvm/CodeGen/TargetSubtargetInfo.h"
30#include "llvm/CodeGen/ValueTypes.h"
31#include "llvm/IR/BasicBlock.h"
32#include "llvm/IR/CallSite.h"
33#include "llvm/IR/Constant.h"
34#include "llvm/IR/Constants.h"
35#include "llvm/IR/DataLayout.h"
36#include "llvm/IR/DerivedTypes.h"
37#include "llvm/IR/InstrTypes.h"
38#include "llvm/IR/Instruction.h"
39#include "llvm/IR/Instructions.h"
40#include "llvm/IR/Intrinsics.h"
41#include "llvm/IR/Operator.h"
42#include "llvm/IR/Type.h"
43#include "llvm/IR/Value.h"
44#include "llvm/MC/MCSchedule.h"
45#include "llvm/Support/Casting.h"
46#include "llvm/Support/CommandLine.h"
47#include "llvm/Support/ErrorHandling.h"
48#include "llvm/Support/MachineValueType.h"
49#include "llvm/Support/MathExtras.h"
50#include <algorithm>
51#include <cassert>
52#include <cstdint>
53#include <limits>
54#include <utility>
55
56namespace llvm {
57
58class Function;
59class GlobalValue;
60class LLVMContext;
61class ScalarEvolution;
62class SCEV;
63class TargetMachine;
64
65extern cl::opt<unsigned> PartialUnrollingThreshold;
66
67/// Base class which can be used to help build a TTI implementation.
68///
69/// This class provides as much implementation of the TTI interface as is
70/// possible using the target independent parts of the code generator.
71///
72/// In order to subclass it, your class must implement a getST() method to
73/// return the subtarget, and a getTLI() method to return the target lowering.
74/// We need these methods implemented in the derived class so that this class
75/// doesn't have to duplicate storage for them.
76template <typename T>
77class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
78private:
79 using BaseT = TargetTransformInfoImplCRTPBase<T>;
80 using TTI = TargetTransformInfo;
81
82 /// Estimate a cost of Broadcast as an extract and sequence of insert
83 /// operations.
84 unsigned getBroadcastShuffleOverhead(Type *Ty) {
85 assert(Ty->isVectorTy() && "Can only shuffle vectors")((Ty->isVectorTy() && "Can only shuffle vectors") ?
static_cast<void> (0) : __assert_fail ("Ty->isVectorTy() && \"Can only shuffle vectors\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 85, __PRETTY_FUNCTION__))
;
86 unsigned Cost = 0;
87 // Broadcast cost is equal to the cost of extracting the zero'th element
88 // plus the cost of inserting it into every element of the result vector.
89 Cost += static_cast<T *>(this)->getVectorInstrCost(
90 Instruction::ExtractElement, Ty, 0);
91
92 for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i) {
93 Cost += static_cast<T *>(this)->getVectorInstrCost(
94 Instruction::InsertElement, Ty, i);
95 }
96 return Cost;
97 }
98
99 /// Estimate a cost of shuffle as a sequence of extract and insert
100 /// operations.
101 unsigned getPermuteShuffleOverhead(Type *Ty) {
102 assert(Ty->isVectorTy() && "Can only shuffle vectors")((Ty->isVectorTy() && "Can only shuffle vectors") ?
static_cast<void> (0) : __assert_fail ("Ty->isVectorTy() && \"Can only shuffle vectors\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 102, __PRETTY_FUNCTION__))
;
103 unsigned Cost = 0;
104 // Shuffle cost is equal to the cost of extracting element from its argument
105 // plus the cost of inserting them onto the result vector.
106
107 // e.g. <4 x float> has a mask of <0,5,2,7> i.e we need to extract from
108 // index 0 of first vector, index 1 of second vector,index 2 of first
109 // vector and finally index 3 of second vector and insert them at index
110 // <0,1,2,3> of result vector.
111 for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i) {
112 Cost += static_cast<T *>(this)
113 ->getVectorInstrCost(Instruction::InsertElement, Ty, i);
114 Cost += static_cast<T *>(this)
115 ->getVectorInstrCost(Instruction::ExtractElement, Ty, i);
116 }
117 return Cost;
118 }
119
120 /// Estimate a cost of subvector extraction as a sequence of extract and
121 /// insert operations.
122 unsigned getExtractSubvectorOverhead(Type *Ty, int Index, Type *SubTy) {
123 assert(Ty && Ty->isVectorTy() && SubTy && SubTy->isVectorTy() &&((Ty && Ty->isVectorTy() && SubTy &&
SubTy->isVectorTy() && "Can only extract subvectors from vectors"
) ? static_cast<void> (0) : __assert_fail ("Ty && Ty->isVectorTy() && SubTy && SubTy->isVectorTy() && \"Can only extract subvectors from vectors\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 124, __PRETTY_FUNCTION__))
124 "Can only extract subvectors from vectors")((Ty && Ty->isVectorTy() && SubTy &&
SubTy->isVectorTy() && "Can only extract subvectors from vectors"
) ? static_cast<void> (0) : __assert_fail ("Ty && Ty->isVectorTy() && SubTy && SubTy->isVectorTy() && \"Can only extract subvectors from vectors\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 124, __PRETTY_FUNCTION__))
;
125 int NumSubElts = SubTy->getVectorNumElements();
126 assert((Index + NumSubElts) <= (int)Ty->getVectorNumElements() &&(((Index + NumSubElts) <= (int)Ty->getVectorNumElements
() && "SK_ExtractSubvector index out of range") ? static_cast
<void> (0) : __assert_fail ("(Index + NumSubElts) <= (int)Ty->getVectorNumElements() && \"SK_ExtractSubvector index out of range\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 127, __PRETTY_FUNCTION__))
127 "SK_ExtractSubvector index out of range")(((Index + NumSubElts) <= (int)Ty->getVectorNumElements
() && "SK_ExtractSubvector index out of range") ? static_cast
<void> (0) : __assert_fail ("(Index + NumSubElts) <= (int)Ty->getVectorNumElements() && \"SK_ExtractSubvector index out of range\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 127, __PRETTY_FUNCTION__))
;
128
129 unsigned Cost = 0;
130 // Subvector extraction cost is equal to the cost of extracting element from
131 // the source type plus the cost of inserting them into the result vector
132 // type.
133 for (int i = 0; i != NumSubElts; ++i) {
134 Cost += static_cast<T *>(this)->getVectorInstrCost(
135 Instruction::ExtractElement, Ty, i + Index);
136 Cost += static_cast<T *>(this)->getVectorInstrCost(
137 Instruction::InsertElement, SubTy, i);
138 }
139 return Cost;
140 }
141
142 /// Estimate a cost of subvector insertion as a sequence of extract and
143 /// insert operations.
144 unsigned getInsertSubvectorOverhead(Type *Ty, int Index, Type *SubTy) {
145 assert(Ty && Ty->isVectorTy() && SubTy && SubTy->isVectorTy() &&((Ty && Ty->isVectorTy() && SubTy &&
SubTy->isVectorTy() && "Can only insert subvectors into vectors"
) ? static_cast<void> (0) : __assert_fail ("Ty && Ty->isVectorTy() && SubTy && SubTy->isVectorTy() && \"Can only insert subvectors into vectors\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 146, __PRETTY_FUNCTION__))
146 "Can only insert subvectors into vectors")((Ty && Ty->isVectorTy() && SubTy &&
SubTy->isVectorTy() && "Can only insert subvectors into vectors"
) ? static_cast<void> (0) : __assert_fail ("Ty && Ty->isVectorTy() && SubTy && SubTy->isVectorTy() && \"Can only insert subvectors into vectors\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 146, __PRETTY_FUNCTION__))
;
147 int NumSubElts = SubTy->getVectorNumElements();
148 assert((Index + NumSubElts) <= (int)Ty->getVectorNumElements() &&(((Index + NumSubElts) <= (int)Ty->getVectorNumElements
() && "SK_InsertSubvector index out of range") ? static_cast
<void> (0) : __assert_fail ("(Index + NumSubElts) <= (int)Ty->getVectorNumElements() && \"SK_InsertSubvector index out of range\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 149, __PRETTY_FUNCTION__))
149 "SK_InsertSubvector index out of range")(((Index + NumSubElts) <= (int)Ty->getVectorNumElements
() && "SK_InsertSubvector index out of range") ? static_cast
<void> (0) : __assert_fail ("(Index + NumSubElts) <= (int)Ty->getVectorNumElements() && \"SK_InsertSubvector index out of range\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 149, __PRETTY_FUNCTION__))
;
150
151 unsigned Cost = 0;
152 // Subvector insertion cost is equal to the cost of extracting element from
153 // the source type plus the cost of inserting them into the result vector
154 // type.
155 for (int i = 0; i != NumSubElts; ++i) {
156 Cost += static_cast<T *>(this)->getVectorInstrCost(
157 Instruction::ExtractElement, SubTy, i);
158 Cost += static_cast<T *>(this)->getVectorInstrCost(
159 Instruction::InsertElement, Ty, i + Index);
160 }
161 return Cost;
162 }
163
164 /// Local query method delegates up to T which *must* implement this!
165 const TargetSubtargetInfo *getST() const {
166 return static_cast<const T *>(this)->getST();
167 }
168
169 /// Local query method delegates up to T which *must* implement this!
170 const TargetLoweringBase *getTLI() const {
171 return static_cast<const T *>(this)->getTLI();
172 }
173
174 static ISD::MemIndexedMode getISDIndexedMode(TTI::MemIndexedMode M) {
175 switch (M) {
176 case TTI::MIM_Unindexed:
177 return ISD::UNINDEXED;
178 case TTI::MIM_PreInc:
179 return ISD::PRE_INC;
180 case TTI::MIM_PreDec:
181 return ISD::PRE_DEC;
182 case TTI::MIM_PostInc:
183 return ISD::POST_INC;
184 case TTI::MIM_PostDec:
185 return ISD::POST_DEC;
186 }
187 llvm_unreachable("Unexpected MemIndexedMode")::llvm::llvm_unreachable_internal("Unexpected MemIndexedMode"
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 187)
;
188 }
189
190protected:
191 explicit BasicTTIImplBase(const TargetMachine *TM, const DataLayout &DL)
192 : BaseT(DL) {}
193 virtual ~BasicTTIImplBase() = default;
194
195 using TargetTransformInfoImplBase::DL;
196
197public:
198 /// \name Scalar TTI Implementations
199 /// @{
200 bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth,
201 unsigned AddressSpace, unsigned Alignment,
202 bool *Fast) const {
203 EVT E = EVT::getIntegerVT(Context, BitWidth);
204 return getTLI()->allowsMisalignedMemoryAccesses(
205 E, AddressSpace, Alignment, MachineMemOperand::MONone, Fast);
206 }
207
208 bool hasBranchDivergence() { return false; }
209
210 bool isSourceOfDivergence(const Value *V) { return false; }
211
212 bool isAlwaysUniform(const Value *V) { return false; }
213
214 unsigned getFlatAddressSpace() {
215 // Return an invalid address space.
216 return -1;
217 }
218
219 bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
220 Intrinsic::ID IID) const {
221 return false;
222 }
223
224 bool rewriteIntrinsicWithAddressSpace(IntrinsicInst *II,
225 Value *OldV, Value *NewV) const {
226 return false;
227 }
228
229 bool isLegalAddImmediate(int64_t imm) {
230 return getTLI()->isLegalAddImmediate(imm);
231 }
232
233 bool isLegalICmpImmediate(int64_t imm) {
234 return getTLI()->isLegalICmpImmediate(imm);
235 }
236
237 bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
238 bool HasBaseReg, int64_t Scale,
239 unsigned AddrSpace, Instruction *I = nullptr) {
240 TargetLoweringBase::AddrMode AM;
241 AM.BaseGV = BaseGV;
242 AM.BaseOffs = BaseOffset;
243 AM.HasBaseReg = HasBaseReg;
244 AM.Scale = Scale;
245 return getTLI()->isLegalAddressingMode(DL, AM, Ty, AddrSpace, I);
246 }
247
248 bool isIndexedLoadLegal(TTI::MemIndexedMode M, Type *Ty,
249 const DataLayout &DL) const {
250 EVT VT = getTLI()->getValueType(DL, Ty);
251 return getTLI()->isIndexedLoadLegal(getISDIndexedMode(M), VT);
252 }
253
254 bool isIndexedStoreLegal(TTI::MemIndexedMode M, Type *Ty,
255 const DataLayout &DL) const {
256 EVT VT = getTLI()->getValueType(DL, Ty);
257 return getTLI()->isIndexedStoreLegal(getISDIndexedMode(M), VT);
258 }
259
260 bool isLSRCostLess(TTI::LSRCost C1, TTI::LSRCost C2) {
261 return TargetTransformInfoImplBase::isLSRCostLess(C1, C2);
262 }
263
264 int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
265 bool HasBaseReg, int64_t Scale, unsigned AddrSpace) {
266 TargetLoweringBase::AddrMode AM;
267 AM.BaseGV = BaseGV;
268 AM.BaseOffs = BaseOffset;
269 AM.HasBaseReg = HasBaseReg;
270 AM.Scale = Scale;
271 return getTLI()->getScalingFactorCost(DL, AM, Ty, AddrSpace);
272 }
273
274 bool isTruncateFree(Type *Ty1, Type *Ty2) {
275 return getTLI()->isTruncateFree(Ty1, Ty2);
276 }
277
278 bool isProfitableToHoist(Instruction *I) {
279 return getTLI()->isProfitableToHoist(I);
280 }
281
282 bool useAA() const { return getST()->useAA(); }
283
284 bool isTypeLegal(Type *Ty) {
285 EVT VT = getTLI()->getValueType(DL, Ty);
286 return getTLI()->isTypeLegal(VT);
287 }
288
289 int getGEPCost(Type *PointeeType, const Value *Ptr,
290 ArrayRef<const Value *> Operands) {
291 return BaseT::getGEPCost(PointeeType, Ptr, Operands);
292 }
293
294 int getExtCost(const Instruction *I, const Value *Src) {
295 if (getTLI()->isExtFree(I))
296 return TargetTransformInfo::TCC_Free;
297
298 if (isa<ZExtInst>(I) || isa<SExtInst>(I))
299 if (const LoadInst *LI = dyn_cast<LoadInst>(Src))
300 if (getTLI()->isExtLoad(LI, I, DL))
301 return TargetTransformInfo::TCC_Free;
302
303 return TargetTransformInfo::TCC_Basic;
304 }
305
306 unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
307 ArrayRef<const Value *> Arguments, const User *U) {
308 return BaseT::getIntrinsicCost(IID, RetTy, Arguments, U);
309 }
310
311 unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
312 ArrayRef<Type *> ParamTys, const User *U) {
313 if (IID == Intrinsic::cttz) {
314 if (getTLI()->isCheapToSpeculateCttz())
315 return TargetTransformInfo::TCC_Basic;
316 return TargetTransformInfo::TCC_Expensive;
317 }
318
319 if (IID == Intrinsic::ctlz) {
320 if (getTLI()->isCheapToSpeculateCtlz())
321 return TargetTransformInfo::TCC_Basic;
322 return TargetTransformInfo::TCC_Expensive;
323 }
324
325 return BaseT::getIntrinsicCost(IID, RetTy, ParamTys, U);
326 }
327
328 unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
329 unsigned &JumpTableSize,
330 ProfileSummaryInfo *PSI,
331 BlockFrequencyInfo *BFI) {
332 /// Try to find the estimated number of clusters. Note that the number of
333 /// clusters identified in this function could be different from the actual
334 /// numbers found in lowering. This function ignore switches that are
335 /// lowered with a mix of jump table / bit test / BTree. This function was
336 /// initially intended to be used when estimating the cost of switch in
337 /// inline cost heuristic, but it's a generic cost model to be used in other
338 /// places (e.g., in loop unrolling).
339 unsigned N = SI.getNumCases();
340 const TargetLoweringBase *TLI = getTLI();
341 const DataLayout &DL = this->getDataLayout();
342
343 JumpTableSize = 0;
344 bool IsJTAllowed = TLI->areJTsAllowed(SI.getParent()->getParent());
345
346 // Early exit if both a jump table and bit test are not allowed.
347 if (N < 1 || (!IsJTAllowed && DL.getIndexSizeInBits(0u) < N))
348 return N;
349
350 APInt MaxCaseVal = SI.case_begin()->getCaseValue()->getValue();
351 APInt MinCaseVal = MaxCaseVal;
352 for (auto CI : SI.cases()) {
353 const APInt &CaseVal = CI.getCaseValue()->getValue();
354 if (CaseVal.sgt(MaxCaseVal))
355 MaxCaseVal = CaseVal;
356 if (CaseVal.slt(MinCaseVal))
357 MinCaseVal = CaseVal;
358 }
359
360 // Check if suitable for a bit test
361 if (N <= DL.getIndexSizeInBits(0u)) {
362 SmallPtrSet<const BasicBlock *, 4> Dests;
363 for (auto I : SI.cases())
364 Dests.insert(I.getCaseSuccessor());
365
366 if (TLI->isSuitableForBitTests(Dests.size(), N, MinCaseVal, MaxCaseVal,
367 DL))
368 return 1;
369 }
370
371 // Check if suitable for a jump table.
372 if (IsJTAllowed) {
373 if (N < 2 || N < TLI->getMinimumJumpTableEntries())
374 return N;
375 uint64_t Range =
376 (MaxCaseVal - MinCaseVal)
377 .getLimitedValue(std::numeric_limits<uint64_t>::max() - 1) + 1;
378 // Check whether a range of clusters is dense enough for a jump table
379 if (TLI->isSuitableForJumpTable(&SI, N, Range, PSI, BFI)) {
380 JumpTableSize = Range;
381 return 1;
382 }
383 }
384 return N;
385 }
386
387 bool shouldBuildLookupTables() {
388 const TargetLoweringBase *TLI = getTLI();
389 return TLI->isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) ||
390 TLI->isOperationLegalOrCustom(ISD::BRIND, MVT::Other);
391 }
392
393 bool haveFastSqrt(Type *Ty) {
394 const TargetLoweringBase *TLI = getTLI();
395 EVT VT = TLI->getValueType(DL, Ty);
396 return TLI->isTypeLegal(VT) &&
397 TLI->isOperationLegalOrCustom(ISD::FSQRT, VT);
398 }
399
400 bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) {
401 return true;
402 }
403
404 unsigned getFPOpCost(Type *Ty) {
405 // Check whether FADD is available, as a proxy for floating-point in
406 // general.
407 const TargetLoweringBase *TLI = getTLI();
408 EVT VT = TLI->getValueType(DL, Ty);
409 if (TLI->isOperationLegalOrCustomOrPromote(ISD::FADD, VT))
410 return TargetTransformInfo::TCC_Basic;
411 return TargetTransformInfo::TCC_Expensive;
412 }
413
414 unsigned getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) {
415 const TargetLoweringBase *TLI = getTLI();
416 switch (Opcode) {
417 default: break;
418 case Instruction::Trunc:
419 if (TLI->isTruncateFree(OpTy, Ty))
420 return TargetTransformInfo::TCC_Free;
421 return TargetTransformInfo::TCC_Basic;
422 case Instruction::ZExt:
423 if (TLI->isZExtFree(OpTy, Ty))
424 return TargetTransformInfo::TCC_Free;
425 return TargetTransformInfo::TCC_Basic;
426
427 case Instruction::AddrSpaceCast:
428 if (TLI->isFreeAddrSpaceCast(OpTy->getPointerAddressSpace(),
429 Ty->getPointerAddressSpace()))
430 return TargetTransformInfo::TCC_Free;
431 return TargetTransformInfo::TCC_Basic;
432 }
433
434 return BaseT::getOperationCost(Opcode, Ty, OpTy);
435 }
436
437 unsigned getInliningThresholdMultiplier() { return 1; }
438
439 int getInlinerVectorBonusPercent() { return 150; }
440
441 void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
442 TTI::UnrollingPreferences &UP) {
443 // This unrolling functionality is target independent, but to provide some
444 // motivation for its intended use, for x86:
445
446 // According to the Intel 64 and IA-32 Architectures Optimization Reference
447 // Manual, Intel Core models and later have a loop stream detector (and
448 // associated uop queue) that can benefit from partial unrolling.
449 // The relevant requirements are:
450 // - The loop must have no more than 4 (8 for Nehalem and later) branches
451 // taken, and none of them may be calls.
452 // - The loop can have no more than 18 (28 for Nehalem and later) uops.
453
454 // According to the Software Optimization Guide for AMD Family 15h
455 // Processors, models 30h-4fh (Steamroller and later) have a loop predictor
456 // and loop buffer which can benefit from partial unrolling.
457 // The relevant requirements are:
458 // - The loop must have fewer than 16 branches
459 // - The loop must have less than 40 uops in all executed loop branches
460
461 // The number of taken branches in a loop is hard to estimate here, and
462 // benchmarking has revealed that it is better not to be conservative when
463 // estimating the branch count. As a result, we'll ignore the branch limits
464 // until someone finds a case where it matters in practice.
465
466 unsigned MaxOps;
467 const TargetSubtargetInfo *ST = getST();
468 if (PartialUnrollingThreshold.getNumOccurrences() > 0)
469 MaxOps = PartialUnrollingThreshold;
470 else if (ST->getSchedModel().LoopMicroOpBufferSize > 0)
471 MaxOps = ST->getSchedModel().LoopMicroOpBufferSize;
472 else
473 return;
474
475 // Scan the loop: don't unroll loops with calls.
476 for (Loop::block_iterator I = L->block_begin(), E = L->block_end(); I != E;
477 ++I) {
478 BasicBlock *BB = *I;
479
480 for (BasicBlock::iterator J = BB->begin(), JE = BB->end(); J != JE; ++J)
481 if (isa<CallInst>(J) || isa<InvokeInst>(J)) {
482 ImmutableCallSite CS(&*J);
483 if (const Function *F = CS.getCalledFunction()) {
484 if (!static_cast<T *>(this)->isLoweredToCall(F))
485 continue;
486 }
487
488 return;
489 }
490 }
491
492 // Enable runtime and partial unrolling up to the specified size.
493 // Enable using trip count upper bound to unroll loops.
494 UP.Partial = UP.Runtime = UP.UpperBound = true;
495 UP.PartialThreshold = MaxOps;
496
497 // Avoid unrolling when optimizing for size.
498 UP.OptSizeThreshold = 0;
499 UP.PartialOptSizeThreshold = 0;
500
501 // Set number of instructions optimized when "back edge"
502 // becomes "fall through" to default value of 2.
503 UP.BEInsns = 2;
504 }
505
506 bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
507 AssumptionCache &AC,
508 TargetLibraryInfo *LibInfo,
509 HardwareLoopInfo &HWLoopInfo) {
510 return BaseT::isHardwareLoopProfitable(L, SE, AC, LibInfo, HWLoopInfo);
511 }
512
513 bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, ScalarEvolution &SE,
514 AssumptionCache &AC, TargetLibraryInfo *TLI,
515 DominatorTree *DT,
516 const LoopAccessInfo *LAI) {
517 return BaseT::preferPredicateOverEpilogue(L, LI, SE, AC, TLI, DT, LAI);
518 }
519
520 int getInstructionLatency(const Instruction *I) {
521 if (isa<LoadInst>(I))
522 return getST()->getSchedModel().DefaultLoadLatency;
523
524 return BaseT::getInstructionLatency(I);
525 }
526
527 virtual Optional<unsigned>
528 getCacheSize(TargetTransformInfo::CacheLevel Level) const {
529 return Optional<unsigned>(
530 getST()->getCacheSize(static_cast<unsigned>(Level)));
531 }
532
533 virtual Optional<unsigned>
534 getCacheAssociativity(TargetTransformInfo::CacheLevel Level) const {
535 Optional<unsigned> TargetResult =
536 getST()->getCacheAssociativity(static_cast<unsigned>(Level));
537
538 if (TargetResult)
539 return TargetResult;
540
541 return BaseT::getCacheAssociativity(Level);
542 }
543
544 virtual unsigned getCacheLineSize() const {
545 return getST()->getCacheLineSize();
546 }
547
548 virtual unsigned getPrefetchDistance() const {
549 return getST()->getPrefetchDistance();
550 }
551
552 virtual unsigned getMinPrefetchStride() const {
553 return getST()->getMinPrefetchStride();
554 }
555
556 virtual unsigned getMaxPrefetchIterationsAhead() const {
557 return getST()->getMaxPrefetchIterationsAhead();
558 }
559
560 /// @}
561
562 /// \name Vector TTI Implementations
563 /// @{
564
565 unsigned getRegisterBitWidth(bool Vector) const { return 32; }
566
567 /// Estimate the overhead of scalarizing an instruction. Insert and Extract
568 /// are set if the result needs to be inserted and/or extracted from vectors.
569 unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) {
570 assert(Ty->isVectorTy() && "Can only scalarize vectors")((Ty->isVectorTy() && "Can only scalarize vectors"
) ? static_cast<void> (0) : __assert_fail ("Ty->isVectorTy() && \"Can only scalarize vectors\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 570, __PRETTY_FUNCTION__))
;
571 unsigned Cost = 0;
572
573 for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i) {
574 if (Insert)
575 Cost += static_cast<T *>(this)
576 ->getVectorInstrCost(Instruction::InsertElement, Ty, i);
577 if (Extract)
578 Cost += static_cast<T *>(this)
579 ->getVectorInstrCost(Instruction::ExtractElement, Ty, i);
580 }
581
582 return Cost;
583 }
584
585 /// Estimate the overhead of scalarizing an instructions unique
586 /// non-constant operands. The types of the arguments are ordinarily
587 /// scalar, in which case the costs are multiplied with VF.
588 unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
589 unsigned VF) {
590 unsigned Cost = 0;
591 SmallPtrSet<const Value*, 4> UniqueOperands;
592 for (const Value *A : Args) {
593 if (!isa<Constant>(A) && UniqueOperands.insert(A).second) {
594 Type *VecTy = nullptr;
595 if (A->getType()->isVectorTy()) {
596 VecTy = A->getType();
597 // If A is a vector operand, VF should be 1 or correspond to A.
598 assert((VF == 1 || VF == VecTy->getVectorNumElements()) &&(((VF == 1 || VF == VecTy->getVectorNumElements()) &&
"Vector argument does not match VF") ? static_cast<void>
(0) : __assert_fail ("(VF == 1 || VF == VecTy->getVectorNumElements()) && \"Vector argument does not match VF\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 599, __PRETTY_FUNCTION__))
599 "Vector argument does not match VF")(((VF == 1 || VF == VecTy->getVectorNumElements()) &&
"Vector argument does not match VF") ? static_cast<void>
(0) : __assert_fail ("(VF == 1 || VF == VecTy->getVectorNumElements()) && \"Vector argument does not match VF\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 599, __PRETTY_FUNCTION__))
;
600 }
601 else
602 VecTy = VectorType::get(A->getType(), VF);
603
604 Cost += getScalarizationOverhead(VecTy, false, true);
605 }
606 }
607
608 return Cost;
609 }
610
611 unsigned getScalarizationOverhead(Type *VecTy, ArrayRef<const Value *> Args) {
612 assert(VecTy->isVectorTy())((VecTy->isVectorTy()) ? static_cast<void> (0) : __assert_fail
("VecTy->isVectorTy()", "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 612, __PRETTY_FUNCTION__))
;
613
614 unsigned Cost = 0;
615
616 Cost += getScalarizationOverhead(VecTy, true, false);
617 if (!Args.empty())
618 Cost += getOperandsScalarizationOverhead(Args,
619 VecTy->getVectorNumElements());
620 else
621 // When no information on arguments is provided, we add the cost
622 // associated with one argument as a heuristic.
623 Cost += getScalarizationOverhead(VecTy, false, true);
624
625 return Cost;
626 }
627
628 unsigned getMaxInterleaveFactor(unsigned VF) { return 1; }
629
630 unsigned getArithmeticInstrCost(
631 unsigned Opcode, Type *Ty,
632 TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
633 TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
634 TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
635 TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None,
636 ArrayRef<const Value *> Args = ArrayRef<const Value *>(),
637 const Instruction *CxtI = nullptr) {
638 // Check if any of the operands are vector operands.
639 const TargetLoweringBase *TLI = getTLI();
640 int ISD = TLI->InstructionOpcodeToISD(Opcode);
641 assert(ISD && "Invalid opcode")((ISD && "Invalid opcode") ? static_cast<void> (
0) : __assert_fail ("ISD && \"Invalid opcode\"", "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 641, __PRETTY_FUNCTION__))
;
642
643 std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
644
645 bool IsFloat = Ty->isFPOrFPVectorTy();
646 // Assume that floating point arithmetic operations cost twice as much as
647 // integer operations.
648 unsigned OpCost = (IsFloat ? 2 : 1);
649
650 if (TLI->isOperationLegalOrPromote(ISD, LT.second)) {
651 // The operation is legal. Assume it costs 1.
652 // TODO: Once we have extract/insert subvector cost we need to use them.
653 return LT.first * OpCost;
654 }
655
656 if (!TLI->isOperationExpand(ISD, LT.second)) {
657 // If the operation is custom lowered, then assume that the code is twice
658 // as expensive.
659 return LT.first * 2 * OpCost;
660 }
661
662 // Else, assume that we need to scalarize this op.
663 // TODO: If one of the types get legalized by splitting, handle this
664 // similarly to what getCastInstrCost() does.
665 if (Ty->isVectorTy()) {
666 unsigned Num = Ty->getVectorNumElements();
667 unsigned Cost = static_cast<T *>(this)
668 ->getArithmeticInstrCost(Opcode, Ty->getScalarType());
669 // Return the cost of multiple scalar invocation plus the cost of
670 // inserting and extracting the values.
671 return getScalarizationOverhead(Ty, Args) + Num * Cost;
672 }
673
674 // We don't know anything about this scalar instruction.
675 return OpCost;
676 }
677
678 unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
679 Type *SubTp) {
680 switch (Kind) {
681 case TTI::SK_Broadcast:
682 return getBroadcastShuffleOverhead(Tp);
683 case TTI::SK_Select:
684 case TTI::SK_Reverse:
685 case TTI::SK_Transpose:
686 case TTI::SK_PermuteSingleSrc:
687 case TTI::SK_PermuteTwoSrc:
688 return getPermuteShuffleOverhead(Tp);
689 case TTI::SK_ExtractSubvector:
690 return getExtractSubvectorOverhead(Tp, Index, SubTp);
691 case TTI::SK_InsertSubvector:
692 return getInsertSubvectorOverhead(Tp, Index, SubTp);
693 }
694 llvm_unreachable("Unknown TTI::ShuffleKind")::llvm::llvm_unreachable_internal("Unknown TTI::ShuffleKind",
"/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 694)
;
695 }
696
697 unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
698 const Instruction *I = nullptr) {
699 const TargetLoweringBase *TLI = getTLI();
700 int ISD = TLI->InstructionOpcodeToISD(Opcode);
701 assert(ISD && "Invalid opcode")((ISD && "Invalid opcode") ? static_cast<void> (
0) : __assert_fail ("ISD && \"Invalid opcode\"", "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 701, __PRETTY_FUNCTION__))
;
702 std::pair<unsigned, MVT> SrcLT = TLI->getTypeLegalizationCost(DL, Src);
703 std::pair<unsigned, MVT> DstLT = TLI->getTypeLegalizationCost(DL, Dst);
704
705 // Check for NOOP conversions.
706 if (SrcLT.first == DstLT.first &&
707 SrcLT.second.getSizeInBits() == DstLT.second.getSizeInBits()) {
708
709 // Bitcast between types that are legalized to the same type are free.
710 if (Opcode == Instruction::BitCast || Opcode == Instruction::Trunc)
711 return 0;
712 }
713
714 if (Opcode == Instruction::Trunc &&
715 TLI->isTruncateFree(SrcLT.second, DstLT.second))
716 return 0;
717
718 if (Opcode == Instruction::ZExt &&
719 TLI->isZExtFree(SrcLT.second, DstLT.second))
720 return 0;
721
722 if (Opcode == Instruction::AddrSpaceCast &&
723 TLI->isFreeAddrSpaceCast(Src->getPointerAddressSpace(),
724 Dst->getPointerAddressSpace()))
725 return 0;
726
727 // If this is a zext/sext of a load, return 0 if the corresponding
728 // extending load exists on target.
729 if ((Opcode == Instruction::ZExt || Opcode == Instruction::SExt) &&
730 I && isa<LoadInst>(I->getOperand(0))) {
731 EVT ExtVT = EVT::getEVT(Dst);
732 EVT LoadVT = EVT::getEVT(Src);
733 unsigned LType =
734 ((Opcode == Instruction::ZExt) ? ISD::ZEXTLOAD : ISD::SEXTLOAD);
735 if (TLI->isLoadExtLegal(LType, ExtVT, LoadVT))
736 return 0;
737 }
738
739 // If the cast is marked as legal (or promote) then assume low cost.
740 if (SrcLT.first == DstLT.first &&
741 TLI->isOperationLegalOrPromote(ISD, DstLT.second))
742 return 1;
743
744 // Handle scalar conversions.
745 if (!Src->isVectorTy() && !Dst->isVectorTy()) {
746 // Scalar bitcasts are usually free.
747 if (Opcode == Instruction::BitCast)
748 return 0;
749
750 // Just check the op cost. If the operation is legal then assume it costs
751 // 1.
752 if (!TLI->isOperationExpand(ISD, DstLT.second))
753 return 1;
754
755 // Assume that illegal scalar instruction are expensive.
756 return 4;
757 }
758
759 // Check vector-to-vector casts.
760 if (Dst->isVectorTy() && Src->isVectorTy()) {
761 // If the cast is between same-sized registers, then the check is simple.
762 if (SrcLT.first == DstLT.first &&
763 SrcLT.second.getSizeInBits() == DstLT.second.getSizeInBits()) {
764
765 // Assume that Zext is done using AND.
766 if (Opcode == Instruction::ZExt)
767 return 1;
768
769 // Assume that sext is done using SHL and SRA.
770 if (Opcode == Instruction::SExt)
771 return 2;
772
773 // Just check the op cost. If the operation is legal then assume it
774 // costs
775 // 1 and multiply by the type-legalization overhead.
776 if (!TLI->isOperationExpand(ISD, DstLT.second))
777 return SrcLT.first * 1;
778 }
779
780 // If we are legalizing by splitting, query the concrete TTI for the cost
781 // of casting the original vector twice. We also need to factor in the
782 // cost of the split itself. Count that as 1, to be consistent with
783 // TLI->getTypeLegalizationCost().
784 if ((TLI->getTypeAction(Src->getContext(), TLI->getValueType(DL, Src)) ==
785 TargetLowering::TypeSplitVector ||
786 TLI->getTypeAction(Dst->getContext(), TLI->getValueType(DL, Dst)) ==
787 TargetLowering::TypeSplitVector) &&
788 Src->getVectorNumElements() > 1 && Dst->getVectorNumElements() > 1) {
789 Type *SplitDst = VectorType::get(Dst->getVectorElementType(),
790 Dst->getVectorNumElements() / 2);
791 Type *SplitSrc = VectorType::get(Src->getVectorElementType(),
792 Src->getVectorNumElements() / 2);
793 T *TTI = static_cast<T *>(this);
794 return TTI->getVectorSplitCost() +
795 (2 * TTI->getCastInstrCost(Opcode, SplitDst, SplitSrc, I));
796 }
797
798 // In other cases where the source or destination are illegal, assume
799 // the operation will get scalarized.
800 unsigned Num = Dst->getVectorNumElements();
801 unsigned Cost = static_cast<T *>(this)->getCastInstrCost(
802 Opcode, Dst->getScalarType(), Src->getScalarType(), I);
803
804 // Return the cost of multiple scalar invocation plus the cost of
805 // inserting and extracting the values.
806 return getScalarizationOverhead(Dst, true, true) + Num * Cost;
807 }
808
809 // We already handled vector-to-vector and scalar-to-scalar conversions.
810 // This
811 // is where we handle bitcast between vectors and scalars. We need to assume
812 // that the conversion is scalarized in one way or another.
813 if (Opcode == Instruction::BitCast)
814 // Illegal bitcasts are done by storing and loading from a stack slot.
815 return (Src->isVectorTy() ? getScalarizationOverhead(Src, false, true)
816 : 0) +
817 (Dst->isVectorTy() ? getScalarizationOverhead(Dst, true, false)
818 : 0);
819
820 llvm_unreachable("Unhandled cast")::llvm::llvm_unreachable_internal("Unhandled cast", "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 820)
;
821 }
822
823 unsigned getExtractWithExtendCost(unsigned Opcode, Type *Dst,
824 VectorType *VecTy, unsigned Index) {
825 return static_cast<T *>(this)->getVectorInstrCost(
826 Instruction::ExtractElement, VecTy, Index) +
827 static_cast<T *>(this)->getCastInstrCost(Opcode, Dst,
828 VecTy->getElementType());
829 }
830
831 unsigned getCFInstrCost(unsigned Opcode) {
832 // Branches are assumed to be predicted.
833 return 0;
834 }
835
836 unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
837 const Instruction *I) {
838 const TargetLoweringBase *TLI = getTLI();
839 int ISD = TLI->InstructionOpcodeToISD(Opcode);
840 assert(ISD && "Invalid opcode")((ISD && "Invalid opcode") ? static_cast<void> (
0) : __assert_fail ("ISD && \"Invalid opcode\"", "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 840, __PRETTY_FUNCTION__))
;
7
Assuming 'ISD' is not equal to 0
8
'?' condition is true
841
842 // Selects on vectors are actually vector selects.
843 if (ISD == ISD::SELECT) {
9
Assuming 'ISD' is not equal to SELECT
10
Taking false branch
844 assert(CondTy && "CondTy must exist")((CondTy && "CondTy must exist") ? static_cast<void
> (0) : __assert_fail ("CondTy && \"CondTy must exist\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 844, __PRETTY_FUNCTION__))
;
845 if (CondTy->isVectorTy())
846 ISD = ISD::VSELECT;
847 }
848 std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
849
850 if (!(ValTy->isVectorTy() && !LT.second.isVector()) &&
11
Taking false branch
851 !TLI->isOperationExpand(ISD, LT.second)) {
852 // The operation is legal. Assume it costs 1. Multiply
853 // by the type-legalization overhead.
854 return LT.first * 1;
855 }
856
857 // Otherwise, assume that the cast is scalarized.
858 // TODO: If one of the types get legalized by splitting, handle this
859 // similarly to what getCastInstrCost() does.
860 if (ValTy->isVectorTy()) {
12
Calling 'Type::isVectorTy'
14
Returning from 'Type::isVectorTy'
15
Taking true branch
861 unsigned Num = ValTy->getVectorNumElements();
862 if (CondTy)
16
Assuming 'CondTy' is null
17
Taking false branch
863 CondTy = CondTy->getScalarType();
864 unsigned Cost = static_cast<T *>(this)->getCmpSelInstrCost(
19
Calling 'AArch64TTIImpl::getCmpSelInstrCost'
865 Opcode, ValTy->getScalarType(), CondTy, I);
18
Passing null pointer value via 3rd parameter 'CondTy'
866
867 // Return the cost of multiple scalar invocation plus the cost of
868 // inserting and extracting the values.
869 return getScalarizationOverhead(ValTy, true, false) + Num * Cost;
870 }
871
872 // Unknown scalar opcode.
873 return 1;
874 }
875
876 unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) {
877 std::pair<unsigned, MVT> LT =
878 getTLI()->getTypeLegalizationCost(DL, Val->getScalarType());
879
880 return LT.first;
881 }
882
883 unsigned getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment,
884 unsigned AddressSpace,
885 const Instruction *I = nullptr) {
886 assert(!Src->isVoidTy() && "Invalid type")((!Src->isVoidTy() && "Invalid type") ? static_cast
<void> (0) : __assert_fail ("!Src->isVoidTy() && \"Invalid type\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 886, __PRETTY_FUNCTION__))
;
887 std::pair<unsigned, MVT> LT = getTLI()->getTypeLegalizationCost(DL, Src);
888
889 // Assuming that all loads of legal types cost 1.
890 unsigned Cost = LT.first;
891
892 if (Src->isVectorTy() &&
893 Src->getPrimitiveSizeInBits() < LT.second.getSizeInBits()) {
894 // This is a vector load that legalizes to a larger type than the vector
895 // itself. Unless the corresponding extending load or truncating store is
896 // legal, then this will scalarize.
897 TargetLowering::LegalizeAction LA = TargetLowering::Expand;
898 EVT MemVT = getTLI()->getValueType(DL, Src);
899 if (Opcode == Instruction::Store)
900 LA = getTLI()->getTruncStoreAction(LT.second, MemVT);
901 else
902 LA = getTLI()->getLoadExtAction(ISD::EXTLOAD, LT.second, MemVT);
903
904 if (LA != TargetLowering::Legal && LA != TargetLowering::Custom) {
905 // This is a vector load/store for some illegal type that is scalarized.
906 // We must account for the cost of building or decomposing the vector.
907 Cost += getScalarizationOverhead(Src, Opcode != Instruction::Store,
908 Opcode == Instruction::Store);
909 }
910 }
911
912 return Cost;
913 }
914
915 unsigned getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
916 unsigned Factor,
917 ArrayRef<unsigned> Indices,
918 unsigned Alignment, unsigned AddressSpace,
919 bool UseMaskForCond = false,
920 bool UseMaskForGaps = false) {
921 VectorType *VT = dyn_cast<VectorType>(VecTy);
922 assert(VT && "Expect a vector type for interleaved memory op")((VT && "Expect a vector type for interleaved memory op"
) ? static_cast<void> (0) : __assert_fail ("VT && \"Expect a vector type for interleaved memory op\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 922, __PRETTY_FUNCTION__))
;
923
924 unsigned NumElts = VT->getNumElements();
925 assert(Factor > 1 && NumElts % Factor == 0 && "Invalid interleave factor")((Factor > 1 && NumElts % Factor == 0 && "Invalid interleave factor"
) ? static_cast<void> (0) : __assert_fail ("Factor > 1 && NumElts % Factor == 0 && \"Invalid interleave factor\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 925, __PRETTY_FUNCTION__))
;
926
927 unsigned NumSubElts = NumElts / Factor;
928 VectorType *SubVT = VectorType::get(VT->getElementType(), NumSubElts);
929
930 // Firstly, the cost of load/store operation.
931 unsigned Cost;
932 if (UseMaskForCond || UseMaskForGaps)
933 Cost = static_cast<T *>(this)->getMaskedMemoryOpCost(
934 Opcode, VecTy, Alignment, AddressSpace);
935 else
936 Cost = static_cast<T *>(this)->getMemoryOpCost(
937 Opcode, VecTy, MaybeAlign(Alignment), AddressSpace);
938
939 // Legalize the vector type, and get the legalized and unlegalized type
940 // sizes.
941 MVT VecTyLT = getTLI()->getTypeLegalizationCost(DL, VecTy).second;
942 unsigned VecTySize =
943 static_cast<T *>(this)->getDataLayout().getTypeStoreSize(VecTy);
944 unsigned VecTyLTSize = VecTyLT.getStoreSize();
945
946 // Return the ceiling of dividing A by B.
947 auto ceil = [](unsigned A, unsigned B) { return (A + B - 1) / B; };
948
949 // Scale the cost of the memory operation by the fraction of legalized
950 // instructions that will actually be used. We shouldn't account for the
951 // cost of dead instructions since they will be removed.
952 //
953 // E.g., An interleaved load of factor 8:
954 // %vec = load <16 x i64>, <16 x i64>* %ptr
955 // %v0 = shufflevector %vec, undef, <0, 8>
956 //
957 // If <16 x i64> is legalized to 8 v2i64 loads, only 2 of the loads will be
958 // used (those corresponding to elements [0:1] and [8:9] of the unlegalized
959 // type). The other loads are unused.
960 //
961 // We only scale the cost of loads since interleaved store groups aren't
962 // allowed to have gaps.
963 if (Opcode == Instruction::Load && VecTySize > VecTyLTSize) {
964 // The number of loads of a legal type it will take to represent a load
965 // of the unlegalized vector type.
966 unsigned NumLegalInsts = ceil(VecTySize, VecTyLTSize);
967
968 // The number of elements of the unlegalized type that correspond to a
969 // single legal instruction.
970 unsigned NumEltsPerLegalInst = ceil(NumElts, NumLegalInsts);
971
972 // Determine which legal instructions will be used.
973 BitVector UsedInsts(NumLegalInsts, false);
974 for (unsigned Index : Indices)
975 for (unsigned Elt = 0; Elt < NumSubElts; ++Elt)
976 UsedInsts.set((Index + Elt * Factor) / NumEltsPerLegalInst);
977
978 // Scale the cost of the load by the fraction of legal instructions that
979 // will be used.
980 Cost *= UsedInsts.count() / NumLegalInsts;
981 }
982
983 // Then plus the cost of interleave operation.
984 if (Opcode == Instruction::Load) {
985 // The interleave cost is similar to extract sub vectors' elements
986 // from the wide vector, and insert them into sub vectors.
987 //
988 // E.g. An interleaved load of factor 2 (with one member of index 0):
989 // %vec = load <8 x i32>, <8 x i32>* %ptr
990 // %v0 = shuffle %vec, undef, <0, 2, 4, 6> ; Index 0
991 // The cost is estimated as extract elements at 0, 2, 4, 6 from the
992 // <8 x i32> vector and insert them into a <4 x i32> vector.
993
994 assert(Indices.size() <= Factor &&((Indices.size() <= Factor && "Interleaved memory op has too many members"
) ? static_cast<void> (0) : __assert_fail ("Indices.size() <= Factor && \"Interleaved memory op has too many members\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 995, __PRETTY_FUNCTION__))
995 "Interleaved memory op has too many members")((Indices.size() <= Factor && "Interleaved memory op has too many members"
) ? static_cast<void> (0) : __assert_fail ("Indices.size() <= Factor && \"Interleaved memory op has too many members\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 995, __PRETTY_FUNCTION__))
;
996
997 for (unsigned Index : Indices) {
998 assert(Index < Factor && "Invalid index for interleaved memory op")((Index < Factor && "Invalid index for interleaved memory op"
) ? static_cast<void> (0) : __assert_fail ("Index < Factor && \"Invalid index for interleaved memory op\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 998, __PRETTY_FUNCTION__))
;
999
1000 // Extract elements from loaded vector for each sub vector.
1001 for (unsigned i = 0; i < NumSubElts; i++)
1002 Cost += static_cast<T *>(this)->getVectorInstrCost(
1003 Instruction::ExtractElement, VT, Index + i * Factor);
1004 }
1005
1006 unsigned InsSubCost = 0;
1007 for (unsigned i = 0; i < NumSubElts; i++)
1008 InsSubCost += static_cast<T *>(this)->getVectorInstrCost(
1009 Instruction::InsertElement, SubVT, i);
1010
1011 Cost += Indices.size() * InsSubCost;
1012 } else {
1013 // The interleave cost is extract all elements from sub vectors, and
1014 // insert them into the wide vector.
1015 //
1016 // E.g. An interleaved store of factor 2:
1017 // %v0_v1 = shuffle %v0, %v1, <0, 4, 1, 5, 2, 6, 3, 7>
1018 // store <8 x i32> %interleaved.vec, <8 x i32>* %ptr
1019 // The cost is estimated as extract all elements from both <4 x i32>
1020 // vectors and insert into the <8 x i32> vector.
1021
1022 unsigned ExtSubCost = 0;
1023 for (unsigned i = 0; i < NumSubElts; i++)
1024 ExtSubCost += static_cast<T *>(this)->getVectorInstrCost(
1025 Instruction::ExtractElement, SubVT, i);
1026 Cost += ExtSubCost * Factor;
1027
1028 for (unsigned i = 0; i < NumElts; i++)
1029 Cost += static_cast<T *>(this)
1030 ->getVectorInstrCost(Instruction::InsertElement, VT, i);
1031 }
1032
1033 if (!UseMaskForCond)
1034 return Cost;
1035
1036 Type *I8Type = Type::getInt8Ty(VT->getContext());
1037 VectorType *MaskVT = VectorType::get(I8Type, NumElts);
1038 SubVT = VectorType::get(I8Type, NumSubElts);
1039
1040 // The Mask shuffling cost is extract all the elements of the Mask
1041 // and insert each of them Factor times into the wide vector:
1042 //
1043 // E.g. an interleaved group with factor 3:
1044 // %mask = icmp ult <8 x i32> %vec1, %vec2
1045 // %interleaved.mask = shufflevector <8 x i1> %mask, <8 x i1> undef,
1046 // <24 x i32> <0,0,0,1,1,1,2,2,2,3,3,3,4,4,4,5,5,5,6,6,6,7,7,7>
1047 // The cost is estimated as extract all mask elements from the <8xi1> mask
1048 // vector and insert them factor times into the <24xi1> shuffled mask
1049 // vector.
1050 for (unsigned i = 0; i < NumSubElts; i++)
1051 Cost += static_cast<T *>(this)->getVectorInstrCost(
1052 Instruction::ExtractElement, SubVT, i);
1053
1054 for (unsigned i = 0; i < NumElts; i++)
1055 Cost += static_cast<T *>(this)->getVectorInstrCost(
1056 Instruction::InsertElement, MaskVT, i);
1057
1058 // The Gaps mask is invariant and created outside the loop, therefore the
1059 // cost of creating it is not accounted for here. However if we have both
1060 // a MaskForGaps and some other mask that guards the execution of the
1061 // memory access, we need to account for the cost of And-ing the two masks
1062 // inside the loop.
1063 if (UseMaskForGaps)
1064 Cost += static_cast<T *>(this)->getArithmeticInstrCost(
1065 BinaryOperator::And, MaskVT);
1066
1067 return Cost;
1068 }
1069
1070 /// Get intrinsic cost based on arguments.
1071 unsigned getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy,
1072 ArrayRef<Value *> Args, FastMathFlags FMF,
1073 unsigned VF = 1) {
1074 unsigned RetVF = (RetTy->isVectorTy() ? RetTy->getVectorNumElements() : 1);
1075 assert((RetVF == 1 || VF == 1) && "VF > 1 and RetVF is a vector type")(((RetVF == 1 || VF == 1) && "VF > 1 and RetVF is a vector type"
) ? static_cast<void> (0) : __assert_fail ("(RetVF == 1 || VF == 1) && \"VF > 1 and RetVF is a vector type\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 1075, __PRETTY_FUNCTION__))
;
1076 auto *ConcreteTTI = static_cast<T *>(this);
1077
1078 switch (IID) {
1079 default: {
1080 // Assume that we need to scalarize this intrinsic.
1081 SmallVector<Type *, 4> Types;
1082 for (Value *Op : Args) {
1083 Type *OpTy = Op->getType();
1084 assert(VF == 1 || !OpTy->isVectorTy())((VF == 1 || !OpTy->isVectorTy()) ? static_cast<void>
(0) : __assert_fail ("VF == 1 || !OpTy->isVectorTy()", "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 1084, __PRETTY_FUNCTION__))
;
1085 Types.push_back(VF == 1 ? OpTy : VectorType::get(OpTy, VF));
1086 }
1087
1088 if (VF > 1 && !RetTy->isVoidTy())
1089 RetTy = VectorType::get(RetTy, VF);
1090
1091 // Compute the scalarization overhead based on Args for a vector
1092 // intrinsic. A vectorizer will pass a scalar RetTy and VF > 1, while
1093 // CostModel will pass a vector RetTy and VF is 1.
1094 unsigned ScalarizationCost = std::numeric_limits<unsigned>::max();
1095 if (RetVF > 1 || VF > 1) {
1096 ScalarizationCost = 0;
1097 if (!RetTy->isVoidTy())
1098 ScalarizationCost += getScalarizationOverhead(RetTy, true, false);
1099 ScalarizationCost += getOperandsScalarizationOverhead(Args, VF);
1100 }
1101
1102 return ConcreteTTI->getIntrinsicInstrCost(IID, RetTy, Types, FMF,
1103 ScalarizationCost);
1104 }
1105 case Intrinsic::masked_scatter: {
1106 assert(VF == 1 && "Can't vectorize types here.")((VF == 1 && "Can't vectorize types here.") ? static_cast
<void> (0) : __assert_fail ("VF == 1 && \"Can't vectorize types here.\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 1106, __PRETTY_FUNCTION__))
;
1107 Value *Mask = Args[3];
1108 bool VarMask = !isa<Constant>(Mask);
1109 unsigned Alignment = cast<ConstantInt>(Args[2])->getZExtValue();
1110 return ConcreteTTI->getGatherScatterOpCost(
1111 Instruction::Store, Args[0]->getType(), Args[1], VarMask, Alignment);
1112 }
1113 case Intrinsic::masked_gather: {
1114 assert(VF == 1 && "Can't vectorize types here.")((VF == 1 && "Can't vectorize types here.") ? static_cast
<void> (0) : __assert_fail ("VF == 1 && \"Can't vectorize types here.\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 1114, __PRETTY_FUNCTION__))
;
1115 Value *Mask = Args[2];
1116 bool VarMask = !isa<Constant>(Mask);
1117 unsigned Alignment = cast<ConstantInt>(Args[1])->getZExtValue();
1118 return ConcreteTTI->getGatherScatterOpCost(Instruction::Load, RetTy,
1119 Args[0], VarMask, Alignment);
1120 }
1121 case Intrinsic::experimental_vector_reduce_add:
1122 case Intrinsic::experimental_vector_reduce_mul:
1123 case Intrinsic::experimental_vector_reduce_and:
1124 case Intrinsic::experimental_vector_reduce_or:
1125 case Intrinsic::experimental_vector_reduce_xor:
1126 case Intrinsic::experimental_vector_reduce_v2_fadd:
1127 case Intrinsic::experimental_vector_reduce_v2_fmul:
1128 case Intrinsic::experimental_vector_reduce_smax:
1129 case Intrinsic::experimental_vector_reduce_smin:
1130 case Intrinsic::experimental_vector_reduce_fmax:
1131 case Intrinsic::experimental_vector_reduce_fmin:
1132 case Intrinsic::experimental_vector_reduce_umax:
1133 case Intrinsic::experimental_vector_reduce_umin:
1134 return getIntrinsicInstrCost(IID, RetTy, Args[0]->getType(), FMF);
1135 case Intrinsic::fshl:
1136 case Intrinsic::fshr: {
1137 Value *X = Args[0];
1138 Value *Y = Args[1];
1139 Value *Z = Args[2];
1140 TTI::OperandValueProperties OpPropsX, OpPropsY, OpPropsZ, OpPropsBW;
1141 TTI::OperandValueKind OpKindX = TTI::getOperandInfo(X, OpPropsX);
1142 TTI::OperandValueKind OpKindY = TTI::getOperandInfo(Y, OpPropsY);
1143 TTI::OperandValueKind OpKindZ = TTI::getOperandInfo(Z, OpPropsZ);
1144 TTI::OperandValueKind OpKindBW = TTI::OK_UniformConstantValue;
1145 OpPropsBW = isPowerOf2_32(RetTy->getScalarSizeInBits()) ? TTI::OP_PowerOf2
1146 : TTI::OP_None;
1147 // fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
1148 // fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
1149 unsigned Cost = 0;
1150 Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::Or, RetTy);
1151 Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::Sub, RetTy);
1152 Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::Shl, RetTy,
1153 OpKindX, OpKindZ, OpPropsX);
1154 Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::LShr, RetTy,
1155 OpKindY, OpKindZ, OpPropsY);
1156 // Non-constant shift amounts requires a modulo.
1157 if (OpKindZ != TTI::OK_UniformConstantValue &&
1158 OpKindZ != TTI::OK_NonUniformConstantValue)
1159 Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::URem, RetTy,
1160 OpKindZ, OpKindBW, OpPropsZ,
1161 OpPropsBW);
1162 // For non-rotates (X != Y) we must add shift-by-zero handling costs.
1163 if (X != Y) {
1164 Type *CondTy = RetTy->getWithNewBitWidth(1);
1165 Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy,
1166 CondTy, nullptr);
1167 Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::Select, RetTy,
1168 CondTy, nullptr);
1169 }
1170 return Cost;
1171 }
1172 }
1173 }
1174
1175 /// Get intrinsic cost based on argument types.
1176 /// If ScalarizationCostPassed is std::numeric_limits<unsigned>::max(), the
1177 /// cost of scalarizing the arguments and the return value will be computed
1178 /// based on types.
1179 unsigned getIntrinsicInstrCost(
1180 Intrinsic::ID IID, Type *RetTy, ArrayRef<Type *> Tys, FastMathFlags FMF,
1181 unsigned ScalarizationCostPassed = std::numeric_limits<unsigned>::max()) {
1182 auto *ConcreteTTI = static_cast<T *>(this);
1183
1184 SmallVector<unsigned, 2> ISDs;
1185 unsigned SingleCallCost = 10; // Library call cost. Make it expensive.
1186 switch (IID) {
1187 default: {
1188 // Assume that we need to scalarize this intrinsic.
1189 unsigned ScalarizationCost = ScalarizationCostPassed;
1190 unsigned ScalarCalls = 1;
1191 Type *ScalarRetTy = RetTy;
1192 if (RetTy->isVectorTy()) {
1193 if (ScalarizationCostPassed == std::numeric_limits<unsigned>::max())
1194 ScalarizationCost = getScalarizationOverhead(RetTy, true, false);
1195 ScalarCalls = std::max(ScalarCalls, RetTy->getVectorNumElements());
1196 ScalarRetTy = RetTy->getScalarType();
1197 }
1198 SmallVector<Type *, 4> ScalarTys;
1199 for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) {
1200 Type *Ty = Tys[i];
1201 if (Ty->isVectorTy()) {
1202 if (ScalarizationCostPassed == std::numeric_limits<unsigned>::max())
1203 ScalarizationCost += getScalarizationOverhead(Ty, false, true);
1204 ScalarCalls = std::max(ScalarCalls, Ty->getVectorNumElements());
1205 Ty = Ty->getScalarType();
1206 }
1207 ScalarTys.push_back(Ty);
1208 }
1209 if (ScalarCalls == 1)
1210 return 1; // Return cost of a scalar intrinsic. Assume it to be cheap.
1211
1212 unsigned ScalarCost =
1213 ConcreteTTI->getIntrinsicInstrCost(IID, ScalarRetTy, ScalarTys, FMF);
1214
1215 return ScalarCalls * ScalarCost + ScalarizationCost;
1216 }
1217 // Look for intrinsics that can be lowered directly or turned into a scalar
1218 // intrinsic call.
1219 case Intrinsic::sqrt:
1220 ISDs.push_back(ISD::FSQRT);
1221 break;
1222 case Intrinsic::sin:
1223 ISDs.push_back(ISD::FSIN);
1224 break;
1225 case Intrinsic::cos:
1226 ISDs.push_back(ISD::FCOS);
1227 break;
1228 case Intrinsic::exp:
1229 ISDs.push_back(ISD::FEXP);
1230 break;
1231 case Intrinsic::exp2:
1232 ISDs.push_back(ISD::FEXP2);
1233 break;
1234 case Intrinsic::log:
1235 ISDs.push_back(ISD::FLOG);
1236 break;
1237 case Intrinsic::log10:
1238 ISDs.push_back(ISD::FLOG10);
1239 break;
1240 case Intrinsic::log2:
1241 ISDs.push_back(ISD::FLOG2);
1242 break;
1243 case Intrinsic::fabs:
1244 ISDs.push_back(ISD::FABS);
1245 break;
1246 case Intrinsic::canonicalize:
1247 ISDs.push_back(ISD::FCANONICALIZE);
1248 break;
1249 case Intrinsic::minnum:
1250 ISDs.push_back(ISD::FMINNUM);
1251 if (FMF.noNaNs())
1252 ISDs.push_back(ISD::FMINIMUM);
1253 break;
1254 case Intrinsic::maxnum:
1255 ISDs.push_back(ISD::FMAXNUM);
1256 if (FMF.noNaNs())
1257 ISDs.push_back(ISD::FMAXIMUM);
1258 break;
1259 case Intrinsic::copysign:
1260 ISDs.push_back(ISD::FCOPYSIGN);
1261 break;
1262 case Intrinsic::floor:
1263 ISDs.push_back(ISD::FFLOOR);
1264 break;
1265 case Intrinsic::ceil:
1266 ISDs.push_back(ISD::FCEIL);
1267 break;
1268 case Intrinsic::trunc:
1269 ISDs.push_back(ISD::FTRUNC);
1270 break;
1271 case Intrinsic::nearbyint:
1272 ISDs.push_back(ISD::FNEARBYINT);
1273 break;
1274 case Intrinsic::rint:
1275 ISDs.push_back(ISD::FRINT);
1276 break;
1277 case Intrinsic::round:
1278 ISDs.push_back(ISD::FROUND);
1279 break;
1280 case Intrinsic::pow:
1281 ISDs.push_back(ISD::FPOW);
1282 break;
1283 case Intrinsic::fma:
1284 ISDs.push_back(ISD::FMA);
1285 break;
1286 case Intrinsic::fmuladd:
1287 ISDs.push_back(ISD::FMA);
1288 break;
1289 // FIXME: We should return 0 whenever getIntrinsicCost == TCC_Free.
1290 case Intrinsic::lifetime_start:
1291 case Intrinsic::lifetime_end:
1292 case Intrinsic::sideeffect:
1293 return 0;
1294 case Intrinsic::masked_store:
1295 return ConcreteTTI->getMaskedMemoryOpCost(Instruction::Store, Tys[0], 0,
1296 0);
1297 case Intrinsic::masked_load:
1298 return ConcreteTTI->getMaskedMemoryOpCost(Instruction::Load, RetTy, 0, 0);
1299 case Intrinsic::experimental_vector_reduce_add:
1300 return ConcreteTTI->getArithmeticReductionCost(Instruction::Add, Tys[0],
1301 /*IsPairwiseForm=*/false);
1302 case Intrinsic::experimental_vector_reduce_mul:
1303 return ConcreteTTI->getArithmeticReductionCost(Instruction::Mul, Tys[0],
1304 /*IsPairwiseForm=*/false);
1305 case Intrinsic::experimental_vector_reduce_and:
1306 return ConcreteTTI->getArithmeticReductionCost(Instruction::And, Tys[0],
1307 /*IsPairwiseForm=*/false);
1308 case Intrinsic::experimental_vector_reduce_or:
1309 return ConcreteTTI->getArithmeticReductionCost(Instruction::Or, Tys[0],
1310 /*IsPairwiseForm=*/false);
1311 case Intrinsic::experimental_vector_reduce_xor:
1312 return ConcreteTTI->getArithmeticReductionCost(Instruction::Xor, Tys[0],
1313 /*IsPairwiseForm=*/false);
1314 case Intrinsic::experimental_vector_reduce_v2_fadd:
1315 return ConcreteTTI->getArithmeticReductionCost(
1316 Instruction::FAdd, Tys[0],
1317 /*IsPairwiseForm=*/false); // FIXME: Add new flag for cost of strict
1318 // reductions.
1319 case Intrinsic::experimental_vector_reduce_v2_fmul:
1320 return ConcreteTTI->getArithmeticReductionCost(
1321 Instruction::FMul, Tys[0],
1322 /*IsPairwiseForm=*/false); // FIXME: Add new flag for cost of strict
1323 // reductions.
1324 case Intrinsic::experimental_vector_reduce_smax:
1325 case Intrinsic::experimental_vector_reduce_smin:
1326 case Intrinsic::experimental_vector_reduce_fmax:
1327 case Intrinsic::experimental_vector_reduce_fmin:
1328 return ConcreteTTI->getMinMaxReductionCost(
1329 Tys[0], CmpInst::makeCmpResultType(Tys[0]), /*IsPairwiseForm=*/false,
1330 /*IsUnsigned=*/true);
1331 case Intrinsic::experimental_vector_reduce_umax:
1332 case Intrinsic::experimental_vector_reduce_umin:
1333 return ConcreteTTI->getMinMaxReductionCost(
1334 Tys[0], CmpInst::makeCmpResultType(Tys[0]), /*IsPairwiseForm=*/false,
1335 /*IsUnsigned=*/false);
1336 case Intrinsic::sadd_sat:
1337 case Intrinsic::ssub_sat: {
1338 Type *CondTy = RetTy->getWithNewBitWidth(1);
1339
1340 Type *OpTy = StructType::create({RetTy, CondTy});
1341 Intrinsic::ID OverflowOp = IID == Intrinsic::sadd_sat
1342 ? Intrinsic::sadd_with_overflow
1343 : Intrinsic::ssub_with_overflow;
1344
1345 // SatMax -> Overflow && SumDiff < 0
1346 // SatMin -> Overflow && SumDiff >= 0
1347 unsigned Cost = 0;
1348 Cost += ConcreteTTI->getIntrinsicInstrCost(
1349 OverflowOp, OpTy, {RetTy, RetTy}, FMF, ScalarizationCostPassed);
1350 Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy,
1351 CondTy, nullptr);
1352 Cost += 2 * ConcreteTTI->getCmpSelInstrCost(BinaryOperator::Select, RetTy,
1353 CondTy, nullptr);
1354 return Cost;
1355 }
1356 case Intrinsic::uadd_sat:
1357 case Intrinsic::usub_sat: {
1358 Type *CondTy = RetTy->getWithNewBitWidth(1);
1359
1360 Type *OpTy = StructType::create({RetTy, CondTy});
1361 Intrinsic::ID OverflowOp = IID == Intrinsic::uadd_sat
1362 ? Intrinsic::uadd_with_overflow
1363 : Intrinsic::usub_with_overflow;
1364
1365 unsigned Cost = 0;
1366 Cost += ConcreteTTI->getIntrinsicInstrCost(
1367 OverflowOp, OpTy, {RetTy, RetTy}, FMF, ScalarizationCostPassed);
1368 Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::Select, RetTy,
1369 CondTy, nullptr);
1370 return Cost;
1371 }
1372 case Intrinsic::smul_fix:
1373 case Intrinsic::umul_fix: {
1374 unsigned ExtSize = RetTy->getScalarSizeInBits() * 2;
1375 Type *ExtTy = RetTy->getWithNewBitWidth(ExtSize);
1376
1377 unsigned ExtOp =
1378 IID == Intrinsic::smul_fix ? Instruction::SExt : Instruction::ZExt;
1379
1380 unsigned Cost = 0;
1381 Cost += 2 * ConcreteTTI->getCastInstrCost(ExtOp, ExtTy, RetTy);
1382 Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::Mul, ExtTy);
1383 Cost +=
1384 2 * ConcreteTTI->getCastInstrCost(Instruction::Trunc, RetTy, ExtTy);
1385 Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::LShr, RetTy,
1386 TTI::OK_AnyValue,
1387 TTI::OK_UniformConstantValue);
1388 Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::Shl, RetTy,
1389 TTI::OK_AnyValue,
1390 TTI::OK_UniformConstantValue);
1391 Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::Or, RetTy);
1392 return Cost;
1393 }
1394 case Intrinsic::sadd_with_overflow:
1395 case Intrinsic::ssub_with_overflow: {
1396 Type *SumTy = RetTy->getContainedType(0);
1397 Type *OverflowTy = RetTy->getContainedType(1);
1398 unsigned Opcode = IID == Intrinsic::sadd_with_overflow
1399 ? BinaryOperator::Add
1400 : BinaryOperator::Sub;
1401
1402 // LHSSign -> LHS >= 0
1403 // RHSSign -> RHS >= 0
1404 // SumSign -> Sum >= 0
1405 //
1406 // Add:
1407 // Overflow -> (LHSSign == RHSSign) && (LHSSign != SumSign)
1408 // Sub:
1409 // Overflow -> (LHSSign != RHSSign) && (LHSSign != SumSign)
1410 unsigned Cost = 0;
1411 Cost += ConcreteTTI->getArithmeticInstrCost(Opcode, SumTy);
1412 Cost += 3 * ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, SumTy,
1413 OverflowTy, nullptr);
1414 Cost += 2 * ConcreteTTI->getCmpSelInstrCost(
1415 BinaryOperator::ICmp, OverflowTy, OverflowTy, nullptr);
1416 Cost +=
1417 ConcreteTTI->getArithmeticInstrCost(BinaryOperator::And, OverflowTy);
1418 return Cost;
1419 }
1420 case Intrinsic::uadd_with_overflow:
1421 case Intrinsic::usub_with_overflow: {
1422 Type *SumTy = RetTy->getContainedType(0);
1423 Type *OverflowTy = RetTy->getContainedType(1);
1424 unsigned Opcode = IID == Intrinsic::uadd_with_overflow
1425 ? BinaryOperator::Add
1426 : BinaryOperator::Sub;
1427
1428 unsigned Cost = 0;
1429 Cost += ConcreteTTI->getArithmeticInstrCost(Opcode, SumTy);
1430 Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, SumTy,
1431 OverflowTy, nullptr);
1432 return Cost;
1433 }
1434 case Intrinsic::smul_with_overflow:
1435 case Intrinsic::umul_with_overflow: {
1436 Type *MulTy = RetTy->getContainedType(0);
1437 Type *OverflowTy = RetTy->getContainedType(1);
1438 unsigned ExtSize = MulTy->getScalarSizeInBits() * 2;
1439 Type *ExtTy = MulTy->getWithNewBitWidth(ExtSize);
1440
1441 unsigned ExtOp =
1442 IID == Intrinsic::smul_fix ? Instruction::SExt : Instruction::ZExt;
1443
1444 unsigned Cost = 0;
1445 Cost += 2 * ConcreteTTI->getCastInstrCost(ExtOp, ExtTy, MulTy);
1446 Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::Mul, ExtTy);
1447 Cost +=
1448 2 * ConcreteTTI->getCastInstrCost(Instruction::Trunc, MulTy, ExtTy);
1449 Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::LShr, MulTy,
1450 TTI::OK_AnyValue,
1451 TTI::OK_UniformConstantValue);
1452
1453 if (IID == Intrinsic::smul_with_overflow)
1454 Cost += ConcreteTTI->getArithmeticInstrCost(
1455 Instruction::AShr, MulTy, TTI::OK_AnyValue,
1456 TTI::OK_UniformConstantValue);
1457
1458 Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, MulTy,
1459 OverflowTy, nullptr);
1460 return Cost;
1461 }
1462 case Intrinsic::ctpop:
1463 ISDs.push_back(ISD::CTPOP);
1464 // In case of legalization use TCC_Expensive. This is cheaper than a
1465 // library call but still not a cheap instruction.
1466 SingleCallCost = TargetTransformInfo::TCC_Expensive;
1467 break;
1468 // FIXME: ctlz, cttz, ...
1469 }
1470
1471 const TargetLoweringBase *TLI = getTLI();
1472 std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, RetTy);
1473
1474 SmallVector<unsigned, 2> LegalCost;
1475 SmallVector<unsigned, 2> CustomCost;
1476 for (unsigned ISD : ISDs) {
1477 if (TLI->isOperationLegalOrPromote(ISD, LT.second)) {
1478 if (IID == Intrinsic::fabs && LT.second.isFloatingPoint() &&
1479 TLI->isFAbsFree(LT.second)) {
1480 return 0;
1481 }
1482
1483 // The operation is legal. Assume it costs 1.
1484 // If the type is split to multiple registers, assume that there is some
1485 // overhead to this.
1486 // TODO: Once we have extract/insert subvector cost we need to use them.
1487 if (LT.first > 1)
1488 LegalCost.push_back(LT.first * 2);
1489 else
1490 LegalCost.push_back(LT.first * 1);
1491 } else if (!TLI->isOperationExpand(ISD, LT.second)) {
1492 // If the operation is custom lowered then assume
1493 // that the code is twice as expensive.
1494 CustomCost.push_back(LT.first * 2);
1495 }
1496 }
1497
1498 auto MinLegalCostI = std::min_element(LegalCost.begin(), LegalCost.end());
1499 if (MinLegalCostI != LegalCost.end())
1500 return *MinLegalCostI;
1501
1502 auto MinCustomCostI =
1503 std::min_element(CustomCost.begin(), CustomCost.end());
1504 if (MinCustomCostI != CustomCost.end())
1505 return *MinCustomCostI;
1506
1507 // If we can't lower fmuladd into an FMA estimate the cost as a floating
1508 // point mul followed by an add.
1509 if (IID == Intrinsic::fmuladd)
1510 return ConcreteTTI->getArithmeticInstrCost(BinaryOperator::FMul, RetTy) +
1511 ConcreteTTI->getArithmeticInstrCost(BinaryOperator::FAdd, RetTy);
1512
1513 // Else, assume that we need to scalarize this intrinsic. For math builtins
1514 // this will emit a costly libcall, adding call overhead and spills. Make it
1515 // very expensive.
1516 if (RetTy->isVectorTy()) {
1517 unsigned ScalarizationCost =
1518 ((ScalarizationCostPassed != std::numeric_limits<unsigned>::max())
1519 ? ScalarizationCostPassed
1520 : getScalarizationOverhead(RetTy, true, false));
1521 unsigned ScalarCalls = RetTy->getVectorNumElements();
1522 SmallVector<Type *, 4> ScalarTys;
1523 for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) {
1524 Type *Ty = Tys[i];
1525 if (Ty->isVectorTy())
1526 Ty = Ty->getScalarType();
1527 ScalarTys.push_back(Ty);
1528 }
1529 unsigned ScalarCost = ConcreteTTI->getIntrinsicInstrCost(
1530 IID, RetTy->getScalarType(), ScalarTys, FMF);
1531 for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) {
1532 if (Tys[i]->isVectorTy()) {
1533 if (ScalarizationCostPassed == std::numeric_limits<unsigned>::max())
1534 ScalarizationCost += getScalarizationOverhead(Tys[i], false, true);
1535 ScalarCalls = std::max(ScalarCalls, Tys[i]->getVectorNumElements());
1536 }
1537 }
1538
1539 return ScalarCalls * ScalarCost + ScalarizationCost;
1540 }
1541
1542 // This is going to be turned into a library call, make it expensive.
1543 return SingleCallCost;
1544 }
1545
1546 /// Compute a cost of the given call instruction.
1547 ///
1548 /// Compute the cost of calling function F with return type RetTy and
1549 /// argument types Tys. F might be nullptr, in this case the cost of an
1550 /// arbitrary call with the specified signature will be returned.
1551 /// This is used, for instance, when we estimate call of a vector
1552 /// counterpart of the given function.
1553 /// \param F Called function, might be nullptr.
1554 /// \param RetTy Return value types.
1555 /// \param Tys Argument types.
1556 /// \returns The cost of Call instruction.
1557 unsigned getCallInstrCost(Function *F, Type *RetTy, ArrayRef<Type *> Tys) {
1558 return 10;
1559 }
1560
1561 unsigned getNumberOfParts(Type *Tp) {
1562 std::pair<unsigned, MVT> LT = getTLI()->getTypeLegalizationCost(DL, Tp);
1563 return LT.first;
1564 }
1565
1566 unsigned getAddressComputationCost(Type *Ty, ScalarEvolution *,
1567 const SCEV *) {
1568 return 0;
1569 }
1570
1571 /// Try to calculate arithmetic and shuffle op costs for reduction operations.
1572 /// We're assuming that reduction operation are performing the following way:
1573 /// 1. Non-pairwise reduction
1574 /// %val1 = shufflevector<n x t> %val, <n x t> %undef,
1575 /// <n x i32> <i32 n/2, i32 n/2 + 1, ..., i32 n, i32 undef, ..., i32 undef>
1576 /// \----------------v-------------/ \----------v------------/
1577 /// n/2 elements n/2 elements
1578 /// %red1 = op <n x t> %val, <n x t> val1
1579 /// After this operation we have a vector %red1 where only the first n/2
1580 /// elements are meaningful, the second n/2 elements are undefined and can be
1581 /// dropped. All other operations are actually working with the vector of
1582 /// length n/2, not n, though the real vector length is still n.
1583 /// %val2 = shufflevector<n x t> %red1, <n x t> %undef,
1584 /// <n x i32> <i32 n/4, i32 n/4 + 1, ..., i32 n/2, i32 undef, ..., i32 undef>
1585 /// \----------------v-------------/ \----------v------------/
1586 /// n/4 elements 3*n/4 elements
1587 /// %red2 = op <n x t> %red1, <n x t> val2 - working with the vector of
1588 /// length n/2, the resulting vector has length n/4 etc.
1589 /// 2. Pairwise reduction:
1590 /// Everything is the same except for an additional shuffle operation which
1591 /// is used to produce operands for pairwise kind of reductions.
1592 /// %val1 = shufflevector<n x t> %val, <n x t> %undef,
1593 /// <n x i32> <i32 0, i32 2, ..., i32 n-2, i32 undef, ..., i32 undef>
1594 /// \-------------v----------/ \----------v------------/
1595 /// n/2 elements n/2 elements
1596 /// %val2 = shufflevector<n x t> %val, <n x t> %undef,
1597 /// <n x i32> <i32 1, i32 3, ..., i32 n-1, i32 undef, ..., i32 undef>
1598 /// \-------------v----------/ \----------v------------/
1599 /// n/2 elements n/2 elements
1600 /// %red1 = op <n x t> %val1, <n x t> val2
1601 /// Again, the operation is performed on <n x t> vector, but the resulting
1602 /// vector %red1 is <n/2 x t> vector.
1603 ///
1604 /// The cost model should take into account that the actual length of the
1605 /// vector is reduced on each iteration.
1606 unsigned getArithmeticReductionCost(unsigned Opcode, Type *Ty,
1607 bool IsPairwise) {
1608 assert(Ty->isVectorTy() && "Expect a vector type")((Ty->isVectorTy() && "Expect a vector type") ? static_cast
<void> (0) : __assert_fail ("Ty->isVectorTy() && \"Expect a vector type\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 1608, __PRETTY_FUNCTION__))
;
1609 Type *ScalarTy = Ty->getVectorElementType();
1610 unsigned NumVecElts = Ty->getVectorNumElements();
1611 unsigned NumReduxLevels = Log2_32(NumVecElts);
1612 unsigned ArithCost = 0;
1613 unsigned ShuffleCost = 0;
1614 auto *ConcreteTTI = static_cast<T *>(this);
1615 std::pair<unsigned, MVT> LT =
1616 ConcreteTTI->getTLI()->getTypeLegalizationCost(DL, Ty);
1617 unsigned LongVectorCount = 0;
1618 unsigned MVTLen =
1619 LT.second.isVector() ? LT.second.getVectorNumElements() : 1;
1620 while (NumVecElts > MVTLen) {
1621 NumVecElts /= 2;
1622 Type *SubTy = VectorType::get(ScalarTy, NumVecElts);
1623 // Assume the pairwise shuffles add a cost.
1624 ShuffleCost += (IsPairwise + 1) *
1625 ConcreteTTI->getShuffleCost(TTI::SK_ExtractSubvector, Ty,
1626 NumVecElts, SubTy);
1627 ArithCost += ConcreteTTI->getArithmeticInstrCost(Opcode, SubTy);
1628 Ty = SubTy;
1629 ++LongVectorCount;
1630 }
1631
1632 NumReduxLevels -= LongVectorCount;
1633
1634 // The minimal length of the vector is limited by the real length of vector
1635 // operations performed on the current platform. That's why several final
1636 // reduction operations are performed on the vectors with the same
1637 // architecture-dependent length.
1638
1639 // Non pairwise reductions need one shuffle per reduction level. Pairwise
1640 // reductions need two shuffles on every level, but the last one. On that
1641 // level one of the shuffles is <0, u, u, ...> which is identity.
1642 unsigned NumShuffles = NumReduxLevels;
1643 if (IsPairwise && NumReduxLevels >= 1)
1644 NumShuffles += NumReduxLevels - 1;
1645 ShuffleCost += NumShuffles *
1646 ConcreteTTI->getShuffleCost(TTI::SK_PermuteSingleSrc, Ty,
1647 0, Ty);
1648 ArithCost += NumReduxLevels *
1649 ConcreteTTI->getArithmeticInstrCost(Opcode, Ty);
1650 return ShuffleCost + ArithCost +
1651 ConcreteTTI->getVectorInstrCost(Instruction::ExtractElement, Ty, 0);
1652 }
1653
1654 /// Try to calculate op costs for min/max reduction operations.
1655 /// \param CondTy Conditional type for the Select instruction.
1656 unsigned getMinMaxReductionCost(Type *Ty, Type *CondTy, bool IsPairwise,
1657 bool) {
1658 assert(Ty->isVectorTy() && "Expect a vector type")((Ty->isVectorTy() && "Expect a vector type") ? static_cast
<void> (0) : __assert_fail ("Ty->isVectorTy() && \"Expect a vector type\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 1658, __PRETTY_FUNCTION__))
;
1659 Type *ScalarTy = Ty->getVectorElementType();
1660 Type *ScalarCondTy = CondTy->getVectorElementType();
1661 unsigned NumVecElts = Ty->getVectorNumElements();
1662 unsigned NumReduxLevels = Log2_32(NumVecElts);
1663 unsigned CmpOpcode;
1664 if (Ty->isFPOrFPVectorTy()) {
1665 CmpOpcode = Instruction::FCmp;
1666 } else {
1667 assert(Ty->isIntOrIntVectorTy() &&((Ty->isIntOrIntVectorTy() && "expecting floating point or integer type for min/max reduction"
) ? static_cast<void> (0) : __assert_fail ("Ty->isIntOrIntVectorTy() && \"expecting floating point or integer type for min/max reduction\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 1668, __PRETTY_FUNCTION__))
1668 "expecting floating point or integer type for min/max reduction")((Ty->isIntOrIntVectorTy() && "expecting floating point or integer type for min/max reduction"
) ? static_cast<void> (0) : __assert_fail ("Ty->isIntOrIntVectorTy() && \"expecting floating point or integer type for min/max reduction\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 1668, __PRETTY_FUNCTION__))
;
1669 CmpOpcode = Instruction::ICmp;
1670 }
1671 unsigned MinMaxCost = 0;
1672 unsigned ShuffleCost = 0;
1673 auto *ConcreteTTI = static_cast<T *>(this);
1674 std::pair<unsigned, MVT> LT =
1675 ConcreteTTI->getTLI()->getTypeLegalizationCost(DL, Ty);
1676 unsigned LongVectorCount = 0;
1677 unsigned MVTLen =
1678 LT.second.isVector() ? LT.second.getVectorNumElements() : 1;
1679 while (NumVecElts > MVTLen) {
1680 NumVecElts /= 2;
1681 Type *SubTy = VectorType::get(ScalarTy, NumVecElts);
1682 CondTy = VectorType::get(ScalarCondTy, NumVecElts);
1683
1684 // Assume the pairwise shuffles add a cost.
1685 ShuffleCost += (IsPairwise + 1) *
1686 ConcreteTTI->getShuffleCost(TTI::SK_ExtractSubvector, Ty,
1687 NumVecElts, SubTy);
1688 MinMaxCost +=
1689 ConcreteTTI->getCmpSelInstrCost(CmpOpcode, SubTy, CondTy, nullptr) +
1690 ConcreteTTI->getCmpSelInstrCost(Instruction::Select, SubTy, CondTy,
1691 nullptr);
1692 Ty = SubTy;
1693 ++LongVectorCount;
1694 }
1695
1696 NumReduxLevels -= LongVectorCount;
1697
1698 // The minimal length of the vector is limited by the real length of vector
1699 // operations performed on the current platform. That's why several final
1700 // reduction opertions are perfomed on the vectors with the same
1701 // architecture-dependent length.
1702
1703 // Non pairwise reductions need one shuffle per reduction level. Pairwise
1704 // reductions need two shuffles on every level, but the last one. On that
1705 // level one of the shuffles is <0, u, u, ...> which is identity.
1706 unsigned NumShuffles = NumReduxLevels;
1707 if (IsPairwise && NumReduxLevels >= 1)
1708 NumShuffles += NumReduxLevels - 1;
1709 ShuffleCost += NumShuffles *
1710 ConcreteTTI->getShuffleCost(TTI::SK_PermuteSingleSrc, Ty,
1711 0, Ty);
1712 MinMaxCost +=
1713 NumReduxLevels *
1714 (ConcreteTTI->getCmpSelInstrCost(CmpOpcode, Ty, CondTy, nullptr) +
1715 ConcreteTTI->getCmpSelInstrCost(Instruction::Select, Ty, CondTy,
1716 nullptr));
1717 // The last min/max should be in vector registers and we counted it above.
1718 // So just need a single extractelement.
1719 return ShuffleCost + MinMaxCost +
1720 ConcreteTTI->getVectorInstrCost(Instruction::ExtractElement, Ty, 0);
1721 }
1722
1723 unsigned getVectorSplitCost() { return 1; }
1724
1725 /// @}
1726};
1727
1728/// Concrete BasicTTIImpl that can be used if no further customization
1729/// is needed.
1730class BasicTTIImpl : public BasicTTIImplBase<BasicTTIImpl> {
1731 using BaseT = BasicTTIImplBase<BasicTTIImpl>;
1732
1733 friend class BasicTTIImplBase<BasicTTIImpl>;
1734
1735 const TargetSubtargetInfo *ST;
1736 const TargetLoweringBase *TLI;
1737
1738 const TargetSubtargetInfo *getST() const { return ST; }
1739 const TargetLoweringBase *getTLI() const { return TLI; }
1740
1741public:
1742 explicit BasicTTIImpl(const TargetMachine *TM, const Function &F);
1743};
1744
1745} // end namespace llvm
1746
1747#endif // LLVM_CODEGEN_BASICTTIIMPL_H

/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/CodeGen/TargetLowering.h

1//===- llvm/CodeGen/TargetLowering.h - Target Lowering Info -----*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file describes how to lower LLVM code to machine code. This has two
11/// main components:
12///
13/// 1. Which ValueTypes are natively supported by the target.
14/// 2. Which operations are supported for supported ValueTypes.
15/// 3. Cost thresholds for alternative implementations of certain operations.
16///
17/// In addition it has a few other components, like information about FP
18/// immediates.
19///
20//===----------------------------------------------------------------------===//
21
22#ifndef LLVM_CODEGEN_TARGETLOWERING_H
23#define LLVM_CODEGEN_TARGETLOWERING_H
24
25#include "llvm/ADT/APInt.h"
26#include "llvm/ADT/ArrayRef.h"
27#include "llvm/ADT/DenseMap.h"
28#include "llvm/ADT/STLExtras.h"
29#include "llvm/ADT/SmallVector.h"
30#include "llvm/ADT/StringRef.h"
31#include "llvm/Analysis/ProfileSummaryInfo.h"
32#include "llvm/CodeGen/DAGCombine.h"
33#include "llvm/CodeGen/ISDOpcodes.h"
34#include "llvm/CodeGen/RuntimeLibcalls.h"
35#include "llvm/CodeGen/SelectionDAG.h"
36#include "llvm/CodeGen/SelectionDAGNodes.h"
37#include "llvm/CodeGen/TargetCallingConv.h"
38#include "llvm/CodeGen/ValueTypes.h"
39#include "llvm/IR/Attributes.h"
40#include "llvm/IR/CallSite.h"
41#include "llvm/IR/CallingConv.h"
42#include "llvm/IR/DataLayout.h"
43#include "llvm/IR/DerivedTypes.h"
44#include "llvm/IR/Function.h"
45#include "llvm/IR/IRBuilder.h"
46#include "llvm/IR/InlineAsm.h"
47#include "llvm/IR/Instruction.h"
48#include "llvm/IR/Instructions.h"
49#include "llvm/IR/Type.h"
50#include "llvm/MC/MCRegisterInfo.h"
51#include "llvm/Support/Alignment.h"
52#include "llvm/Support/AtomicOrdering.h"
53#include "llvm/Support/Casting.h"
54#include "llvm/Support/ErrorHandling.h"
55#include "llvm/Support/MachineValueType.h"
56#include "llvm/Target/TargetMachine.h"
57#include "llvm/Transforms/Utils/SizeOpts.h"
58#include <algorithm>
59#include <cassert>
60#include <climits>
61#include <cstdint>
62#include <iterator>
63#include <map>
64#include <string>
65#include <utility>
66#include <vector>
67
68namespace llvm {
69
70class BranchProbability;
71class CCState;
72class CCValAssign;
73class Constant;
74class FastISel;
75class FunctionLoweringInfo;
76class GlobalValue;
77class GISelKnownBits;
78class IntrinsicInst;
79struct KnownBits;
80class LegacyDivergenceAnalysis;
81class LLVMContext;
82class MachineBasicBlock;
83class MachineFunction;
84class MachineInstr;
85class MachineJumpTableInfo;
86class MachineLoop;
87class MachineRegisterInfo;
88class MCContext;
89class MCExpr;
90class Module;
91class TargetRegisterClass;
92class TargetLibraryInfo;
93class TargetRegisterInfo;
94class Value;
95
96namespace Sched {
97
98 enum Preference {
99 None, // No preference
100 Source, // Follow source order.
101 RegPressure, // Scheduling for lowest register pressure.
102 Hybrid, // Scheduling for both latency and register pressure.
103 ILP, // Scheduling for ILP in low register pressure mode.
104 VLIW // Scheduling for VLIW targets.
105 };
106
107} // end namespace Sched
108
109/// This base class for TargetLowering contains the SelectionDAG-independent
110/// parts that can be used from the rest of CodeGen.
111class TargetLoweringBase {
112public:
113 /// This enum indicates whether operations are valid for a target, and if not,
114 /// what action should be used to make them valid.
115 enum LegalizeAction : uint8_t {
116 Legal, // The target natively supports this operation.
117 Promote, // This operation should be executed in a larger type.
118 Expand, // Try to expand this to other ops, otherwise use a libcall.
119 LibCall, // Don't try to expand this to other ops, always use a libcall.
120 Custom // Use the LowerOperation hook to implement custom lowering.
121 };
122
123 /// This enum indicates whether a types are legal for a target, and if not,
124 /// what action should be used to make them valid.
125 enum LegalizeTypeAction : uint8_t {
126 TypeLegal, // The target natively supports this type.
127 TypePromoteInteger, // Replace this integer with a larger one.
128 TypeExpandInteger, // Split this integer into two of half the size.
129 TypeSoftenFloat, // Convert this float to a same size integer type.
130 TypeExpandFloat, // Split this float into two of half the size.
131 TypeScalarizeVector, // Replace this one-element vector with its element.
132 TypeSplitVector, // Split this vector into two of half the size.
133 TypeWidenVector, // This vector should be widened into a larger vector.
134 TypePromoteFloat // Replace this float with a larger one.
135 };
136
137 /// LegalizeKind holds the legalization kind that needs to happen to EVT
138 /// in order to type-legalize it.
139 using LegalizeKind = std::pair<LegalizeTypeAction, EVT>;
140
141 /// Enum that describes how the target represents true/false values.
142 enum BooleanContent {
143 UndefinedBooleanContent, // Only bit 0 counts, the rest can hold garbage.
144 ZeroOrOneBooleanContent, // All bits zero except for bit 0.
145 ZeroOrNegativeOneBooleanContent // All bits equal to bit 0.
146 };
147
148 /// Enum that describes what type of support for selects the target has.
149 enum SelectSupportKind {
150 ScalarValSelect, // The target supports scalar selects (ex: cmov).
151 ScalarCondVectorVal, // The target supports selects with a scalar condition
152 // and vector values (ex: cmov).
153 VectorMaskSelect // The target supports vector selects with a vector
154 // mask (ex: x86 blends).
155 };
156
157 /// Enum that specifies what an atomic load/AtomicRMWInst is expanded
158 /// to, if at all. Exists because different targets have different levels of
159 /// support for these atomic instructions, and also have different options
160 /// w.r.t. what they should expand to.
161 enum class AtomicExpansionKind {
162 None, // Don't expand the instruction.
163 LLSC, // Expand the instruction into loadlinked/storeconditional; used
164 // by ARM/AArch64.
165 LLOnly, // Expand the (load) instruction into just a load-linked, which has
166 // greater atomic guarantees than a normal load.
167 CmpXChg, // Expand the instruction into cmpxchg; used by at least X86.
168 MaskedIntrinsic, // Use a target-specific intrinsic for the LL/SC loop.
169 };
170
171 /// Enum that specifies when a multiplication should be expanded.
172 enum class MulExpansionKind {
173 Always, // Always expand the instruction.
174 OnlyLegalOrCustom, // Only expand when the resulting instructions are legal
175 // or custom.
176 };
177
178 class ArgListEntry {
179 public:
180 Value *Val = nullptr;
181 SDValue Node = SDValue();
182 Type *Ty = nullptr;
183 bool IsSExt : 1;
184 bool IsZExt : 1;
185 bool IsInReg : 1;
186 bool IsSRet : 1;
187 bool IsNest : 1;
188 bool IsByVal : 1;
189 bool IsInAlloca : 1;
190 bool IsReturned : 1;
191 bool IsSwiftSelf : 1;
192 bool IsSwiftError : 1;
193 bool IsCFGuardTarget : 1;
194 uint16_t Alignment = 0;
195 Type *ByValType = nullptr;
196
197 ArgListEntry()
198 : IsSExt(false), IsZExt(false), IsInReg(false), IsSRet(false),
199 IsNest(false), IsByVal(false), IsInAlloca(false), IsReturned(false),
200 IsSwiftSelf(false), IsSwiftError(false), IsCFGuardTarget(false) {}
201
202 void setAttributes(const CallBase *Call, unsigned ArgIdx);
203
204 void setAttributes(ImmutableCallSite *CS, unsigned ArgIdx) {
205 return setAttributes(cast<CallBase>(CS->getInstruction()), ArgIdx);
206 }
207 };
208 using ArgListTy = std::vector<ArgListEntry>;
209
210 virtual void markLibCallAttributes(MachineFunction *MF, unsigned CC,
211 ArgListTy &Args) const {};
212
213 static ISD::NodeType getExtendForContent(BooleanContent Content) {
214 switch (Content) {
215 case UndefinedBooleanContent:
216 // Extend by adding rubbish bits.
217 return ISD::ANY_EXTEND;
218 case ZeroOrOneBooleanContent:
219 // Extend by adding zero bits.
220 return ISD::ZERO_EXTEND;
221 case ZeroOrNegativeOneBooleanContent:
222 // Extend by copying the sign bit.
223 return ISD::SIGN_EXTEND;
224 }
225 llvm_unreachable("Invalid content kind")::llvm::llvm_unreachable_internal("Invalid content kind", "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/CodeGen/TargetLowering.h"
, 225)
;
226 }
227
228 explicit TargetLoweringBase(const TargetMachine &TM);
229 TargetLoweringBase(const TargetLoweringBase &) = delete;
230 TargetLoweringBase &operator=(const TargetLoweringBase &) = delete;
231 virtual ~TargetLoweringBase() = default;
232
233 /// Return true if the target support strict float operation
234 bool isStrictFPEnabled() const {
235 return IsStrictFPEnabled;
236 }
237
238protected:
239 /// Initialize all of the actions to default values.
240 void initActions();
241
242public:
243 const TargetMachine &getTargetMachine() const { return TM; }
244
245 virtual bool useSoftFloat() const { return false; }
246
247 /// Return the pointer type for the given address space, defaults to
248 /// the pointer type from the data layout.
249 /// FIXME: The default needs to be removed once all the code is updated.
250 virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS = 0) const {
251 return MVT::getIntegerVT(DL.getPointerSizeInBits(AS));
252 }
253
254 /// Return the in-memory pointer type for the given address space, defaults to
255 /// the pointer type from the data layout. FIXME: The default needs to be
256 /// removed once all the code is updated.
257 MVT getPointerMemTy(const DataLayout &DL, uint32_t AS = 0) const {
258 return MVT::getIntegerVT(DL.getPointerSizeInBits(AS));
259 }
260
261 /// Return the type for frame index, which is determined by
262 /// the alloca address space specified through the data layout.
263 MVT getFrameIndexTy(const DataLayout &DL) const {
264 return getPointerTy(DL, DL.getAllocaAddrSpace());
265 }
266
267 /// Return the type for operands of fence.
268 /// TODO: Let fence operands be of i32 type and remove this.
269 virtual MVT getFenceOperandTy(const DataLayout &DL) const {
270 return getPointerTy(DL);
271 }
272
273 /// EVT is not used in-tree, but is used by out-of-tree target.
274 /// A documentation for this function would be nice...
275 virtual MVT getScalarShiftAmountTy(const DataLayout &, EVT) const;
276
277 EVT getShiftAmountTy(EVT LHSTy, const DataLayout &DL,
278 bool LegalTypes = true) const;
279
280 /// Returns the type to be used for the index operand of:
281 /// ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT,
282 /// ISD::INSERT_SUBVECTOR, and ISD::EXTRACT_SUBVECTOR
283 virtual MVT getVectorIdxTy(const DataLayout &DL) const {
284 return getPointerTy(DL);
285 }
286
287 virtual bool isSelectSupported(SelectSupportKind /*kind*/) const {
288 return true;
289 }
290
291 /// Return true if it is profitable to convert a select of FP constants into
292 /// a constant pool load whose address depends on the select condition. The
293 /// parameter may be used to differentiate a select with FP compare from
294 /// integer compare.
295 virtual bool reduceSelectOfFPConstantLoads(EVT CmpOpVT) const {
296 return true;
297 }
298
299 /// Return true if multiple condition registers are available.
300 bool hasMultipleConditionRegisters() const {
301 return HasMultipleConditionRegisters;
302 }
303
304 /// Return true if the target has BitExtract instructions.
305 bool hasExtractBitsInsn() const { return HasExtractBitsInsn; }
306
307 /// Return the preferred vector type legalization action.
308 virtual TargetLoweringBase::LegalizeTypeAction
309 getPreferredVectorAction(MVT VT) const {
310 // The default action for one element vectors is to scalarize
311 if (VT.getVectorNumElements() == 1)
312 return TypeScalarizeVector;
313 // The default action for an odd-width vector is to widen.
314 if (!VT.isPow2VectorType())
315 return TypeWidenVector;
316 // The default action for other vectors is to promote
317 return TypePromoteInteger;
318 }
319
320 // There are two general methods for expanding a BUILD_VECTOR node:
321 // 1. Use SCALAR_TO_VECTOR on the defined scalar values and then shuffle
322 // them together.
323 // 2. Build the vector on the stack and then load it.
324 // If this function returns true, then method (1) will be used, subject to
325 // the constraint that all of the necessary shuffles are legal (as determined
326 // by isShuffleMaskLegal). If this function returns false, then method (2) is
327 // always used. The vector type, and the number of defined values, are
328 // provided.
329 virtual bool
330 shouldExpandBuildVectorWithShuffles(EVT /* VT */,
331 unsigned DefinedValues) const {
332 return DefinedValues < 3;
333 }
334
335 /// Return true if integer divide is usually cheaper than a sequence of
336 /// several shifts, adds, and multiplies for this target.
337 /// The definition of "cheaper" may depend on whether we're optimizing
338 /// for speed or for size.
339 virtual bool isIntDivCheap(EVT VT, AttributeList Attr) const { return false; }
340
341 /// Return true if the target can handle a standalone remainder operation.
342 virtual bool hasStandaloneRem(EVT VT) const {
343 return true;
344 }
345
346 /// Return true if SQRT(X) shouldn't be replaced with X*RSQRT(X).
347 virtual bool isFsqrtCheap(SDValue X, SelectionDAG &DAG) const {
348 // Default behavior is to replace SQRT(X) with X*RSQRT(X).
349 return false;
350 }
351
352 /// Reciprocal estimate status values used by the functions below.
353 enum ReciprocalEstimate : int {
354 Unspecified = -1,
355 Disabled = 0,
356 Enabled = 1
357 };
358
359 /// Return a ReciprocalEstimate enum value for a square root of the given type
360 /// based on the function's attributes. If the operation is not overridden by
361 /// the function's attributes, "Unspecified" is returned and target defaults
362 /// are expected to be used for instruction selection.
363 int getRecipEstimateSqrtEnabled(EVT VT, MachineFunction &MF) const;
364
365 /// Return a ReciprocalEstimate enum value for a division of the given type
366 /// based on the function's attributes. If the operation is not overridden by
367 /// the function's attributes, "Unspecified" is returned and target defaults
368 /// are expected to be used for instruction selection.
369 int getRecipEstimateDivEnabled(EVT VT, MachineFunction &MF) const;
370
371 /// Return the refinement step count for a square root of the given type based
372 /// on the function's attributes. If the operation is not overridden by
373 /// the function's attributes, "Unspecified" is returned and target defaults
374 /// are expected to be used for instruction selection.
375 int getSqrtRefinementSteps(EVT VT, MachineFunction &MF) const;
376
377 /// Return the refinement step count for a division of the given type based
378 /// on the function's attributes. If the operation is not overridden by
379 /// the function's attributes, "Unspecified" is returned and target defaults
380 /// are expected to be used for instruction selection.
381 int getDivRefinementSteps(EVT VT, MachineFunction &MF) const;
382
383 /// Returns true if target has indicated at least one type should be bypassed.
384 bool isSlowDivBypassed() const { return !BypassSlowDivWidths.empty(); }
385
386 /// Returns map of slow types for division or remainder with corresponding
387 /// fast types
388 const DenseMap<unsigned int, unsigned int> &getBypassSlowDivWidths() const {
389 return BypassSlowDivWidths;
390 }
391
392 /// Return true if Flow Control is an expensive operation that should be
393 /// avoided.
394 bool isJumpExpensive() const { return JumpIsExpensive; }
395
396 /// Return true if selects are only cheaper than branches if the branch is
397 /// unlikely to be predicted right.
398 bool isPredictableSelectExpensive() const {
399 return PredictableSelectIsExpensive;
400 }
401
402 /// If a branch or a select condition is skewed in one direction by more than
403 /// this factor, it is very likely to be predicted correctly.
404 virtual BranchProbability getPredictableBranchThreshold() const;
405
406 /// Return true if the following transform is beneficial:
407 /// fold (conv (load x)) -> (load (conv*)x)
408 /// On architectures that don't natively support some vector loads
409 /// efficiently, casting the load to a smaller vector of larger types and
410 /// loading is more efficient, however, this can be undone by optimizations in
411 /// dag combiner.
412 virtual bool isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT,
413 const SelectionDAG &DAG,
414 const MachineMemOperand &MMO) const {
415 // Don't do if we could do an indexed load on the original type, but not on
416 // the new one.
417 if (!LoadVT.isSimple() || !BitcastVT.isSimple())
418 return true;
419
420 MVT LoadMVT = LoadVT.getSimpleVT();
421
422 // Don't bother doing this if it's just going to be promoted again later, as
423 // doing so might interfere with other combines.
424 if (getOperationAction(ISD::LOAD, LoadMVT) == Promote &&
425 getTypeToPromoteTo(ISD::LOAD, LoadMVT) == BitcastVT.getSimpleVT())
426 return false;
427
428 bool Fast = false;
429 return allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), BitcastVT,
430 MMO, &Fast) && Fast;
431 }
432
433 /// Return true if the following transform is beneficial:
434 /// (store (y (conv x)), y*)) -> (store x, (x*))
435 virtual bool isStoreBitCastBeneficial(EVT StoreVT, EVT BitcastVT,
436 const SelectionDAG &DAG,
437 const MachineMemOperand &MMO) const {
438 // Default to the same logic as loads.
439 return isLoadBitCastBeneficial(StoreVT, BitcastVT, DAG, MMO);
440 }
441
442 /// Return true if it is expected to be cheaper to do a store of a non-zero
443 /// vector constant with the given size and type for the address space than to
444 /// store the individual scalar element constants.
445 virtual bool storeOfVectorConstantIsCheap(EVT MemVT,
446 unsigned NumElem,
447 unsigned AddrSpace) const {
448 return false;
449 }
450
451 /// Allow store merging for the specified type after legalization in addition
452 /// to before legalization. This may transform stores that do not exist
453 /// earlier (for example, stores created from intrinsics).
454 virtual bool mergeStoresAfterLegalization(EVT MemVT) const {
455 return true;
456 }
457
458 /// Returns if it's reasonable to merge stores to MemVT size.
459 virtual bool canMergeStoresTo(unsigned AS, EVT MemVT,
460 const SelectionDAG &DAG) const {
461 return true;
462 }
463
464 /// Return true if it is cheap to speculate a call to intrinsic cttz.
465 virtual bool isCheapToSpeculateCttz() const {
466 return false;
467 }
468
469 /// Return true if it is cheap to speculate a call to intrinsic ctlz.
470 virtual bool isCheapToSpeculateCtlz() const {
471 return false;
472 }
473
474 /// Return true if ctlz instruction is fast.
475 virtual bool isCtlzFast() const {
476 return false;
477 }
478
479 /// Return true if instruction generated for equality comparison is folded
480 /// with instruction generated for signed comparison.
481 virtual bool isEqualityCmpFoldedWithSignedCmp() const { return true; }
482
483 /// Return true if it is safe to transform an integer-domain bitwise operation
484 /// into the equivalent floating-point operation. This should be set to true
485 /// if the target has IEEE-754-compliant fabs/fneg operations for the input
486 /// type.
487 virtual bool hasBitPreservingFPLogic(EVT VT) const {
488 return false;
489 }
490
491 /// Return true if it is cheaper to split the store of a merged int val
492 /// from a pair of smaller values into multiple stores.
493 virtual bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const {
494 return false;
495 }
496
497 /// Return if the target supports combining a
498 /// chain like:
499 /// \code
500 /// %andResult = and %val1, #mask
501 /// %icmpResult = icmp %andResult, 0
502 /// \endcode
503 /// into a single machine instruction of a form like:
504 /// \code
505 /// cc = test %register, #mask
506 /// \endcode
507 virtual bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const {
508 return false;
509 }
510
511 /// Use bitwise logic to make pairs of compares more efficient. For example:
512 /// and (seteq A, B), (seteq C, D) --> seteq (or (xor A, B), (xor C, D)), 0
513 /// This should be true when it takes more than one instruction to lower
514 /// setcc (cmp+set on x86 scalar), when bitwise ops are faster than logic on
515 /// condition bits (crand on PowerPC), and/or when reducing cmp+br is a win.
516 virtual bool convertSetCCLogicToBitwiseLogic(EVT VT) const {
517 return false;
518 }
519
520 /// Return the preferred operand type if the target has a quick way to compare
521 /// integer values of the given size. Assume that any legal integer type can
522 /// be compared efficiently. Targets may override this to allow illegal wide
523 /// types to return a vector type if there is support to compare that type.
524 virtual MVT hasFastEqualityCompare(unsigned NumBits) const {
525 MVT VT = MVT::getIntegerVT(NumBits);
526 return isTypeLegal(VT) ? VT : MVT::INVALID_SIMPLE_VALUE_TYPE;
527 }
528
529 /// Return true if the target should transform:
530 /// (X & Y) == Y ---> (~X & Y) == 0
531 /// (X & Y) != Y ---> (~X & Y) != 0
532 ///
533 /// This may be profitable if the target has a bitwise and-not operation that
534 /// sets comparison flags. A target may want to limit the transformation based
535 /// on the type of Y or if Y is a constant.
536 ///
537 /// Note that the transform will not occur if Y is known to be a power-of-2
538 /// because a mask and compare of a single bit can be handled by inverting the
539 /// predicate, for example:
540 /// (X & 8) == 8 ---> (X & 8) != 0
541 virtual bool hasAndNotCompare(SDValue Y) const {
542 return false;
543 }
544
545 /// Return true if the target has a bitwise and-not operation:
546 /// X = ~A & B
547 /// This can be used to simplify select or other instructions.
548 virtual bool hasAndNot(SDValue X) const {
549 // If the target has the more complex version of this operation, assume that
550 // it has this operation too.
551 return hasAndNotCompare(X);
552 }
553
554 /// Return true if the target has a bit-test instruction:
555 /// (X & (1 << Y)) ==/!= 0
556 /// This knowledge can be used to prevent breaking the pattern,
557 /// or creating it if it could be recognized.
558 virtual bool hasBitTest(SDValue X, SDValue Y) const { return false; }
559
560 /// There are two ways to clear extreme bits (either low or high):
561 /// Mask: x & (-1 << y) (the instcombine canonical form)
562 /// Shifts: x >> y << y
563 /// Return true if the variant with 2 variable shifts is preferred.
564 /// Return false if there is no preference.
565 virtual bool shouldFoldMaskToVariableShiftPair(SDValue X) const {
566 // By default, let's assume that no one prefers shifts.
567 return false;
568 }
569
570 /// Return true if it is profitable to fold a pair of shifts into a mask.
571 /// This is usually true on most targets. But some targets, like Thumb1,
572 /// have immediate shift instructions, but no immediate "and" instruction;
573 /// this makes the fold unprofitable.
574 virtual bool shouldFoldConstantShiftPairToMask(const SDNode *N,
575 CombineLevel Level) const {
576 return true;
577 }
578
579 /// Should we tranform the IR-optimal check for whether given truncation
580 /// down into KeptBits would be truncating or not:
581 /// (add %x, (1 << (KeptBits-1))) srccond (1 << KeptBits)
582 /// Into it's more traditional form:
583 /// ((%x << C) a>> C) dstcond %x
584 /// Return true if we should transform.
585 /// Return false if there is no preference.
586 virtual bool shouldTransformSignedTruncationCheck(EVT XVT,
587 unsigned KeptBits) const {
588 // By default, let's assume that no one prefers shifts.
589 return false;
590 }
591
592 /// Given the pattern
593 /// (X & (C l>>/<< Y)) ==/!= 0
594 /// return true if it should be transformed into:
595 /// ((X <</l>> Y) & C) ==/!= 0
596 /// WARNING: if 'X' is a constant, the fold may deadlock!
597 /// FIXME: we could avoid passing XC, but we can't use isConstOrConstSplat()
598 /// here because it can end up being not linked in.
599 virtual bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
600 SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y,
601 unsigned OldShiftOpcode, unsigned NewShiftOpcode,
602 SelectionDAG &DAG) const {
603 if (hasBitTest(X, Y)) {
604 // One interesting pattern that we'd want to form is 'bit test':
605 // ((1 << Y) & C) ==/!= 0
606 // But we also need to be careful not to try to reverse that fold.
607
608 // Is this '1 << Y' ?
609 if (OldShiftOpcode == ISD::SHL && CC->isOne())
610 return false; // Keep the 'bit test' pattern.
611
612 // Will it be '1 << Y' after the transform ?
613 if (XC && NewShiftOpcode == ISD::SHL && XC->isOne())
614 return true; // Do form the 'bit test' pattern.
615 }
616
617 // If 'X' is a constant, and we transform, then we will immediately
618 // try to undo the fold, thus causing endless combine loop.
619 // So by default, let's assume everyone prefers the fold
620 // iff 'X' is not a constant.
621 return !XC;
622 }
623
624 /// These two forms are equivalent:
625 /// sub %y, (xor %x, -1)
626 /// add (add %x, 1), %y
627 /// The variant with two add's is IR-canonical.
628 /// Some targets may prefer one to the other.
629 virtual bool preferIncOfAddToSubOfNot(EVT VT) const {
630 // By default, let's assume that everyone prefers the form with two add's.
631 return true;
632 }
633
634 /// Return true if the target wants to use the optimization that
635 /// turns ext(promotableInst1(...(promotableInstN(load)))) into
636 /// promotedInst1(...(promotedInstN(ext(load)))).
637 bool enableExtLdPromotion() const { return EnableExtLdPromotion; }
638
639 /// Return true if the target can combine store(extractelement VectorTy,
640 /// Idx).
641 /// \p Cost[out] gives the cost of that transformation when this is true.
642 virtual bool canCombineStoreAndExtract(Type *VectorTy, Value *Idx,
643 unsigned &Cost) const {
644 return false;
645 }
646
647 /// Return true if inserting a scalar into a variable element of an undef
648 /// vector is more efficiently handled by splatting the scalar instead.
649 virtual bool shouldSplatInsEltVarIndex(EVT) const {
650 return false;
651 }
652
653 /// Return true if target always beneficiates from combining into FMA for a
654 /// given value type. This must typically return false on targets where FMA
655 /// takes more cycles to execute than FADD.
656 virtual bool enableAggressiveFMAFusion(EVT VT) const {
657 return false;
658 }
659
660 /// Return the ValueType of the result of SETCC operations.
661 virtual EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
662 EVT VT) const;
663
664 /// Return the ValueType for comparison libcalls. Comparions libcalls include
665 /// floating point comparion calls, and Ordered/Unordered check calls on
666 /// floating point numbers.
667 virtual
668 MVT::SimpleValueType getCmpLibcallReturnType() const;
669
670 /// For targets without i1 registers, this gives the nature of the high-bits
671 /// of boolean values held in types wider than i1.
672 ///
673 /// "Boolean values" are special true/false values produced by nodes like
674 /// SETCC and consumed (as the condition) by nodes like SELECT and BRCOND.
675 /// Not to be confused with general values promoted from i1. Some cpus
676 /// distinguish between vectors of boolean and scalars; the isVec parameter
677 /// selects between the two kinds. For example on X86 a scalar boolean should
678 /// be zero extended from i1, while the elements of a vector of booleans
679 /// should be sign extended from i1.
680 ///
681 /// Some cpus also treat floating point types the same way as they treat
682 /// vectors instead of the way they treat scalars.
683 BooleanContent getBooleanContents(bool isVec, bool isFloat) const {
684 if (isVec)
685 return BooleanVectorContents;
686 return isFloat ? BooleanFloatContents : BooleanContents;
687 }
688
689 BooleanContent getBooleanContents(EVT Type) const {
690 return getBooleanContents(Type.isVector(), Type.isFloatingPoint());
691 }
692
693 /// Return target scheduling preference.
694 Sched::Preference getSchedulingPreference() const {
695 return SchedPreferenceInfo;
696 }
697
698 /// Some scheduler, e.g. hybrid, can switch to different scheduling heuristics
699 /// for different nodes. This function returns the preference (or none) for
700 /// the given node.
701 virtual Sched::Preference getSchedulingPreference(SDNode *) const {
702 return Sched::None;
703 }
704
705 /// Return the register class that should be used for the specified value
706 /// type.
707 virtual const TargetRegisterClass *getRegClassFor(MVT VT, bool isDivergent = false) const {
708 (void)isDivergent;
709 const TargetRegisterClass *RC = RegClassForVT[VT.SimpleTy];
710 assert(RC && "This value type is not natively supported!")((RC && "This value type is not natively supported!")
? static_cast<void> (0) : __assert_fail ("RC && \"This value type is not natively supported!\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/CodeGen/TargetLowering.h"
, 710, __PRETTY_FUNCTION__))
;
711 return RC;
712 }
713
714 /// Allows target to decide about the register class of the
715 /// specific value that is live outside the defining block.
716 /// Returns true if the value needs uniform register class.
717 virtual bool requiresUniformRegister(MachineFunction &MF,
718 const Value *) const {
719 return false;
720 }
721
722 /// Return the 'representative' register class for the specified value
723 /// type.
724 ///
725 /// The 'representative' register class is the largest legal super-reg
726 /// register class for the register class of the value type. For example, on
727 /// i386 the rep register class for i8, i16, and i32 are GR32; while the rep
728 /// register class is GR64 on x86_64.
729 virtual const TargetRegisterClass *getRepRegClassFor(MVT VT) const {
730 const TargetRegisterClass *RC = RepRegClassForVT[VT.SimpleTy];
731 return RC;
732 }
733
734 /// Return the cost of the 'representative' register class for the specified
735 /// value type.
736 virtual uint8_t getRepRegClassCostFor(MVT VT) const {
737 return RepRegClassCostForVT[VT.SimpleTy];
738 }
739
740 /// Return true if SHIFT instructions should be expanded to SHIFT_PARTS
741 /// instructions, and false if a library call is preferred (e.g for code-size
742 /// reasons).
743 virtual bool shouldExpandShift(SelectionDAG &DAG, SDNode *N) const {
744 return true;
745 }
746
747 /// Return true if the target has native support for the specified value type.
748 /// This means that it has a register that directly holds it without
749 /// promotions or expansions.
750 bool isTypeLegal(EVT VT) const {
751 assert(!VT.isSimple() ||((!VT.isSimple() || (unsigned)VT.getSimpleVT().SimpleTy < array_lengthof
(RegClassForVT)) ? static_cast<void> (0) : __assert_fail
("!VT.isSimple() || (unsigned)VT.getSimpleVT().SimpleTy < array_lengthof(RegClassForVT)"
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/CodeGen/TargetLowering.h"
, 752, __PRETTY_FUNCTION__))
752 (unsigned)VT.getSimpleVT().SimpleTy < array_lengthof(RegClassForVT))((!VT.isSimple() || (unsigned)VT.getSimpleVT().SimpleTy < array_lengthof
(RegClassForVT)) ? static_cast<void> (0) : __assert_fail
("!VT.isSimple() || (unsigned)VT.getSimpleVT().SimpleTy < array_lengthof(RegClassForVT)"
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/CodeGen/TargetLowering.h"
, 752, __PRETTY_FUNCTION__))
;
753 return VT.isSimple() && RegClassForVT[VT.getSimpleVT().SimpleTy] != nullptr;
754 }
755
756 class ValueTypeActionImpl {
757 /// ValueTypeActions - For each value type, keep a LegalizeTypeAction enum
758 /// that indicates how instruction selection should deal with the type.
759 LegalizeTypeAction ValueTypeActions[MVT::LAST_VALUETYPE];
760
761 public:
762 ValueTypeActionImpl() {
763 std::fill(std::begin(ValueTypeActions), std::end(ValueTypeActions),
764 TypeLegal);
765 }
766
767 LegalizeTypeAction getTypeAction(MVT VT) const {
768 return ValueTypeActions[VT.SimpleTy];
769 }
770
771 void setTypeAction(MVT VT, LegalizeTypeAction Action) {
772 ValueTypeActions[VT.SimpleTy] = Action;
773 }
774 };
775
776 const ValueTypeActionImpl &getValueTypeActions() const {
777 return ValueTypeActions;
778 }
779
780 /// Return how we should legalize values of this type, either it is already
781 /// legal (return 'Legal') or we need to promote it to a larger type (return
782 /// 'Promote'), or we need to expand it into multiple registers of smaller
783 /// integer type (return 'Expand'). 'Custom' is not an option.
784 LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const {
785 return getTypeConversion(Context, VT).first;
786 }
787 LegalizeTypeAction getTypeAction(MVT VT) const {
788 return ValueTypeActions.getTypeAction(VT);
789 }
790
791 /// For types supported by the target, this is an identity function. For
792 /// types that must be promoted to larger types, this returns the larger type
793 /// to promote to. For integer types that are larger than the largest integer
794 /// register, this contains one step in the expansion to get to the smaller
795 /// register. For illegal floating point types, this returns the integer type
796 /// to transform to.
797 EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const {
798 return getTypeConversion(Context, VT).second;
799 }
800
801 /// For types supported by the target, this is an identity function. For
802 /// types that must be expanded (i.e. integer types that are larger than the
803 /// largest integer register or illegal floating point types), this returns
804 /// the largest legal type it will be expanded to.
805 EVT getTypeToExpandTo(LLVMContext &Context, EVT VT) const {
806 assert(!VT.isVector())((!VT.isVector()) ? static_cast<void> (0) : __assert_fail
("!VT.isVector()", "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/CodeGen/TargetLowering.h"
, 806, __PRETTY_FUNCTION__))
;
807 while (true) {
808 switch (getTypeAction(Context, VT)) {
809 case TypeLegal:
810 return VT;
811 case TypeExpandInteger:
812 VT = getTypeToTransformTo(Context, VT);
813 break;
814 default:
815 llvm_unreachable("Type is not legal nor is it to be expanded!")::llvm::llvm_unreachable_internal("Type is not legal nor is it to be expanded!"
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/CodeGen/TargetLowering.h"
, 815)
;
816 }
817 }
818 }
819
820 /// Vector types are broken down into some number of legal first class types.
821 /// For example, EVT::v8f32 maps to 2 EVT::v4f32 with Altivec or SSE1, or 8
822 /// promoted EVT::f64 values with the X86 FP stack. Similarly, EVT::v2i64
823 /// turns into 4 EVT::i32 values with both PPC and X86.
824 ///
825 /// This method returns the number of registers needed, and the VT for each
826 /// register. It also returns the VT and quantity of the intermediate values
827 /// before they are promoted/expanded.
828 unsigned getVectorTypeBreakdown(LLVMContext &Context, EVT VT,
829 EVT &IntermediateVT,
830 unsigned &NumIntermediates,
831 MVT &RegisterVT) const;
832
833 /// Certain targets such as MIPS require that some types such as vectors are
834 /// always broken down into scalars in some contexts. This occurs even if the
835 /// vector type is legal.
836 virtual unsigned getVectorTypeBreakdownForCallingConv(
837 LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,
838 unsigned &NumIntermediates, MVT &RegisterVT) const {
839 return getVectorTypeBreakdown(Context, VT, IntermediateVT, NumIntermediates,
840 RegisterVT);
841 }
842
843 struct IntrinsicInfo {
844 unsigned opc = 0; // target opcode
845 EVT memVT; // memory VT
846
847 // value representing memory location
848 PointerUnion<const Value *, const PseudoSourceValue *> ptrVal;
849
850 int offset = 0; // offset off of ptrVal
851 uint64_t size = 0; // the size of the memory location
852 // (taken from memVT if zero)
853 MaybeAlign align = Align::None(); // alignment
854
855 MachineMemOperand::Flags flags = MachineMemOperand::MONone;
856 IntrinsicInfo() = default;
857 };
858
859 /// Given an intrinsic, checks if on the target the intrinsic will need to map
860 /// to a MemIntrinsicNode (touches memory). If this is the case, it returns
861 /// true and store the intrinsic information into the IntrinsicInfo that was
862 /// passed to the function.
863 virtual bool getTgtMemIntrinsic(IntrinsicInfo &, const CallInst &,
864 MachineFunction &,
865 unsigned /*Intrinsic*/) const {
866 return false;
867 }
868
869 /// Returns true if the target can instruction select the specified FP
870 /// immediate natively. If false, the legalizer will materialize the FP
871 /// immediate as a load from a constant pool.
872 virtual bool isFPImmLegal(const APFloat & /*Imm*/, EVT /*VT*/,
873 bool ForCodeSize = false) const {
874 return false;
875 }
876
877 /// Targets can use this to indicate that they only support *some*
878 /// VECTOR_SHUFFLE operations, those with specific masks. By default, if a
879 /// target supports the VECTOR_SHUFFLE node, all mask values are assumed to be
880 /// legal.
881 virtual bool isShuffleMaskLegal(ArrayRef<int> /*Mask*/, EVT /*VT*/) const {
882 return true;
883 }
884
885 /// Returns true if the operation can trap for the value type.
886 ///
887 /// VT must be a legal type. By default, we optimistically assume most
888 /// operations don't trap except for integer divide and remainder.
889 virtual bool canOpTrap(unsigned Op, EVT VT) const;
890
891 /// Similar to isShuffleMaskLegal. Targets can use this to indicate if there
892 /// is a suitable VECTOR_SHUFFLE that can be used to replace a VAND with a
893 /// constant pool entry.
894 virtual bool isVectorClearMaskLegal(ArrayRef<int> /*Mask*/,
895 EVT /*VT*/) const {
896 return false;
897 }
898
899 /// Return how this operation should be treated: either it is legal, needs to
900 /// be promoted to a larger size, needs to be expanded to some other code
901 /// sequence, or the target has a custom expander for it.
902 LegalizeAction getOperationAction(unsigned Op, EVT VT) const {
903 if (VT.isExtended()) return Expand;
904 // If a target-specific SDNode requires legalization, require the target
905 // to provide custom legalization for it.
906 if (Op >= array_lengthof(OpActions[0])) return Custom;
907 return OpActions[(unsigned)VT.getSimpleVT().SimpleTy][Op];
908 }
909
910 /// Custom method defined by each target to indicate if an operation which
911 /// may require a scale is supported natively by the target.
912 /// If not, the operation is illegal.
913 virtual bool isSupportedFixedPointOperation(unsigned Op, EVT VT,
914 unsigned Scale) const {
915 return false;
916 }
917
918 /// Some fixed point operations may be natively supported by the target but
919 /// only for specific scales. This method allows for checking
920 /// if the width is supported by the target for a given operation that may
921 /// depend on scale.
922 LegalizeAction getFixedPointOperationAction(unsigned Op, EVT VT,
923 unsigned Scale) const {
924 auto Action = getOperationAction(Op, VT);
925 if (Action != Legal)
926 return Action;
927
928 // This operation is supported in this type but may only work on specific
929 // scales.
930 bool Supported;
931 switch (Op) {
932 default:
933 llvm_unreachable("Unexpected fixed point operation.")::llvm::llvm_unreachable_internal("Unexpected fixed point operation."
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/CodeGen/TargetLowering.h"
, 933)
;
934 case ISD::SMULFIX:
935 case ISD::SMULFIXSAT:
936 case ISD::UMULFIX:
937 case ISD::UMULFIXSAT:
938 case ISD::SDIVFIX:
939 case ISD::UDIVFIX:
940 Supported = isSupportedFixedPointOperation(Op, VT, Scale);
941 break;
942 }
943
944 return Supported ? Action : Expand;
945 }
946
947 // If Op is a strict floating-point operation, return the result
948 // of getOperationAction for the equivalent non-strict operation.
949 LegalizeAction getStrictFPOperationAction(unsigned Op, EVT VT) const {
950 unsigned EqOpc;
951 switch (Op) {
952 default: llvm_unreachable("Unexpected FP pseudo-opcode")::llvm::llvm_unreachable_internal("Unexpected FP pseudo-opcode"
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/CodeGen/TargetLowering.h"
, 952)
;
953#define INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \
954 case ISD::STRICT_##DAGN: EqOpc = ISD::DAGN; break;
955#define CMP_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \
956 case ISD::STRICT_##DAGN: EqOpc = ISD::SETCC; break;
957#include "llvm/IR/ConstrainedOps.def"
958 }
959
960 return getOperationAction(EqOpc, VT);
961 }
962
963 /// Return true if the specified operation is legal on this target or can be
964 /// made legal with custom lowering. This is used to help guide high-level
965 /// lowering decisions.
966 bool isOperationLegalOrCustom(unsigned Op, EVT VT) const {
967 return (VT == MVT::Other || isTypeLegal(VT)) &&
968 (getOperationAction(Op, VT) == Legal ||
969 getOperationAction(Op, VT) == Custom);
970 }
971
972 /// Return true if the specified operation is legal on this target or can be
973 /// made legal using promotion. This is used to help guide high-level lowering
974 /// decisions.
975 bool isOperationLegalOrPromote(unsigned Op, EVT VT) const {
976 return (VT == MVT::Other || isTypeLegal(VT)) &&
977 (getOperationAction(Op, VT) == Legal ||
978 getOperationAction(Op, VT) == Promote);
979 }
980
981 /// Return true if the specified operation is legal on this target or can be
982 /// made legal with custom lowering or using promotion. This is used to help
983 /// guide high-level lowering decisions.
984 bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT) const {
985 return (VT == MVT::Other || isTypeLegal(VT)) &&
986 (getOperationAction(Op, VT) == Legal ||
987 getOperationAction(Op, VT) == Custom ||
988 getOperationAction(Op, VT) == Promote);
989 }
990
991 /// Return true if the operation uses custom lowering, regardless of whether
992 /// the type is legal or not.
993 bool isOperationCustom(unsigned Op, EVT VT) const {
994 return getOperationAction(Op, VT) == Custom;
995 }
996
997 /// Return true if lowering to a jump table is allowed.
998 virtual bool areJTsAllowed(const Function *Fn) const {
999 if (Fn->getFnAttribute("no-jump-tables").getValueAsString() == "true")
1000 return false;
1001
1002 return isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) ||
1003 isOperationLegalOrCustom(ISD::BRIND, MVT::Other);
1004 }
1005
1006 /// Check whether the range [Low,High] fits in a machine word.
1007 bool rangeFitsInWord(const APInt &Low, const APInt &High,
1008 const DataLayout &DL) const {
1009 // FIXME: Using the pointer type doesn't seem ideal.
1010 uint64_t BW = DL.getIndexSizeInBits(0u);
1011 uint64_t Range = (High - Low).getLimitedValue(UINT64_MAX(18446744073709551615UL) - 1) + 1;
1012 return Range <= BW;
1013 }
1014
1015 /// Return true if lowering to a jump table is suitable for a set of case
1016 /// clusters which may contain \p NumCases cases, \p Range range of values.
1017 virtual bool isSuitableForJumpTable(const SwitchInst *SI, uint64_t NumCases,
1018 uint64_t Range, ProfileSummaryInfo *PSI,
1019 BlockFrequencyInfo *BFI) const;
1020
1021 /// Return true if lowering to a bit test is suitable for a set of case
1022 /// clusters which contains \p NumDests unique destinations, \p Low and
1023 /// \p High as its lowest and highest case values, and expects \p NumCmps
1024 /// case value comparisons. Check if the number of destinations, comparison
1025 /// metric, and range are all suitable.
1026 bool isSuitableForBitTests(unsigned NumDests, unsigned NumCmps,
1027 const APInt &Low, const APInt &High,
1028 const DataLayout &DL) const {
1029 // FIXME: I don't think NumCmps is the correct metric: a single case and a
1030 // range of cases both require only one branch to lower. Just looking at the
1031 // number of clusters and destinations should be enough to decide whether to
1032 // build bit tests.
1033
1034 // To lower a range with bit tests, the range must fit the bitwidth of a
1035 // machine word.
1036 if (!rangeFitsInWord(Low, High, DL))
1037 return false;
1038
1039 // Decide whether it's profitable to lower this range with bit tests. Each
1040 // destination requires a bit test and branch, and there is an overall range
1041 // check branch. For a small number of clusters, separate comparisons might
1042 // be cheaper, and for many destinations, splitting the range might be
1043 // better.
1044 return (NumDests == 1 && NumCmps >= 3) || (NumDests == 2 && NumCmps >= 5) ||
1045 (NumDests == 3 && NumCmps >= 6);
1046 }
1047
1048 /// Return true if the specified operation is illegal on this target or
1049 /// unlikely to be made legal with custom lowering. This is used to help guide
1050 /// high-level lowering decisions.
1051 bool isOperationExpand(unsigned Op, EVT VT) const {
1052 return (!isTypeLegal(VT) || getOperationAction(Op, VT) == Expand);
1053 }
1054
1055 /// Return true if the specified operation is legal on this target.
1056 bool isOperationLegal(unsigned Op, EVT VT) const {
1057 return (VT == MVT::Other || isTypeLegal(VT)) &&
1058 getOperationAction(Op, VT) == Legal;
1059 }
1060
1061 /// Return how this load with extension should be treated: either it is legal,
1062 /// needs to be promoted to a larger size, needs to be expanded to some other
1063 /// code sequence, or the target has a custom expander for it.
1064 LegalizeAction getLoadExtAction(unsigned ExtType, EVT ValVT,
1065 EVT MemVT) const {
1066 if (ValVT.isExtended() || MemVT.isExtended()) return Expand;
1067 unsigned ValI = (unsigned) ValVT.getSimpleVT().SimpleTy;
1068 unsigned MemI = (unsigned) MemVT.getSimpleVT().SimpleTy;
1069 assert(ExtType < ISD::LAST_LOADEXT_TYPE && ValI < MVT::LAST_VALUETYPE &&((ExtType < ISD::LAST_LOADEXT_TYPE && ValI < MVT
::LAST_VALUETYPE && MemI < MVT::LAST_VALUETYPE &&
"Table isn't big enough!") ? static_cast<void> (0) : __assert_fail
("ExtType < ISD::LAST_LOADEXT_TYPE && ValI < MVT::LAST_VALUETYPE && MemI < MVT::LAST_VALUETYPE && \"Table isn't big enough!\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/CodeGen/TargetLowering.h"
, 1070, __PRETTY_FUNCTION__))
1070 MemI < MVT::LAST_VALUETYPE && "Table isn't big enough!")((ExtType < ISD::LAST_LOADEXT_TYPE && ValI < MVT
::LAST_VALUETYPE && MemI < MVT::LAST_VALUETYPE &&
"Table isn't big enough!") ? static_cast<void> (0) : __assert_fail
("ExtType < ISD::LAST_LOADEXT_TYPE && ValI < MVT::LAST_VALUETYPE && MemI < MVT::LAST_VALUETYPE && \"Table isn't big enough!\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/CodeGen/TargetLowering.h"
, 1070, __PRETTY_FUNCTION__))
;
1071 unsigned Shift = 4 * ExtType;
1072 return (LegalizeAction)((LoadExtActions[ValI][MemI] >> Shift) & 0xf);
1073 }
1074
1075 /// Return true if the specified load with extension is legal on this target.
1076 bool isLoadExtLegal(unsigned ExtType, EVT ValVT, EVT MemVT) const {
1077 return getLoadExtAction(ExtType, ValVT, MemVT) == Legal;
1078 }
1079
1080 /// Return true if the specified load with extension is legal or custom
1081 /// on this target.
1082 bool isLoadExtLegalOrCustom(unsigned ExtType, EVT ValVT, EVT MemVT) const {
1083 return getLoadExtAction(ExtType, ValVT, MemVT) == Legal ||
1084 getLoadExtAction(ExtType, ValVT, MemVT) == Custom;
1085 }
1086
1087 /// Return how this store with truncation should be treated: either it is
1088 /// legal, needs to be promoted to a larger size, needs to be expanded to some
1089 /// other code sequence, or the target has a custom expander for it.
1090 LegalizeAction getTruncStoreAction(EVT ValVT, EVT MemVT) const {
1091 if (ValVT.isExtended() || MemVT.isExtended()) return Expand;
1092 unsigned ValI = (unsigned) ValVT.getSimpleVT().SimpleTy;
1093 unsigned MemI = (unsigned) MemVT.getSimpleVT().SimpleTy;
1094 assert(ValI < MVT::LAST_VALUETYPE && MemI < MVT::LAST_VALUETYPE &&((ValI < MVT::LAST_VALUETYPE && MemI < MVT::LAST_VALUETYPE
&& "Table isn't big enough!") ? static_cast<void>
(0) : __assert_fail ("ValI < MVT::LAST_VALUETYPE && MemI < MVT::LAST_VALUETYPE && \"Table isn't big enough!\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/CodeGen/TargetLowering.h"
, 1095, __PRETTY_FUNCTION__))
1095 "Table isn't big enough!")((ValI < MVT::LAST_VALUETYPE && MemI < MVT::LAST_VALUETYPE
&& "Table isn't big enough!") ? static_cast<void>
(0) : __assert_fail ("ValI < MVT::LAST_VALUETYPE && MemI < MVT::LAST_VALUETYPE && \"Table isn't big enough!\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/CodeGen/TargetLowering.h"
, 1095, __PRETTY_FUNCTION__))
;
1096 return TruncStoreActions[ValI][MemI];
1097 }
1098
1099 /// Return true if the specified store with truncation is legal on this
1100 /// target.
1101 bool isTruncStoreLegal(EVT ValVT, EVT MemVT) const {
1102 return isTypeLegal(ValVT) && getTruncStoreAction(ValVT, MemVT) == Legal;
1103 }
1104
1105 /// Return true if the specified store with truncation has solution on this
1106 /// target.
1107 bool isTruncStoreLegalOrCustom(EVT ValVT, EVT MemVT) const {
1108 return isTypeLegal(ValVT) &&
1109 (getTruncStoreAction(ValVT, MemVT) == Legal ||
1110 getTruncStoreAction(ValVT, MemVT) == Custom);
1111 }
1112
1113 /// Return how the indexed load should be treated: either it is legal, needs
1114 /// to be promoted to a larger size, needs to be expanded to some other code
1115 /// sequence, or the target has a custom expander for it.
1116 LegalizeAction getIndexedLoadAction(unsigned IdxMode, MVT VT) const {
1117 return getIndexedModeAction(IdxMode, VT, IMAB_Load);
1118 }
1119
1120 /// Return true if the specified indexed load is legal on this target.
1121 bool isIndexedLoadLegal(unsigned IdxMode, EVT VT) const {
1122 return VT.isSimple() &&
1123 (getIndexedLoadAction(IdxMode, VT.getSimpleVT()) == Legal ||
1124 getIndexedLoadAction(IdxMode, VT.getSimpleVT()) == Custom);
1125 }
1126
1127 /// Return how the indexed store should be treated: either it is legal, needs
1128 /// to be promoted to a larger size, needs to be expanded to some other code
1129 /// sequence, or the target has a custom expander for it.
1130 LegalizeAction getIndexedStoreAction(unsigned IdxMode, MVT VT) const {
1131 return getIndexedModeAction(IdxMode, VT, IMAB_Store);
1132 }
1133
1134 /// Return true if the specified indexed load is legal on this target.
1135 bool isIndexedStoreLegal(unsigned IdxMode, EVT VT) const {
1136 return VT.isSimple() &&
1137 (getIndexedStoreAction(IdxMode, VT.getSimpleVT()) == Legal ||
1138 getIndexedStoreAction(IdxMode, VT.getSimpleVT()) == Custom);
1139 }
1140
1141 /// Return how the indexed load should be treated: either it is legal, needs
1142 /// to be promoted to a larger size, needs to be expanded to some other code
1143 /// sequence, or the target has a custom expander for it.
1144 LegalizeAction getIndexedMaskedLoadAction(unsigned IdxMode, MVT VT) const {
1145 return getIndexedModeAction(IdxMode, VT, IMAB_MaskedLoad);
1146 }
1147
1148 /// Return true if the specified indexed load is legal on this target.
1149 bool isIndexedMaskedLoadLegal(unsigned IdxMode, EVT VT) const {
1150 return VT.isSimple() &&
1151 (getIndexedMaskedLoadAction(IdxMode, VT.getSimpleVT()) == Legal ||
1152 getIndexedMaskedLoadAction(IdxMode, VT.getSimpleVT()) == Custom);
1153 }
1154
1155 /// Return how the indexed store should be treated: either it is legal, needs
1156 /// to be promoted to a larger size, needs to be expanded to some other code
1157 /// sequence, or the target has a custom expander for it.
1158 LegalizeAction getIndexedMaskedStoreAction(unsigned IdxMode, MVT VT) const {
1159 return getIndexedModeAction(IdxMode, VT, IMAB_MaskedStore);
1160 }
1161
1162 /// Return true if the specified indexed load is legal on this target.
1163 bool isIndexedMaskedStoreLegal(unsigned IdxMode, EVT VT) const {
1164 return VT.isSimple() &&
1165 (getIndexedMaskedStoreAction(IdxMode, VT.getSimpleVT()) == Legal ||
1166 getIndexedMaskedStoreAction(IdxMode, VT.getSimpleVT()) == Custom);
1167 }
1168
1169 /// Return how the condition code should be treated: either it is legal, needs
1170 /// to be expanded to some other code sequence, or the target has a custom
1171 /// expander for it.
1172 LegalizeAction
1173 getCondCodeAction(ISD::CondCode CC, MVT VT) const {
1174 assert((unsigned)CC < array_lengthof(CondCodeActions) &&(((unsigned)CC < array_lengthof(CondCodeActions) &&
((unsigned)VT.SimpleTy >> 3) < array_lengthof(CondCodeActions
[0]) && "Table isn't big enough!") ? static_cast<void
> (0) : __assert_fail ("(unsigned)CC < array_lengthof(CondCodeActions) && ((unsigned)VT.SimpleTy >> 3) < array_lengthof(CondCodeActions[0]) && \"Table isn't big enough!\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/CodeGen/TargetLowering.h"
, 1176, __PRETTY_FUNCTION__))
1175 ((unsigned)VT.SimpleTy >> 3) < array_lengthof(CondCodeActions[0]) &&(((unsigned)CC < array_lengthof(CondCodeActions) &&
((unsigned)VT.SimpleTy >> 3) < array_lengthof(CondCodeActions
[0]) && "Table isn't big enough!") ? static_cast<void
> (0) : __assert_fail ("(unsigned)CC < array_lengthof(CondCodeActions) && ((unsigned)VT.SimpleTy >> 3) < array_lengthof(CondCodeActions[0]) && \"Table isn't big enough!\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/CodeGen/TargetLowering.h"
, 1176, __PRETTY_FUNCTION__))
1176 "Table isn't big enough!")(((unsigned)CC < array_lengthof(CondCodeActions) &&
((unsigned)VT.SimpleTy >> 3) < array_lengthof(CondCodeActions
[0]) && "Table isn't big enough!") ? static_cast<void
> (0) : __assert_fail ("(unsigned)CC < array_lengthof(CondCodeActions) && ((unsigned)VT.SimpleTy >> 3) < array_lengthof(CondCodeActions[0]) && \"Table isn't big enough!\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/CodeGen/TargetLowering.h"
, 1176, __PRETTY_FUNCTION__))
;
1177 // See setCondCodeAction for how this is encoded.
1178 uint32_t Shift = 4 * (VT.SimpleTy & 0x7);
1179 uint32_t Value = CondCodeActions[CC][VT.SimpleTy >> 3];
1180 LegalizeAction Action = (LegalizeAction) ((Value >> Shift) & 0xF);
1181 assert(Action != Promote && "Can't promote condition code!")((Action != Promote && "Can't promote condition code!"
) ? static_cast<void> (0) : __assert_fail ("Action != Promote && \"Can't promote condition code!\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/CodeGen/TargetLowering.h"
, 1181, __PRETTY_FUNCTION__))
;
1182 return Action;
1183 }
1184
1185 /// Return true if the specified condition code is legal on this target.
1186 bool isCondCodeLegal(ISD::CondCode CC, MVT VT) const {
1187 return getCondCodeAction(CC, VT) == Legal;
1188 }
1189
1190 /// Return true if the specified condition code is legal or custom on this
1191 /// target.
1192 bool isCondCodeLegalOrCustom(ISD::CondCode CC, MVT VT) const {
1193 return getCondCodeAction(CC, VT) == Legal ||
1194 getCondCodeAction(CC, VT) == Custom;
1195 }
1196
1197 /// If the action for this operation is to promote, this method returns the
1198 /// ValueType to promote to.
1199 MVT getTypeToPromoteTo(unsigned Op, MVT VT) const {
1200 assert(getOperationAction(Op, VT) == Promote &&((getOperationAction(Op, VT) == Promote && "This operation isn't promoted!"
) ? static_cast<void> (0) : __assert_fail ("getOperationAction(Op, VT) == Promote && \"This operation isn't promoted!\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/CodeGen/TargetLowering.h"
, 1201, __PRETTY_FUNCTION__))
1201 "This operation isn't promoted!")((getOperationAction(Op, VT) == Promote && "This operation isn't promoted!"
) ? static_cast<void> (0) : __assert_fail ("getOperationAction(Op, VT) == Promote && \"This operation isn't promoted!\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/CodeGen/TargetLowering.h"
, 1201, __PRETTY_FUNCTION__))
;
1202
1203 // See if this has an explicit type specified.
1204 std::map<std::pair<unsigned, MVT::SimpleValueType>,
1205 MVT::SimpleValueType>::const_iterator PTTI =
1206 PromoteToType.find(std::make_pair(Op, VT.SimpleTy));
1207 if (PTTI != PromoteToType.end()) return PTTI->second;
1208
1209 assert((VT.isInteger() || VT.isFloatingPoint()) &&(((VT.isInteger() || VT.isFloatingPoint()) && "Cannot autopromote this type, add it with AddPromotedToType."
) ? static_cast<void> (0) : __assert_fail ("(VT.isInteger() || VT.isFloatingPoint()) && \"Cannot autopromote this type, add it with AddPromotedToType.\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/CodeGen/TargetLowering.h"
, 1210, __PRETTY_FUNCTION__))
1210 "Cannot autopromote this type, add it with AddPromotedToType.")(((VT.isInteger() || VT.isFloatingPoint()) && "Cannot autopromote this type, add it with AddPromotedToType."
) ? static_cast<void> (0) : __assert_fail ("(VT.isInteger() || VT.isFloatingPoint()) && \"Cannot autopromote this type, add it with AddPromotedToType.\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/CodeGen/TargetLowering.h"
, 1210, __PRETTY_FUNCTION__))
;
1211
1212 MVT NVT = VT;
1213 do {
1214 NVT = (MVT::SimpleValueType)(NVT.SimpleTy+1);
1215 assert(NVT.isInteger() == VT.isInteger() && NVT != MVT::isVoid &&((NVT.isInteger() == VT.isInteger() && NVT != MVT::isVoid
&& "Didn't find type to promote to!") ? static_cast<
void> (0) : __assert_fail ("NVT.isInteger() == VT.isInteger() && NVT != MVT::isVoid && \"Didn't find type to promote to!\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/CodeGen/TargetLowering.h"
, 1216, __PRETTY_FUNCTION__))
1216 "Didn't find type to promote to!")((NVT.isInteger() == VT.isInteger() && NVT != MVT::isVoid
&& "Didn't find type to promote to!") ? static_cast<
void> (0) : __assert_fail ("NVT.isInteger() == VT.isInteger() && NVT != MVT::isVoid && \"Didn't find type to promote to!\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/CodeGen/TargetLowering.h"
, 1216, __PRETTY_FUNCTION__))
;
1217 } while (!isTypeLegal(NVT) ||
1218 getOperationAction(Op, NVT) == Promote);
1219 return NVT;
1220 }
1221
1222 /// Return the EVT corresponding to this LLVM type. This is fixed by the LLVM
1223 /// operations except for the pointer size. If AllowUnknown is true, this
1224 /// will return MVT::Other for types with no EVT counterpart (e.g. structs),
1225 /// otherwise it will assert.
1226 EVT getValueType(const DataLayout &DL, Type *Ty,
1227 bool AllowUnknown = false) const {
1228 // Lower scalar pointers to native pointer types.
1229 if (auto *PTy = dyn_cast<PointerType>(Ty))
28
Assuming 'PTy' is null
29
Taking false branch
1230 return getPointerTy(DL, PTy->getAddressSpace());
1231
1232 if (auto *VTy = dyn_cast<VectorType>(Ty)) {
30
Assuming 'VTy' is non-null
31
Taking true branch
1233 Type *EltTy = VTy->getElementType();
1234 // Lower vectors of pointers to native pointer types.
1235 if (auto *PTy
32.1
'PTy' is non-null
32.1
'PTy' is non-null
32.1
'PTy' is non-null
32.1
'PTy' is non-null
= dyn_cast<PointerType>(EltTy)) {
32
Assuming 'EltTy' is a 'PointerType'
33
Taking true branch
1236 EVT PointerTy(getPointerTy(DL, PTy->getAddressSpace()));
1237 EltTy = PointerTy.getTypeForEVT(Ty->getContext());
34
Called C++ object pointer is null
1238 }
1239 return EVT::getVectorVT(Ty->getContext(), EVT::getEVT(EltTy, false),
1240 VTy->getElementCount());
1241 }
1242
1243 return EVT::getEVT(Ty, AllowUnknown);
1244 }
1245
1246 EVT getMemValueType(const DataLayout &DL, Type *Ty,
1247 bool AllowUnknown = false) const {
1248 // Lower scalar pointers to native pointer types.
1249 if (PointerType *PTy = dyn_cast<PointerType>(Ty))
1250 return getPointerMemTy(DL, PTy->getAddressSpace());
1251 else if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
1252 Type *Elm = VTy->getElementType();
1253 if (PointerType *PT = dyn_cast<PointerType>(Elm)) {
1254 EVT PointerTy(getPointerMemTy(DL, PT->getAddressSpace()));
1255 Elm = PointerTy.getTypeForEVT(Ty->getContext());
1256 }
1257 return EVT::getVectorVT(Ty->getContext(), EVT::getEVT(Elm, false),
1258 VTy->getElementCount());
1259 }
1260
1261 return getValueType(DL, Ty, AllowUnknown);
1262 }
1263
1264
1265 /// Return the MVT corresponding to this LLVM type. See getValueType.
1266 MVT getSimpleValueType(const DataLayout &DL, Type *Ty,
1267 bool AllowUnknown = false) const {
1268 return getValueType(DL, Ty, AllowUnknown).getSimpleVT();
1269 }
1270
1271 /// Return the desired alignment for ByVal or InAlloca aggregate function
1272 /// arguments in the caller parameter area. This is the actual alignment, not
1273 /// its logarithm.
1274 virtual unsigned getByValTypeAlignment(Type *Ty, const DataLayout &DL) const;
1275
1276 /// Return the type of registers that this ValueType will eventually require.
1277 MVT getRegisterType(MVT VT) const {
1278 assert((unsigned)VT.SimpleTy < array_lengthof(RegisterTypeForVT))(((unsigned)VT.SimpleTy < array_lengthof(RegisterTypeForVT
)) ? static_cast<void> (0) : __assert_fail ("(unsigned)VT.SimpleTy < array_lengthof(RegisterTypeForVT)"
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/CodeGen/TargetLowering.h"
, 1278, __PRETTY_FUNCTION__))
;
1279 return RegisterTypeForVT[VT.SimpleTy];
1280 }
1281
1282 /// Return the type of registers that this ValueType will eventually require.
1283 MVT getRegisterType(LLVMContext &Context, EVT VT) const {
1284 if (VT.isSimple()) {
1285 assert((unsigned)VT.getSimpleVT().SimpleTy <(((unsigned)VT.getSimpleVT().SimpleTy < array_lengthof(RegisterTypeForVT
)) ? static_cast<void> (0) : __assert_fail ("(unsigned)VT.getSimpleVT().SimpleTy < array_lengthof(RegisterTypeForVT)"
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/CodeGen/TargetLowering.h"
, 1286, __PRETTY_FUNCTION__))
1286 array_lengthof(RegisterTypeForVT))(((unsigned)VT.getSimpleVT().SimpleTy < array_lengthof(RegisterTypeForVT
)) ? static_cast<void> (0) : __assert_fail ("(unsigned)VT.getSimpleVT().SimpleTy < array_lengthof(RegisterTypeForVT)"
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/CodeGen/TargetLowering.h"
, 1286, __PRETTY_FUNCTION__))
;
1287 return RegisterTypeForVT[VT.getSimpleVT().SimpleTy];
1288 }
1289 if (VT.isVector()) {
1290 EVT VT1;
1291 MVT RegisterVT;
1292 unsigned NumIntermediates;
1293 (void)getVectorTypeBreakdown(Context, VT, VT1,
1294 NumIntermediates, RegisterVT);
1295 return RegisterVT;
1296 }
1297 if (VT.isInteger()) {
1298 return getRegisterType(Context, getTypeToTransformTo(Context, VT));
1299 }
1300 llvm_unreachable("Unsupported extended type!")::llvm::llvm_unreachable_internal("Unsupported extended type!"
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/CodeGen/TargetLowering.h"
, 1300)
;
1301 }
1302
1303 /// Return the number of registers that this ValueType will eventually
1304 /// require.
1305 ///
1306 /// This is one for any types promoted to live in larger registers, but may be
1307 /// more than one for types (like i64) that are split into pieces. For types
1308 /// like i140, which are first promoted then expanded, it is the number of
1309 /// registers needed to hold all the bits of the original type. For an i140
1310 /// on a 32 bit machine this means 5 registers.
1311 unsigned getNumRegisters(LLVMContext &Context, EVT VT) const {
1312 if (VT.isSimple()) {
1313 assert((unsigned)VT.getSimpleVT().SimpleTy <(((unsigned)VT.getSimpleVT().SimpleTy < array_lengthof(NumRegistersForVT
)) ? static_cast<void> (0) : __assert_fail ("(unsigned)VT.getSimpleVT().SimpleTy < array_lengthof(NumRegistersForVT)"
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/CodeGen/TargetLowering.h"
, 1314, __PRETTY_FUNCTION__))
1314 array_lengthof(NumRegistersForVT))(((unsigned)VT.getSimpleVT().SimpleTy < array_lengthof(NumRegistersForVT
)) ? static_cast<void> (0) : __assert_fail ("(unsigned)VT.getSimpleVT().SimpleTy < array_lengthof(NumRegistersForVT)"
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/CodeGen/TargetLowering.h"
, 1314, __PRETTY_FUNCTION__))
;
1315 return NumRegistersForVT[VT.getSimpleVT().SimpleTy];
1316 }
1317 if (VT.isVector()) {
1318 EVT VT1;
1319 MVT VT2;
1320 unsigned NumIntermediates;
1321 return getVectorTypeBreakdown(Context, VT, VT1, NumIntermediates, VT2);
1322 }
1323 if (VT.isInteger()) {
1324 unsigned BitWidth = VT.getSizeInBits();
1325 unsigned RegWidth = getRegisterType(Context, VT).getSizeInBits();
1326 return (BitWidth + RegWidth - 1) / RegWidth;
1327 }
1328 llvm_unreachable("Unsupported extended type!")::llvm::llvm_unreachable_internal("Unsupported extended type!"
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/CodeGen/TargetLowering.h"
, 1328)
;
1329 }
1330
1331 /// Certain combinations of ABIs, Targets and features require that types
1332 /// are legal for some operations and not for other operations.
1333 /// For MIPS all vector types must be passed through the integer register set.
1334 virtual MVT getRegisterTypeForCallingConv(LLVMContext &Context,
1335 CallingConv::ID CC, EVT VT) const {
1336 return getRegisterType(Context, VT);
1337 }
1338
1339 /// Certain targets require unusual breakdowns of certain types. For MIPS,
1340 /// this occurs when a vector type is used, as vector are passed through the
1341 /// integer register set.
1342 virtual unsigned getNumRegistersForCallingConv(LLVMContext &Context,
1343 CallingConv::ID CC,
1344 EVT VT) const {
1345 return getNumRegisters(Context, VT);
1346 }
1347
1348 /// Certain targets have context senstive alignment requirements, where one
1349 /// type has the alignment requirement of another type.
1350 virtual Align getABIAlignmentForCallingConv(Type *ArgTy,
1351 DataLayout DL) const {
1352 return Align(DL.getABITypeAlignment(ArgTy));
1353 }
1354
1355 /// If true, then instruction selection should seek to shrink the FP constant
1356 /// of the specified type to a smaller type in order to save space and / or
1357 /// reduce runtime.
1358 virtual bool ShouldShrinkFPConstant(EVT) const { return true; }
1359
1360 /// Return true if it is profitable to reduce a load to a smaller type.
1361 /// Example: (i16 (trunc (i32 (load x))) -> i16 load x
1362 virtual bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy,
1363 EVT NewVT) const {
1364 // By default, assume that it is cheaper to extract a subvector from a wide
1365 // vector load rather than creating multiple narrow vector loads.
1366 if (NewVT.isVector() && !Load->hasOneUse())
1367 return false;
1368
1369 return true;
1370 }
1371
1372 /// When splitting a value of the specified type into parts, does the Lo
1373 /// or Hi part come first? This usually follows the endianness, except
1374 /// for ppcf128, where the Hi part always comes first.
1375 bool hasBigEndianPartOrdering(EVT VT, const DataLayout &DL) const {
1376 return DL.isBigEndian() || VT == MVT::ppcf128;
1377 }
1378
1379 /// If true, the target has custom DAG combine transformations that it can
1380 /// perform for the specified node.
1381 bool hasTargetDAGCombine(ISD::NodeType NT) const {
1382 assert(unsigned(NT >> 3) < array_lengthof(TargetDAGCombineArray))((unsigned(NT >> 3) < array_lengthof(TargetDAGCombineArray
)) ? static_cast<void> (0) : __assert_fail ("unsigned(NT >> 3) < array_lengthof(TargetDAGCombineArray)"
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/CodeGen/TargetLowering.h"
, 1382, __PRETTY_FUNCTION__))
;
1383 return TargetDAGCombineArray[NT >> 3] & (1 << (NT&7));
1384 }
1385
1386 unsigned getGatherAllAliasesMaxDepth() const {
1387 return GatherAllAliasesMaxDepth;
1388 }
1389
1390 /// Returns the size of the platform's va_list object.
1391 virtual unsigned getVaListSizeInBits(const DataLayout &DL) const {
1392 return getPointerTy(DL).getSizeInBits();
1393 }
1394
1395 /// Get maximum # of store operations permitted for llvm.memset
1396 ///
1397 /// This function returns the maximum number of store operations permitted
1398 /// to replace a call to llvm.memset. The value is set by the target at the
1399 /// performance threshold for such a replacement. If OptSize is true,
1400 /// return the limit for functions that have OptSize attribute.
1401 unsigned getMaxStoresPerMemset(bool OptSize) const {
1402 return OptSize ? MaxStoresPerMemsetOptSize : MaxStoresPerMemset;
1403 }
1404
1405 /// Get maximum # of store operations permitted for llvm.memcpy
1406 ///
1407 /// This function returns the maximum number of store operations permitted
1408 /// to replace a call to llvm.memcpy. The value is set by the target at the
1409 /// performance threshold for such a replacement. If OptSize is true,
1410 /// return the limit for functions that have OptSize attribute.
1411 unsigned getMaxStoresPerMemcpy(bool OptSize) const {
1412 return OptSize ? MaxStoresPerMemcpyOptSize : MaxStoresPerMemcpy;
1413 }
1414
1415 /// \brief Get maximum # of store operations to be glued together
1416 ///
1417 /// This function returns the maximum number of store operations permitted
1418 /// to glue together during lowering of llvm.memcpy. The value is set by
1419 // the target at the performance threshold for such a replacement.
1420 virtual unsigned getMaxGluedStoresPerMemcpy() const {
1421 return MaxGluedStoresPerMemcpy;
1422 }
1423
1424 /// Get maximum # of load operations permitted for memcmp
1425 ///
1426 /// This function returns the maximum number of load operations permitted
1427 /// to replace a call to memcmp. The value is set by the target at the
1428 /// performance threshold for such a replacement. If OptSize is true,
1429 /// return the limit for functions that have OptSize attribute.
1430 unsigned getMaxExpandSizeMemcmp(bool OptSize) const {
1431 return OptSize ? MaxLoadsPerMemcmpOptSize : MaxLoadsPerMemcmp;
1432 }
1433
1434 /// Get maximum # of store operations permitted for llvm.memmove
1435 ///
1436 /// This function returns the maximum number of store operations permitted
1437 /// to replace a call to llvm.memmove. The value is set by the target at the
1438 /// performance threshold for such a replacement. If OptSize is true,
1439 /// return the limit for functions that have OptSize attribute.
1440 unsigned getMaxStoresPerMemmove(bool OptSize) const {
1441 return OptSize ? MaxStoresPerMemmoveOptSize : MaxStoresPerMemmove;
1442 }
1443
1444 /// Determine if the target supports unaligned memory accesses.
1445 ///
1446 /// This function returns true if the target allows unaligned memory accesses
1447 /// of the specified type in the given address space. If true, it also returns
1448 /// whether the unaligned memory access is "fast" in the last argument by
1449 /// reference. This is used, for example, in situations where an array
1450 /// copy/move/set is converted to a sequence of store operations. Its use
1451 /// helps to ensure that such replacements don't generate code that causes an
1452 /// alignment error (trap) on the target machine.
1453 virtual bool allowsMisalignedMemoryAccesses(
1454 EVT, unsigned AddrSpace = 0, unsigned Align = 1,
1455 MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
1456 bool * /*Fast*/ = nullptr) const {
1457 return false;
1458 }
1459
1460 /// LLT handling variant.
1461 virtual bool allowsMisalignedMemoryAccesses(
1462 LLT, unsigned AddrSpace = 0, unsigned Align = 1,
1463 MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
1464 bool * /*Fast*/ = nullptr) const {
1465 return false;
1466 }
1467
1468 /// This function returns true if the memory access is aligned or if the
1469 /// target allows this specific unaligned memory access. If the access is
1470 /// allowed, the optional final parameter returns if the access is also fast
1471 /// (as defined by the target).
1472 bool allowsMemoryAccessForAlignment(
1473 LLVMContext &Context, const DataLayout &DL, EVT VT,
1474 unsigned AddrSpace = 0, unsigned Alignment = 1,
1475 MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
1476 bool *Fast = nullptr) const;
1477
1478 /// Return true if the memory access of this type is aligned or if the target
1479 /// allows this specific unaligned access for the given MachineMemOperand.
1480 /// If the access is allowed, the optional final parameter returns if the
1481 /// access is also fast (as defined by the target).
1482 bool allowsMemoryAccessForAlignment(LLVMContext &Context,
1483 const DataLayout &DL, EVT VT,
1484 const MachineMemOperand &MMO,
1485 bool *Fast = nullptr) const;
1486
1487 /// Return true if the target supports a memory access of this type for the
1488 /// given address space and alignment. If the access is allowed, the optional
1489 /// final parameter returns if the access is also fast (as defined by the
1490 /// target).
1491 virtual bool
1492 allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT,
1493 unsigned AddrSpace = 0, unsigned Alignment = 1,
1494 MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
1495 bool *Fast = nullptr) const;
1496
1497 /// Return true if the target supports a memory access of this type for the
1498 /// given MachineMemOperand. If the access is allowed, the optional
1499 /// final parameter returns if the access is also fast (as defined by the
1500 /// target).
1501 bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT,
1502 const MachineMemOperand &MMO,
1503 bool *Fast = nullptr) const;
1504
1505 /// Returns the target specific optimal type for load and store operations as
1506 /// a result of memset, memcpy, and memmove lowering.
1507 ///
1508 /// If DstAlign is zero that means it's safe to destination alignment can
1509 /// satisfy any constraint. Similarly if SrcAlign is zero it means there isn't
1510 /// a need to check it against alignment requirement, probably because the
1511 /// source does not need to be loaded. If 'IsMemset' is true, that means it's
1512 /// expanding a memset. If 'ZeroMemset' is true, that means it's a memset of
1513 /// zero. 'MemcpyStrSrc' indicates whether the memcpy source is constant so it
1514 /// does not need to be loaded. It returns EVT::Other if the type should be
1515 /// determined using generic target-independent logic.
1516 virtual EVT
1517 getOptimalMemOpType(uint64_t /*Size*/, unsigned /*DstAlign*/,
1518 unsigned /*SrcAlign*/, bool /*IsMemset*/,
1519 bool /*ZeroMemset*/, bool /*MemcpyStrSrc*/,
1520 const AttributeList & /*FuncAttributes*/) const {
1521 return MVT::Other;
1522 }
1523
1524
1525 /// LLT returning variant.
1526 virtual LLT
1527 getOptimalMemOpLLT(uint64_t /*Size*/, unsigned /*DstAlign*/,
1528 unsigned /*SrcAlign*/, bool /*IsMemset*/,
1529 bool /*ZeroMemset*/, bool /*MemcpyStrSrc*/,
1530 const AttributeList & /*FuncAttributes*/) const {
1531 return LLT();
1532 }
1533
1534 /// Returns true if it's safe to use load / store of the specified type to
1535 /// expand memcpy / memset inline.
1536 ///
1537 /// This is mostly true for all types except for some special cases. For
1538 /// example, on X86 targets without SSE2 f64 load / store are done with fldl /
1539 /// fstpl which also does type conversion. Note the specified type doesn't
1540 /// have to be legal as the hook is used before type legalization.
1541 virtual bool isSafeMemOpType(MVT /*VT*/) const { return true; }
1542
1543 /// Return lower limit for number of blocks in a jump table.
1544 virtual unsigned getMinimumJumpTableEntries() const;
1545
1546 /// Return lower limit of the density in a jump table.
1547 unsigned getMinimumJumpTableDensity(bool OptForSize) const;
1548
1549 /// Return upper limit for number of entries in a jump table.
1550 /// Zero if no limit.
1551 unsigned getMaximumJumpTableSize() const;
1552
1553 virtual bool isJumpTableRelative() const {
1554 return TM.isPositionIndependent();
1555 }
1556
1557 /// If a physical register, this specifies the register that
1558 /// llvm.savestack/llvm.restorestack should save and restore.
1559 unsigned getStackPointerRegisterToSaveRestore() const {
1560 return StackPointerRegisterToSaveRestore;
1561 }
1562
1563 /// If a physical register, this returns the register that receives the
1564 /// exception address on entry to an EH pad.
1565 virtual unsigned
1566 getExceptionPointerRegister(const Constant *PersonalityFn) const {
1567 // 0 is guaranteed to be the NoRegister value on all targets
1568 return 0;
1569 }
1570
1571 /// If a physical register, this returns the register that receives the
1572 /// exception typeid on entry to a landing pad.
1573 virtual unsigned
1574 getExceptionSelectorRegister(const Constant *PersonalityFn) const {
1575 // 0 is guaranteed to be the NoRegister value on all targets
1576 return 0;
1577 }
1578
1579 virtual bool needsFixedCatchObjects() const {
1580 report_fatal_error("Funclet EH is not implemented for this target");
1581 }
1582
1583 /// Return the minimum stack alignment of an argument.
1584 Align getMinStackArgumentAlignment() const {
1585 return MinStackArgumentAlignment;
1586 }
1587
1588 /// Return the minimum function alignment.
1589 Align getMinFunctionAlignment() const { return MinFunctionAlignment; }
1590
1591 /// Return the preferred function alignment.
1592 Align getPrefFunctionAlignment() const { return PrefFunctionAlignment; }
1593
1594 /// Return the preferred loop alignment.
1595 virtual Align getPrefLoopAlignment(MachineLoop *ML = nullptr) const {
1596 return PrefLoopAlignment;
1597 }
1598
1599 /// Should loops be aligned even when the function is marked OptSize (but not
1600 /// MinSize).
1601 virtual bool alignLoopsWithOptSize() const {
1602 return false;
1603 }
1604
1605 /// If the target has a standard location for the stack protector guard,
1606 /// returns the address of that location. Otherwise, returns nullptr.
1607 /// DEPRECATED: please override useLoadStackGuardNode and customize
1608 /// LOAD_STACK_GUARD, or customize \@llvm.stackguard().
1609 virtual Value *getIRStackGuard(IRBuilder<> &IRB) const;
1610
1611 /// Inserts necessary declarations for SSP (stack protection) purpose.
1612 /// Should be used only when getIRStackGuard returns nullptr.
1613 virtual void insertSSPDeclarations(Module &M) const;
1614
1615 /// Return the variable that's previously inserted by insertSSPDeclarations,
1616 /// if any, otherwise return nullptr. Should be used only when
1617 /// getIRStackGuard returns nullptr.
1618 virtual Value *getSDagStackGuard(const Module &M) const;
1619
1620 /// If this function returns true, stack protection checks should XOR the
1621 /// frame pointer (or whichever pointer is used to address locals) into the
1622 /// stack guard value before checking it. getIRStackGuard must return nullptr
1623 /// if this returns true.
1624 virtual bool useStackGuardXorFP() const { return false; }
1625
1626 /// If the target has a standard stack protection check function that
1627 /// performs validation and error handling, returns the function. Otherwise,
1628 /// returns nullptr. Must be previously inserted by insertSSPDeclarations.
1629 /// Should be used only when getIRStackGuard returns nullptr.
1630 virtual Function *getSSPStackGuardCheck(const Module &M) const;
1631
1632protected:
1633 Value *getDefaultSafeStackPointerLocation(IRBuilder<> &IRB,
1634 bool UseTLS) const;
1635
1636public:
1637 /// Returns the target-specific address of the unsafe stack pointer.
1638 virtual Value *getSafeStackPointerLocation(IRBuilder<> &IRB) const;
1639
1640 /// Returns the name of the symbol used to emit stack probes or the empty
1641 /// string if not applicable.
1642 virtual StringRef getStackProbeSymbolName(MachineFunction &MF) const {
1643 return "";
1644 }
1645
1646 /// Returns true if a cast between SrcAS and DestAS is a noop.
1647 virtual bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const {
1648 return false;
1649 }
1650
1651 /// Returns true if a cast from SrcAS to DestAS is "cheap", such that e.g. we
1652 /// are happy to sink it into basic blocks. A cast may be free, but not
1653 /// necessarily a no-op. e.g. a free truncate from a 64-bit to 32-bit pointer.
1654 virtual bool isFreeAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const {
1655 return isNoopAddrSpaceCast(SrcAS, DestAS);
1656 }
1657
1658 /// Return true if the pointer arguments to CI should be aligned by aligning
1659 /// the object whose address is being passed. If so then MinSize is set to the
1660 /// minimum size the object must be to be aligned and PrefAlign is set to the
1661 /// preferred alignment.
1662 virtual bool shouldAlignPointerArgs(CallInst * /*CI*/, unsigned & /*MinSize*/,
1663 unsigned & /*PrefAlign*/) const {
1664 return false;
1665 }
1666
1667 //===--------------------------------------------------------------------===//
1668 /// \name Helpers for TargetTransformInfo implementations
1669 /// @{
1670
1671 /// Get the ISD node that corresponds to the Instruction class opcode.
1672 int InstructionOpcodeToISD(unsigned Opcode) const;
1673
1674 /// Estimate the cost of type-legalization and the legalized type.
1675 std::pair<int, MVT> getTypeLegalizationCost(const DataLayout &DL,
1676 Type *Ty) const;
1677
1678 /// @}
1679
1680 //===--------------------------------------------------------------------===//
1681 /// \name Helpers for atomic expansion.
1682 /// @{
1683
1684 /// Returns the maximum atomic operation size (in bits) supported by
1685 /// the backend. Atomic operations greater than this size (as well
1686 /// as ones that are not naturally aligned), will be expanded by
1687 /// AtomicExpandPass into an __atomic_* library call.
1688 unsigned getMaxAtomicSizeInBitsSupported() const {
1689 return MaxAtomicSizeInBitsSupported;
1690 }
1691
1692 /// Returns the size of the smallest cmpxchg or ll/sc instruction
1693 /// the backend supports. Any smaller operations are widened in
1694 /// AtomicExpandPass.
1695 ///
1696 /// Note that *unlike* operations above the maximum size, atomic ops
1697 /// are still natively supported below the minimum; they just
1698 /// require a more complex expansion.
1699 unsigned getMinCmpXchgSizeInBits() const { return MinCmpXchgSizeInBits; }
1700
1701 /// Whether the target supports unaligned atomic operations.
1702 bool supportsUnalignedAtomics() const { return SupportsUnalignedAtomics; }
1703
1704 /// Whether AtomicExpandPass should automatically insert fences and reduce
1705 /// ordering for this atomic. This should be true for most architectures with
1706 /// weak memory ordering. Defaults to false.
1707 virtual bool shouldInsertFencesForAtomic(const Instruction *I) const {
1708 return false;
1709 }
1710
1711 /// Perform a load-linked operation on Addr, returning a "Value *" with the
1712 /// corresponding pointee type. This may entail some non-trivial operations to
1713 /// truncate or reconstruct types that will be illegal in the backend. See
1714 /// ARMISelLowering for an example implementation.
1715 virtual Value *emitLoadLinked(IRBuilder<> &Builder, Value *Addr,
1716 AtomicOrdering Ord) const {
1717 llvm_unreachable("Load linked unimplemented on this target")::llvm::llvm_unreachable_internal("Load linked unimplemented on this target"
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/CodeGen/TargetLowering.h"
, 1717)
;
1718 }
1719
1720 /// Perform a store-conditional operation to Addr. Return the status of the
1721 /// store. This should be 0 if the store succeeded, non-zero otherwise.
1722 virtual Value *emitStoreConditional(IRBuilder<> &Builder, Value *Val,
1723 Value *Addr, AtomicOrdering Ord) const {
1724 llvm_unreachable("Store conditional unimplemented on this target")::llvm::llvm_unreachable_internal("Store conditional unimplemented on this target"
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/CodeGen/TargetLowering.h"
, 1724)
;
1725 }
1726
1727 /// Perform a masked atomicrmw using a target-specific intrinsic. This
1728 /// represents the core LL/SC loop which will be lowered at a late stage by
1729 /// the backend.
1730 virtual Value *emitMaskedAtomicRMWIntrinsic(IRBuilder<> &Builder,
1731 AtomicRMWInst *AI,
1732 Value *AlignedAddr, Value *Incr,
1733 Value *Mask, Value *ShiftAmt,
1734 AtomicOrdering Ord) const {
1735 llvm_unreachable("Masked atomicrmw expansion unimplemented on this target")::llvm::llvm_unreachable_internal("Masked atomicrmw expansion unimplemented on this target"
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/CodeGen/TargetLowering.h"
, 1735)
;
1736 }
1737
1738 /// Perform a masked cmpxchg using a target-specific intrinsic. This
1739 /// represents the core LL/SC loop which will be lowered at a late stage by
1740 /// the backend.
1741 virtual Value *emitMaskedAtomicCmpXchgIntrinsic(
1742 IRBuilder<> &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
1743 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
1744 llvm_unreachable("Masked cmpxchg expansion unimplemented on this target")::llvm::llvm_unreachable_internal("Masked cmpxchg expansion unimplemented on this target"
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/CodeGen/TargetLowering.h"
, 1744)
;
1745 }
1746
1747 /// Inserts in the IR a target-specific intrinsic specifying a fence.
1748 /// It is called by AtomicExpandPass before expanding an
1749 /// AtomicRMW/AtomicCmpXchg/AtomicStore/AtomicLoad
1750 /// if shouldInsertFencesForAtomic returns true.
1751 ///
1752 /// Inst is the original atomic instruction, prior to other expansions that
1753 /// may be performed.
1754 ///
1755 /// This function should either return a nullptr, or a pointer to an IR-level
1756 /// Instruction*. Even complex fence sequences can be represented by a
1757 /// single Instruction* through an intrinsic to be lowered later.
1758 /// Backends should override this method to produce target-specific intrinsic
1759 /// for their fences.
1760 /// FIXME: Please note that the default implementation here in terms of
1761 /// IR-level fences exists for historical/compatibility reasons and is
1762 /// *unsound* ! Fences cannot, in general, be used to restore sequential
1763 /// consistency. For example, consider the following example:
1764 /// atomic<int> x = y = 0;
1765 /// int r1, r2, r3, r4;
1766 /// Thread 0:
1767 /// x.store(1);
1768 /// Thread 1:
1769 /// y.store(1);
1770 /// Thread 2:
1771 /// r1 = x.load();
1772 /// r2 = y.load();
1773 /// Thread 3:
1774 /// r3 = y.load();
1775 /// r4 = x.load();
1776 /// r1 = r3 = 1 and r2 = r4 = 0 is impossible as long as the accesses are all
1777 /// seq_cst. But if they are lowered to monotonic accesses, no amount of
1778 /// IR-level fences can prevent it.
1779 /// @{
1780 virtual Instruction *emitLeadingFence(IRBuilder<> &Builder, Instruction *Inst,
1781 AtomicOrdering Ord) const {
1782 if (isReleaseOrStronger(Ord) && Inst->hasAtomicStore())
1783 return Builder.CreateFence(Ord);
1784 else
1785 return nullptr;
1786 }
1787
1788 virtual Instruction *emitTrailingFence(IRBuilder<> &Builder,
1789 Instruction *Inst,
1790 AtomicOrdering Ord) const {
1791 if (isAcquireOrStronger(Ord))
1792 return Builder.CreateFence(Ord);
1793 else
1794 return nullptr;
1795 }
1796 /// @}
1797
1798 // Emits code that executes when the comparison result in the ll/sc
1799 // expansion of a cmpxchg instruction is such that the store-conditional will
1800 // not execute. This makes it possible to balance out the load-linked with
1801 // a dedicated instruction, if desired.
1802 // E.g., on ARM, if ldrex isn't followed by strex, the exclusive monitor would
1803 // be unnecessarily held, except if clrex, inserted by this hook, is executed.
1804 virtual void emitAtomicCmpXchgNoStoreLLBalance(IRBuilder<> &Builder) const {}
1805
1806 /// Returns true if the given (atomic) store should be expanded by the
1807 /// IR-level AtomicExpand pass into an "atomic xchg" which ignores its input.
1808 virtual bool shouldExpandAtomicStoreInIR(StoreInst *SI) const {
1809 return false;
1810 }
1811
1812 /// Returns true if arguments should be sign-extended in lib calls.
1813 virtual bool shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const {
1814 return IsSigned;
1815 }
1816
1817 /// Returns true if arguments should be extended in lib calls.
1818 virtual bool shouldExtendTypeInLibCall(EVT Type) const {
1819 return true;
1820 }
1821
1822 /// Returns how the given (atomic) load should be expanded by the
1823 /// IR-level AtomicExpand pass.
1824 virtual AtomicExpansionKind shouldExpandAtomicLoadInIR(LoadInst *LI) const {
1825 return AtomicExpansionKind::None;
1826 }
1827
1828 /// Returns how the given atomic cmpxchg should be expanded by the IR-level
1829 /// AtomicExpand pass.
1830 virtual AtomicExpansionKind
1831 shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const {
1832 return AtomicExpansionKind::None;
1833 }
1834
1835 /// Returns how the IR-level AtomicExpand pass should expand the given
1836 /// AtomicRMW, if at all. Default is to never expand.
1837 virtual AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
1838 return RMW->isFloatingPointOperation() ?
1839 AtomicExpansionKind::CmpXChg : AtomicExpansionKind::None;
1840 }
1841
1842 /// On some platforms, an AtomicRMW that never actually modifies the value
1843 /// (such as fetch_add of 0) can be turned into a fence followed by an
1844 /// atomic load. This may sound useless, but it makes it possible for the
1845 /// processor to keep the cacheline shared, dramatically improving
1846 /// performance. And such idempotent RMWs are useful for implementing some
1847 /// kinds of locks, see for example (justification + benchmarks):
1848 /// http://www.hpl.hp.com/techreports/2012/HPL-2012-68.pdf
1849 /// This method tries doing that transformation, returning the atomic load if
1850 /// it succeeds, and nullptr otherwise.
1851 /// If shouldExpandAtomicLoadInIR returns true on that load, it will undergo
1852 /// another round of expansion.
1853 virtual LoadInst *
1854 lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *RMWI) const {
1855 return nullptr;
1856 }
1857
1858 /// Returns how the platform's atomic operations are extended (ZERO_EXTEND,
1859 /// SIGN_EXTEND, or ANY_EXTEND).
1860 virtual ISD::NodeType getExtendForAtomicOps() const {
1861 return ISD::ZERO_EXTEND;
1862 }
1863
1864 /// @}
1865
1866 /// Returns true if we should normalize
1867 /// select(N0&N1, X, Y) => select(N0, select(N1, X, Y), Y) and
1868 /// select(N0|N1, X, Y) => select(N0, select(N1, X, Y, Y)) if it is likely
1869 /// that it saves us from materializing N0 and N1 in an integer register.
1870 /// Targets that are able to perform and/or on flags should return false here.
1871 virtual bool shouldNormalizeToSelectSequence(LLVMContext &Context,
1872 EVT VT) const {
1873 // If a target has multiple condition registers, then it likely has logical
1874 // operations on those registers.
1875 if (hasMultipleConditionRegisters())
1876 return false;
1877 // Only do the transform if the value won't be split into multiple
1878 // registers.
1879 LegalizeTypeAction Action = getTypeAction(Context, VT);
1880 return Action != TypeExpandInteger && Action != TypeExpandFloat &&
1881 Action != TypeSplitVector;
1882 }
1883
1884 virtual bool isProfitableToCombineMinNumMaxNum(EVT VT) const { return true; }
1885
1886 /// Return true if a select of constants (select Cond, C1, C2) should be
1887 /// transformed into simple math ops with the condition value. For example:
1888 /// select Cond, C1, C1-1 --> add (zext Cond), C1-1
1889 virtual bool convertSelectOfConstantsToMath(EVT VT) const {
1890 return false;
1891 }
1892
1893 /// Return true if it is profitable to transform an integer
1894 /// multiplication-by-constant into simpler operations like shifts and adds.
1895 /// This may be true if the target does not directly support the
1896 /// multiplication operation for the specified type or the sequence of simpler
1897 /// ops is faster than the multiply.
1898 virtual bool decomposeMulByConstant(LLVMContext &Context,
1899 EVT VT, SDValue C) const {
1900 return false;
1901 }
1902
1903 /// Return true if it is more correct/profitable to use strict FP_TO_INT
1904 /// conversion operations - canonicalizing the FP source value instead of
1905 /// converting all cases and then selecting based on value.
1906 /// This may be true if the target throws exceptions for out of bounds
1907 /// conversions or has fast FP CMOV.
1908 virtual bool shouldUseStrictFP_TO_INT(EVT FpVT, EVT IntVT,
1909 bool IsSigned) const {
1910 return false;
1911 }
1912
1913 //===--------------------------------------------------------------------===//
1914 // TargetLowering Configuration Methods - These methods should be invoked by
1915 // the derived class constructor to configure this object for the target.
1916 //
1917protected:
1918 /// Specify how the target extends the result of integer and floating point
1919 /// boolean values from i1 to a wider type. See getBooleanContents.
1920 void setBooleanContents(BooleanContent Ty) {
1921 BooleanContents = Ty;
1922 BooleanFloatContents = Ty;
1923 }
1924
1925 /// Specify how the target extends the result of integer and floating point
1926 /// boolean values from i1 to a wider type. See getBooleanContents.
1927 void setBooleanContents(BooleanContent IntTy, BooleanContent FloatTy) {
1928 BooleanContents = IntTy;
1929 BooleanFloatContents = FloatTy;
1930 }
1931
1932 /// Specify how the target extends the result of a vector boolean value from a
1933 /// vector of i1 to a wider type. See getBooleanContents.
1934 void setBooleanVectorContents(BooleanContent Ty) {
1935 BooleanVectorContents = Ty;
1936 }
1937
1938 /// Specify the target scheduling preference.
1939 void setSchedulingPreference(Sched::Preference Pref) {
1940 SchedPreferenceInfo = Pref;
1941 }
1942
1943 /// Indicate the minimum number of blocks to generate jump tables.
1944 void setMinimumJumpTableEntries(unsigned Val);
1945
1946 /// Indicate the maximum number of entries in jump tables.
1947 /// Set to zero to generate unlimited jump tables.
1948 void setMaximumJumpTableSize(unsigned);
1949
1950 /// If set to a physical register, this specifies the register that
1951 /// llvm.savestack/llvm.restorestack should save and restore.
1952 void setStackPointerRegisterToSaveRestore(unsigned R) {
1953 StackPointerRegisterToSaveRestore = R;
1954 }
1955
1956 /// Tells the code generator that the target has multiple (allocatable)
1957 /// condition registers that can be used to store the results of comparisons
1958 /// for use by selects and conditional branches. With multiple condition
1959 /// registers, the code generator will not aggressively sink comparisons into
1960 /// the blocks of their users.
1961 void setHasMultipleConditionRegisters(bool hasManyRegs = true) {
1962 HasMultipleConditionRegisters = hasManyRegs;
1963 }
1964
1965 /// Tells the code generator that the target has BitExtract instructions.
1966 /// The code generator will aggressively sink "shift"s into the blocks of
1967 /// their users if the users will generate "and" instructions which can be
1968 /// combined with "shift" to BitExtract instructions.
1969 void setHasExtractBitsInsn(bool hasExtractInsn = true) {
1970 HasExtractBitsInsn = hasExtractInsn;
1971 }
1972
1973 /// Tells the code generator not to expand logic operations on comparison
1974 /// predicates into separate sequences that increase the amount of flow
1975 /// control.
1976 void setJumpIsExpensive(bool isExpensive = true);
1977
1978 /// Tells the code generator which bitwidths to bypass.
1979 void addBypassSlowDiv(unsigned int SlowBitWidth, unsigned int FastBitWidth) {
1980 BypassSlowDivWidths[SlowBitWidth] = FastBitWidth;
1981 }
1982
1983 /// Add the specified register class as an available regclass for the
1984 /// specified value type. This indicates the selector can handle values of
1985 /// that class natively.
1986 void addRegisterClass(MVT VT, const TargetRegisterClass *RC) {
1987 assert((unsigned)VT.SimpleTy < array_lengthof(RegClassForVT))(((unsigned)VT.SimpleTy < array_lengthof(RegClassForVT)) ?
static_cast<void> (0) : __assert_fail ("(unsigned)VT.SimpleTy < array_lengthof(RegClassForVT)"
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/CodeGen/TargetLowering.h"
, 1987, __PRETTY_FUNCTION__))
;
1988 RegClassForVT[VT.SimpleTy] = RC;
1989 }
1990
1991 /// Return the largest legal super-reg register class of the register class
1992 /// for the specified type and its associated "cost".
1993 virtual std::pair<const TargetRegisterClass *, uint8_t>
1994 findRepresentativeClass(const TargetRegisterInfo *TRI, MVT VT) const;
1995
1996 /// Once all of the register classes are added, this allows us to compute
1997 /// derived properties we expose.
1998 void computeRegisterProperties(const TargetRegisterInfo *TRI);
1999
2000 /// Indicate that the specified operation does not work with the specified
2001 /// type and indicate what to do about it. Note that VT may refer to either
2002 /// the type of a result or that of an operand of Op.
2003 void setOperationAction(unsigned Op, MVT VT,
2004 LegalizeAction Action) {
2005 assert(Op < array_lengthof(OpActions[0]) && "Table isn't big enough!")((Op < array_lengthof(OpActions[0]) && "Table isn't big enough!"
) ? static_cast<void> (0) : __assert_fail ("Op < array_lengthof(OpActions[0]) && \"Table isn't big enough!\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/CodeGen/TargetLowering.h"
, 2005, __PRETTY_FUNCTION__))
;
2006 OpActions[(unsigned)VT.SimpleTy][Op] = Action;
2007 }
2008
2009 /// Indicate that the specified load with extension does not work with the
2010 /// specified type and indicate what to do about it.
2011 void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT,
2012 LegalizeAction Action) {
2013 assert(ExtType < ISD::LAST_LOADEXT_TYPE && ValVT.isValid() &&((ExtType < ISD::LAST_LOADEXT_TYPE && ValVT.isValid
() && MemVT.isValid() && "Table isn't big enough!"
) ? static_cast<void> (0) : __assert_fail ("ExtType < ISD::LAST_LOADEXT_TYPE && ValVT.isValid() && MemVT.isValid() && \"Table isn't big enough!\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/CodeGen/TargetLowering.h"
, 2014, __PRETTY_FUNCTION__))
2014 MemVT.isValid() && "Table isn't big enough!")((ExtType < ISD::LAST_LOADEXT_TYPE && ValVT.isValid
() && MemVT.isValid() && "Table isn't big enough!"
) ? static_cast<void> (0) : __assert_fail ("ExtType < ISD::LAST_LOADEXT_TYPE && ValVT.isValid() && MemVT.isValid() && \"Table isn't big enough!\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/CodeGen/TargetLowering.h"
, 2014, __PRETTY_FUNCTION__))
;
2015 assert((unsigned)Action < 0x10 && "too many bits for bitfield array")(((unsigned)Action < 0x10 && "too many bits for bitfield array"
) ? static_cast<void> (0) : __assert_fail ("(unsigned)Action < 0x10 && \"too many bits for bitfield array\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/CodeGen/TargetLowering.h"
, 2015, __PRETTY_FUNCTION__))
;
2016 unsigned Shift = 4 * ExtType;
2017 LoadExtActions[ValVT.SimpleTy][MemVT.SimpleTy] &= ~((uint16_t)0xF << Shift);
2018 LoadExtActions[ValVT.SimpleTy][MemVT.SimpleTy] |= (uint16_t)Action << Shift;
2019 }
2020
2021 /// Indicate that the specified truncating store does not work with the
2022 /// specified type and indicate what to do about it.
2023 void setTruncStoreAction(MVT ValVT, MVT MemVT,
2024 LegalizeAction Action) {
2025 assert(ValVT.isValid() && MemVT.isValid() && "Table isn't big enough!")((ValVT.isValid() && MemVT.isValid() && "Table isn't big enough!"
) ? static_cast<void> (0) : __assert_fail ("ValVT.isValid() && MemVT.isValid() && \"Table isn't big enough!\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/CodeGen/TargetLowering.h"
, 2025, __PRETTY_FUNCTION__))
;
2026 TruncStoreActions[(unsigned)ValVT.SimpleTy][MemVT.SimpleTy] = Action;
2027 }
2028
2029 /// Indicate that the specified indexed load does or does not work with the
2030 /// specified type and indicate what to do abort it.
2031 ///
2032 /// NOTE: All indexed mode loads are initialized to Expand in
2033 /// TargetLowering.cpp
2034 void setIndexedLoadAction(unsigned IdxMode, MVT VT, LegalizeAction Action) {
2035 setIndexedModeAction(IdxMode, VT, IMAB_Load, Action);
2036 }
2037
2038 /// Indicate that the specified indexed store does or does not work with the
2039 /// specified type and indicate what to do about it.
2040 ///
2041 /// NOTE: All indexed mode stores are initialized to Expand in
2042 /// TargetLowering.cpp
2043 void setIndexedStoreAction(unsigned IdxMode, MVT VT, LegalizeAction Action) {
2044 setIndexedModeAction(IdxMode, VT, IMAB_Store, Action);
2045 }
2046
2047 /// Indicate that the specified indexed masked load does or does not work with
2048 /// the specified type and indicate what to do about it.
2049 ///
2050 /// NOTE: All indexed mode masked loads are initialized to Expand in
2051 /// TargetLowering.cpp
2052 void setIndexedMaskedLoadAction(unsigned IdxMode, MVT VT,
2053 LegalizeAction Action) {
2054 setIndexedModeAction(IdxMode, VT, IMAB_MaskedLoad, Action);
2055 }
2056
2057 /// Indicate that the specified indexed masked store does or does not work
2058 /// with the specified type and indicate what to do about it.
2059 ///
2060 /// NOTE: All indexed mode masked stores are initialized to Expand in
2061 /// TargetLowering.cpp
2062 void setIndexedMaskedStoreAction(unsigned IdxMode, MVT VT,
2063 LegalizeAction Action) {
2064 setIndexedModeAction(IdxMode, VT, IMAB_MaskedStore, Action);
2065 }
2066
2067 /// Indicate that the specified condition code is or isn't supported on the
2068 /// target and indicate what to do about it.
2069 void setCondCodeAction(ISD::CondCode CC, MVT VT,
2070 LegalizeAction Action) {
2071 assert(VT.isValid() && (unsigned)CC < array_lengthof(CondCodeActions) &&((VT.isValid() && (unsigned)CC < array_lengthof(CondCodeActions
) && "Table isn't big enough!") ? static_cast<void
> (0) : __assert_fail ("VT.isValid() && (unsigned)CC < array_lengthof(CondCodeActions) && \"Table isn't big enough!\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/CodeGen/TargetLowering.h"
, 2072, __PRETTY_FUNCTION__))
2072 "Table isn't big enough!")((VT.isValid() && (unsigned)CC < array_lengthof(CondCodeActions
) && "Table isn't big enough!") ? static_cast<void
> (0) : __assert_fail ("VT.isValid() && (unsigned)CC < array_lengthof(CondCodeActions) && \"Table isn't big enough!\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/CodeGen/TargetLowering.h"
, 2072, __PRETTY_FUNCTION__))
;
2073 assert((unsigned)Action < 0x10 && "too many bits for bitfield array")(((unsigned)Action < 0x10 && "too many bits for bitfield array"
) ? static_cast<void> (0) : __assert_fail ("(unsigned)Action < 0x10 && \"too many bits for bitfield array\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/CodeGen/TargetLowering.h"
, 2073, __PRETTY_FUNCTION__))
;
2074 /// The lower 3 bits of the SimpleTy index into Nth 4bit set from the 32-bit
2075 /// value and the upper 29 bits index into the second dimension of the array
2076 /// to select what 32-bit value to use.
2077 uint32_t Shift = 4 * (VT.SimpleTy & 0x7);
2078 CondCodeActions[CC][VT.SimpleTy >> 3] &= ~((uint32_t)0xF << Shift);
2079 CondCodeActions[CC][VT.SimpleTy >> 3] |= (uint32_t)Action << Shift;
2080 }
2081
2082 /// If Opc/OrigVT is specified as being promoted, the promotion code defaults
2083 /// to trying a larger integer/fp until it can find one that works. If that
2084 /// default is insufficient, this method can be used by the target to override
2085 /// the default.
2086 void AddPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT) {
2087 PromoteToType[std::make_pair(Opc, OrigVT.SimpleTy)] = DestVT.SimpleTy;
2088 }
2089
2090 /// Convenience method to set an operation to Promote and specify the type
2091 /// in a single call.
2092 void setOperationPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT) {
2093 setOperationAction(Opc, OrigVT, Promote);
2094 AddPromotedToType(Opc, OrigVT, DestVT);
2095 }
2096
2097 /// Targets should invoke this method for each target independent node that
2098 /// they want to provide a custom DAG combiner for by implementing the
2099 /// PerformDAGCombine virtual method.
2100 void setTargetDAGCombine(ISD::NodeType NT) {
2101 assert(unsigned(NT >> 3) < array_lengthof(TargetDAGCombineArray))((unsigned(NT >> 3) < array_lengthof(TargetDAGCombineArray
)) ? static_cast<void> (0) : __assert_fail ("unsigned(NT >> 3) < array_lengthof(TargetDAGCombineArray)"
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/CodeGen/TargetLowering.h"
, 2101, __PRETTY_FUNCTION__))
;
2102 TargetDAGCombineArray[NT >> 3] |= 1 << (NT&7);
2103 }
2104
2105 /// Set the target's minimum function alignment.
2106 void setMinFunctionAlignment(Align Alignment) {
2107 MinFunctionAlignment = Alignment;
2108 }
2109
2110 /// Set the target's preferred function alignment. This should be set if
2111 /// there is a performance benefit to higher-than-minimum alignment
2112 void setPrefFunctionAlignment(Align Alignment) {
2113 PrefFunctionAlignment = Alignment;
2114 }
2115
2116 /// Set the target's preferred loop alignment. Default alignment is one, it
2117 /// means the target does not care about loop alignment. The target may also
2118 /// override getPrefLoopAlignment to provide per-loop values.
2119 void setPrefLoopAlignment(Align Alignment) { PrefLoopAlignment = Alignment; }
2120
2121 /// Set the minimum stack alignment of an argument.
2122 void setMinStackArgumentAlignment(Align Alignment) {
2123 MinStackArgumentAlignment = Alignment;
2124 }
2125
2126 /// Set the maximum atomic operation size supported by the
2127 /// backend. Atomic operations greater than this size (as well as
2128 /// ones that are not naturally aligned), will be expanded by
2129 /// AtomicExpandPass into an __atomic_* library call.
2130 void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits) {
2131 MaxAtomicSizeInBitsSupported = SizeInBits;
2132 }
2133
2134 /// Sets the minimum cmpxchg or ll/sc size supported by the backend.
2135 void setMinCmpXchgSizeInBits(unsigned SizeInBits) {
2136 MinCmpXchgSizeInBits = SizeInBits;
2137 }
2138
2139 /// Sets whether unaligned atomic operations are supported.
2140 void setSupportsUnalignedAtomics(bool UnalignedSupported) {
2141 SupportsUnalignedAtomics = UnalignedSupported;
2142 }
2143
2144public:
2145 //===--------------------------------------------------------------------===//
2146 // Addressing mode description hooks (used by LSR etc).
2147 //
2148
2149 /// CodeGenPrepare sinks address calculations into the same BB as Load/Store
2150 /// instructions reading the address. This allows as much computation as
2151 /// possible to be done in the address mode for that operand. This hook lets
2152 /// targets also pass back when this should be done on intrinsics which
2153 /// load/store.
2154 virtual bool getAddrModeArguments(IntrinsicInst * /*I*/,
2155 SmallVectorImpl<Value*> &/*Ops*/,
2156 Type *&/*AccessTy*/) const {
2157 return false;
2158 }
2159
2160 /// This represents an addressing mode of:
2161 /// BaseGV + BaseOffs + BaseReg + Scale*ScaleReg
2162 /// If BaseGV is null, there is no BaseGV.
2163 /// If BaseOffs is zero, there is no base offset.
2164 /// If HasBaseReg is false, there is no base register.
2165 /// If Scale is zero, there is no ScaleReg. Scale of 1 indicates a reg with
2166 /// no scale.
2167 struct AddrMode {
2168 GlobalValue *BaseGV = nullptr;
2169 int64_t BaseOffs = 0;
2170 bool HasBaseReg = false;
2171 int64_t Scale = 0;
2172 AddrMode() = default;
2173 };
2174
2175 /// Return true if the addressing mode represented by AM is legal for this
2176 /// target, for a load/store of the specified type.
2177 ///
2178 /// The type may be VoidTy, in which case only return true if the addressing
2179 /// mode is legal for a load/store of any legal type. TODO: Handle
2180 /// pre/postinc as well.
2181 ///
2182 /// If the address space cannot be determined, it will be -1.
2183 ///
2184 /// TODO: Remove default argument
2185 virtual bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM,
2186 Type *Ty, unsigned AddrSpace,
2187 Instruction *I = nullptr) const;
2188
2189 /// Return the cost of the scaling factor used in the addressing mode
2190 /// represented by AM for this target, for a load/store of the specified type.
2191 ///
2192 /// If the AM is supported, the return value must be >= 0.
2193 /// If the AM is not supported, it returns a negative value.
2194 /// TODO: Handle pre/postinc as well.
2195 /// TODO: Remove default argument
2196 virtual int getScalingFactorCost(const DataLayout &DL, const AddrMode &AM,
2197 Type *Ty, unsigned AS = 0) const {
2198 // Default: assume that any scaling factor used in a legal AM is free.
2199 if (isLegalAddressingMode(DL, AM, Ty, AS))
2200 return 0;
2201 return -1;
2202 }
2203
2204 /// Return true if the specified immediate is legal icmp immediate, that is
2205 /// the target has icmp instructions which can compare a register against the
2206 /// immediate without having to materialize the immediate into a register.
2207 virtual bool isLegalICmpImmediate(int64_t) const {
2208 return true;
2209 }
2210
2211 /// Return true if the specified immediate is legal add immediate, that is the
2212 /// target has add instructions which can add a register with the immediate
2213 /// without having to materialize the immediate into a register.
2214 virtual bool isLegalAddImmediate(int64_t) const {
2215 return true;
2216 }
2217
2218 /// Return true if the specified immediate is legal for the value input of a
2219 /// store instruction.
2220 virtual bool isLegalStoreImmediate(int64_t Value) const {
2221 // Default implementation assumes that at least 0 works since it is likely
2222 // that a zero register exists or a zero immediate is allowed.
2223 return Value == 0;
2224 }
2225
2226 /// Return true if it's significantly cheaper to shift a vector by a uniform
2227 /// scalar than by an amount which will vary across each lane. On x86, for
2228 /// example, there is a "psllw" instruction for the former case, but no simple
2229 /// instruction for a general "a << b" operation on vectors.
2230 virtual bool isVectorShiftByScalarCheap(Type *Ty) const {
2231 return false;
2232 }
2233
2234 /// Returns true if the opcode is a commutative binary operation.
2235 virtual bool isCommutativeBinOp(unsigned Opcode) const {
2236 // FIXME: This should get its info from the td file.
2237 switch (Opcode) {
2238 case ISD::ADD:
2239 case ISD::SMIN:
2240 case ISD::SMAX:
2241 case ISD::UMIN:
2242 case ISD::UMAX:
2243 case ISD::MUL:
2244 case ISD::MULHU:
2245 case ISD::MULHS:
2246 case ISD::SMUL_LOHI:
2247 case ISD::UMUL_LOHI:
2248 case ISD::FADD:
2249 case ISD::FMUL:
2250 case ISD::AND:
2251 case ISD::OR:
2252 case ISD::XOR:
2253 case ISD::SADDO:
2254 case ISD::UADDO:
2255 case ISD::ADDC:
2256 case ISD::ADDE:
2257 case ISD::SADDSAT:
2258 case ISD::UADDSAT:
2259 case ISD::FMINNUM:
2260 case ISD::FMAXNUM:
2261 case ISD::FMINNUM_IEEE:
2262 case ISD::FMAXNUM_IEEE:
2263 case ISD::FMINIMUM:
2264 case ISD::FMAXIMUM:
2265 return true;
2266 default: return false;
2267 }
2268 }
2269
2270 /// Return true if the node is a math/logic binary operator.
2271 virtual bool isBinOp(unsigned Opcode) const {
2272 // A commutative binop must be a binop.
2273 if (isCommutativeBinOp(Opcode))
2274 return true;
2275 // These are non-commutative binops.
2276 switch (Opcode) {
2277 case ISD::SUB:
2278 case ISD::SHL:
2279 case ISD::SRL:
2280 case ISD::SRA:
2281 case ISD::SDIV:
2282 case ISD::UDIV:
2283 case ISD::SREM:
2284 case ISD::UREM:
2285 case ISD::FSUB:
2286 case ISD::FDIV:
2287 case ISD::FREM:
2288 return true;
2289 default:
2290 return false;
2291 }
2292 }
2293
2294 /// Return true if it's free to truncate a value of type FromTy to type
2295 /// ToTy. e.g. On x86 it's free to truncate a i32 value in register EAX to i16
2296 /// by referencing its sub-register AX.
2297 /// Targets must return false when FromTy <= ToTy.
2298 virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const {
2299 return false;
2300 }
2301
2302 /// Return true if a truncation from FromTy to ToTy is permitted when deciding
2303 /// whether a call is in tail position. Typically this means that both results
2304 /// would be assigned to the same register or stack slot, but it could mean
2305 /// the target performs adequate checks of its own before proceeding with the
2306 /// tail call. Targets must return false when FromTy <= ToTy.
2307 virtual bool allowTruncateForTailCall(Type *FromTy, Type *ToTy) const {
2308 return false;
2309 }
2310
2311 virtual bool isTruncateFree(EVT FromVT, EVT ToVT) const {
2312 return false;
2313 }
2314
2315 virtual bool isProfitableToHoist(Instruction *I) const { return true; }
2316
2317 /// Return true if the extension represented by \p I is free.
2318 /// Unlikely the is[Z|FP]ExtFree family which is based on types,
2319 /// this method can use the context provided by \p I to decide
2320 /// whether or not \p I is free.
2321 /// This method extends the behavior of the is[Z|FP]ExtFree family.
2322 /// In other words, if is[Z|FP]Free returns true, then this method
2323 /// returns true as well. The converse is not true.
2324 /// The target can perform the adequate checks by overriding isExtFreeImpl.
2325 /// \pre \p I must be a sign, zero, or fp extension.
2326 bool isExtFree(const Instruction *I) const {
2327 switch (I->getOpcode()) {
2328 case Instruction::FPExt:
2329 if (isFPExtFree(EVT::getEVT(I->getType()),
2330 EVT::getEVT(I->getOperand(0)->getType())))
2331 return true;
2332 break;
2333 case Instruction::ZExt:
2334 if (isZExtFree(I->getOperand(0)->getType(), I->getType()))
2335 return true;
2336 break;
2337 case Instruction::SExt:
2338 break;
2339 default:
2340 llvm_unreachable("Instruction is not an extension")::llvm::llvm_unreachable_internal("Instruction is not an extension"
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/CodeGen/TargetLowering.h"
, 2340)
;
2341 }
2342 return isExtFreeImpl(I);
2343 }
2344
2345 /// Return true if \p Load and \p Ext can form an ExtLoad.
2346 /// For example, in AArch64
2347 /// %L = load i8, i8* %ptr
2348 /// %E = zext i8 %L to i32
2349 /// can be lowered into one load instruction
2350 /// ldrb w0, [x0]
2351 bool isExtLoad(const LoadInst *Load, const Instruction *Ext,
2352 const DataLayout &DL) const {
2353 EVT VT = getValueType(DL, Ext->getType());
2354 EVT LoadVT = getValueType(DL, Load->getType());
2355
2356 // If the load has other users and the truncate is not free, the ext
2357 // probably isn't free.
2358 if (!Load->hasOneUse() && (isTypeLegal(LoadVT) || !isTypeLegal(VT)) &&
2359 !isTruncateFree(Ext->getType(), Load->getType()))
2360 return false;
2361
2362 // Check whether the target supports casts folded into loads.
2363 unsigned LType;
2364 if (isa<ZExtInst>(Ext))
2365 LType = ISD::ZEXTLOAD;
2366 else {
2367 assert(isa<SExtInst>(Ext) && "Unexpected ext type!")((isa<SExtInst>(Ext) && "Unexpected ext type!")
? static_cast<void> (0) : __assert_fail ("isa<SExtInst>(Ext) && \"Unexpected ext type!\""
, "/build/llvm-toolchain-snapshot-10~++20200112100611+7fa5290d5bd/llvm/include/llvm/CodeGen/TargetLowering.h"
, 2367, __PRETTY_FUNCTION__))
;
2368 LType = ISD::SEXTLOAD;
2369 }
2370
2371 return isLoadExtLegal(LType, VT, LoadVT);
2372 }
2373
2374 /// Return true if any actual instruction that defines a value of type FromTy
2375 /// implicitly zero-extends the value to ToTy in the result register.
2376 ///
2377 /// The function should return true when it is likely that the truncate can
2378 /// be freely folded with an instruction defining a value of FromTy. If
2379 /// the defining instruction is unknown (because you're looking at a
2380 /// function argument, PHI, etc.) then the target may require an
2381 /// explicit truncate, which is not necessarily free, but this function
2382 /// does not deal with those cases.
2383 /// Targets must return false when FromTy >= ToTy.
2384 virtual bool isZExtFree(Type *FromTy, Type *ToTy) const {
2385 return false;
2386 }
2387
2388 virtual bool isZExtFree(EVT FromTy, EVT ToTy) const {
2389 return false;
2390 }
2391
2392 /// Return true if sign-extension from FromTy to ToTy is cheaper than
2393 /// zero-extension.
2394 virtual bool isSExtCheaperThanZExt(EVT FromTy, EVT ToTy) const {
2395 return false;