Bug Summary

File:llvm/include/llvm/CodeGen/TargetLowering.h
Warning:line 1385, column 31
Called C++ object pointer is null

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name AArch64TargetTransformInfo.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mframe-pointer=none -fmath-errno -fno-rounding-math -mconstructor-aliases -munwind-tables -target-cpu x86-64 -tune-cpu generic -fno-split-dwarf-inlining -debugger-tuning=gdb -ffunction-sections -fdata-sections -resource-dir /usr/lib/llvm-12/lib/clang/12.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I /build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/build-llvm/lib/Target/AArch64 -I /build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/lib/Target/AArch64 -I /build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/build-llvm/include -I /build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/x86_64-linux-gnu/c++/6.3.0 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/6.3.0/../../../../include/c++/6.3.0/backward -internal-isystem /usr/local/include -internal-isystem /usr/lib/llvm-12/lib/clang/12.0.0/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir /build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/build-llvm/lib/Target/AArch64 -fdebug-prefix-map=/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998=. -ferror-limit 19 -fvisibility hidden -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -o /tmp/scan-build-2020-09-28-092409-31635-1 -x c++ /build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

1//===-- AArch64TargetTransformInfo.cpp - AArch64 specific TTI -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "AArch64ExpandImm.h"
10#include "AArch64TargetTransformInfo.h"
11#include "MCTargetDesc/AArch64AddressingModes.h"
12#include "llvm/Analysis/LoopInfo.h"
13#include "llvm/Analysis/TargetTransformInfo.h"
14#include "llvm/CodeGen/BasicTTIImpl.h"
15#include "llvm/CodeGen/CostTable.h"
16#include "llvm/CodeGen/TargetLowering.h"
17#include "llvm/IR/IntrinsicInst.h"
18#include "llvm/IR/IntrinsicsAArch64.h"
19#include "llvm/Support/Debug.h"
20#include <algorithm>
21using namespace llvm;
22
23#define DEBUG_TYPE"aarch64tti" "aarch64tti"
24
25static cl::opt<bool> EnableFalkorHWPFUnrollFix("enable-falkor-hwpf-unroll-fix",
26 cl::init(true), cl::Hidden);
27
28bool AArch64TTIImpl::areInlineCompatible(const Function *Caller,
29 const Function *Callee) const {
30 const TargetMachine &TM = getTLI()->getTargetMachine();
31
32 const FeatureBitset &CallerBits =
33 TM.getSubtargetImpl(*Caller)->getFeatureBits();
34 const FeatureBitset &CalleeBits =
35 TM.getSubtargetImpl(*Callee)->getFeatureBits();
36
37 // Inline a callee if its target-features are a subset of the callers
38 // target-features.
39 return (CallerBits & CalleeBits) == CalleeBits;
40}
41
42/// Calculate the cost of materializing a 64-bit value. This helper
43/// method might only calculate a fraction of a larger immediate. Therefore it
44/// is valid to return a cost of ZERO.
45int AArch64TTIImpl::getIntImmCost(int64_t Val) {
46 // Check if the immediate can be encoded within an instruction.
47 if (Val == 0 || AArch64_AM::isLogicalImmediate(Val, 64))
48 return 0;
49
50 if (Val < 0)
51 Val = ~Val;
52
53 // Calculate how many moves we will need to materialize this constant.
54 SmallVector<AArch64_IMM::ImmInsnModel, 4> Insn;
55 AArch64_IMM::expandMOVImm(Val, 64, Insn);
56 return Insn.size();
57}
58
59/// Calculate the cost of materializing the given constant.
60int AArch64TTIImpl::getIntImmCost(const APInt &Imm, Type *Ty,
61 TTI::TargetCostKind CostKind) {
62 assert(Ty->isIntegerTy())((Ty->isIntegerTy()) ? static_cast<void> (0) : __assert_fail
("Ty->isIntegerTy()", "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp"
, 62, __PRETTY_FUNCTION__))
;
63
64 unsigned BitSize = Ty->getPrimitiveSizeInBits();
65 if (BitSize == 0)
66 return ~0U;
67
68 // Sign-extend all constants to a multiple of 64-bit.
69 APInt ImmVal = Imm;
70 if (BitSize & 0x3f)
71 ImmVal = Imm.sext((BitSize + 63) & ~0x3fU);
72
73 // Split the constant into 64-bit chunks and calculate the cost for each
74 // chunk.
75 int Cost = 0;
76 for (unsigned ShiftVal = 0; ShiftVal < BitSize; ShiftVal += 64) {
77 APInt Tmp = ImmVal.ashr(ShiftVal).sextOrTrunc(64);
78 int64_t Val = Tmp.getSExtValue();
79 Cost += getIntImmCost(Val);
80 }
81 // We need at least one instruction to materialze the constant.
82 return std::max(1, Cost);
83}
84
85int AArch64TTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx,
86 const APInt &Imm, Type *Ty,
87 TTI::TargetCostKind CostKind,
88 Instruction *Inst) {
89 assert(Ty->isIntegerTy())((Ty->isIntegerTy()) ? static_cast<void> (0) : __assert_fail
("Ty->isIntegerTy()", "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp"
, 89, __PRETTY_FUNCTION__))
;
90
91 unsigned BitSize = Ty->getPrimitiveSizeInBits();
92 // There is no cost model for constants with a bit size of 0. Return TCC_Free
93 // here, so that constant hoisting will ignore this constant.
94 if (BitSize == 0)
95 return TTI::TCC_Free;
96
97 unsigned ImmIdx = ~0U;
98 switch (Opcode) {
99 default:
100 return TTI::TCC_Free;
101 case Instruction::GetElementPtr:
102 // Always hoist the base address of a GetElementPtr.
103 if (Idx == 0)
104 return 2 * TTI::TCC_Basic;
105 return TTI::TCC_Free;
106 case Instruction::Store:
107 ImmIdx = 0;
108 break;
109 case Instruction::Add:
110 case Instruction::Sub:
111 case Instruction::Mul:
112 case Instruction::UDiv:
113 case Instruction::SDiv:
114 case Instruction::URem:
115 case Instruction::SRem:
116 case Instruction::And:
117 case Instruction::Or:
118 case Instruction::Xor:
119 case Instruction::ICmp:
120 ImmIdx = 1;
121 break;
122 // Always return TCC_Free for the shift value of a shift instruction.
123 case Instruction::Shl:
124 case Instruction::LShr:
125 case Instruction::AShr:
126 if (Idx == 1)
127 return TTI::TCC_Free;
128 break;
129 case Instruction::Trunc:
130 case Instruction::ZExt:
131 case Instruction::SExt:
132 case Instruction::IntToPtr:
133 case Instruction::PtrToInt:
134 case Instruction::BitCast:
135 case Instruction::PHI:
136 case Instruction::Call:
137 case Instruction::Select:
138 case Instruction::Ret:
139 case Instruction::Load:
140 break;
141 }
142
143 if (Idx == ImmIdx) {
144 int NumConstants = (BitSize + 63) / 64;
145 int Cost = AArch64TTIImpl::getIntImmCost(Imm, Ty, CostKind);
146 return (Cost <= NumConstants * TTI::TCC_Basic)
147 ? static_cast<int>(TTI::TCC_Free)
148 : Cost;
149 }
150 return AArch64TTIImpl::getIntImmCost(Imm, Ty, CostKind);
151}
152
153int AArch64TTIImpl::getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
154 const APInt &Imm, Type *Ty,
155 TTI::TargetCostKind CostKind) {
156 assert(Ty->isIntegerTy())((Ty->isIntegerTy()) ? static_cast<void> (0) : __assert_fail
("Ty->isIntegerTy()", "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp"
, 156, __PRETTY_FUNCTION__))
;
157
158 unsigned BitSize = Ty->getPrimitiveSizeInBits();
159 // There is no cost model for constants with a bit size of 0. Return TCC_Free
160 // here, so that constant hoisting will ignore this constant.
161 if (BitSize == 0)
162 return TTI::TCC_Free;
163
164 // Most (all?) AArch64 intrinsics do not support folding immediates into the
165 // selected instruction, so we compute the materialization cost for the
166 // immediate directly.
167 if (IID >= Intrinsic::aarch64_addg && IID <= Intrinsic::aarch64_udiv)
168 return AArch64TTIImpl::getIntImmCost(Imm, Ty, CostKind);
169
170 switch (IID) {
171 default:
172 return TTI::TCC_Free;
173 case Intrinsic::sadd_with_overflow:
174 case Intrinsic::uadd_with_overflow:
175 case Intrinsic::ssub_with_overflow:
176 case Intrinsic::usub_with_overflow:
177 case Intrinsic::smul_with_overflow:
178 case Intrinsic::umul_with_overflow:
179 if (Idx == 1) {
180 int NumConstants = (BitSize + 63) / 64;
181 int Cost = AArch64TTIImpl::getIntImmCost(Imm, Ty, CostKind);
182 return (Cost <= NumConstants * TTI::TCC_Basic)
183 ? static_cast<int>(TTI::TCC_Free)
184 : Cost;
185 }
186 break;
187 case Intrinsic::experimental_stackmap:
188 if ((Idx < 2) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
189 return TTI::TCC_Free;
190 break;
191 case Intrinsic::experimental_patchpoint_void:
192 case Intrinsic::experimental_patchpoint_i64:
193 if ((Idx < 4) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
194 return TTI::TCC_Free;
195 break;
196 case Intrinsic::experimental_gc_statepoint:
197 if ((Idx < 5) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
198 return TTI::TCC_Free;
199 break;
200 }
201 return AArch64TTIImpl::getIntImmCost(Imm, Ty, CostKind);
202}
203
204TargetTransformInfo::PopcntSupportKind
205AArch64TTIImpl::getPopcntSupport(unsigned TyWidth) {
206 assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2")((isPowerOf2_32(TyWidth) && "Ty width must be power of 2"
) ? static_cast<void> (0) : __assert_fail ("isPowerOf2_32(TyWidth) && \"Ty width must be power of 2\""
, "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp"
, 206, __PRETTY_FUNCTION__))
;
207 if (TyWidth == 32 || TyWidth == 64)
208 return TTI::PSK_FastHardware;
209 // TODO: AArch64TargetLowering::LowerCTPOP() supports 128bit popcount.
210 return TTI::PSK_Software;
211}
212
213bool AArch64TTIImpl::isWideningInstruction(Type *DstTy, unsigned Opcode,
214 ArrayRef<const Value *> Args) {
215
216 // A helper that returns a vector type from the given type. The number of
217 // elements in type Ty determine the vector width.
218 auto toVectorTy = [&](Type *ArgTy) {
219 return FixedVectorType::get(ArgTy->getScalarType(),
220 cast<FixedVectorType>(DstTy)->getNumElements());
221 };
222
223 // Exit early if DstTy is not a vector type whose elements are at least
224 // 16-bits wide.
225 if (!DstTy->isVectorTy() || DstTy->getScalarSizeInBits() < 16)
226 return false;
227
228 // Determine if the operation has a widening variant. We consider both the
229 // "long" (e.g., usubl) and "wide" (e.g., usubw) versions of the
230 // instructions.
231 //
232 // TODO: Add additional widening operations (e.g., mul, shl, etc.) once we
233 // verify that their extending operands are eliminated during code
234 // generation.
235 switch (Opcode) {
236 case Instruction::Add: // UADDL(2), SADDL(2), UADDW(2), SADDW(2).
237 case Instruction::Sub: // USUBL(2), SSUBL(2), USUBW(2), SSUBW(2).
238 break;
239 default:
240 return false;
241 }
242
243 // To be a widening instruction (either the "wide" or "long" versions), the
244 // second operand must be a sign- or zero extend having a single user. We
245 // only consider extends having a single user because they may otherwise not
246 // be eliminated.
247 if (Args.size() != 2 ||
248 (!isa<SExtInst>(Args[1]) && !isa<ZExtInst>(Args[1])) ||
249 !Args[1]->hasOneUse())
250 return false;
251 auto *Extend = cast<CastInst>(Args[1]);
252
253 // Legalize the destination type and ensure it can be used in a widening
254 // operation.
255 auto DstTyL = TLI->getTypeLegalizationCost(DL, DstTy);
256 unsigned DstElTySize = DstTyL.second.getScalarSizeInBits();
257 if (!DstTyL.second.isVector() || DstElTySize != DstTy->getScalarSizeInBits())
258 return false;
259
260 // Legalize the source type and ensure it can be used in a widening
261 // operation.
262 auto *SrcTy = toVectorTy(Extend->getSrcTy());
263 auto SrcTyL = TLI->getTypeLegalizationCost(DL, SrcTy);
264 unsigned SrcElTySize = SrcTyL.second.getScalarSizeInBits();
265 if (!SrcTyL.second.isVector() || SrcElTySize != SrcTy->getScalarSizeInBits())
266 return false;
267
268 // Get the total number of vector elements in the legalized types.
269 unsigned NumDstEls = DstTyL.first * DstTyL.second.getVectorNumElements();
270 unsigned NumSrcEls = SrcTyL.first * SrcTyL.second.getVectorNumElements();
271
272 // Return true if the legalized types have the same number of vector elements
273 // and the destination element type size is twice that of the source type.
274 return NumDstEls == NumSrcEls && 2 * SrcElTySize == DstElTySize;
275}
276
277int AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
278 TTI::CastContextHint CCH,
279 TTI::TargetCostKind CostKind,
280 const Instruction *I) {
281 int ISD = TLI->InstructionOpcodeToISD(Opcode);
282 assert(ISD && "Invalid opcode")((ISD && "Invalid opcode") ? static_cast<void> (
0) : __assert_fail ("ISD && \"Invalid opcode\"", "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp"
, 282, __PRETTY_FUNCTION__))
;
283
284 // If the cast is observable, and it is used by a widening instruction (e.g.,
285 // uaddl, saddw, etc.), it may be free.
286 if (I && I->hasOneUse()) {
287 auto *SingleUser = cast<Instruction>(*I->user_begin());
288 SmallVector<const Value *, 4> Operands(SingleUser->operand_values());
289 if (isWideningInstruction(Dst, SingleUser->getOpcode(), Operands)) {
290 // If the cast is the second operand, it is free. We will generate either
291 // a "wide" or "long" version of the widening instruction.
292 if (I == SingleUser->getOperand(1))
293 return 0;
294 // If the cast is not the second operand, it will be free if it looks the
295 // same as the second operand. In this case, we will generate a "long"
296 // version of the widening instruction.
297 if (auto *Cast = dyn_cast<CastInst>(SingleUser->getOperand(1)))
298 if (I->getOpcode() == unsigned(Cast->getOpcode()) &&
299 cast<CastInst>(I)->getSrcTy() == Cast->getSrcTy())
300 return 0;
301 }
302 }
303
304 // TODO: Allow non-throughput costs that aren't binary.
305 auto AdjustCost = [&CostKind](int Cost) {
306 if (CostKind != TTI::TCK_RecipThroughput)
307 return Cost == 0 ? 0 : 1;
308 return Cost;
309 };
310
311 EVT SrcTy = TLI->getValueType(DL, Src);
312 EVT DstTy = TLI->getValueType(DL, Dst);
313
314 if (!SrcTy.isSimple() || !DstTy.isSimple())
315 return AdjustCost(
316 BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I));
317
318 static const TypeConversionCostTblEntry
319 ConversionTbl[] = {
320 { ISD::TRUNCATE, MVT::v4i16, MVT::v4i32, 1 },
321 { ISD::TRUNCATE, MVT::v4i32, MVT::v4i64, 0 },
322 { ISD::TRUNCATE, MVT::v8i8, MVT::v8i32, 3 },
323 { ISD::TRUNCATE, MVT::v16i8, MVT::v16i32, 6 },
324
325 // The number of shll instructions for the extension.
326 { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 3 },
327 { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i16, 3 },
328 { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i32, 2 },
329 { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i32, 2 },
330 { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i8, 3 },
331 { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i8, 3 },
332 { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i16, 2 },
333 { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i16, 2 },
334 { ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i8, 7 },
335 { ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i8, 7 },
336 { ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i16, 6 },
337 { ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i16, 6 },
338 { ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i8, 2 },
339 { ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i8, 2 },
340 { ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i8, 6 },
341 { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i8, 6 },
342
343 // LowerVectorINT_TO_FP:
344 { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i32, 1 },
345 { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 },
346 { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i64, 1 },
347 { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i32, 1 },
348 { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 },
349 { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i64, 1 },
350
351 // Complex: to v2f32
352 { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i8, 3 },
353 { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i16, 3 },
354 { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i64, 2 },
355 { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i8, 3 },
356 { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i16, 3 },
357 { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i64, 2 },
358
359 // Complex: to v4f32
360 { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i8, 4 },
361 { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i16, 2 },
362 { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i8, 3 },
363 { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i16, 2 },
364
365 // Complex: to v8f32
366 { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i8, 10 },
367 { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i16, 4 },
368 { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i8, 10 },
369 { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i16, 4 },
370
371 // Complex: to v16f32
372 { ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i8, 21 },
373 { ISD::UINT_TO_FP, MVT::v16f32, MVT::v16i8, 21 },
374
375 // Complex: to v2f64
376 { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i8, 4 },
377 { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i16, 4 },
378 { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 },
379 { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i8, 4 },
380 { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i16, 4 },
381 { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 },
382
383
384 // LowerVectorFP_TO_INT
385 { ISD::FP_TO_SINT, MVT::v2i32, MVT::v2f32, 1 },
386 { ISD::FP_TO_SINT, MVT::v4i32, MVT::v4f32, 1 },
387 { ISD::FP_TO_SINT, MVT::v2i64, MVT::v2f64, 1 },
388 { ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f32, 1 },
389 { ISD::FP_TO_UINT, MVT::v4i32, MVT::v4f32, 1 },
390 { ISD::FP_TO_UINT, MVT::v2i64, MVT::v2f64, 1 },
391
392 // Complex, from v2f32: legal type is v2i32 (no cost) or v2i64 (1 ext).
393 { ISD::FP_TO_SINT, MVT::v2i64, MVT::v2f32, 2 },
394 { ISD::FP_TO_SINT, MVT::v2i16, MVT::v2f32, 1 },
395 { ISD::FP_TO_SINT, MVT::v2i8, MVT::v2f32, 1 },
396 { ISD::FP_TO_UINT, MVT::v2i64, MVT::v2f32, 2 },
397 { ISD::FP_TO_UINT, MVT::v2i16, MVT::v2f32, 1 },
398 { ISD::FP_TO_UINT, MVT::v2i8, MVT::v2f32, 1 },
399
400 // Complex, from v4f32: legal type is v4i16, 1 narrowing => ~2
401 { ISD::FP_TO_SINT, MVT::v4i16, MVT::v4f32, 2 },
402 { ISD::FP_TO_SINT, MVT::v4i8, MVT::v4f32, 2 },
403 { ISD::FP_TO_UINT, MVT::v4i16, MVT::v4f32, 2 },
404 { ISD::FP_TO_UINT, MVT::v4i8, MVT::v4f32, 2 },
405
406 // Complex, from v2f64: legal type is v2i32, 1 narrowing => ~2.
407 { ISD::FP_TO_SINT, MVT::v2i32, MVT::v2f64, 2 },
408 { ISD::FP_TO_SINT, MVT::v2i16, MVT::v2f64, 2 },
409 { ISD::FP_TO_SINT, MVT::v2i8, MVT::v2f64, 2 },
410 { ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f64, 2 },
411 { ISD::FP_TO_UINT, MVT::v2i16, MVT::v2f64, 2 },
412 { ISD::FP_TO_UINT, MVT::v2i8, MVT::v2f64, 2 },
413 };
414
415 if (const auto *Entry = ConvertCostTableLookup(ConversionTbl, ISD,
416 DstTy.getSimpleVT(),
417 SrcTy.getSimpleVT()))
418 return AdjustCost(Entry->Cost);
419
420 return AdjustCost(
421 BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I));
422}
423
424int AArch64TTIImpl::getExtractWithExtendCost(unsigned Opcode, Type *Dst,
425 VectorType *VecTy,
426 unsigned Index) {
427
428 // Make sure we were given a valid extend opcode.
429 assert((Opcode == Instruction::SExt || Opcode == Instruction::ZExt) &&(((Opcode == Instruction::SExt || Opcode == Instruction::ZExt
) && "Invalid opcode") ? static_cast<void> (0) :
__assert_fail ("(Opcode == Instruction::SExt || Opcode == Instruction::ZExt) && \"Invalid opcode\""
, "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp"
, 430, __PRETTY_FUNCTION__))
430 "Invalid opcode")(((Opcode == Instruction::SExt || Opcode == Instruction::ZExt
) && "Invalid opcode") ? static_cast<void> (0) :
__assert_fail ("(Opcode == Instruction::SExt || Opcode == Instruction::ZExt) && \"Invalid opcode\""
, "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp"
, 430, __PRETTY_FUNCTION__))
;
431
432 // We are extending an element we extract from a vector, so the source type
433 // of the extend is the element type of the vector.
434 auto *Src = VecTy->getElementType();
435
436 // Sign- and zero-extends are for integer types only.
437 assert(isa<IntegerType>(Dst) && isa<IntegerType>(Src) && "Invalid type")((isa<IntegerType>(Dst) && isa<IntegerType>
(Src) && "Invalid type") ? static_cast<void> (0
) : __assert_fail ("isa<IntegerType>(Dst) && isa<IntegerType>(Src) && \"Invalid type\""
, "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp"
, 437, __PRETTY_FUNCTION__))
;
438
439 // Get the cost for the extract. We compute the cost (if any) for the extend
440 // below.
441 auto Cost = getVectorInstrCost(Instruction::ExtractElement, VecTy, Index);
442
443 // Legalize the types.
444 auto VecLT = TLI->getTypeLegalizationCost(DL, VecTy);
445 auto DstVT = TLI->getValueType(DL, Dst);
446 auto SrcVT = TLI->getValueType(DL, Src);
447 TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
448
449 // If the resulting type is still a vector and the destination type is legal,
450 // we may get the extension for free. If not, get the default cost for the
451 // extend.
452 if (!VecLT.second.isVector() || !TLI->isTypeLegal(DstVT))
453 return Cost + getCastInstrCost(Opcode, Dst, Src, TTI::CastContextHint::None,
454 CostKind);
455
456 // The destination type should be larger than the element type. If not, get
457 // the default cost for the extend.
458 if (DstVT.getSizeInBits() < SrcVT.getSizeInBits())
459 return Cost + getCastInstrCost(Opcode, Dst, Src, TTI::CastContextHint::None,
460 CostKind);
461
462 switch (Opcode) {
463 default:
464 llvm_unreachable("Opcode should be either SExt or ZExt")::llvm::llvm_unreachable_internal("Opcode should be either SExt or ZExt"
, "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp"
, 464)
;
465
466 // For sign-extends, we only need a smov, which performs the extension
467 // automatically.
468 case Instruction::SExt:
469 return Cost;
470
471 // For zero-extends, the extend is performed automatically by a umov unless
472 // the destination type is i64 and the element type is i8 or i16.
473 case Instruction::ZExt:
474 if (DstVT.getSizeInBits() != 64u || SrcVT.getSizeInBits() == 32u)
475 return Cost;
476 }
477
478 // If we are unable to perform the extend for free, get the default cost.
479 return Cost + getCastInstrCost(Opcode, Dst, Src, TTI::CastContextHint::None,
480 CostKind);
481}
482
483unsigned AArch64TTIImpl::getCFInstrCost(unsigned Opcode,
484 TTI::TargetCostKind CostKind) {
485 if (CostKind != TTI::TCK_RecipThroughput)
486 return Opcode == Instruction::PHI ? 0 : 1;
487 assert(CostKind == TTI::TCK_RecipThroughput && "unexpected CostKind")((CostKind == TTI::TCK_RecipThroughput && "unexpected CostKind"
) ? static_cast<void> (0) : __assert_fail ("CostKind == TTI::TCK_RecipThroughput && \"unexpected CostKind\""
, "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp"
, 487, __PRETTY_FUNCTION__))
;
488 // Branches are assumed to be predicted.
489 return 0;
490}
491
492int AArch64TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
493 unsigned Index) {
494 assert(Val->isVectorTy() && "This must be a vector type")((Val->isVectorTy() && "This must be a vector type"
) ? static_cast<void> (0) : __assert_fail ("Val->isVectorTy() && \"This must be a vector type\""
, "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp"
, 494, __PRETTY_FUNCTION__))
;
495
496 if (Index != -1U) {
497 // Legalize the type.
498 std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Val);
499
500 // This type is legalized to a scalar type.
501 if (!LT.second.isVector())
502 return 0;
503
504 // The type may be split. Normalize the index to the new type.
505 unsigned Width = LT.second.getVectorNumElements();
506 Index = Index % Width;
507
508 // The element at index zero is already inside the vector.
509 if (Index == 0)
510 return 0;
511 }
512
513 // All other insert/extracts cost this much.
514 return ST->getVectorInsertExtractBaseCost();
515}
516
517int AArch64TTIImpl::getArithmeticInstrCost(
518 unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind,
519 TTI::OperandValueKind Opd1Info,
520 TTI::OperandValueKind Opd2Info, TTI::OperandValueProperties Opd1PropInfo,
521 TTI::OperandValueProperties Opd2PropInfo, ArrayRef<const Value *> Args,
522 const Instruction *CxtI) {
523 // TODO: Handle more cost kinds.
524 if (CostKind != TTI::TCK_RecipThroughput)
525 return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Opd1Info,
526 Opd2Info, Opd1PropInfo,
527 Opd2PropInfo, Args, CxtI);
528
529 // Legalize the type.
530 std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
531
532 // If the instruction is a widening instruction (e.g., uaddl, saddw, etc.),
533 // add in the widening overhead specified by the sub-target. Since the
534 // extends feeding widening instructions are performed automatically, they
535 // aren't present in the generated code and have a zero cost. By adding a
536 // widening overhead here, we attach the total cost of the combined operation
537 // to the widening instruction.
538 int Cost = 0;
539 if (isWideningInstruction(Ty, Opcode, Args))
540 Cost += ST->getWideningBaseCost();
541
542 int ISD = TLI->InstructionOpcodeToISD(Opcode);
543
544 switch (ISD) {
545 default:
546 return Cost + BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Opd1Info,
547 Opd2Info,
548 Opd1PropInfo, Opd2PropInfo);
549 case ISD::SDIV:
550 if (Opd2Info == TargetTransformInfo::OK_UniformConstantValue &&
551 Opd2PropInfo == TargetTransformInfo::OP_PowerOf2) {
552 // On AArch64, scalar signed division by constants power-of-two are
553 // normally expanded to the sequence ADD + CMP + SELECT + SRA.
554 // The OperandValue properties many not be same as that of previous
555 // operation; conservatively assume OP_None.
556 Cost += getArithmeticInstrCost(Instruction::Add, Ty, CostKind,
557 Opd1Info, Opd2Info,
558 TargetTransformInfo::OP_None,
559 TargetTransformInfo::OP_None);
560 Cost += getArithmeticInstrCost(Instruction::Sub, Ty, CostKind,
561 Opd1Info, Opd2Info,
562 TargetTransformInfo::OP_None,
563 TargetTransformInfo::OP_None);
564 Cost += getArithmeticInstrCost(Instruction::Select, Ty, CostKind,
565 Opd1Info, Opd2Info,
566 TargetTransformInfo::OP_None,
567 TargetTransformInfo::OP_None);
568 Cost += getArithmeticInstrCost(Instruction::AShr, Ty, CostKind,
569 Opd1Info, Opd2Info,
570 TargetTransformInfo::OP_None,
571 TargetTransformInfo::OP_None);
572 return Cost;
573 }
574 LLVM_FALLTHROUGH[[gnu::fallthrough]];
575 case ISD::UDIV:
576 if (Opd2Info == TargetTransformInfo::OK_UniformConstantValue) {
577 auto VT = TLI->getValueType(DL, Ty);
578 if (TLI->isOperationLegalOrCustom(ISD::MULHU, VT)) {
579 // Vector signed division by constant are expanded to the
580 // sequence MULHS + ADD/SUB + SRA + SRL + ADD, and unsigned division
581 // to MULHS + SUB + SRL + ADD + SRL.
582 int MulCost = getArithmeticInstrCost(Instruction::Mul, Ty, CostKind,
583 Opd1Info, Opd2Info,
584 TargetTransformInfo::OP_None,
585 TargetTransformInfo::OP_None);
586 int AddCost = getArithmeticInstrCost(Instruction::Add, Ty, CostKind,
587 Opd1Info, Opd2Info,
588 TargetTransformInfo::OP_None,
589 TargetTransformInfo::OP_None);
590 int ShrCost = getArithmeticInstrCost(Instruction::AShr, Ty, CostKind,
591 Opd1Info, Opd2Info,
592 TargetTransformInfo::OP_None,
593 TargetTransformInfo::OP_None);
594 return MulCost * 2 + AddCost * 2 + ShrCost * 2 + 1;
595 }
596 }
597
598 Cost += BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Opd1Info,
599 Opd2Info,
600 Opd1PropInfo, Opd2PropInfo);
601 if (Ty->isVectorTy()) {
602 // On AArch64, vector divisions are not supported natively and are
603 // expanded into scalar divisions of each pair of elements.
604 Cost += getArithmeticInstrCost(Instruction::ExtractElement, Ty, CostKind,
605 Opd1Info, Opd2Info, Opd1PropInfo,
606 Opd2PropInfo);
607 Cost += getArithmeticInstrCost(Instruction::InsertElement, Ty, CostKind,
608 Opd1Info, Opd2Info, Opd1PropInfo,
609 Opd2PropInfo);
610 // TODO: if one of the arguments is scalar, then it's not necessary to
611 // double the cost of handling the vector elements.
612 Cost += Cost;
613 }
614 return Cost;
615
616 case ISD::ADD:
617 case ISD::MUL:
618 case ISD::XOR:
619 case ISD::OR:
620 case ISD::AND:
621 // These nodes are marked as 'custom' for combining purposes only.
622 // We know that they are legal. See LowerAdd in ISelLowering.
623 return (Cost + 1) * LT.first;
624
625 case ISD::FADD:
626 // These nodes are marked as 'custom' just to lower them to SVE.
627 // We know said lowering will incur no additional cost.
628 if (isa<FixedVectorType>(Ty) && !Ty->getScalarType()->isFP128Ty())
629 return (Cost + 2) * LT.first;
630
631 return Cost + BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Opd1Info,
632 Opd2Info,
633 Opd1PropInfo, Opd2PropInfo);
634 }
635}
636
637int AArch64TTIImpl::getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
638 const SCEV *Ptr) {
639 // Address computations in vectorized code with non-consecutive addresses will
640 // likely result in more instructions compared to scalar code where the
641 // computation can more often be merged into the index mode. The resulting
642 // extra micro-ops can significantly decrease throughput.
643 unsigned NumVectorInstToHideOverhead = 10;
644 int MaxMergeDistance = 64;
645
646 if (Ty->isVectorTy() && SE &&
647 !BaseT::isConstantStridedAccessLessThan(SE, Ptr, MaxMergeDistance + 1))
648 return NumVectorInstToHideOverhead;
649
650 // In many cases the address computation is not merged into the instruction
651 // addressing mode.
652 return 1;
653}
654
655int AArch64TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
656 Type *CondTy,
657 TTI::TargetCostKind CostKind,
658 const Instruction *I) {
659 // TODO: Handle other cost kinds.
660 if (CostKind
22.1
'CostKind' is equal to TCK_RecipThroughput
22.1
'CostKind' is equal to TCK_RecipThroughput
22.1
'CostKind' is equal to TCK_RecipThroughput
22.1
'CostKind' is equal to TCK_RecipThroughput
!= TTI::TCK_RecipThroughput
)
1
Assuming 'CostKind' is equal to TCK_RecipThroughput
2
Taking false branch
23
Taking false branch
661 return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, CostKind, I);
662
663 int ISD = TLI->InstructionOpcodeToISD(Opcode);
664 // We don't lower some vector selects well that are wider than the register
665 // width.
666 if (ValTy->isVectorTy() && ISD == ISD::SELECT) {
3
Calling 'Type::isVectorTy'
7
Returning from 'Type::isVectorTy'
24
Calling 'Type::isVectorTy'
27
Returning from 'Type::isVectorTy'
28
Assuming 'ISD' is equal to SELECT
29
Taking true branch
667 // We would need this many instructions to hide the scalarization happening.
668 const int AmortizationCost = 20;
669 static const TypeConversionCostTblEntry
670 VectorSelectTbl[] = {
671 { ISD::SELECT, MVT::v16i1, MVT::v16i16, 16 },
672 { ISD::SELECT, MVT::v8i1, MVT::v8i32, 8 },
673 { ISD::SELECT, MVT::v16i1, MVT::v16i32, 16 },
674 { ISD::SELECT, MVT::v4i1, MVT::v4i64, 4 * AmortizationCost },
675 { ISD::SELECT, MVT::v8i1, MVT::v8i64, 8 * AmortizationCost },
676 { ISD::SELECT, MVT::v16i1, MVT::v16i64, 16 * AmortizationCost }
677 };
678
679 EVT SelCondTy = TLI->getValueType(DL, CondTy);
30
Passing null pointer value via 2nd parameter 'Ty'
31
Calling 'TargetLoweringBase::getValueType'
680 EVT SelValTy = TLI->getValueType(DL, ValTy);
681 if (SelCondTy.isSimple() && SelValTy.isSimple()) {
682 if (const auto *Entry = ConvertCostTableLookup(VectorSelectTbl, ISD,
683 SelCondTy.getSimpleVT(),
684 SelValTy.getSimpleVT()))
685 return Entry->Cost;
686 }
687 }
688 return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, CostKind, I);
8
Passing value via 3rd parameter 'CondTy'
9
Calling 'BasicTTIImplBase::getCmpSelInstrCost'
689}
690
691AArch64TTIImpl::TTI::MemCmpExpansionOptions
692AArch64TTIImpl::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const {
693 TTI::MemCmpExpansionOptions Options;
694 if (ST->requiresStrictAlign()) {
695 // TODO: Add cost modeling for strict align. Misaligned loads expand to
696 // a bunch of instructions when strict align is enabled.
697 return Options;
698 }
699 Options.AllowOverlappingLoads = true;
700 Options.MaxNumLoads = TLI->getMaxExpandSizeMemcmp(OptSize);
701 Options.NumLoadsPerBlock = Options.MaxNumLoads;
702 // TODO: Though vector loads usually perform well on AArch64, in some targets
703 // they may wake up the FP unit, which raises the power consumption. Perhaps
704 // they could be used with no holds barred (-O3).
705 Options.LoadSizes = {8, 4, 2, 1};
706 return Options;
707}
708
709int AArch64TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Ty,
710 MaybeAlign Alignment, unsigned AddressSpace,
711 TTI::TargetCostKind CostKind,
712 const Instruction *I) {
713 // TODO: Handle other cost kinds.
714 if (CostKind != TTI::TCK_RecipThroughput)
715 return 1;
716
717 // Type legalization can't handle structs
718 if (TLI->getValueType(DL, Ty, true) == MVT::Other)
719 return BaseT::getMemoryOpCost(Opcode, Ty, Alignment, AddressSpace,
720 CostKind);
721
722 auto LT = TLI->getTypeLegalizationCost(DL, Ty);
723
724 if (ST->isMisaligned128StoreSlow() && Opcode == Instruction::Store &&
725 LT.second.is128BitVector() && (!Alignment || *Alignment < Align(16))) {
726 // Unaligned stores are extremely inefficient. We don't split all
727 // unaligned 128-bit stores because the negative impact that has shown in
728 // practice on inlined block copy code.
729 // We make such stores expensive so that we will only vectorize if there
730 // are 6 other instructions getting vectorized.
731 const int AmortizationCost = 6;
732
733 return LT.first * 2 * AmortizationCost;
734 }
735
736 if (Ty->isVectorTy() &&
737 cast<VectorType>(Ty)->getElementType()->isIntegerTy(8)) {
738 unsigned ProfitableNumElements;
739 if (Opcode == Instruction::Store)
740 // We use a custom trunc store lowering so v.4b should be profitable.
741 ProfitableNumElements = 4;
742 else
743 // We scalarize the loads because there is not v.4b register and we
744 // have to promote the elements to v.2.
745 ProfitableNumElements = 8;
746
747 if (cast<FixedVectorType>(Ty)->getNumElements() < ProfitableNumElements) {
748 unsigned NumVecElts = cast<FixedVectorType>(Ty)->getNumElements();
749 unsigned NumVectorizableInstsToAmortize = NumVecElts * 2;
750 // We generate 2 instructions per vector element.
751 return NumVectorizableInstsToAmortize * NumVecElts * 2;
752 }
753 }
754
755 return LT.first;
756}
757
758int AArch64TTIImpl::getInterleavedMemoryOpCost(
759 unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
760 Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
761 bool UseMaskForCond, bool UseMaskForGaps) {
762 assert(Factor >= 2 && "Invalid interleave factor")((Factor >= 2 && "Invalid interleave factor") ? static_cast
<void> (0) : __assert_fail ("Factor >= 2 && \"Invalid interleave factor\""
, "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp"
, 762, __PRETTY_FUNCTION__))
;
763 auto *VecVTy = cast<FixedVectorType>(VecTy);
764
765 if (!UseMaskForCond && !UseMaskForGaps &&
766 Factor <= TLI->getMaxSupportedInterleaveFactor()) {
767 unsigned NumElts = VecVTy->getNumElements();
768 auto *SubVecTy =
769 FixedVectorType::get(VecTy->getScalarType(), NumElts / Factor);
770
771 // ldN/stN only support legal vector types of size 64 or 128 in bits.
772 // Accesses having vector types that are a multiple of 128 bits can be
773 // matched to more than one ldN/stN instruction.
774 if (NumElts % Factor == 0 &&
775 TLI->isLegalInterleavedAccessType(SubVecTy, DL))
776 return Factor * TLI->getNumInterleavedAccesses(SubVecTy, DL);
777 }
778
779 return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
780 Alignment, AddressSpace, CostKind,
781 UseMaskForCond, UseMaskForGaps);
782}
783
784int AArch64TTIImpl::getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) {
785 int Cost = 0;
786 TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
787 for (auto *I : Tys) {
788 if (!I->isVectorTy())
789 continue;
790 if (I->getScalarSizeInBits() * cast<FixedVectorType>(I)->getNumElements() ==
791 128)
792 Cost += getMemoryOpCost(Instruction::Store, I, Align(128), 0, CostKind) +
793 getMemoryOpCost(Instruction::Load, I, Align(128), 0, CostKind);
794 }
795 return Cost;
796}
797
798unsigned AArch64TTIImpl::getMaxInterleaveFactor(unsigned VF) {
799 return ST->getMaxInterleaveFactor();
800}
801
802// For Falkor, we want to avoid having too many strided loads in a loop since
803// that can exhaust the HW prefetcher resources. We adjust the unroller
804// MaxCount preference below to attempt to ensure unrolling doesn't create too
805// many strided loads.
806static void
807getFalkorUnrollingPreferences(Loop *L, ScalarEvolution &SE,
808 TargetTransformInfo::UnrollingPreferences &UP) {
809 enum { MaxStridedLoads = 7 };
810 auto countStridedLoads = [](Loop *L, ScalarEvolution &SE) {
811 int StridedLoads = 0;
812 // FIXME? We could make this more precise by looking at the CFG and
813 // e.g. not counting loads in each side of an if-then-else diamond.
814 for (const auto BB : L->blocks()) {
815 for (auto &I : *BB) {
816 LoadInst *LMemI = dyn_cast<LoadInst>(&I);
817 if (!LMemI)
818 continue;
819
820 Value *PtrValue = LMemI->getPointerOperand();
821 if (L->isLoopInvariant(PtrValue))
822 continue;
823
824 const SCEV *LSCEV = SE.getSCEV(PtrValue);
825 const SCEVAddRecExpr *LSCEVAddRec = dyn_cast<SCEVAddRecExpr>(LSCEV);
826 if (!LSCEVAddRec || !LSCEVAddRec->isAffine())
827 continue;
828
829 // FIXME? We could take pairing of unrolled load copies into account
830 // by looking at the AddRec, but we would probably have to limit this
831 // to loops with no stores or other memory optimization barriers.
832 ++StridedLoads;
833 // We've seen enough strided loads that seeing more won't make a
834 // difference.
835 if (StridedLoads > MaxStridedLoads / 2)
836 return StridedLoads;
837 }
838 }
839 return StridedLoads;
840 };
841
842 int StridedLoads = countStridedLoads(L, SE);
843 LLVM_DEBUG(dbgs() << "falkor-hwpf: detected " << StridedLoadsdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64tti")) { dbgs() << "falkor-hwpf: detected " <<
StridedLoads << " strided loads\n"; } } while (false)
844 << " strided loads\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64tti")) { dbgs() << "falkor-hwpf: detected " <<
StridedLoads << " strided loads\n"; } } while (false)
;
845 // Pick the largest power of 2 unroll count that won't result in too many
846 // strided loads.
847 if (StridedLoads) {
848 UP.MaxCount = 1 << Log2_32(MaxStridedLoads / StridedLoads);
849 LLVM_DEBUG(dbgs() << "falkor-hwpf: setting unroll MaxCount to "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64tti")) { dbgs() << "falkor-hwpf: setting unroll MaxCount to "
<< UP.MaxCount << '\n'; } } while (false)
850 << UP.MaxCount << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("aarch64tti")) { dbgs() << "falkor-hwpf: setting unroll MaxCount to "
<< UP.MaxCount << '\n'; } } while (false)
;
851 }
852}
853
854void AArch64TTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
855 TTI::UnrollingPreferences &UP) {
856 // Enable partial unrolling and runtime unrolling.
857 BaseT::getUnrollingPreferences(L, SE, UP);
858
859 // For inner loop, it is more likely to be a hot one, and the runtime check
860 // can be promoted out from LICM pass, so the overhead is less, let's try
861 // a larger threshold to unroll more loops.
862 if (L->getLoopDepth() > 1)
863 UP.PartialThreshold *= 2;
864
865 // Disable partial & runtime unrolling on -Os.
866 UP.PartialOptSizeThreshold = 0;
867
868 if (ST->getProcFamily() == AArch64Subtarget::Falkor &&
869 EnableFalkorHWPFUnrollFix)
870 getFalkorUnrollingPreferences(L, SE, UP);
871}
872
873void AArch64TTIImpl::getPeelingPreferences(Loop *L, ScalarEvolution &SE,
874 TTI::PeelingPreferences &PP) {
875 BaseT::getPeelingPreferences(L, SE, PP);
876}
877
878Value *AArch64TTIImpl::getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
879 Type *ExpectedType) {
880 switch (Inst->getIntrinsicID()) {
881 default:
882 return nullptr;
883 case Intrinsic::aarch64_neon_st2:
884 case Intrinsic::aarch64_neon_st3:
885 case Intrinsic::aarch64_neon_st4: {
886 // Create a struct type
887 StructType *ST = dyn_cast<StructType>(ExpectedType);
888 if (!ST)
889 return nullptr;
890 unsigned NumElts = Inst->getNumArgOperands() - 1;
891 if (ST->getNumElements() != NumElts)
892 return nullptr;
893 for (unsigned i = 0, e = NumElts; i != e; ++i) {
894 if (Inst->getArgOperand(i)->getType() != ST->getElementType(i))
895 return nullptr;
896 }
897 Value *Res = UndefValue::get(ExpectedType);
898 IRBuilder<> Builder(Inst);
899 for (unsigned i = 0, e = NumElts; i != e; ++i) {
900 Value *L = Inst->getArgOperand(i);
901 Res = Builder.CreateInsertValue(Res, L, i);
902 }
903 return Res;
904 }
905 case Intrinsic::aarch64_neon_ld2:
906 case Intrinsic::aarch64_neon_ld3:
907 case Intrinsic::aarch64_neon_ld4:
908 if (Inst->getType() == ExpectedType)
909 return Inst;
910 return nullptr;
911 }
912}
913
914bool AArch64TTIImpl::getTgtMemIntrinsic(IntrinsicInst *Inst,
915 MemIntrinsicInfo &Info) {
916 switch (Inst->getIntrinsicID()) {
917 default:
918 break;
919 case Intrinsic::aarch64_neon_ld2:
920 case Intrinsic::aarch64_neon_ld3:
921 case Intrinsic::aarch64_neon_ld4:
922 Info.ReadMem = true;
923 Info.WriteMem = false;
924 Info.PtrVal = Inst->getArgOperand(0);
925 break;
926 case Intrinsic::aarch64_neon_st2:
927 case Intrinsic::aarch64_neon_st3:
928 case Intrinsic::aarch64_neon_st4:
929 Info.ReadMem = false;
930 Info.WriteMem = true;
931 Info.PtrVal = Inst->getArgOperand(Inst->getNumArgOperands() - 1);
932 break;
933 }
934
935 switch (Inst->getIntrinsicID()) {
936 default:
937 return false;
938 case Intrinsic::aarch64_neon_ld2:
939 case Intrinsic::aarch64_neon_st2:
940 Info.MatchingId = VECTOR_LDST_TWO_ELEMENTS;
941 break;
942 case Intrinsic::aarch64_neon_ld3:
943 case Intrinsic::aarch64_neon_st3:
944 Info.MatchingId = VECTOR_LDST_THREE_ELEMENTS;
945 break;
946 case Intrinsic::aarch64_neon_ld4:
947 case Intrinsic::aarch64_neon_st4:
948 Info.MatchingId = VECTOR_LDST_FOUR_ELEMENTS;
949 break;
950 }
951 return true;
952}
953
954/// See if \p I should be considered for address type promotion. We check if \p
955/// I is a sext with right type and used in memory accesses. If it used in a
956/// "complex" getelementptr, we allow it to be promoted without finding other
957/// sext instructions that sign extended the same initial value. A getelementptr
958/// is considered as "complex" if it has more than 2 operands.
959bool AArch64TTIImpl::shouldConsiderAddressTypePromotion(
960 const Instruction &I, bool &AllowPromotionWithoutCommonHeader) {
961 bool Considerable = false;
962 AllowPromotionWithoutCommonHeader = false;
963 if (!isa<SExtInst>(&I))
964 return false;
965 Type *ConsideredSExtType =
966 Type::getInt64Ty(I.getParent()->getParent()->getContext());
967 if (I.getType() != ConsideredSExtType)
968 return false;
969 // See if the sext is the one with the right type and used in at least one
970 // GetElementPtrInst.
971 for (const User *U : I.users()) {
972 if (const GetElementPtrInst *GEPInst = dyn_cast<GetElementPtrInst>(U)) {
973 Considerable = true;
974 // A getelementptr is considered as "complex" if it has more than 2
975 // operands. We will promote a SExt used in such complex GEP as we
976 // expect some computation to be merged if they are done on 64 bits.
977 if (GEPInst->getNumOperands() > 2) {
978 AllowPromotionWithoutCommonHeader = true;
979 break;
980 }
981 }
982 }
983 return Considerable;
984}
985
986bool AArch64TTIImpl::useReductionIntrinsic(unsigned Opcode, Type *Ty,
987 TTI::ReductionFlags Flags) const {
988 auto *VTy = cast<VectorType>(Ty);
989 unsigned ScalarBits = Ty->getScalarSizeInBits();
990 switch (Opcode) {
991 case Instruction::FAdd:
992 case Instruction::FMul:
993 case Instruction::And:
994 case Instruction::Or:
995 case Instruction::Xor:
996 case Instruction::Mul:
997 return false;
998 case Instruction::Add:
999 return ScalarBits * cast<FixedVectorType>(VTy)->getNumElements() >= 128;
1000 case Instruction::ICmp:
1001 return (ScalarBits < 64) &&
1002 (ScalarBits * cast<FixedVectorType>(VTy)->getNumElements() >= 128);
1003 case Instruction::FCmp:
1004 return Flags.NoNaN;
1005 default:
1006 llvm_unreachable("Unhandled reduction opcode")::llvm::llvm_unreachable_internal("Unhandled reduction opcode"
, "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp"
, 1006)
;
1007 }
1008 return false;
1009}
1010
1011int AArch64TTIImpl::getArithmeticReductionCost(unsigned Opcode,
1012 VectorType *ValTy,
1013 bool IsPairwiseForm,
1014 TTI::TargetCostKind CostKind) {
1015
1016 if (IsPairwiseForm)
1017 return BaseT::getArithmeticReductionCost(Opcode, ValTy, IsPairwiseForm,
1018 CostKind);
1019
1020 std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
1021 MVT MTy = LT.second;
1022 int ISD = TLI->InstructionOpcodeToISD(Opcode);
1023 assert(ISD && "Invalid opcode")((ISD && "Invalid opcode") ? static_cast<void> (
0) : __assert_fail ("ISD && \"Invalid opcode\"", "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp"
, 1023, __PRETTY_FUNCTION__))
;
1024
1025 // Horizontal adds can use the 'addv' instruction. We model the cost of these
1026 // instructions as normal vector adds. This is the only arithmetic vector
1027 // reduction operation for which we have an instruction.
1028 static const CostTblEntry CostTblNoPairwise[]{
1029 {ISD::ADD, MVT::v8i8, 1},
1030 {ISD::ADD, MVT::v16i8, 1},
1031 {ISD::ADD, MVT::v4i16, 1},
1032 {ISD::ADD, MVT::v8i16, 1},
1033 {ISD::ADD, MVT::v4i32, 1},
1034 };
1035
1036 if (const auto *Entry = CostTableLookup(CostTblNoPairwise, ISD, MTy))
1037 return LT.first * Entry->Cost;
1038
1039 return BaseT::getArithmeticReductionCost(Opcode, ValTy, IsPairwiseForm,
1040 CostKind);
1041}
1042
1043int AArch64TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp,
1044 int Index, VectorType *SubTp) {
1045 if (Kind == TTI::SK_Broadcast || Kind == TTI::SK_Transpose ||
1046 Kind == TTI::SK_Select || Kind == TTI::SK_PermuteSingleSrc) {
1047 static const CostTblEntry ShuffleTbl[] = {
1048 // Broadcast shuffle kinds can be performed with 'dup'.
1049 { TTI::SK_Broadcast, MVT::v8i8, 1 },
1050 { TTI::SK_Broadcast, MVT::v16i8, 1 },
1051 { TTI::SK_Broadcast, MVT::v4i16, 1 },
1052 { TTI::SK_Broadcast, MVT::v8i16, 1 },
1053 { TTI::SK_Broadcast, MVT::v2i32, 1 },
1054 { TTI::SK_Broadcast, MVT::v4i32, 1 },
1055 { TTI::SK_Broadcast, MVT::v2i64, 1 },
1056 { TTI::SK_Broadcast, MVT::v2f32, 1 },
1057 { TTI::SK_Broadcast, MVT::v4f32, 1 },
1058 { TTI::SK_Broadcast, MVT::v2f64, 1 },
1059 // Transpose shuffle kinds can be performed with 'trn1/trn2' and
1060 // 'zip1/zip2' instructions.
1061 { TTI::SK_Transpose, MVT::v8i8, 1 },
1062 { TTI::SK_Transpose, MVT::v16i8, 1 },
1063 { TTI::SK_Transpose, MVT::v4i16, 1 },
1064 { TTI::SK_Transpose, MVT::v8i16, 1 },
1065 { TTI::SK_Transpose, MVT::v2i32, 1 },
1066 { TTI::SK_Transpose, MVT::v4i32, 1 },
1067 { TTI::SK_Transpose, MVT::v2i64, 1 },
1068 { TTI::SK_Transpose, MVT::v2f32, 1 },
1069 { TTI::SK_Transpose, MVT::v4f32, 1 },
1070 { TTI::SK_Transpose, MVT::v2f64, 1 },
1071 // Select shuffle kinds.
1072 // TODO: handle vXi8/vXi16.
1073 { TTI::SK_Select, MVT::v2i32, 1 }, // mov.
1074 { TTI::SK_Select, MVT::v4i32, 2 }, // rev+trn (or similar).
1075 { TTI::SK_Select, MVT::v2i64, 1 }, // mov.
1076 { TTI::SK_Select, MVT::v2f32, 1 }, // mov.
1077 { TTI::SK_Select, MVT::v4f32, 2 }, // rev+trn (or similar).
1078 { TTI::SK_Select, MVT::v2f64, 1 }, // mov.
1079 // PermuteSingleSrc shuffle kinds.
1080 // TODO: handle vXi8/vXi16.
1081 { TTI::SK_PermuteSingleSrc, MVT::v2i32, 1 }, // mov.
1082 { TTI::SK_PermuteSingleSrc, MVT::v4i32, 3 }, // perfectshuffle worst case.
1083 { TTI::SK_PermuteSingleSrc, MVT::v2i64, 1 }, // mov.
1084 { TTI::SK_PermuteSingleSrc, MVT::v2f32, 1 }, // mov.
1085 { TTI::SK_PermuteSingleSrc, MVT::v4f32, 3 }, // perfectshuffle worst case.
1086 { TTI::SK_PermuteSingleSrc, MVT::v2f64, 1 }, // mov.
1087 };
1088 std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
1089 if (const auto *Entry = CostTableLookup(ShuffleTbl, Kind, LT.second))
1090 return LT.first * Entry->Cost;
1091 }
1092
1093 return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
1094}

/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/IR/Type.h

1//===- llvm/Type.h - Classes for handling data types ------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains the declaration of the Type class. For more "Type"
10// stuff, look in DerivedTypes.h.
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_IR_TYPE_H
15#define LLVM_IR_TYPE_H
16
17#include "llvm/ADT/APFloat.h"
18#include "llvm/ADT/ArrayRef.h"
19#include "llvm/ADT/SmallPtrSet.h"
20#include "llvm/Support/CBindingWrapping.h"
21#include "llvm/Support/Casting.h"
22#include "llvm/Support/Compiler.h"
23#include "llvm/Support/ErrorHandling.h"
24#include "llvm/Support/TypeSize.h"
25#include <cassert>
26#include <cstdint>
27#include <iterator>
28
29namespace llvm {
30
31template<class GraphType> struct GraphTraits;
32class IntegerType;
33class LLVMContext;
34class PointerType;
35class raw_ostream;
36class StringRef;
37
38/// The instances of the Type class are immutable: once they are created,
39/// they are never changed. Also note that only one instance of a particular
40/// type is ever created. Thus seeing if two types are equal is a matter of
41/// doing a trivial pointer comparison. To enforce that no two equal instances
42/// are created, Type instances can only be created via static factory methods
43/// in class Type and in derived classes. Once allocated, Types are never
44/// free'd.
45///
46class Type {
47public:
48 //===--------------------------------------------------------------------===//
49 /// Definitions of all of the base types for the Type system. Based on this
50 /// value, you can cast to a class defined in DerivedTypes.h.
51 /// Note: If you add an element to this, you need to add an element to the
52 /// Type::getPrimitiveType function, or else things will break!
53 /// Also update LLVMTypeKind and LLVMGetTypeKind () in the C binding.
54 ///
55 enum TypeID {
56 // PrimitiveTypes
57 HalfTyID = 0, ///< 16-bit floating point type
58 BFloatTyID, ///< 16-bit floating point type (7-bit significand)
59 FloatTyID, ///< 32-bit floating point type
60 DoubleTyID, ///< 64-bit floating point type
61 X86_FP80TyID, ///< 80-bit floating point type (X87)
62 FP128TyID, ///< 128-bit floating point type (112-bit significand)
63 PPC_FP128TyID, ///< 128-bit floating point type (two 64-bits, PowerPC)
64 VoidTyID, ///< type with no size
65 LabelTyID, ///< Labels
66 MetadataTyID, ///< Metadata
67 X86_MMXTyID, ///< MMX vectors (64 bits, X86 specific)
68 TokenTyID, ///< Tokens
69
70 // Derived types... see DerivedTypes.h file.
71 IntegerTyID, ///< Arbitrary bit width integers
72 FunctionTyID, ///< Functions
73 PointerTyID, ///< Pointers
74 StructTyID, ///< Structures
75 ArrayTyID, ///< Arrays
76 FixedVectorTyID, ///< Fixed width SIMD vector type
77 ScalableVectorTyID ///< Scalable SIMD vector type
78 };
79
80private:
81 /// This refers to the LLVMContext in which this type was uniqued.
82 LLVMContext &Context;
83
84 TypeID ID : 8; // The current base type of this type.
85 unsigned SubclassData : 24; // Space for subclasses to store data.
86 // Note that this should be synchronized with
87 // MAX_INT_BITS value in IntegerType class.
88
89protected:
90 friend class LLVMContextImpl;
91
92 explicit Type(LLVMContext &C, TypeID tid)
93 : Context(C), ID(tid), SubclassData(0) {}
94 ~Type() = default;
95
96 unsigned getSubclassData() const { return SubclassData; }
97
98 void setSubclassData(unsigned val) {
99 SubclassData = val;
100 // Ensure we don't have any accidental truncation.
101 assert(getSubclassData() == val && "Subclass data too large for field")((getSubclassData() == val && "Subclass data too large for field"
) ? static_cast<void> (0) : __assert_fail ("getSubclassData() == val && \"Subclass data too large for field\""
, "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/IR/Type.h"
, 101, __PRETTY_FUNCTION__))
;
102 }
103
104 /// Keeps track of how many Type*'s there are in the ContainedTys list.
105 unsigned NumContainedTys = 0;
106
107 /// A pointer to the array of Types contained by this Type. For example, this
108 /// includes the arguments of a function type, the elements of a structure,
109 /// the pointee of a pointer, the element type of an array, etc. This pointer
110 /// may be 0 for types that don't contain other types (Integer, Double,
111 /// Float).
112 Type * const *ContainedTys = nullptr;
113
114public:
115 /// Print the current type.
116 /// Omit the type details if \p NoDetails == true.
117 /// E.g., let %st = type { i32, i16 }
118 /// When \p NoDetails is true, we only print %st.
119 /// Put differently, \p NoDetails prints the type as if
120 /// inlined with the operands when printing an instruction.
121 void print(raw_ostream &O, bool IsForDebug = false,
122 bool NoDetails = false) const;
123
124 void dump() const;
125
126 /// Return the LLVMContext in which this type was uniqued.
127 LLVMContext &getContext() const { return Context; }
128
129 //===--------------------------------------------------------------------===//
130 // Accessors for working with types.
131 //
132
133 /// Return the type id for the type. This will return one of the TypeID enum
134 /// elements defined above.
135 TypeID getTypeID() const { return ID; }
136
137 /// Return true if this is 'void'.
138 bool isVoidTy() const { return getTypeID() == VoidTyID; }
139
140 /// Return true if this is 'half', a 16-bit IEEE fp type.
141 bool isHalfTy() const { return getTypeID() == HalfTyID; }
142
143 /// Return true if this is 'bfloat', a 16-bit bfloat type.
144 bool isBFloatTy() const { return getTypeID() == BFloatTyID; }
145
146 /// Return true if this is 'float', a 32-bit IEEE fp type.
147 bool isFloatTy() const { return getTypeID() == FloatTyID; }
148
149 /// Return true if this is 'double', a 64-bit IEEE fp type.
150 bool isDoubleTy() const { return getTypeID() == DoubleTyID; }
151
152 /// Return true if this is x86 long double.
153 bool isX86_FP80Ty() const { return getTypeID() == X86_FP80TyID; }
154
155 /// Return true if this is 'fp128'.
156 bool isFP128Ty() const { return getTypeID() == FP128TyID; }
157
158 /// Return true if this is powerpc long double.
159 bool isPPC_FP128Ty() const { return getTypeID() == PPC_FP128TyID; }
160
161 /// Return true if this is one of the six floating-point types
162 bool isFloatingPointTy() const {
163 return getTypeID() == HalfTyID || getTypeID() == BFloatTyID ||
164 getTypeID() == FloatTyID || getTypeID() == DoubleTyID ||
165 getTypeID() == X86_FP80TyID || getTypeID() == FP128TyID ||
166 getTypeID() == PPC_FP128TyID;
167 }
168
169 const fltSemantics &getFltSemantics() const {
170 switch (getTypeID()) {
171 case HalfTyID: return APFloat::IEEEhalf();
172 case BFloatTyID: return APFloat::BFloat();
173 case FloatTyID: return APFloat::IEEEsingle();
174 case DoubleTyID: return APFloat::IEEEdouble();
175 case X86_FP80TyID: return APFloat::x87DoubleExtended();
176 case FP128TyID: return APFloat::IEEEquad();
177 case PPC_FP128TyID: return APFloat::PPCDoubleDouble();
178 default: llvm_unreachable("Invalid floating type")::llvm::llvm_unreachable_internal("Invalid floating type", "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/IR/Type.h"
, 178)
;
179 }
180 }
181
182 /// Return true if this is X86 MMX.
183 bool isX86_MMXTy() const { return getTypeID() == X86_MMXTyID; }
184
185 /// Return true if this is a FP type or a vector of FP.
186 bool isFPOrFPVectorTy() const { return getScalarType()->isFloatingPointTy(); }
187
188 /// Return true if this is 'label'.
189 bool isLabelTy() const { return getTypeID() == LabelTyID; }
190
191 /// Return true if this is 'metadata'.
192 bool isMetadataTy() const { return getTypeID() == MetadataTyID; }
193
194 /// Return true if this is 'token'.
195 bool isTokenTy() const { return getTypeID() == TokenTyID; }
196
197 /// True if this is an instance of IntegerType.
198 bool isIntegerTy() const { return getTypeID() == IntegerTyID; }
199
200 /// Return true if this is an IntegerType of the given width.
201 bool isIntegerTy(unsigned Bitwidth) const;
202
203 /// Return true if this is an integer type or a vector of integer types.
204 bool isIntOrIntVectorTy() const { return getScalarType()->isIntegerTy(); }
205
206 /// Return true if this is an integer type or a vector of integer types of
207 /// the given width.
208 bool isIntOrIntVectorTy(unsigned BitWidth) const {
209 return getScalarType()->isIntegerTy(BitWidth);
210 }
211
212 /// Return true if this is an integer type or a pointer type.
213 bool isIntOrPtrTy() const { return isIntegerTy() || isPointerTy(); }
214
215 /// True if this is an instance of FunctionType.
216 bool isFunctionTy() const { return getTypeID() == FunctionTyID; }
217
218 /// True if this is an instance of StructType.
219 bool isStructTy() const { return getTypeID() == StructTyID; }
220
221 /// True if this is an instance of ArrayType.
222 bool isArrayTy() const { return getTypeID() == ArrayTyID; }
223
224 /// True if this is an instance of PointerType.
225 bool isPointerTy() const { return getTypeID() == PointerTyID; }
226
227 /// Return true if this is a pointer type or a vector of pointer types.
228 bool isPtrOrPtrVectorTy() const { return getScalarType()->isPointerTy(); }
229
230 /// True if this is an instance of VectorType.
231 inline bool isVectorTy() const {
232 return getTypeID() == ScalableVectorTyID || getTypeID() == FixedVectorTyID;
4
Assuming the condition is false
5
Assuming the condition is false
6
Returning zero, which participates in a condition later
25
Assuming the condition is true
26
Returning the value 1, which participates in a condition later
233 }
234
235 /// Return true if this type could be converted with a lossless BitCast to
236 /// type 'Ty'. For example, i8* to i32*. BitCasts are valid for types of the
237 /// same size only where no re-interpretation of the bits is done.
238 /// Determine if this type could be losslessly bitcast to Ty
239 bool canLosslesslyBitCastTo(Type *Ty) const;
240
241 /// Return true if this type is empty, that is, it has no elements or all of
242 /// its elements are empty.
243 bool isEmptyTy() const;
244
245 /// Return true if the type is "first class", meaning it is a valid type for a
246 /// Value.
247 bool isFirstClassType() const {
248 return getTypeID() != FunctionTyID && getTypeID() != VoidTyID;
249 }
250
251 /// Return true if the type is a valid type for a register in codegen. This
252 /// includes all first-class types except struct and array types.
253 bool isSingleValueType() const {
254 return isFloatingPointTy() || isX86_MMXTy() || isIntegerTy() ||
255 isPointerTy() || isVectorTy();
256 }
257
258 /// Return true if the type is an aggregate type. This means it is valid as
259 /// the first operand of an insertvalue or extractvalue instruction. This
260 /// includes struct and array types, but does not include vector types.
261 bool isAggregateType() const {
262 return getTypeID() == StructTyID || getTypeID() == ArrayTyID;
263 }
264
265 /// Return true if it makes sense to take the size of this type. To get the
266 /// actual size for a particular target, it is reasonable to use the
267 /// DataLayout subsystem to do this.
268 bool isSized(SmallPtrSetImpl<Type*> *Visited = nullptr) const {
269 // If it's a primitive, it is always sized.
270 if (getTypeID() == IntegerTyID || isFloatingPointTy() ||
271 getTypeID() == PointerTyID ||
272 getTypeID() == X86_MMXTyID)
273 return true;
274 // If it is not something that can have a size (e.g. a function or label),
275 // it doesn't have a size.
276 if (getTypeID() != StructTyID && getTypeID() != ArrayTyID && !isVectorTy())
277 return false;
278 // Otherwise we have to try harder to decide.
279 return isSizedDerivedType(Visited);
280 }
281
282 /// Return the basic size of this type if it is a primitive type. These are
283 /// fixed by LLVM and are not target-dependent.
284 /// This will return zero if the type does not have a size or is not a
285 /// primitive type.
286 ///
287 /// If this is a scalable vector type, the scalable property will be set and
288 /// the runtime size will be a positive integer multiple of the base size.
289 ///
290 /// Note that this may not reflect the size of memory allocated for an
291 /// instance of the type or the number of bytes that are written when an
292 /// instance of the type is stored to memory. The DataLayout class provides
293 /// additional query functions to provide this information.
294 ///
295 TypeSize getPrimitiveSizeInBits() const LLVM_READONLY__attribute__((__pure__));
296
297 /// If this is a vector type, return the getPrimitiveSizeInBits value for the
298 /// element type. Otherwise return the getPrimitiveSizeInBits value for this
299 /// type.
300 unsigned getScalarSizeInBits() const LLVM_READONLY__attribute__((__pure__));
301
302 /// Return the width of the mantissa of this type. This is only valid on
303 /// floating-point types. If the FP type does not have a stable mantissa (e.g.
304 /// ppc long double), this method returns -1.
305 int getFPMantissaWidth() const;
306
307 /// If this is a vector type, return the element type, otherwise return
308 /// 'this'.
309 inline Type *getScalarType() const {
310 if (isVectorTy())
311 return getContainedType(0);
312 return const_cast<Type *>(this);
313 }
314
315 //===--------------------------------------------------------------------===//
316 // Type Iteration support.
317 //
318 using subtype_iterator = Type * const *;
319
320 subtype_iterator subtype_begin() const { return ContainedTys; }
321 subtype_iterator subtype_end() const { return &ContainedTys[NumContainedTys];}
322 ArrayRef<Type*> subtypes() const {
323 return makeArrayRef(subtype_begin(), subtype_end());
324 }
325
326 using subtype_reverse_iterator = std::reverse_iterator<subtype_iterator>;
327
328 subtype_reverse_iterator subtype_rbegin() const {
329 return subtype_reverse_iterator(subtype_end());
330 }
331 subtype_reverse_iterator subtype_rend() const {
332 return subtype_reverse_iterator(subtype_begin());
333 }
334
335 /// This method is used to implement the type iterator (defined at the end of
336 /// the file). For derived types, this returns the types 'contained' in the
337 /// derived type.
338 Type *getContainedType(unsigned i) const {
339 assert(i < NumContainedTys && "Index out of range!")((i < NumContainedTys && "Index out of range!") ? static_cast
<void> (0) : __assert_fail ("i < NumContainedTys && \"Index out of range!\""
, "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/IR/Type.h"
, 339, __PRETTY_FUNCTION__))
;
340 return ContainedTys[i];
341 }
342
343 /// Return the number of types in the derived type.
344 unsigned getNumContainedTypes() const { return NumContainedTys; }
345
346 //===--------------------------------------------------------------------===//
347 // Helper methods corresponding to subclass methods. This forces a cast to
348 // the specified subclass and calls its accessor. "getArrayNumElements" (for
349 // example) is shorthand for cast<ArrayType>(Ty)->getNumElements(). This is
350 // only intended to cover the core methods that are frequently used, helper
351 // methods should not be added here.
352
353 inline unsigned getIntegerBitWidth() const;
354
355 inline Type *getFunctionParamType(unsigned i) const;
356 inline unsigned getFunctionNumParams() const;
357 inline bool isFunctionVarArg() const;
358
359 inline StringRef getStructName() const;
360 inline unsigned getStructNumElements() const;
361 inline Type *getStructElementType(unsigned N) const;
362
363 inline uint64_t getArrayNumElements() const;
364
365 Type *getArrayElementType() const {
366 assert(getTypeID() == ArrayTyID)((getTypeID() == ArrayTyID) ? static_cast<void> (0) : __assert_fail
("getTypeID() == ArrayTyID", "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/IR/Type.h"
, 366, __PRETTY_FUNCTION__))
;
367 return ContainedTys[0];
368 }
369
370 Type *getPointerElementType() const {
371 assert(getTypeID() == PointerTyID)((getTypeID() == PointerTyID) ? static_cast<void> (0) :
__assert_fail ("getTypeID() == PointerTyID", "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/IR/Type.h"
, 371, __PRETTY_FUNCTION__))
;
372 return ContainedTys[0];
373 }
374
375 /// Given an integer or vector type, change the lane bitwidth to NewBitwidth,
376 /// whilst keeping the old number of lanes.
377 inline Type *getWithNewBitWidth(unsigned NewBitWidth) const;
378
379 /// Given scalar/vector integer type, returns a type with elements twice as
380 /// wide as in the original type. For vectors, preserves element count.
381 inline Type *getExtendedType() const;
382
383 /// Get the address space of this pointer or pointer vector type.
384 inline unsigned getPointerAddressSpace() const;
385
386 //===--------------------------------------------------------------------===//
387 // Static members exported by the Type class itself. Useful for getting
388 // instances of Type.
389 //
390
391 /// Return a type based on an identifier.
392 static Type *getPrimitiveType(LLVMContext &C, TypeID IDNumber);
393
394 //===--------------------------------------------------------------------===//
395 // These are the builtin types that are always available.
396 //
397 static Type *getVoidTy(LLVMContext &C);
398 static Type *getLabelTy(LLVMContext &C);
399 static Type *getHalfTy(LLVMContext &C);
400 static Type *getBFloatTy(LLVMContext &C);
401 static Type *getFloatTy(LLVMContext &C);
402 static Type *getDoubleTy(LLVMContext &C);
403 static Type *getMetadataTy(LLVMContext &C);
404 static Type *getX86_FP80Ty(LLVMContext &C);
405 static Type *getFP128Ty(LLVMContext &C);
406 static Type *getPPC_FP128Ty(LLVMContext &C);
407 static Type *getX86_MMXTy(LLVMContext &C);
408 static Type *getTokenTy(LLVMContext &C);
409 static IntegerType *getIntNTy(LLVMContext &C, unsigned N);
410 static IntegerType *getInt1Ty(LLVMContext &C);
411 static IntegerType *getInt8Ty(LLVMContext &C);
412 static IntegerType *getInt16Ty(LLVMContext &C);
413 static IntegerType *getInt32Ty(LLVMContext &C);
414 static IntegerType *getInt64Ty(LLVMContext &C);
415 static IntegerType *getInt128Ty(LLVMContext &C);
416 template <typename ScalarTy> static Type *getScalarTy(LLVMContext &C) {
417 int noOfBits = sizeof(ScalarTy) * CHAR_BIT8;
418 if (std::is_integral<ScalarTy>::value) {
419 return (Type*) Type::getIntNTy(C, noOfBits);
420 } else if (std::is_floating_point<ScalarTy>::value) {
421 switch (noOfBits) {
422 case 32:
423 return Type::getFloatTy(C);
424 case 64:
425 return Type::getDoubleTy(C);
426 }
427 }
428 llvm_unreachable("Unsupported type in Type::getScalarTy")::llvm::llvm_unreachable_internal("Unsupported type in Type::getScalarTy"
, "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/IR/Type.h"
, 428)
;
429 }
430
431 //===--------------------------------------------------------------------===//
432 // Convenience methods for getting pointer types with one of the above builtin
433 // types as pointee.
434 //
435 static PointerType *getHalfPtrTy(LLVMContext &C, unsigned AS = 0);
436 static PointerType *getBFloatPtrTy(LLVMContext &C, unsigned AS = 0);
437 static PointerType *getFloatPtrTy(LLVMContext &C, unsigned AS = 0);
438 static PointerType *getDoublePtrTy(LLVMContext &C, unsigned AS = 0);
439 static PointerType *getX86_FP80PtrTy(LLVMContext &C, unsigned AS = 0);
440 static PointerType *getFP128PtrTy(LLVMContext &C, unsigned AS = 0);
441 static PointerType *getPPC_FP128PtrTy(LLVMContext &C, unsigned AS = 0);
442 static PointerType *getX86_MMXPtrTy(LLVMContext &C, unsigned AS = 0);
443 static PointerType *getIntNPtrTy(LLVMContext &C, unsigned N, unsigned AS = 0);
444 static PointerType *getInt1PtrTy(LLVMContext &C, unsigned AS = 0);
445 static PointerType *getInt8PtrTy(LLVMContext &C, unsigned AS = 0);
446 static PointerType *getInt16PtrTy(LLVMContext &C, unsigned AS = 0);
447 static PointerType *getInt32PtrTy(LLVMContext &C, unsigned AS = 0);
448 static PointerType *getInt64PtrTy(LLVMContext &C, unsigned AS = 0);
449
450 /// Return a pointer to the current type. This is equivalent to
451 /// PointerType::get(Foo, AddrSpace).
452 PointerType *getPointerTo(unsigned AddrSpace = 0) const;
453
454private:
455 /// Derived types like structures and arrays are sized iff all of the members
456 /// of the type are sized as well. Since asking for their size is relatively
457 /// uncommon, move this operation out-of-line.
458 bool isSizedDerivedType(SmallPtrSetImpl<Type*> *Visited = nullptr) const;
459};
460
461// Printing of types.
462inline raw_ostream &operator<<(raw_ostream &OS, const Type &T) {
463 T.print(OS);
464 return OS;
465}
466
467// allow isa<PointerType>(x) to work without DerivedTypes.h included.
468template <> struct isa_impl<PointerType, Type> {
469 static inline bool doit(const Type &Ty) {
470 return Ty.getTypeID() == Type::PointerTyID;
471 }
472};
473
474// Create wrappers for C Binding types (see CBindingWrapping.h).
475DEFINE_ISA_CONVERSION_FUNCTIONS(Type, LLVMTypeRef)inline Type *unwrap(LLVMTypeRef P) { return reinterpret_cast<
Type*>(P); } inline LLVMTypeRef wrap(const Type *P) { return
reinterpret_cast<LLVMTypeRef>(const_cast<Type*>(
P)); } template<typename T> inline T *unwrap(LLVMTypeRef
P) { return cast<T>(unwrap(P)); }
476
477/* Specialized opaque type conversions.
478 */
479inline Type **unwrap(LLVMTypeRef* Tys) {
480 return reinterpret_cast<Type**>(Tys);
481}
482
483inline LLVMTypeRef *wrap(Type **Tys) {
484 return reinterpret_cast<LLVMTypeRef*>(const_cast<Type**>(Tys));
485}
486
487} // end namespace llvm
488
489#endif // LLVM_IR_TYPE_H

/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/BasicTTIImpl.h

1//===- BasicTTIImpl.h -------------------------------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// This file provides a helper that implements much of the TTI interface in
11/// terms of the target-independent code generator and TargetLowering
12/// interfaces.
13//
14//===----------------------------------------------------------------------===//
15
16#ifndef LLVM_CODEGEN_BASICTTIIMPL_H
17#define LLVM_CODEGEN_BASICTTIIMPL_H
18
19#include "llvm/ADT/APInt.h"
20#include "llvm/ADT/ArrayRef.h"
21#include "llvm/ADT/BitVector.h"
22#include "llvm/ADT/SmallPtrSet.h"
23#include "llvm/ADT/SmallVector.h"
24#include "llvm/Analysis/LoopInfo.h"
25#include "llvm/Analysis/TargetTransformInfo.h"
26#include "llvm/Analysis/TargetTransformInfoImpl.h"
27#include "llvm/CodeGen/ISDOpcodes.h"
28#include "llvm/CodeGen/TargetLowering.h"
29#include "llvm/CodeGen/TargetSubtargetInfo.h"
30#include "llvm/CodeGen/ValueTypes.h"
31#include "llvm/IR/BasicBlock.h"
32#include "llvm/IR/Constant.h"
33#include "llvm/IR/Constants.h"
34#include "llvm/IR/DataLayout.h"
35#include "llvm/IR/DerivedTypes.h"
36#include "llvm/IR/InstrTypes.h"
37#include "llvm/IR/Instruction.h"
38#include "llvm/IR/Instructions.h"
39#include "llvm/IR/Intrinsics.h"
40#include "llvm/IR/Operator.h"
41#include "llvm/IR/Type.h"
42#include "llvm/IR/Value.h"
43#include "llvm/Support/Casting.h"
44#include "llvm/Support/CommandLine.h"
45#include "llvm/Support/ErrorHandling.h"
46#include "llvm/Support/MachineValueType.h"
47#include "llvm/Support/MathExtras.h"
48#include <algorithm>
49#include <cassert>
50#include <cstdint>
51#include <limits>
52#include <utility>
53
54namespace llvm {
55
56class Function;
57class GlobalValue;
58class LLVMContext;
59class ScalarEvolution;
60class SCEV;
61class TargetMachine;
62
63extern cl::opt<unsigned> PartialUnrollingThreshold;
64
65/// Base class which can be used to help build a TTI implementation.
66///
67/// This class provides as much implementation of the TTI interface as is
68/// possible using the target independent parts of the code generator.
69///
70/// In order to subclass it, your class must implement a getST() method to
71/// return the subtarget, and a getTLI() method to return the target lowering.
72/// We need these methods implemented in the derived class so that this class
73/// doesn't have to duplicate storage for them.
74template <typename T>
75class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
76private:
77 using BaseT = TargetTransformInfoImplCRTPBase<T>;
78 using TTI = TargetTransformInfo;
79
80 /// Helper function to access this as a T.
81 T *thisT() { return static_cast<T *>(this); }
82
83 /// Estimate a cost of Broadcast as an extract and sequence of insert
84 /// operations.
85 unsigned getBroadcastShuffleOverhead(FixedVectorType *VTy) {
86 unsigned Cost = 0;
87 // Broadcast cost is equal to the cost of extracting the zero'th element
88 // plus the cost of inserting it into every element of the result vector.
89 Cost += thisT()->getVectorInstrCost(Instruction::ExtractElement, VTy, 0);
90
91 for (int i = 0, e = VTy->getNumElements(); i < e; ++i) {
92 Cost += thisT()->getVectorInstrCost(Instruction::InsertElement, VTy, i);
93 }
94 return Cost;
95 }
96
97 /// Estimate a cost of shuffle as a sequence of extract and insert
98 /// operations.
99 unsigned getPermuteShuffleOverhead(FixedVectorType *VTy) {
100 unsigned Cost = 0;
101 // Shuffle cost is equal to the cost of extracting element from its argument
102 // plus the cost of inserting them onto the result vector.
103
104 // e.g. <4 x float> has a mask of <0,5,2,7> i.e we need to extract from
105 // index 0 of first vector, index 1 of second vector,index 2 of first
106 // vector and finally index 3 of second vector and insert them at index
107 // <0,1,2,3> of result vector.
108 for (int i = 0, e = VTy->getNumElements(); i < e; ++i) {
109 Cost += thisT()->getVectorInstrCost(Instruction::InsertElement, VTy, i);
110 Cost += thisT()->getVectorInstrCost(Instruction::ExtractElement, VTy, i);
111 }
112 return Cost;
113 }
114
115 /// Estimate a cost of subvector extraction as a sequence of extract and
116 /// insert operations.
117 unsigned getExtractSubvectorOverhead(FixedVectorType *VTy, int Index,
118 FixedVectorType *SubVTy) {
119 assert(VTy && SubVTy &&((VTy && SubVTy && "Can only extract subvectors from vectors"
) ? static_cast<void> (0) : __assert_fail ("VTy && SubVTy && \"Can only extract subvectors from vectors\""
, "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 120, __PRETTY_FUNCTION__))
120 "Can only extract subvectors from vectors")((VTy && SubVTy && "Can only extract subvectors from vectors"
) ? static_cast<void> (0) : __assert_fail ("VTy && SubVTy && \"Can only extract subvectors from vectors\""
, "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 120, __PRETTY_FUNCTION__))
;
121 int NumSubElts = SubVTy->getNumElements();
122 assert((Index + NumSubElts) <= (int)VTy->getNumElements() &&(((Index + NumSubElts) <= (int)VTy->getNumElements() &&
"SK_ExtractSubvector index out of range") ? static_cast<void
> (0) : __assert_fail ("(Index + NumSubElts) <= (int)VTy->getNumElements() && \"SK_ExtractSubvector index out of range\""
, "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 123, __PRETTY_FUNCTION__))
123 "SK_ExtractSubvector index out of range")(((Index + NumSubElts) <= (int)VTy->getNumElements() &&
"SK_ExtractSubvector index out of range") ? static_cast<void
> (0) : __assert_fail ("(Index + NumSubElts) <= (int)VTy->getNumElements() && \"SK_ExtractSubvector index out of range\""
, "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 123, __PRETTY_FUNCTION__))
;
124
125 unsigned Cost = 0;
126 // Subvector extraction cost is equal to the cost of extracting element from
127 // the source type plus the cost of inserting them into the result vector
128 // type.
129 for (int i = 0; i != NumSubElts; ++i) {
130 Cost += thisT()->getVectorInstrCost(Instruction::ExtractElement, VTy,
131 i + Index);
132 Cost +=
133 thisT()->getVectorInstrCost(Instruction::InsertElement, SubVTy, i);
134 }
135 return Cost;
136 }
137
138 /// Estimate a cost of subvector insertion as a sequence of extract and
139 /// insert operations.
140 unsigned getInsertSubvectorOverhead(FixedVectorType *VTy, int Index,
141 FixedVectorType *SubVTy) {
142 assert(VTy && SubVTy &&((VTy && SubVTy && "Can only insert subvectors into vectors"
) ? static_cast<void> (0) : __assert_fail ("VTy && SubVTy && \"Can only insert subvectors into vectors\""
, "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 143, __PRETTY_FUNCTION__))
143 "Can only insert subvectors into vectors")((VTy && SubVTy && "Can only insert subvectors into vectors"
) ? static_cast<void> (0) : __assert_fail ("VTy && SubVTy && \"Can only insert subvectors into vectors\""
, "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 143, __PRETTY_FUNCTION__))
;
144 int NumSubElts = SubVTy->getNumElements();
145 assert((Index + NumSubElts) <= (int)VTy->getNumElements() &&(((Index + NumSubElts) <= (int)VTy->getNumElements() &&
"SK_InsertSubvector index out of range") ? static_cast<void
> (0) : __assert_fail ("(Index + NumSubElts) <= (int)VTy->getNumElements() && \"SK_InsertSubvector index out of range\""
, "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 146, __PRETTY_FUNCTION__))
146 "SK_InsertSubvector index out of range")(((Index + NumSubElts) <= (int)VTy->getNumElements() &&
"SK_InsertSubvector index out of range") ? static_cast<void
> (0) : __assert_fail ("(Index + NumSubElts) <= (int)VTy->getNumElements() && \"SK_InsertSubvector index out of range\""
, "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 146, __PRETTY_FUNCTION__))
;
147
148 unsigned Cost = 0;
149 // Subvector insertion cost is equal to the cost of extracting element from
150 // the source type plus the cost of inserting them into the result vector
151 // type.
152 for (int i = 0; i != NumSubElts; ++i) {
153 Cost +=
154 thisT()->getVectorInstrCost(Instruction::ExtractElement, SubVTy, i);
155 Cost += thisT()->getVectorInstrCost(Instruction::InsertElement, VTy,
156 i + Index);
157 }
158 return Cost;
159 }
160
161 /// Local query method delegates up to T which *must* implement this!
162 const TargetSubtargetInfo *getST() const {
163 return static_cast<const T *>(this)->getST();
164 }
165
166 /// Local query method delegates up to T which *must* implement this!
167 const TargetLoweringBase *getTLI() const {
168 return static_cast<const T *>(this)->getTLI();
169 }
170
171 static ISD::MemIndexedMode getISDIndexedMode(TTI::MemIndexedMode M) {
172 switch (M) {
173 case TTI::MIM_Unindexed:
174 return ISD::UNINDEXED;
175 case TTI::MIM_PreInc:
176 return ISD::PRE_INC;
177 case TTI::MIM_PreDec:
178 return ISD::PRE_DEC;
179 case TTI::MIM_PostInc:
180 return ISD::POST_INC;
181 case TTI::MIM_PostDec:
182 return ISD::POST_DEC;
183 }
184 llvm_unreachable("Unexpected MemIndexedMode")::llvm::llvm_unreachable_internal("Unexpected MemIndexedMode"
, "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 184)
;
185 }
186
187protected:
188 explicit BasicTTIImplBase(const TargetMachine *TM, const DataLayout &DL)
189 : BaseT(DL) {}
190 virtual ~BasicTTIImplBase() = default;
191
192 using TargetTransformInfoImplBase::DL;
193
194public:
195 /// \name Scalar TTI Implementations
196 /// @{
197 bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth,
198 unsigned AddressSpace, unsigned Alignment,
199 bool *Fast) const {
200 EVT E = EVT::getIntegerVT(Context, BitWidth);
201 return getTLI()->allowsMisalignedMemoryAccesses(
202 E, AddressSpace, Alignment, MachineMemOperand::MONone, Fast);
203 }
204
205 bool hasBranchDivergence() { return false; }
206
207 bool useGPUDivergenceAnalysis() { return false; }
208
209 bool isSourceOfDivergence(const Value *V) { return false; }
210
211 bool isAlwaysUniform(const Value *V) { return false; }
212
213 unsigned getFlatAddressSpace() {
214 // Return an invalid address space.
215 return -1;
216 }
217
218 bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
219 Intrinsic::ID IID) const {
220 return false;
221 }
222
223 bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const {
224 return getTLI()->getTargetMachine().isNoopAddrSpaceCast(FromAS, ToAS);
225 }
226
227 Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV,
228 Value *NewV) const {
229 return nullptr;
230 }
231
232 bool isLegalAddImmediate(int64_t imm) {
233 return getTLI()->isLegalAddImmediate(imm);
234 }
235
236 bool isLegalICmpImmediate(int64_t imm) {
237 return getTLI()->isLegalICmpImmediate(imm);
238 }
239
240 bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
241 bool HasBaseReg, int64_t Scale,
242 unsigned AddrSpace, Instruction *I = nullptr) {
243 TargetLoweringBase::AddrMode AM;
244 AM.BaseGV = BaseGV;
245 AM.BaseOffs = BaseOffset;
246 AM.HasBaseReg = HasBaseReg;
247 AM.Scale = Scale;
248 return getTLI()->isLegalAddressingMode(DL, AM, Ty, AddrSpace, I);
249 }
250
251 bool isIndexedLoadLegal(TTI::MemIndexedMode M, Type *Ty,
252 const DataLayout &DL) const {
253 EVT VT = getTLI()->getValueType(DL, Ty);
254 return getTLI()->isIndexedLoadLegal(getISDIndexedMode(M), VT);
255 }
256
257 bool isIndexedStoreLegal(TTI::MemIndexedMode M, Type *Ty,
258 const DataLayout &DL) const {
259 EVT VT = getTLI()->getValueType(DL, Ty);
260 return getTLI()->isIndexedStoreLegal(getISDIndexedMode(M), VT);
261 }
262
263 bool isLSRCostLess(TTI::LSRCost C1, TTI::LSRCost C2) {
264 return TargetTransformInfoImplBase::isLSRCostLess(C1, C2);
265 }
266
267 bool isProfitableLSRChainElement(Instruction *I) {
268 return TargetTransformInfoImplBase::isProfitableLSRChainElement(I);
269 }
270
271 int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
272 bool HasBaseReg, int64_t Scale, unsigned AddrSpace) {
273 TargetLoweringBase::AddrMode AM;
274 AM.BaseGV = BaseGV;
275 AM.BaseOffs = BaseOffset;
276 AM.HasBaseReg = HasBaseReg;
277 AM.Scale = Scale;
278 return getTLI()->getScalingFactorCost(DL, AM, Ty, AddrSpace);
279 }
280
281 bool isTruncateFree(Type *Ty1, Type *Ty2) {
282 return getTLI()->isTruncateFree(Ty1, Ty2);
283 }
284
285 bool isProfitableToHoist(Instruction *I) {
286 return getTLI()->isProfitableToHoist(I);
287 }
288
289 bool useAA() const { return getST()->useAA(); }
290
291 bool isTypeLegal(Type *Ty) {
292 EVT VT = getTLI()->getValueType(DL, Ty);
293 return getTLI()->isTypeLegal(VT);
294 }
295
296 int getGEPCost(Type *PointeeType, const Value *Ptr,
297 ArrayRef<const Value *> Operands) {
298 return BaseT::getGEPCost(PointeeType, Ptr, Operands);
299 }
300
301 unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
302 unsigned &JumpTableSize,
303 ProfileSummaryInfo *PSI,
304 BlockFrequencyInfo *BFI) {
305 /// Try to find the estimated number of clusters. Note that the number of
306 /// clusters identified in this function could be different from the actual
307 /// numbers found in lowering. This function ignore switches that are
308 /// lowered with a mix of jump table / bit test / BTree. This function was
309 /// initially intended to be used when estimating the cost of switch in
310 /// inline cost heuristic, but it's a generic cost model to be used in other
311 /// places (e.g., in loop unrolling).
312 unsigned N = SI.getNumCases();
313 const TargetLoweringBase *TLI = getTLI();
314 const DataLayout &DL = this->getDataLayout();
315
316 JumpTableSize = 0;
317 bool IsJTAllowed = TLI->areJTsAllowed(SI.getParent()->getParent());
318
319 // Early exit if both a jump table and bit test are not allowed.
320 if (N < 1 || (!IsJTAllowed && DL.getIndexSizeInBits(0u) < N))
321 return N;
322
323 APInt MaxCaseVal = SI.case_begin()->getCaseValue()->getValue();
324 APInt MinCaseVal = MaxCaseVal;
325 for (auto CI : SI.cases()) {
326 const APInt &CaseVal = CI.getCaseValue()->getValue();
327 if (CaseVal.sgt(MaxCaseVal))
328 MaxCaseVal = CaseVal;
329 if (CaseVal.slt(MinCaseVal))
330 MinCaseVal = CaseVal;
331 }
332
333 // Check if suitable for a bit test
334 if (N <= DL.getIndexSizeInBits(0u)) {
335 SmallPtrSet<const BasicBlock *, 4> Dests;
336 for (auto I : SI.cases())
337 Dests.insert(I.getCaseSuccessor());
338
339 if (TLI->isSuitableForBitTests(Dests.size(), N, MinCaseVal, MaxCaseVal,
340 DL))
341 return 1;
342 }
343
344 // Check if suitable for a jump table.
345 if (IsJTAllowed) {
346 if (N < 2 || N < TLI->getMinimumJumpTableEntries())
347 return N;
348 uint64_t Range =
349 (MaxCaseVal - MinCaseVal)
350 .getLimitedValue(std::numeric_limits<uint64_t>::max() - 1) + 1;
351 // Check whether a range of clusters is dense enough for a jump table
352 if (TLI->isSuitableForJumpTable(&SI, N, Range, PSI, BFI)) {
353 JumpTableSize = Range;
354 return 1;
355 }
356 }
357 return N;
358 }
359
360 bool shouldBuildLookupTables() {
361 const TargetLoweringBase *TLI = getTLI();
362 return TLI->isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) ||
363 TLI->isOperationLegalOrCustom(ISD::BRIND, MVT::Other);
364 }
365
366 bool haveFastSqrt(Type *Ty) {
367 const TargetLoweringBase *TLI = getTLI();
368 EVT VT = TLI->getValueType(DL, Ty);
369 return TLI->isTypeLegal(VT) &&
370 TLI->isOperationLegalOrCustom(ISD::FSQRT, VT);
371 }
372
373 bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) {
374 return true;
375 }
376
377 unsigned getFPOpCost(Type *Ty) {
378 // Check whether FADD is available, as a proxy for floating-point in
379 // general.
380 const TargetLoweringBase *TLI = getTLI();
381 EVT VT = TLI->getValueType(DL, Ty);
382 if (TLI->isOperationLegalOrCustomOrPromote(ISD::FADD, VT))
383 return TargetTransformInfo::TCC_Basic;
384 return TargetTransformInfo::TCC_Expensive;
385 }
386
387 unsigned getInliningThresholdMultiplier() { return 1; }
388
389 int getInlinerVectorBonusPercent() { return 150; }
390
391 void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
392 TTI::UnrollingPreferences &UP) {
393 // This unrolling functionality is target independent, but to provide some
394 // motivation for its intended use, for x86:
395
396 // According to the Intel 64 and IA-32 Architectures Optimization Reference
397 // Manual, Intel Core models and later have a loop stream detector (and
398 // associated uop queue) that can benefit from partial unrolling.
399 // The relevant requirements are:
400 // - The loop must have no more than 4 (8 for Nehalem and later) branches
401 // taken, and none of them may be calls.
402 // - The loop can have no more than 18 (28 for Nehalem and later) uops.
403
404 // According to the Software Optimization Guide for AMD Family 15h
405 // Processors, models 30h-4fh (Steamroller and later) have a loop predictor
406 // and loop buffer which can benefit from partial unrolling.
407 // The relevant requirements are:
408 // - The loop must have fewer than 16 branches
409 // - The loop must have less than 40 uops in all executed loop branches
410
411 // The number of taken branches in a loop is hard to estimate here, and
412 // benchmarking has revealed that it is better not to be conservative when
413 // estimating the branch count. As a result, we'll ignore the branch limits
414 // until someone finds a case where it matters in practice.
415
416 unsigned MaxOps;
417 const TargetSubtargetInfo *ST = getST();
418 if (PartialUnrollingThreshold.getNumOccurrences() > 0)
419 MaxOps = PartialUnrollingThreshold;
420 else if (ST->getSchedModel().LoopMicroOpBufferSize > 0)
421 MaxOps = ST->getSchedModel().LoopMicroOpBufferSize;
422 else
423 return;
424
425 // Scan the loop: don't unroll loops with calls.
426 for (BasicBlock *BB : L->blocks()) {
427 for (Instruction &I : *BB) {
428 if (isa<CallInst>(I) || isa<InvokeInst>(I)) {
429 if (const Function *F = cast<CallBase>(I).getCalledFunction()) {
430 if (!thisT()->isLoweredToCall(F))
431 continue;
432 }
433
434 return;
435 }
436 }
437 }
438
439 // Enable runtime and partial unrolling up to the specified size.
440 // Enable using trip count upper bound to unroll loops.
441 UP.Partial = UP.Runtime = UP.UpperBound = true;
442 UP.PartialThreshold = MaxOps;
443
444 // Avoid unrolling when optimizing for size.
445 UP.OptSizeThreshold = 0;
446 UP.PartialOptSizeThreshold = 0;
447
448 // Set number of instructions optimized when "back edge"
449 // becomes "fall through" to default value of 2.
450 UP.BEInsns = 2;
451 }
452
453 void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
454 TTI::PeelingPreferences &PP) {
455 PP.PeelCount = 0;
456 PP.AllowPeeling = true;
457 PP.AllowLoopNestsPeeling = false;
458 PP.PeelProfiledIterations = true;
459 }
460
461 bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
462 AssumptionCache &AC,
463 TargetLibraryInfo *LibInfo,
464 HardwareLoopInfo &HWLoopInfo) {
465 return BaseT::isHardwareLoopProfitable(L, SE, AC, LibInfo, HWLoopInfo);
466 }
467
468 bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, ScalarEvolution &SE,
469 AssumptionCache &AC, TargetLibraryInfo *TLI,
470 DominatorTree *DT,
471 const LoopAccessInfo *LAI) {
472 return BaseT::preferPredicateOverEpilogue(L, LI, SE, AC, TLI, DT, LAI);
473 }
474
475 bool emitGetActiveLaneMask() {
476 return BaseT::emitGetActiveLaneMask();
477 }
478
479 Optional<Instruction *> instCombineIntrinsic(InstCombiner &IC,
480 IntrinsicInst &II) {
481 return BaseT::instCombineIntrinsic(IC, II);
482 }
483
484 Optional<Value *> simplifyDemandedUseBitsIntrinsic(InstCombiner &IC,
485 IntrinsicInst &II,
486 APInt DemandedMask,
487 KnownBits &Known,
488 bool &KnownBitsComputed) {
489 return BaseT::simplifyDemandedUseBitsIntrinsic(IC, II, DemandedMask, Known,
490 KnownBitsComputed);
491 }
492
493 Optional<Value *> simplifyDemandedVectorEltsIntrinsic(
494 InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
495 APInt &UndefElts2, APInt &UndefElts3,
496 std::function<void(Instruction *, unsigned, APInt, APInt &)>
497 SimplifyAndSetOp) {
498 return BaseT::simplifyDemandedVectorEltsIntrinsic(
499 IC, II, DemandedElts, UndefElts, UndefElts2, UndefElts3,
500 SimplifyAndSetOp);
501 }
502
503 int getInstructionLatency(const Instruction *I) {
504 if (isa<LoadInst>(I))
505 return getST()->getSchedModel().DefaultLoadLatency;
506
507 return BaseT::getInstructionLatency(I);
508 }
509
510 virtual Optional<unsigned>
511 getCacheSize(TargetTransformInfo::CacheLevel Level) const {
512 return Optional<unsigned>(
513 getST()->getCacheSize(static_cast<unsigned>(Level)));
514 }
515
516 virtual Optional<unsigned>
517 getCacheAssociativity(TargetTransformInfo::CacheLevel Level) const {
518 Optional<unsigned> TargetResult =
519 getST()->getCacheAssociativity(static_cast<unsigned>(Level));
520
521 if (TargetResult)
522 return TargetResult;
523
524 return BaseT::getCacheAssociativity(Level);
525 }
526
527 virtual unsigned getCacheLineSize() const {
528 return getST()->getCacheLineSize();
529 }
530
531 virtual unsigned getPrefetchDistance() const {
532 return getST()->getPrefetchDistance();
533 }
534
535 virtual unsigned getMinPrefetchStride(unsigned NumMemAccesses,
536 unsigned NumStridedMemAccesses,
537 unsigned NumPrefetches,
538 bool HasCall) const {
539 return getST()->getMinPrefetchStride(NumMemAccesses, NumStridedMemAccesses,
540 NumPrefetches, HasCall);
541 }
542
543 virtual unsigned getMaxPrefetchIterationsAhead() const {
544 return getST()->getMaxPrefetchIterationsAhead();
545 }
546
547 virtual bool enableWritePrefetching() const {
548 return getST()->enableWritePrefetching();
549 }
550
551 /// @}
552
553 /// \name Vector TTI Implementations
554 /// @{
555
556 unsigned getRegisterBitWidth(bool Vector) const { return 32; }
557
558 /// Estimate the overhead of scalarizing an instruction. Insert and Extract
559 /// are set if the demanded result elements need to be inserted and/or
560 /// extracted from vectors.
561 unsigned getScalarizationOverhead(VectorType *InTy, const APInt &DemandedElts,
562 bool Insert, bool Extract) {
563 /// FIXME: a bitfield is not a reasonable abstraction for talking about
564 /// which elements are needed from a scalable vector
565 auto *Ty = cast<FixedVectorType>(InTy);
566
567 assert(DemandedElts.getBitWidth() == Ty->getNumElements() &&((DemandedElts.getBitWidth() == Ty->getNumElements() &&
"Vector size mismatch") ? static_cast<void> (0) : __assert_fail
("DemandedElts.getBitWidth() == Ty->getNumElements() && \"Vector size mismatch\""
, "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 568, __PRETTY_FUNCTION__))
568 "Vector size mismatch")((DemandedElts.getBitWidth() == Ty->getNumElements() &&
"Vector size mismatch") ? static_cast<void> (0) : __assert_fail
("DemandedElts.getBitWidth() == Ty->getNumElements() && \"Vector size mismatch\""
, "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 568, __PRETTY_FUNCTION__))
;
569
570 unsigned Cost = 0;
571
572 for (int i = 0, e = Ty->getNumElements(); i < e; ++i) {
573 if (!DemandedElts[i])
574 continue;
575 if (Insert)
576 Cost += thisT()->getVectorInstrCost(Instruction::InsertElement, Ty, i);
577 if (Extract)
578 Cost += thisT()->getVectorInstrCost(Instruction::ExtractElement, Ty, i);
579 }
580
581 return Cost;
582 }
583
584 /// Helper wrapper for the DemandedElts variant of getScalarizationOverhead.
585 unsigned getScalarizationOverhead(VectorType *InTy, bool Insert,
586 bool Extract) {
587 auto *Ty = cast<FixedVectorType>(InTy);
588
589 APInt DemandedElts = APInt::getAllOnesValue(Ty->getNumElements());
590 return thisT()->getScalarizationOverhead(Ty, DemandedElts, Insert, Extract);
591 }
592
593 /// Estimate the overhead of scalarizing an instructions unique
594 /// non-constant operands. The types of the arguments are ordinarily
595 /// scalar, in which case the costs are multiplied with VF.
596 unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
597 unsigned VF) {
598 unsigned Cost = 0;
599 SmallPtrSet<const Value*, 4> UniqueOperands;
600 for (const Value *A : Args) {
601 if (!isa<Constant>(A) && UniqueOperands.insert(A).second) {
602 auto *VecTy = dyn_cast<VectorType>(A->getType());
603 if (VecTy) {
604 // If A is a vector operand, VF should be 1 or correspond to A.
605 assert((VF == 1 ||(((VF == 1 || VF == cast<FixedVectorType>(VecTy)->getNumElements
()) && "Vector argument does not match VF") ? static_cast
<void> (0) : __assert_fail ("(VF == 1 || VF == cast<FixedVectorType>(VecTy)->getNumElements()) && \"Vector argument does not match VF\""
, "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 607, __PRETTY_FUNCTION__))
606 VF == cast<FixedVectorType>(VecTy)->getNumElements()) &&(((VF == 1 || VF == cast<FixedVectorType>(VecTy)->getNumElements
()) && "Vector argument does not match VF") ? static_cast
<void> (0) : __assert_fail ("(VF == 1 || VF == cast<FixedVectorType>(VecTy)->getNumElements()) && \"Vector argument does not match VF\""
, "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 607, __PRETTY_FUNCTION__))
607 "Vector argument does not match VF")(((VF == 1 || VF == cast<FixedVectorType>(VecTy)->getNumElements
()) && "Vector argument does not match VF") ? static_cast
<void> (0) : __assert_fail ("(VF == 1 || VF == cast<FixedVectorType>(VecTy)->getNumElements()) && \"Vector argument does not match VF\""
, "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 607, __PRETTY_FUNCTION__))
;
608 }
609 else
610 VecTy = FixedVectorType::get(A->getType(), VF);
611
612 Cost += getScalarizationOverhead(VecTy, false, true);
613 }
614 }
615
616 return Cost;
617 }
618
619 unsigned getScalarizationOverhead(VectorType *InTy,
620 ArrayRef<const Value *> Args) {
621 auto *Ty = cast<FixedVectorType>(InTy);
622
623 unsigned Cost = 0;
624
625 Cost += getScalarizationOverhead(Ty, true, false);
626 if (!Args.empty())
627 Cost += getOperandsScalarizationOverhead(Args, Ty->getNumElements());
628 else
629 // When no information on arguments is provided, we add the cost
630 // associated with one argument as a heuristic.
631 Cost += getScalarizationOverhead(Ty, false, true);
632
633 return Cost;
634 }
635
636 unsigned getMaxInterleaveFactor(unsigned VF) { return 1; }
637
638 unsigned getArithmeticInstrCost(
639 unsigned Opcode, Type *Ty,
640 TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
641 TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
642 TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
643 TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
644 TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None,
645 ArrayRef<const Value *> Args = ArrayRef<const Value *>(),
646 const Instruction *CxtI = nullptr) {
647 // Check if any of the operands are vector operands.
648 const TargetLoweringBase *TLI = getTLI();
649 int ISD = TLI->InstructionOpcodeToISD(Opcode);
650 assert(ISD && "Invalid opcode")((ISD && "Invalid opcode") ? static_cast<void> (
0) : __assert_fail ("ISD && \"Invalid opcode\"", "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 650, __PRETTY_FUNCTION__))
;
651
652 // TODO: Handle more cost kinds.
653 if (CostKind != TTI::TCK_RecipThroughput)
654 return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind,
655 Opd1Info, Opd2Info,
656 Opd1PropInfo, Opd2PropInfo,
657 Args, CxtI);
658
659 std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
660
661 bool IsFloat = Ty->isFPOrFPVectorTy();
662 // Assume that floating point arithmetic operations cost twice as much as
663 // integer operations.
664 unsigned OpCost = (IsFloat ? 2 : 1);
665
666 if (TLI->isOperationLegalOrPromote(ISD, LT.second)) {
667 // The operation is legal. Assume it costs 1.
668 // TODO: Once we have extract/insert subvector cost we need to use them.
669 return LT.first * OpCost;
670 }
671
672 if (!TLI->isOperationExpand(ISD, LT.second)) {
673 // If the operation is custom lowered, then assume that the code is twice
674 // as expensive.
675 return LT.first * 2 * OpCost;
676 }
677
678 // Else, assume that we need to scalarize this op.
679 // TODO: If one of the types get legalized by splitting, handle this
680 // similarly to what getCastInstrCost() does.
681 if (auto *VTy = dyn_cast<VectorType>(Ty)) {
682 unsigned Num = cast<FixedVectorType>(VTy)->getNumElements();
683 unsigned Cost = thisT()->getArithmeticInstrCost(
684 Opcode, VTy->getScalarType(), CostKind, Opd1Info, Opd2Info,
685 Opd1PropInfo, Opd2PropInfo, Args, CxtI);
686 // Return the cost of multiple scalar invocation plus the cost of
687 // inserting and extracting the values.
688 return getScalarizationOverhead(VTy, Args) + Num * Cost;
689 }
690
691 // We don't know anything about this scalar instruction.
692 return OpCost;
693 }
694
695 unsigned getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, int Index,
696 VectorType *SubTp) {
697
698 switch (Kind) {
699 case TTI::SK_Broadcast:
700 return getBroadcastShuffleOverhead(cast<FixedVectorType>(Tp));
701 case TTI::SK_Select:
702 case TTI::SK_Reverse:
703 case TTI::SK_Transpose:
704 case TTI::SK_PermuteSingleSrc:
705 case TTI::SK_PermuteTwoSrc:
706 return getPermuteShuffleOverhead(cast<FixedVectorType>(Tp));
707 case TTI::SK_ExtractSubvector:
708 return getExtractSubvectorOverhead(cast<FixedVectorType>(Tp), Index,
709 cast<FixedVectorType>(SubTp));
710 case TTI::SK_InsertSubvector:
711 return getInsertSubvectorOverhead(cast<FixedVectorType>(Tp), Index,
712 cast<FixedVectorType>(SubTp));
713 }
714 llvm_unreachable("Unknown TTI::ShuffleKind")::llvm::llvm_unreachable_internal("Unknown TTI::ShuffleKind",
"/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 714)
;
715 }
716
717 unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
718 TTI::CastContextHint CCH,
719 TTI::TargetCostKind CostKind,
720 const Instruction *I = nullptr) {
721 if (BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I) == 0)
722 return 0;
723
724 const TargetLoweringBase *TLI = getTLI();
725 int ISD = TLI->InstructionOpcodeToISD(Opcode);
726 assert(ISD && "Invalid opcode")((ISD && "Invalid opcode") ? static_cast<void> (
0) : __assert_fail ("ISD && \"Invalid opcode\"", "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 726, __PRETTY_FUNCTION__))
;
727 std::pair<unsigned, MVT> SrcLT = TLI->getTypeLegalizationCost(DL, Src);
728 std::pair<unsigned, MVT> DstLT = TLI->getTypeLegalizationCost(DL, Dst);
729
730 TypeSize SrcSize = SrcLT.second.getSizeInBits();
731 TypeSize DstSize = DstLT.second.getSizeInBits();
732 bool IntOrPtrSrc = Src->isIntegerTy() || Src->isPointerTy();
733 bool IntOrPtrDst = Dst->isIntegerTy() || Dst->isPointerTy();
734
735 switch (Opcode) {
736 default:
737 break;
738 case Instruction::Trunc:
739 // Check for NOOP conversions.
740 if (TLI->isTruncateFree(SrcLT.second, DstLT.second))
741 return 0;
742 LLVM_FALLTHROUGH[[gnu::fallthrough]];
743 case Instruction::BitCast:
744 // Bitcast between types that are legalized to the same type are free and
745 // assume int to/from ptr of the same size is also free.
746 if (SrcLT.first == DstLT.first && IntOrPtrSrc == IntOrPtrDst &&
747 SrcSize == DstSize)
748 return 0;
749 break;
750 case Instruction::FPExt:
751 if (I && getTLI()->isExtFree(I))
752 return 0;
753 break;
754 case Instruction::ZExt:
755 if (TLI->isZExtFree(SrcLT.second, DstLT.second))
756 return 0;
757 LLVM_FALLTHROUGH[[gnu::fallthrough]];
758 case Instruction::SExt:
759 if (I && getTLI()->isExtFree(I))
760 return 0;
761
762 // If this is a zext/sext of a load, return 0 if the corresponding
763 // extending load exists on target.
764 if (CCH == TTI::CastContextHint::Normal) {
765 EVT ExtVT = EVT::getEVT(Dst);
766 EVT LoadVT = EVT::getEVT(Src);
767 unsigned LType =
768 ((Opcode == Instruction::ZExt) ? ISD::ZEXTLOAD : ISD::SEXTLOAD);
769 if (TLI->isLoadExtLegal(LType, ExtVT, LoadVT))
770 return 0;
771 }
772 break;
773 case Instruction::AddrSpaceCast:
774 if (TLI->isFreeAddrSpaceCast(Src->getPointerAddressSpace(),
775 Dst->getPointerAddressSpace()))
776 return 0;
777 break;
778 }
779
780 auto *SrcVTy = dyn_cast<VectorType>(Src);
781 auto *DstVTy = dyn_cast<VectorType>(Dst);
782
783 // If the cast is marked as legal (or promote) then assume low cost.
784 if (SrcLT.first == DstLT.first &&
785 TLI->isOperationLegalOrPromote(ISD, DstLT.second))
786 return SrcLT.first;
787
788 // Handle scalar conversions.
789 if (!SrcVTy && !DstVTy) {
790 // Just check the op cost. If the operation is legal then assume it costs
791 // 1.
792 if (!TLI->isOperationExpand(ISD, DstLT.second))
793 return 1;
794
795 // Assume that illegal scalar instruction are expensive.
796 return 4;
797 }
798
799 // Check vector-to-vector casts.
800 if (DstVTy && SrcVTy) {
801 // If the cast is between same-sized registers, then the check is simple.
802 if (SrcLT.first == DstLT.first && SrcSize == DstSize) {
803
804 // Assume that Zext is done using AND.
805 if (Opcode == Instruction::ZExt)
806 return SrcLT.first;
807
808 // Assume that sext is done using SHL and SRA.
809 if (Opcode == Instruction::SExt)
810 return SrcLT.first * 2;
811
812 // Just check the op cost. If the operation is legal then assume it
813 // costs
814 // 1 and multiply by the type-legalization overhead.
815 if (!TLI->isOperationExpand(ISD, DstLT.second))
816 return SrcLT.first * 1;
817 }
818
819 // If we are legalizing by splitting, query the concrete TTI for the cost
820 // of casting the original vector twice. We also need to factor in the
821 // cost of the split itself. Count that as 1, to be consistent with
822 // TLI->getTypeLegalizationCost().
823 bool SplitSrc =
824 TLI->getTypeAction(Src->getContext(), TLI->getValueType(DL, Src)) ==
825 TargetLowering::TypeSplitVector;
826 bool SplitDst =
827 TLI->getTypeAction(Dst->getContext(), TLI->getValueType(DL, Dst)) ==
828 TargetLowering::TypeSplitVector;
829 if ((SplitSrc || SplitDst) &&
830 cast<FixedVectorType>(SrcVTy)->getNumElements() > 1 &&
831 cast<FixedVectorType>(DstVTy)->getNumElements() > 1) {
832 Type *SplitDstTy = VectorType::getHalfElementsVectorType(DstVTy);
833 Type *SplitSrcTy = VectorType::getHalfElementsVectorType(SrcVTy);
834 T *TTI = static_cast<T *>(this);
835 // If both types need to be split then the split is free.
836 unsigned SplitCost =
837 (!SplitSrc || !SplitDst) ? TTI->getVectorSplitCost() : 0;
838 return SplitCost +
839 (2 * TTI->getCastInstrCost(Opcode, SplitDstTy, SplitSrcTy, CCH,
840 CostKind, I));
841 }
842
843 // In other cases where the source or destination are illegal, assume
844 // the operation will get scalarized.
845 unsigned Num = cast<FixedVectorType>(DstVTy)->getNumElements();
846 unsigned Cost = thisT()->getCastInstrCost(
847 Opcode, Dst->getScalarType(), Src->getScalarType(), CCH, CostKind, I);
848
849 // Return the cost of multiple scalar invocation plus the cost of
850 // inserting and extracting the values.
851 return getScalarizationOverhead(DstVTy, true, true) + Num * Cost;
852 }
853
854 // We already handled vector-to-vector and scalar-to-scalar conversions.
855 // This
856 // is where we handle bitcast between vectors and scalars. We need to assume
857 // that the conversion is scalarized in one way or another.
858 if (Opcode == Instruction::BitCast) {
859 // Illegal bitcasts are done by storing and loading from a stack slot.
860 return (SrcVTy ? getScalarizationOverhead(SrcVTy, false, true) : 0) +
861 (DstVTy ? getScalarizationOverhead(DstVTy, true, false) : 0);
862 }
863
864 llvm_unreachable("Unhandled cast")::llvm::llvm_unreachable_internal("Unhandled cast", "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 864)
;
865 }
866
867 unsigned getExtractWithExtendCost(unsigned Opcode, Type *Dst,
868 VectorType *VecTy, unsigned Index) {
869 return thisT()->getVectorInstrCost(Instruction::ExtractElement, VecTy,
870 Index) +
871 thisT()->getCastInstrCost(Opcode, Dst, VecTy->getElementType(),
872 TTI::CastContextHint::None, TTI::TCK_RecipThroughput);
873 }
874
875 unsigned getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind) {
876 return BaseT::getCFInstrCost(Opcode, CostKind);
877 }
878
879 unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
880 TTI::TargetCostKind CostKind,
881 const Instruction *I = nullptr) {
882 const TargetLoweringBase *TLI = getTLI();
883 int ISD = TLI->InstructionOpcodeToISD(Opcode);
884 assert(ISD && "Invalid opcode")((ISD && "Invalid opcode") ? static_cast<void> (
0) : __assert_fail ("ISD && \"Invalid opcode\"", "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 884, __PRETTY_FUNCTION__))
;
10
Assuming 'ISD' is not equal to 0
11
'?' condition is true
885
886 // TODO: Handle other cost kinds.
887 if (CostKind
11.1
'CostKind' is equal to TCK_RecipThroughput
11.1
'CostKind' is equal to TCK_RecipThroughput
11.1
'CostKind' is equal to TCK_RecipThroughput
11.1
'CostKind' is equal to TCK_RecipThroughput
!= TTI::TCK_RecipThroughput)
12
Taking false branch
888 return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, CostKind, I);
889
890 // Selects on vectors are actually vector selects.
891 if (ISD == ISD::SELECT) {
13
Assuming 'ISD' is not equal to SELECT
14
Taking false branch
892 assert(CondTy && "CondTy must exist")((CondTy && "CondTy must exist") ? static_cast<void
> (0) : __assert_fail ("CondTy && \"CondTy must exist\""
, "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 892, __PRETTY_FUNCTION__))
;
893 if (CondTy->isVectorTy())
894 ISD = ISD::VSELECT;
895 }
896 std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
897
898 if (!(ValTy->isVectorTy() && !LT.second.isVector()) &&
15
Taking false branch
899 !TLI->isOperationExpand(ISD, LT.second)) {
900 // The operation is legal. Assume it costs 1. Multiply
901 // by the type-legalization overhead.
902 return LT.first * 1;
903 }
904
905 // Otherwise, assume that the cast is scalarized.
906 // TODO: If one of the types get legalized by splitting, handle this
907 // similarly to what getCastInstrCost() does.
908 if (auto *ValVTy
16.1
'ValVTy' is non-null
16.1
'ValVTy' is non-null
16.1
'ValVTy' is non-null
16.1
'ValVTy' is non-null
= dyn_cast<VectorType>(ValTy)) {
16
Assuming 'ValTy' is a 'VectorType'
17
Taking true branch
909 unsigned Num = cast<FixedVectorType>(ValVTy)->getNumElements();
18
'ValVTy' is a 'FixedVectorType'
910 if (CondTy)
19
Assuming 'CondTy' is null
20
Taking false branch
911 CondTy = CondTy->getScalarType();
912 unsigned Cost = thisT()->getCmpSelInstrCost(
22
Calling 'AArch64TTIImpl::getCmpSelInstrCost'
913 Opcode, ValVTy->getScalarType(), CondTy, CostKind, I);
21
Passing null pointer value via 3rd parameter 'CondTy'
914
915 // Return the cost of multiple scalar invocation plus the cost of
916 // inserting and extracting the values.
917 return getScalarizationOverhead(ValVTy, true, false) + Num * Cost;
918 }
919
920 // Unknown scalar opcode.
921 return 1;
922 }
923
924 unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) {
925 std::pair<unsigned, MVT> LT =
926 getTLI()->getTypeLegalizationCost(DL, Val->getScalarType());
927
928 return LT.first;
929 }
930
931 unsigned getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment,
932 unsigned AddressSpace,
933 TTI::TargetCostKind CostKind,
934 const Instruction *I = nullptr) {
935 assert(!Src->isVoidTy() && "Invalid type")((!Src->isVoidTy() && "Invalid type") ? static_cast
<void> (0) : __assert_fail ("!Src->isVoidTy() && \"Invalid type\""
, "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 935, __PRETTY_FUNCTION__))
;
936 // Assume types, such as structs, are expensive.
937 if (getTLI()->getValueType(DL, Src, true) == MVT::Other)
938 return 4;
939 std::pair<unsigned, MVT> LT = getTLI()->getTypeLegalizationCost(DL, Src);
940
941 // Assuming that all loads of legal types cost 1.
942 unsigned Cost = LT.first;
943 if (CostKind != TTI::TCK_RecipThroughput)
944 return Cost;
945
946 if (Src->isVectorTy() &&
947 Src->getPrimitiveSizeInBits() < LT.second.getSizeInBits()) {
948 // This is a vector load that legalizes to a larger type than the vector
949 // itself. Unless the corresponding extending load or truncating store is
950 // legal, then this will scalarize.
951 TargetLowering::LegalizeAction LA = TargetLowering::Expand;
952 EVT MemVT = getTLI()->getValueType(DL, Src);
953 if (Opcode == Instruction::Store)
954 LA = getTLI()->getTruncStoreAction(LT.second, MemVT);
955 else
956 LA = getTLI()->getLoadExtAction(ISD::EXTLOAD, LT.second, MemVT);
957
958 if (LA != TargetLowering::Legal && LA != TargetLowering::Custom) {
959 // This is a vector load/store for some illegal type that is scalarized.
960 // We must account for the cost of building or decomposing the vector.
961 Cost += getScalarizationOverhead(cast<VectorType>(Src),
962 Opcode != Instruction::Store,
963 Opcode == Instruction::Store);
964 }
965 }
966
967 return Cost;
968 }
969
970 unsigned getInterleavedMemoryOpCost(
971 unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
972 Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
973 bool UseMaskForCond = false, bool UseMaskForGaps = false) {
974 auto *VT = cast<FixedVectorType>(VecTy);
975
976 unsigned NumElts = VT->getNumElements();
977 assert(Factor > 1 && NumElts % Factor == 0 && "Invalid interleave factor")((Factor > 1 && NumElts % Factor == 0 && "Invalid interleave factor"
) ? static_cast<void> (0) : __assert_fail ("Factor > 1 && NumElts % Factor == 0 && \"Invalid interleave factor\""
, "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 977, __PRETTY_FUNCTION__))
;
978
979 unsigned NumSubElts = NumElts / Factor;
980 auto *SubVT = FixedVectorType::get(VT->getElementType(), NumSubElts);
981
982 // Firstly, the cost of load/store operation.
983 unsigned Cost;
984 if (UseMaskForCond || UseMaskForGaps)
985 Cost = thisT()->getMaskedMemoryOpCost(Opcode, VecTy, Alignment,
986 AddressSpace, CostKind);
987 else
988 Cost = thisT()->getMemoryOpCost(Opcode, VecTy, Alignment, AddressSpace,
989 CostKind);
990
991 // Legalize the vector type, and get the legalized and unlegalized type
992 // sizes.
993 MVT VecTyLT = getTLI()->getTypeLegalizationCost(DL, VecTy).second;
994 unsigned VecTySize = thisT()->getDataLayout().getTypeStoreSize(VecTy);
995 unsigned VecTyLTSize = VecTyLT.getStoreSize();
996
997 // Return the ceiling of dividing A by B.
998 auto ceil = [](unsigned A, unsigned B) { return (A + B - 1) / B; };
999
1000 // Scale the cost of the memory operation by the fraction of legalized
1001 // instructions that will actually be used. We shouldn't account for the
1002 // cost of dead instructions since they will be removed.
1003 //
1004 // E.g., An interleaved load of factor 8:
1005 // %vec = load <16 x i64>, <16 x i64>* %ptr
1006 // %v0 = shufflevector %vec, undef, <0, 8>
1007 //
1008 // If <16 x i64> is legalized to 8 v2i64 loads, only 2 of the loads will be
1009 // used (those corresponding to elements [0:1] and [8:9] of the unlegalized
1010 // type). The other loads are unused.
1011 //
1012 // We only scale the cost of loads since interleaved store groups aren't
1013 // allowed to have gaps.
1014 if (Opcode == Instruction::Load && VecTySize > VecTyLTSize) {
1015 // The number of loads of a legal type it will take to represent a load
1016 // of the unlegalized vector type.
1017 unsigned NumLegalInsts = ceil(VecTySize, VecTyLTSize);
1018
1019 // The number of elements of the unlegalized type that correspond to a
1020 // single legal instruction.
1021 unsigned NumEltsPerLegalInst = ceil(NumElts, NumLegalInsts);
1022
1023 // Determine which legal instructions will be used.
1024 BitVector UsedInsts(NumLegalInsts, false);
1025 for (unsigned Index : Indices)
1026 for (unsigned Elt = 0; Elt < NumSubElts; ++Elt)
1027 UsedInsts.set((Index + Elt * Factor) / NumEltsPerLegalInst);
1028
1029 // Scale the cost of the load by the fraction of legal instructions that
1030 // will be used.
1031 Cost *= UsedInsts.count() / NumLegalInsts;
1032 }
1033
1034 // Then plus the cost of interleave operation.
1035 if (Opcode == Instruction::Load) {
1036 // The interleave cost is similar to extract sub vectors' elements
1037 // from the wide vector, and insert them into sub vectors.
1038 //
1039 // E.g. An interleaved load of factor 2 (with one member of index 0):
1040 // %vec = load <8 x i32>, <8 x i32>* %ptr
1041 // %v0 = shuffle %vec, undef, <0, 2, 4, 6> ; Index 0
1042 // The cost is estimated as extract elements at 0, 2, 4, 6 from the
1043 // <8 x i32> vector and insert them into a <4 x i32> vector.
1044
1045 assert(Indices.size() <= Factor &&((Indices.size() <= Factor && "Interleaved memory op has too many members"
) ? static_cast<void> (0) : __assert_fail ("Indices.size() <= Factor && \"Interleaved memory op has too many members\""
, "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 1046, __PRETTY_FUNCTION__))
1046 "Interleaved memory op has too many members")((Indices.size() <= Factor && "Interleaved memory op has too many members"
) ? static_cast<void> (0) : __assert_fail ("Indices.size() <= Factor && \"Interleaved memory op has too many members\""
, "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 1046, __PRETTY_FUNCTION__))
;
1047
1048 for (unsigned Index : Indices) {
1049 assert(Index < Factor && "Invalid index for interleaved memory op")((Index < Factor && "Invalid index for interleaved memory op"
) ? static_cast<void> (0) : __assert_fail ("Index < Factor && \"Invalid index for interleaved memory op\""
, "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 1049, __PRETTY_FUNCTION__))
;
1050
1051 // Extract elements from loaded vector for each sub vector.
1052 for (unsigned i = 0; i < NumSubElts; i++)
1053 Cost += thisT()->getVectorInstrCost(Instruction::ExtractElement, VT,
1054 Index + i * Factor);
1055 }
1056
1057 unsigned InsSubCost = 0;
1058 for (unsigned i = 0; i < NumSubElts; i++)
1059 InsSubCost +=
1060 thisT()->getVectorInstrCost(Instruction::InsertElement, SubVT, i);
1061
1062 Cost += Indices.size() * InsSubCost;
1063 } else {
1064 // The interleave cost is extract all elements from sub vectors, and
1065 // insert them into the wide vector.
1066 //
1067 // E.g. An interleaved store of factor 2:
1068 // %v0_v1 = shuffle %v0, %v1, <0, 4, 1, 5, 2, 6, 3, 7>
1069 // store <8 x i32> %interleaved.vec, <8 x i32>* %ptr
1070 // The cost is estimated as extract all elements from both <4 x i32>
1071 // vectors and insert into the <8 x i32> vector.
1072
1073 unsigned ExtSubCost = 0;
1074 for (unsigned i = 0; i < NumSubElts; i++)
1075 ExtSubCost +=
1076 thisT()->getVectorInstrCost(Instruction::ExtractElement, SubVT, i);
1077 Cost += ExtSubCost * Factor;
1078
1079 for (unsigned i = 0; i < NumElts; i++)
1080 Cost += static_cast<T *>(this)
1081 ->getVectorInstrCost(Instruction::InsertElement, VT, i);
1082 }
1083
1084 if (!UseMaskForCond)
1085 return Cost;
1086
1087 Type *I8Type = Type::getInt8Ty(VT->getContext());
1088 auto *MaskVT = FixedVectorType::get(I8Type, NumElts);
1089 SubVT = FixedVectorType::get(I8Type, NumSubElts);
1090
1091 // The Mask shuffling cost is extract all the elements of the Mask
1092 // and insert each of them Factor times into the wide vector:
1093 //
1094 // E.g. an interleaved group with factor 3:
1095 // %mask = icmp ult <8 x i32> %vec1, %vec2
1096 // %interleaved.mask = shufflevector <8 x i1> %mask, <8 x i1> undef,
1097 // <24 x i32> <0,0,0,1,1,1,2,2,2,3,3,3,4,4,4,5,5,5,6,6,6,7,7,7>
1098 // The cost is estimated as extract all mask elements from the <8xi1> mask
1099 // vector and insert them factor times into the <24xi1> shuffled mask
1100 // vector.
1101 for (unsigned i = 0; i < NumSubElts; i++)
1102 Cost +=
1103 thisT()->getVectorInstrCost(Instruction::ExtractElement, SubVT, i);
1104
1105 for (unsigned i = 0; i < NumElts; i++)
1106 Cost +=
1107 thisT()->getVectorInstrCost(Instruction::InsertElement, MaskVT, i);
1108
1109 // The Gaps mask is invariant and created outside the loop, therefore the
1110 // cost of creating it is not accounted for here. However if we have both
1111 // a MaskForGaps and some other mask that guards the execution of the
1112 // memory access, we need to account for the cost of And-ing the two masks
1113 // inside the loop.
1114 if (UseMaskForGaps)
1115 Cost += thisT()->getArithmeticInstrCost(BinaryOperator::And, MaskVT,
1116 CostKind);
1117
1118 return Cost;
1119 }
1120
1121 /// Get intrinsic cost based on arguments.
1122 unsigned getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
1123 TTI::TargetCostKind CostKind) {
1124 Intrinsic::ID IID = ICA.getID();
1125
1126 // Special case some scalar intrinsics.
1127 if (CostKind != TTI::TCK_RecipThroughput) {
1128 switch (IID) {
1129 default:
1130 break;
1131 case Intrinsic::cttz:
1132 if (getTLI()->isCheapToSpeculateCttz())
1133 return TargetTransformInfo::TCC_Basic;
1134 break;
1135 case Intrinsic::ctlz:
1136 if (getTLI()->isCheapToSpeculateCtlz())
1137 return TargetTransformInfo::TCC_Basic;
1138 break;
1139 case Intrinsic::memcpy:
1140 return thisT()->getMemcpyCost(ICA.getInst());
1141 // TODO: other libc intrinsics.
1142 }
1143 return BaseT::getIntrinsicInstrCost(ICA, CostKind);
1144 }
1145
1146 if (BaseT::getIntrinsicInstrCost(ICA, CostKind) == 0)
1147 return 0;
1148
1149 // TODO: Combine these two logic paths.
1150 if (ICA.isTypeBasedOnly())
1151 return getTypeBasedIntrinsicInstrCost(ICA, CostKind);
1152
1153 Type *RetTy = ICA.getReturnType();
1154 unsigned VF = ICA.getVectorFactor();
1155 unsigned RetVF =
1156 (RetTy->isVectorTy() ? cast<FixedVectorType>(RetTy)->getNumElements()
1157 : 1);
1158 assert((RetVF == 1 || VF == 1) && "VF > 1 and RetVF is a vector type")(((RetVF == 1 || VF == 1) && "VF > 1 and RetVF is a vector type"
) ? static_cast<void> (0) : __assert_fail ("(RetVF == 1 || VF == 1) && \"VF > 1 and RetVF is a vector type\""
, "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 1158, __PRETTY_FUNCTION__))
;
1159 const IntrinsicInst *I = ICA.getInst();
1160 const SmallVectorImpl<const Value *> &Args = ICA.getArgs();
1161 FastMathFlags FMF = ICA.getFlags();
1162
1163 switch (IID) {
1164 default: {
1165 // Assume that we need to scalarize this intrinsic.
1166 SmallVector<Type *, 4> Types;
1167 for (const Value *Op : Args) {
1168 Type *OpTy = Op->getType();
1169 assert(VF == 1 || !OpTy->isVectorTy())((VF == 1 || !OpTy->isVectorTy()) ? static_cast<void>
(0) : __assert_fail ("VF == 1 || !OpTy->isVectorTy()", "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 1169, __PRETTY_FUNCTION__))
;
1170 Types.push_back(VF == 1 ? OpTy : FixedVectorType::get(OpTy, VF));
1171 }
1172
1173 if (VF > 1 && !RetTy->isVoidTy())
1174 RetTy = FixedVectorType::get(RetTy, VF);
1175
1176 // Compute the scalarization overhead based on Args for a vector
1177 // intrinsic. A vectorizer will pass a scalar RetTy and VF > 1, while
1178 // CostModel will pass a vector RetTy and VF is 1.
1179 unsigned ScalarizationCost = std::numeric_limits<unsigned>::max();
1180 if (RetVF > 1 || VF > 1) {
1181 ScalarizationCost = 0;
1182 if (!RetTy->isVoidTy())
1183 ScalarizationCost +=
1184 getScalarizationOverhead(cast<VectorType>(RetTy), true, false);
1185 ScalarizationCost += getOperandsScalarizationOverhead(Args, VF);
1186 }
1187
1188 IntrinsicCostAttributes Attrs(IID, RetTy, Types, FMF,
1189 ScalarizationCost, I);
1190 return thisT()->getIntrinsicInstrCost(Attrs, CostKind);
1191 }
1192 case Intrinsic::masked_scatter: {
1193 assert(VF == 1 && "Can't vectorize types here.")((VF == 1 && "Can't vectorize types here.") ? static_cast
<void> (0) : __assert_fail ("VF == 1 && \"Can't vectorize types here.\""
, "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 1193, __PRETTY_FUNCTION__))
;
1194 const Value *Mask = Args[3];
1195 bool VarMask = !isa<Constant>(Mask);
1196 Align Alignment = cast<ConstantInt>(Args[2])->getAlignValue();
1197 return thisT()->getGatherScatterOpCost(Instruction::Store,
1198 Args[0]->getType(), Args[1],
1199 VarMask, Alignment, CostKind, I);
1200 }
1201 case Intrinsic::masked_gather: {
1202 assert(VF == 1 && "Can't vectorize types here.")((VF == 1 && "Can't vectorize types here.") ? static_cast
<void> (0) : __assert_fail ("VF == 1 && \"Can't vectorize types here.\""
, "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 1202, __PRETTY_FUNCTION__))
;
1203 const Value *Mask = Args[2];
1204 bool VarMask = !isa<Constant>(Mask);
1205 Align Alignment = cast<ConstantInt>(Args[1])->getAlignValue();
1206 return thisT()->getGatherScatterOpCost(Instruction::Load, RetTy, Args[0],
1207 VarMask, Alignment, CostKind, I);
1208 }
1209 case Intrinsic::experimental_vector_reduce_add:
1210 case Intrinsic::experimental_vector_reduce_mul:
1211 case Intrinsic::experimental_vector_reduce_and:
1212 case Intrinsic::experimental_vector_reduce_or:
1213 case Intrinsic::experimental_vector_reduce_xor:
1214 case Intrinsic::experimental_vector_reduce_v2_fadd:
1215 case Intrinsic::experimental_vector_reduce_v2_fmul:
1216 case Intrinsic::experimental_vector_reduce_smax:
1217 case Intrinsic::experimental_vector_reduce_smin:
1218 case Intrinsic::experimental_vector_reduce_fmax:
1219 case Intrinsic::experimental_vector_reduce_fmin:
1220 case Intrinsic::experimental_vector_reduce_umax:
1221 case Intrinsic::experimental_vector_reduce_umin: {
1222 IntrinsicCostAttributes Attrs(IID, RetTy, Args[0]->getType(), FMF, 1, I);
1223 return getIntrinsicInstrCost(Attrs, CostKind);
1224 }
1225 case Intrinsic::fshl:
1226 case Intrinsic::fshr: {
1227 const Value *X = Args[0];
1228 const Value *Y = Args[1];
1229 const Value *Z = Args[2];
1230 TTI::OperandValueProperties OpPropsX, OpPropsY, OpPropsZ, OpPropsBW;
1231 TTI::OperandValueKind OpKindX = TTI::getOperandInfo(X, OpPropsX);
1232 TTI::OperandValueKind OpKindY = TTI::getOperandInfo(Y, OpPropsY);
1233 TTI::OperandValueKind OpKindZ = TTI::getOperandInfo(Z, OpPropsZ);
1234 TTI::OperandValueKind OpKindBW = TTI::OK_UniformConstantValue;
1235 OpPropsBW = isPowerOf2_32(RetTy->getScalarSizeInBits()) ? TTI::OP_PowerOf2
1236 : TTI::OP_None;
1237 // fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
1238 // fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
1239 unsigned Cost = 0;
1240 Cost +=
1241 thisT()->getArithmeticInstrCost(BinaryOperator::Or, RetTy, CostKind);
1242 Cost +=
1243 thisT()->getArithmeticInstrCost(BinaryOperator::Sub, RetTy, CostKind);
1244 Cost += thisT()->getArithmeticInstrCost(
1245 BinaryOperator::Shl, RetTy, CostKind, OpKindX, OpKindZ, OpPropsX);
1246 Cost += thisT()->getArithmeticInstrCost(
1247 BinaryOperator::LShr, RetTy, CostKind, OpKindY, OpKindZ, OpPropsY);
1248 // Non-constant shift amounts requires a modulo.
1249 if (OpKindZ != TTI::OK_UniformConstantValue &&
1250 OpKindZ != TTI::OK_NonUniformConstantValue)
1251 Cost += thisT()->getArithmeticInstrCost(BinaryOperator::URem, RetTy,
1252 CostKind, OpKindZ, OpKindBW,
1253 OpPropsZ, OpPropsBW);
1254 // For non-rotates (X != Y) we must add shift-by-zero handling costs.
1255 if (X != Y) {
1256 Type *CondTy = RetTy->getWithNewBitWidth(1);
1257 Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy,
1258 CostKind);
1259 Cost += thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy,
1260 CondTy, CostKind);
1261 }
1262 return Cost;
1263 }
1264 }
1265 }
1266
1267 /// Get intrinsic cost based on argument types.
1268 /// If ScalarizationCostPassed is std::numeric_limits<unsigned>::max(), the
1269 /// cost of scalarizing the arguments and the return value will be computed
1270 /// based on types.
1271 unsigned getTypeBasedIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
1272 TTI::TargetCostKind CostKind) {
1273 Intrinsic::ID IID = ICA.getID();
1274 Type *RetTy = ICA.getReturnType();
1275 const SmallVectorImpl<Type *> &Tys = ICA.getArgTypes();
1276 FastMathFlags FMF = ICA.getFlags();
1277 unsigned ScalarizationCostPassed = ICA.getScalarizationCost();
1278 bool SkipScalarizationCost = ICA.skipScalarizationCost();
1279
1280 auto *VecOpTy = Tys.empty() ? nullptr : dyn_cast<VectorType>(Tys[0]);
1281
1282 SmallVector<unsigned, 2> ISDs;
1283 unsigned SingleCallCost = 10; // Library call cost. Make it expensive.
1284 switch (IID) {
1285 default: {
1286 // Assume that we need to scalarize this intrinsic.
1287 unsigned ScalarizationCost = ScalarizationCostPassed;
1288 unsigned ScalarCalls = 1;
1289 Type *ScalarRetTy = RetTy;
1290 if (auto *RetVTy = dyn_cast<VectorType>(RetTy)) {
1291 if (!SkipScalarizationCost)
1292 ScalarizationCost = getScalarizationOverhead(RetVTy, true, false);
1293 ScalarCalls = std::max(ScalarCalls,
1294 cast<FixedVectorType>(RetVTy)->getNumElements());
1295 ScalarRetTy = RetTy->getScalarType();
1296 }
1297 SmallVector<Type *, 4> ScalarTys;
1298 for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) {
1299 Type *Ty = Tys[i];
1300 if (auto *VTy = dyn_cast<VectorType>(Ty)) {
1301 if (!SkipScalarizationCost)
1302 ScalarizationCost += getScalarizationOverhead(VTy, false, true);
1303 ScalarCalls = std::max(ScalarCalls,
1304 cast<FixedVectorType>(VTy)->getNumElements());
1305 Ty = Ty->getScalarType();
1306 }
1307 ScalarTys.push_back(Ty);
1308 }
1309 if (ScalarCalls == 1)
1310 return 1; // Return cost of a scalar intrinsic. Assume it to be cheap.
1311
1312 IntrinsicCostAttributes ScalarAttrs(IID, ScalarRetTy, ScalarTys, FMF);
1313 unsigned ScalarCost =
1314 thisT()->getIntrinsicInstrCost(ScalarAttrs, CostKind);
1315
1316 return ScalarCalls * ScalarCost + ScalarizationCost;
1317 }
1318 // Look for intrinsics that can be lowered directly or turned into a scalar
1319 // intrinsic call.
1320 case Intrinsic::sqrt:
1321 ISDs.push_back(ISD::FSQRT);
1322 break;
1323 case Intrinsic::sin:
1324 ISDs.push_back(ISD::FSIN);
1325 break;
1326 case Intrinsic::cos:
1327 ISDs.push_back(ISD::FCOS);
1328 break;
1329 case Intrinsic::exp:
1330 ISDs.push_back(ISD::FEXP);
1331 break;
1332 case Intrinsic::exp2:
1333 ISDs.push_back(ISD::FEXP2);
1334 break;
1335 case Intrinsic::log:
1336 ISDs.push_back(ISD::FLOG);
1337 break;
1338 case Intrinsic::log10:
1339 ISDs.push_back(ISD::FLOG10);
1340 break;
1341 case Intrinsic::log2:
1342 ISDs.push_back(ISD::FLOG2);
1343 break;
1344 case Intrinsic::fabs:
1345 ISDs.push_back(ISD::FABS);
1346 break;
1347 case Intrinsic::canonicalize:
1348 ISDs.push_back(ISD::FCANONICALIZE);
1349 break;
1350 case Intrinsic::minnum:
1351 ISDs.push_back(ISD::FMINNUM);
1352 break;
1353 case Intrinsic::maxnum:
1354 ISDs.push_back(ISD::FMAXNUM);
1355 break;
1356 case Intrinsic::copysign:
1357 ISDs.push_back(ISD::FCOPYSIGN);
1358 break;
1359 case Intrinsic::floor:
1360 ISDs.push_back(ISD::FFLOOR);
1361 break;
1362 case Intrinsic::ceil:
1363 ISDs.push_back(ISD::FCEIL);
1364 break;
1365 case Intrinsic::trunc:
1366 ISDs.push_back(ISD::FTRUNC);
1367 break;
1368 case Intrinsic::nearbyint:
1369 ISDs.push_back(ISD::FNEARBYINT);
1370 break;
1371 case Intrinsic::rint:
1372 ISDs.push_back(ISD::FRINT);
1373 break;
1374 case Intrinsic::round:
1375 ISDs.push_back(ISD::FROUND);
1376 break;
1377 case Intrinsic::roundeven:
1378 ISDs.push_back(ISD::FROUNDEVEN);
1379 break;
1380 case Intrinsic::pow:
1381 ISDs.push_back(ISD::FPOW);
1382 break;
1383 case Intrinsic::fma:
1384 ISDs.push_back(ISD::FMA);
1385 break;
1386 case Intrinsic::fmuladd:
1387 ISDs.push_back(ISD::FMA);
1388 break;
1389 case Intrinsic::experimental_constrained_fmuladd:
1390 ISDs.push_back(ISD::STRICT_FMA);
1391 break;
1392 // FIXME: We should return 0 whenever getIntrinsicCost == TCC_Free.
1393 case Intrinsic::lifetime_start:
1394 case Intrinsic::lifetime_end:
1395 case Intrinsic::sideeffect:
1396 return 0;
1397 case Intrinsic::masked_store: {
1398 Type *Ty = Tys[0];
1399 Align TyAlign = thisT()->DL.getABITypeAlign(Ty);
1400 return thisT()->getMaskedMemoryOpCost(Instruction::Store, Ty, TyAlign, 0,
1401 CostKind);
1402 }
1403 case Intrinsic::masked_load: {
1404 Type *Ty = RetTy;
1405 Align TyAlign = thisT()->DL.getABITypeAlign(Ty);
1406 return thisT()->getMaskedMemoryOpCost(Instruction::Load, Ty, TyAlign, 0,
1407 CostKind);
1408 }
1409 case Intrinsic::experimental_vector_reduce_add:
1410 return thisT()->getArithmeticReductionCost(Instruction::Add, VecOpTy,
1411 /*IsPairwiseForm=*/false,
1412 CostKind);
1413 case Intrinsic::experimental_vector_reduce_mul:
1414 return thisT()->getArithmeticReductionCost(Instruction::Mul, VecOpTy,
1415 /*IsPairwiseForm=*/false,
1416 CostKind);
1417 case Intrinsic::experimental_vector_reduce_and:
1418 return thisT()->getArithmeticReductionCost(Instruction::And, VecOpTy,
1419 /*IsPairwiseForm=*/false,
1420 CostKind);
1421 case Intrinsic::experimental_vector_reduce_or:
1422 return thisT()->getArithmeticReductionCost(Instruction::Or, VecOpTy,
1423 /*IsPairwiseForm=*/false,
1424 CostKind);
1425 case Intrinsic::experimental_vector_reduce_xor:
1426 return thisT()->getArithmeticReductionCost(Instruction::Xor, VecOpTy,
1427 /*IsPairwiseForm=*/false,
1428 CostKind);
1429 case Intrinsic::experimental_vector_reduce_v2_fadd:
1430 // FIXME: Add new flag for cost of strict reductions.
1431 return thisT()->getArithmeticReductionCost(Instruction::FAdd, VecOpTy,
1432 /*IsPairwiseForm=*/false,
1433 CostKind);
1434 case Intrinsic::experimental_vector_reduce_v2_fmul:
1435 // FIXME: Add new flag for cost of strict reductions.
1436 return thisT()->getArithmeticReductionCost(Instruction::FMul, VecOpTy,
1437 /*IsPairwiseForm=*/false,
1438 CostKind);
1439 case Intrinsic::experimental_vector_reduce_smax:
1440 case Intrinsic::experimental_vector_reduce_smin:
1441 case Intrinsic::experimental_vector_reduce_fmax:
1442 case Intrinsic::experimental_vector_reduce_fmin:
1443 return thisT()->getMinMaxReductionCost(
1444 VecOpTy, cast<VectorType>(CmpInst::makeCmpResultType(VecOpTy)),
1445 /*IsPairwiseForm=*/false,
1446 /*IsUnsigned=*/false, CostKind);
1447 case Intrinsic::experimental_vector_reduce_umax:
1448 case Intrinsic::experimental_vector_reduce_umin:
1449 return thisT()->getMinMaxReductionCost(
1450 VecOpTy, cast<VectorType>(CmpInst::makeCmpResultType(VecOpTy)),
1451 /*IsPairwiseForm=*/false,
1452 /*IsUnsigned=*/true, CostKind);
1453 case Intrinsic::abs:
1454 case Intrinsic::smax:
1455 case Intrinsic::smin:
1456 case Intrinsic::umax:
1457 case Intrinsic::umin: {
1458 // abs(X) = select(icmp(X,0),X,sub(0,X))
1459 // minmax(X,Y) = select(icmp(X,Y),X,Y)
1460 Type *CondTy = RetTy->getWithNewBitWidth(1);
1461 unsigned Cost = 0;
1462 // TODO: Ideally getCmpSelInstrCost would accept an icmp condition code.
1463 Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy,
1464 CostKind);
1465 Cost += thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, CondTy,
1466 CostKind);
1467 // TODO: Should we add an OperandValueProperties::OP_Zero property?
1468 if (IID == Intrinsic::abs)
1469 Cost += thisT()->getArithmeticInstrCost(
1470 BinaryOperator::Sub, RetTy, CostKind, TTI::OK_UniformConstantValue);
1471 return Cost;
1472 }
1473 case Intrinsic::sadd_sat:
1474 case Intrinsic::ssub_sat: {
1475 Type *CondTy = RetTy->getWithNewBitWidth(1);
1476
1477 Type *OpTy = StructType::create({RetTy, CondTy});
1478 Intrinsic::ID OverflowOp = IID == Intrinsic::sadd_sat
1479 ? Intrinsic::sadd_with_overflow
1480 : Intrinsic::ssub_with_overflow;
1481
1482 // SatMax -> Overflow && SumDiff < 0
1483 // SatMin -> Overflow && SumDiff >= 0
1484 unsigned Cost = 0;
1485 IntrinsicCostAttributes Attrs(OverflowOp, OpTy, {RetTy, RetTy}, FMF,
1486 ScalarizationCostPassed);
1487 Cost += thisT()->getIntrinsicInstrCost(Attrs, CostKind);
1488 Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy,
1489 CostKind);
1490 Cost += 2 * thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy,
1491 CondTy, CostKind);
1492 return Cost;
1493 }
1494 case Intrinsic::uadd_sat:
1495 case Intrinsic::usub_sat: {
1496 Type *CondTy = RetTy->getWithNewBitWidth(1);
1497
1498 Type *OpTy = StructType::create({RetTy, CondTy});
1499 Intrinsic::ID OverflowOp = IID == Intrinsic::uadd_sat
1500 ? Intrinsic::uadd_with_overflow
1501 : Intrinsic::usub_with_overflow;
1502
1503 unsigned Cost = 0;
1504 IntrinsicCostAttributes Attrs(OverflowOp, OpTy, {RetTy, RetTy}, FMF,
1505 ScalarizationCostPassed);
1506 Cost += thisT()->getIntrinsicInstrCost(Attrs, CostKind);
1507 Cost += thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, CondTy,
1508 CostKind);
1509 return Cost;
1510 }
1511 case Intrinsic::smul_fix:
1512 case Intrinsic::umul_fix: {
1513 unsigned ExtSize = RetTy->getScalarSizeInBits() * 2;
1514 Type *ExtTy = RetTy->getWithNewBitWidth(ExtSize);
1515
1516 unsigned ExtOp =
1517 IID == Intrinsic::smul_fix ? Instruction::SExt : Instruction::ZExt;
1518 TTI::CastContextHint CCH = TTI::CastContextHint::None;
1519
1520 unsigned Cost = 0;
1521 Cost += 2 * thisT()->getCastInstrCost(ExtOp, ExtTy, RetTy, CCH, CostKind);
1522 Cost +=
1523 thisT()->getArithmeticInstrCost(Instruction::Mul, ExtTy, CostKind);
1524 Cost += 2 * thisT()->getCastInstrCost(Instruction::Trunc, RetTy, ExtTy,
1525 CCH, CostKind);
1526 Cost += thisT()->getArithmeticInstrCost(Instruction::LShr, RetTy,
1527 CostKind, TTI::OK_AnyValue,
1528 TTI::OK_UniformConstantValue);
1529 Cost += thisT()->getArithmeticInstrCost(Instruction::Shl, RetTy, CostKind,
1530 TTI::OK_AnyValue,
1531 TTI::OK_UniformConstantValue);
1532 Cost += thisT()->getArithmeticInstrCost(Instruction::Or, RetTy, CostKind);
1533 return Cost;
1534 }
1535 case Intrinsic::sadd_with_overflow:
1536 case Intrinsic::ssub_with_overflow: {
1537 Type *SumTy = RetTy->getContainedType(0);
1538 Type *OverflowTy = RetTy->getContainedType(1);
1539 unsigned Opcode = IID == Intrinsic::sadd_with_overflow
1540 ? BinaryOperator::Add
1541 : BinaryOperator::Sub;
1542
1543 // LHSSign -> LHS >= 0
1544 // RHSSign -> RHS >= 0
1545 // SumSign -> Sum >= 0
1546 //
1547 // Add:
1548 // Overflow -> (LHSSign == RHSSign) && (LHSSign != SumSign)
1549 // Sub:
1550 // Overflow -> (LHSSign != RHSSign) && (LHSSign != SumSign)
1551 unsigned Cost = 0;
1552 Cost += thisT()->getArithmeticInstrCost(Opcode, SumTy, CostKind);
1553 Cost += 3 * thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, SumTy,
1554 OverflowTy, CostKind);
1555 Cost += 2 * thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, OverflowTy,
1556 OverflowTy, CostKind);
1557 Cost += thisT()->getArithmeticInstrCost(BinaryOperator::And, OverflowTy,
1558 CostKind);
1559 return Cost;
1560 }
1561 case Intrinsic::uadd_with_overflow:
1562 case Intrinsic::usub_with_overflow: {
1563 Type *SumTy = RetTy->getContainedType(0);
1564 Type *OverflowTy = RetTy->getContainedType(1);
1565 unsigned Opcode = IID == Intrinsic::uadd_with_overflow
1566 ? BinaryOperator::Add
1567 : BinaryOperator::Sub;
1568
1569 unsigned Cost = 0;
1570 Cost += thisT()->getArithmeticInstrCost(Opcode, SumTy, CostKind);
1571 Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, SumTy,
1572 OverflowTy, CostKind);
1573 return Cost;
1574 }
1575 case Intrinsic::smul_with_overflow:
1576 case Intrinsic::umul_with_overflow: {
1577 Type *MulTy = RetTy->getContainedType(0);
1578 Type *OverflowTy = RetTy->getContainedType(1);
1579 unsigned ExtSize = MulTy->getScalarSizeInBits() * 2;
1580 Type *ExtTy = MulTy->getWithNewBitWidth(ExtSize);
1581
1582 unsigned ExtOp =
1583 IID == Intrinsic::smul_fix ? Instruction::SExt : Instruction::ZExt;
1584 TTI::CastContextHint CCH = TTI::CastContextHint::None;
1585
1586 unsigned Cost = 0;
1587 Cost += 2 * thisT()->getCastInstrCost(ExtOp, ExtTy, MulTy, CCH, CostKind);
1588 Cost +=
1589 thisT()->getArithmeticInstrCost(Instruction::Mul, ExtTy, CostKind);
1590 Cost += 2 * thisT()->getCastInstrCost(Instruction::Trunc, MulTy, ExtTy,
1591 CCH, CostKind);
1592 Cost += thisT()->getArithmeticInstrCost(Instruction::LShr, MulTy,
1593 CostKind, TTI::OK_AnyValue,
1594 TTI::OK_UniformConstantValue);
1595
1596 if (IID == Intrinsic::smul_with_overflow)
1597 Cost += thisT()->getArithmeticInstrCost(Instruction::AShr, MulTy,
1598 CostKind, TTI::OK_AnyValue,
1599 TTI::OK_UniformConstantValue);
1600
1601 Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, MulTy,
1602 OverflowTy, CostKind);
1603 return Cost;
1604 }
1605 case Intrinsic::ctpop:
1606 ISDs.push_back(ISD::CTPOP);
1607 // In case of legalization use TCC_Expensive. This is cheaper than a
1608 // library call but still not a cheap instruction.
1609 SingleCallCost = TargetTransformInfo::TCC_Expensive;
1610 break;
1611 // FIXME: ctlz, cttz, ...
1612 case Intrinsic::bswap:
1613 ISDs.push_back(ISD::BSWAP);
1614 break;
1615 case Intrinsic::bitreverse:
1616 ISDs.push_back(ISD::BITREVERSE);
1617 break;
1618 }
1619
1620 const TargetLoweringBase *TLI = getTLI();
1621 std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, RetTy);
1622
1623 SmallVector<unsigned, 2> LegalCost;
1624 SmallVector<unsigned, 2> CustomCost;
1625 for (unsigned ISD : ISDs) {
1626 if (TLI->isOperationLegalOrPromote(ISD, LT.second)) {
1627 if (IID == Intrinsic::fabs && LT.second.isFloatingPoint() &&
1628 TLI->isFAbsFree(LT.second)) {
1629 return 0;
1630 }
1631
1632 // The operation is legal. Assume it costs 1.
1633 // If the type is split to multiple registers, assume that there is some
1634 // overhead to this.
1635 // TODO: Once we have extract/insert subvector cost we need to use them.
1636 if (LT.first > 1)
1637 LegalCost.push_back(LT.first * 2);
1638 else
1639 LegalCost.push_back(LT.first * 1);
1640 } else if (!TLI->isOperationExpand(ISD, LT.second)) {
1641 // If the operation is custom lowered then assume
1642 // that the code is twice as expensive.
1643 CustomCost.push_back(LT.first * 2);
1644 }
1645 }
1646
1647 auto *MinLegalCostI = std::min_element(LegalCost.begin(), LegalCost.end());
1648 if (MinLegalCostI != LegalCost.end())
1649 return *MinLegalCostI;
1650
1651 auto MinCustomCostI =
1652 std::min_element(CustomCost.begin(), CustomCost.end());
1653 if (MinCustomCostI != CustomCost.end())
1654 return *MinCustomCostI;
1655
1656 // If we can't lower fmuladd into an FMA estimate the cost as a floating
1657 // point mul followed by an add.
1658 if (IID == Intrinsic::fmuladd)
1659 return thisT()->getArithmeticInstrCost(BinaryOperator::FMul, RetTy,
1660 CostKind) +
1661 thisT()->getArithmeticInstrCost(BinaryOperator::FAdd, RetTy,
1662 CostKind);
1663 if (IID == Intrinsic::experimental_constrained_fmuladd) {
1664 IntrinsicCostAttributes FMulAttrs(
1665 Intrinsic::experimental_constrained_fmul, RetTy, Tys);
1666 IntrinsicCostAttributes FAddAttrs(
1667 Intrinsic::experimental_constrained_fadd, RetTy, Tys);
1668 return thisT()->getIntrinsicInstrCost(FMulAttrs, CostKind) +
1669 thisT()->getIntrinsicInstrCost(FAddAttrs, CostKind);
1670 }
1671
1672 // Else, assume that we need to scalarize this intrinsic. For math builtins
1673 // this will emit a costly libcall, adding call overhead and spills. Make it
1674 // very expensive.
1675 if (auto *RetVTy = dyn_cast<VectorType>(RetTy)) {
1676 unsigned ScalarizationCost = SkipScalarizationCost ?
1677 ScalarizationCostPassed : getScalarizationOverhead(RetVTy, true, false);
1678
1679 unsigned ScalarCalls = cast<FixedVectorType>(RetVTy)->getNumElements();
1680 SmallVector<Type *, 4> ScalarTys;
1681 for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) {
1682 Type *Ty = Tys[i];
1683 if (Ty->isVectorTy())
1684 Ty = Ty->getScalarType();
1685 ScalarTys.push_back(Ty);
1686 }
1687 IntrinsicCostAttributes Attrs(IID, RetTy->getScalarType(), ScalarTys, FMF);
1688 unsigned ScalarCost = thisT()->getIntrinsicInstrCost(Attrs, CostKind);
1689 for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) {
1690 if (auto *VTy = dyn_cast<VectorType>(Tys[i])) {
1691 if (!ICA.skipScalarizationCost())
1692 ScalarizationCost += getScalarizationOverhead(VTy, false, true);
1693 ScalarCalls = std::max(ScalarCalls,
1694 cast<FixedVectorType>(VTy)->getNumElements());
1695 }
1696 }
1697 return ScalarCalls * ScalarCost + ScalarizationCost;
1698 }
1699
1700 // This is going to be turned into a library call, make it expensive.
1701 return SingleCallCost;
1702 }
1703
1704 /// Compute a cost of the given call instruction.
1705 ///
1706 /// Compute the cost of calling function F with return type RetTy and
1707 /// argument types Tys. F might be nullptr, in this case the cost of an
1708 /// arbitrary call with the specified signature will be returned.
1709 /// This is used, for instance, when we estimate call of a vector
1710 /// counterpart of the given function.
1711 /// \param F Called function, might be nullptr.
1712 /// \param RetTy Return value types.
1713 /// \param Tys Argument types.
1714 /// \returns The cost of Call instruction.
1715 unsigned getCallInstrCost(Function *F, Type *RetTy, ArrayRef<Type *> Tys,
1716 TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency) {
1717 return 10;
1718 }
1719
1720 unsigned getNumberOfParts(Type *Tp) {
1721 std::pair<unsigned, MVT> LT = getTLI()->getTypeLegalizationCost(DL, Tp);
1722 return LT.first;
1723 }
1724
1725 unsigned getAddressComputationCost(Type *Ty, ScalarEvolution *,
1726 const SCEV *) {
1727 return 0;
1728 }
1729
1730 /// Try to calculate arithmetic and shuffle op costs for reduction operations.
1731 /// We're assuming that reduction operation are performing the following way:
1732 /// 1. Non-pairwise reduction
1733 /// %val1 = shufflevector<n x t> %val, <n x t> %undef,
1734 /// <n x i32> <i32 n/2, i32 n/2 + 1, ..., i32 n, i32 undef, ..., i32 undef>
1735 /// \----------------v-------------/ \----------v------------/
1736 /// n/2 elements n/2 elements
1737 /// %red1 = op <n x t> %val, <n x t> val1
1738 /// After this operation we have a vector %red1 where only the first n/2
1739 /// elements are meaningful, the second n/2 elements are undefined and can be
1740 /// dropped. All other operations are actually working with the vector of
1741 /// length n/2, not n, though the real vector length is still n.
1742 /// %val2 = shufflevector<n x t> %red1, <n x t> %undef,
1743 /// <n x i32> <i32 n/4, i32 n/4 + 1, ..., i32 n/2, i32 undef, ..., i32 undef>
1744 /// \----------------v-------------/ \----------v------------/
1745 /// n/4 elements 3*n/4 elements
1746 /// %red2 = op <n x t> %red1, <n x t> val2 - working with the vector of
1747 /// length n/2, the resulting vector has length n/4 etc.
1748 /// 2. Pairwise reduction:
1749 /// Everything is the same except for an additional shuffle operation which
1750 /// is used to produce operands for pairwise kind of reductions.
1751 /// %val1 = shufflevector<n x t> %val, <n x t> %undef,
1752 /// <n x i32> <i32 0, i32 2, ..., i32 n-2, i32 undef, ..., i32 undef>
1753 /// \-------------v----------/ \----------v------------/
1754 /// n/2 elements n/2 elements
1755 /// %val2 = shufflevector<n x t> %val, <n x t> %undef,
1756 /// <n x i32> <i32 1, i32 3, ..., i32 n-1, i32 undef, ..., i32 undef>
1757 /// \-------------v----------/ \----------v------------/
1758 /// n/2 elements n/2 elements
1759 /// %red1 = op <n x t> %val1, <n x t> val2
1760 /// Again, the operation is performed on <n x t> vector, but the resulting
1761 /// vector %red1 is <n/2 x t> vector.
1762 ///
1763 /// The cost model should take into account that the actual length of the
1764 /// vector is reduced on each iteration.
1765 unsigned getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
1766 bool IsPairwise,
1767 TTI::TargetCostKind CostKind) {
1768 Type *ScalarTy = Ty->getElementType();
1769 unsigned NumVecElts = cast<FixedVectorType>(Ty)->getNumElements();
1770 unsigned NumReduxLevels = Log2_32(NumVecElts);
1771 unsigned ArithCost = 0;
1772 unsigned ShuffleCost = 0;
1773 std::pair<unsigned, MVT> LT =
1774 thisT()->getTLI()->getTypeLegalizationCost(DL, Ty);
1775 unsigned LongVectorCount = 0;
1776 unsigned MVTLen =
1777 LT.second.isVector() ? LT.second.getVectorNumElements() : 1;
1778 while (NumVecElts > MVTLen) {
1779 NumVecElts /= 2;
1780 VectorType *SubTy = FixedVectorType::get(ScalarTy, NumVecElts);
1781 // Assume the pairwise shuffles add a cost.
1782 ShuffleCost +=
1783 (IsPairwise + 1) * thisT()->getShuffleCost(TTI::SK_ExtractSubvector,
1784 Ty, NumVecElts, SubTy);
1785 ArithCost += thisT()->getArithmeticInstrCost(Opcode, SubTy, CostKind);
1786 Ty = SubTy;
1787 ++LongVectorCount;
1788 }
1789
1790 NumReduxLevels -= LongVectorCount;
1791
1792 // The minimal length of the vector is limited by the real length of vector
1793 // operations performed on the current platform. That's why several final
1794 // reduction operations are performed on the vectors with the same
1795 // architecture-dependent length.
1796
1797 // Non pairwise reductions need one shuffle per reduction level. Pairwise
1798 // reductions need two shuffles on every level, but the last one. On that
1799 // level one of the shuffles is <0, u, u, ...> which is identity.
1800 unsigned NumShuffles = NumReduxLevels;
1801 if (IsPairwise && NumReduxLevels >= 1)
1802 NumShuffles += NumReduxLevels - 1;
1803 ShuffleCost += NumShuffles *
1804 thisT()->getShuffleCost(TTI::SK_PermuteSingleSrc, Ty, 0, Ty);
1805 ArithCost += NumReduxLevels * thisT()->getArithmeticInstrCost(Opcode, Ty);
1806 return ShuffleCost + ArithCost +
1807 thisT()->getVectorInstrCost(Instruction::ExtractElement, Ty, 0);
1808 }
1809
1810 /// Try to calculate op costs for min/max reduction operations.
1811 /// \param CondTy Conditional type for the Select instruction.
1812 unsigned getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy,
1813 bool IsPairwise, bool IsUnsigned,
1814 TTI::TargetCostKind CostKind) {
1815 Type *ScalarTy = Ty->getElementType();
1816 Type *ScalarCondTy = CondTy->getElementType();
1817 unsigned NumVecElts = cast<FixedVectorType>(Ty)->getNumElements();
1818 unsigned NumReduxLevels = Log2_32(NumVecElts);
1819 unsigned CmpOpcode;
1820 if (Ty->isFPOrFPVectorTy()) {
1821 CmpOpcode = Instruction::FCmp;
1822 } else {
1823 assert(Ty->isIntOrIntVectorTy() &&((Ty->isIntOrIntVectorTy() && "expecting floating point or integer type for min/max reduction"
) ? static_cast<void> (0) : __assert_fail ("Ty->isIntOrIntVectorTy() && \"expecting floating point or integer type for min/max reduction\""
, "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 1824, __PRETTY_FUNCTION__))
1824 "expecting floating point or integer type for min/max reduction")((Ty->isIntOrIntVectorTy() && "expecting floating point or integer type for min/max reduction"
) ? static_cast<void> (0) : __assert_fail ("Ty->isIntOrIntVectorTy() && \"expecting floating point or integer type for min/max reduction\""
, "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/BasicTTIImpl.h"
, 1824, __PRETTY_FUNCTION__))
;
1825 CmpOpcode = Instruction::ICmp;
1826 }
1827 unsigned MinMaxCost = 0;
1828 unsigned ShuffleCost = 0;
1829 std::pair<unsigned, MVT> LT =
1830 thisT()->getTLI()->getTypeLegalizationCost(DL, Ty);
1831 unsigned LongVectorCount = 0;
1832 unsigned MVTLen =
1833 LT.second.isVector() ? LT.second.getVectorNumElements() : 1;
1834 while (NumVecElts > MVTLen) {
1835 NumVecElts /= 2;
1836 auto *SubTy = FixedVectorType::get(ScalarTy, NumVecElts);
1837 CondTy = FixedVectorType::get(ScalarCondTy, NumVecElts);
1838
1839 // Assume the pairwise shuffles add a cost.
1840 ShuffleCost +=
1841 (IsPairwise + 1) * thisT()->getShuffleCost(TTI::SK_ExtractSubvector,
1842 Ty, NumVecElts, SubTy);
1843 MinMaxCost +=
1844 thisT()->getCmpSelInstrCost(CmpOpcode, SubTy, CondTy, CostKind) +
1845 thisT()->getCmpSelInstrCost(Instruction::Select, SubTy, CondTy,
1846 CostKind);
1847 Ty = SubTy;
1848 ++LongVectorCount;
1849 }
1850
1851 NumReduxLevels -= LongVectorCount;
1852
1853 // The minimal length of the vector is limited by the real length of vector
1854 // operations performed on the current platform. That's why several final
1855 // reduction opertions are perfomed on the vectors with the same
1856 // architecture-dependent length.
1857
1858 // Non pairwise reductions need one shuffle per reduction level. Pairwise
1859 // reductions need two shuffles on every level, but the last one. On that
1860 // level one of the shuffles is <0, u, u, ...> which is identity.
1861 unsigned NumShuffles = NumReduxLevels;
1862 if (IsPairwise && NumReduxLevels >= 1)
1863 NumShuffles += NumReduxLevels - 1;
1864 ShuffleCost += NumShuffles *
1865 thisT()->getShuffleCost(TTI::SK_PermuteSingleSrc, Ty, 0, Ty);
1866 MinMaxCost +=
1867 NumReduxLevels *
1868 (thisT()->getCmpSelInstrCost(CmpOpcode, Ty, CondTy, CostKind) +
1869 thisT()->getCmpSelInstrCost(Instruction::Select, Ty, CondTy,
1870 CostKind));
1871 // The last min/max should be in vector registers and we counted it above.
1872 // So just need a single extractelement.
1873 return ShuffleCost + MinMaxCost +
1874 thisT()->getVectorInstrCost(Instruction::ExtractElement, Ty, 0);
1875 }
1876
1877 unsigned getVectorSplitCost() { return 1; }
1878
1879 /// @}
1880};
1881
1882/// Concrete BasicTTIImpl that can be used if no further customization
1883/// is needed.
1884class BasicTTIImpl : public BasicTTIImplBase<BasicTTIImpl> {
1885 using BaseT = BasicTTIImplBase<BasicTTIImpl>;
1886
1887 friend class BasicTTIImplBase<BasicTTIImpl>;
1888
1889 const TargetSubtargetInfo *ST;
1890 const TargetLoweringBase *TLI;
1891
1892 const TargetSubtargetInfo *getST() const { return ST; }
1893 const TargetLoweringBase *getTLI() const { return TLI; }
1894
1895public:
1896 explicit BasicTTIImpl(const TargetMachine *TM, const Function &F);
1897};
1898
1899} // end namespace llvm
1900
1901#endif // LLVM_CODEGEN_BASICTTIIMPL_H

/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/TargetLowering.h

1//===- llvm/CodeGen/TargetLowering.h - Target Lowering Info -----*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file describes how to lower LLVM code to machine code. This has two
11/// main components:
12///
13/// 1. Which ValueTypes are natively supported by the target.
14/// 2. Which operations are supported for supported ValueTypes.
15/// 3. Cost thresholds for alternative implementations of certain operations.
16///
17/// In addition it has a few other components, like information about FP
18/// immediates.
19///
20//===----------------------------------------------------------------------===//
21
22#ifndef LLVM_CODEGEN_TARGETLOWERING_H
23#define LLVM_CODEGEN_TARGETLOWERING_H
24
25#include "llvm/ADT/APInt.h"
26#include "llvm/ADT/ArrayRef.h"
27#include "llvm/ADT/DenseMap.h"
28#include "llvm/ADT/STLExtras.h"
29#include "llvm/ADT/SmallVector.h"
30#include "llvm/ADT/StringRef.h"
31#include "llvm/CodeGen/DAGCombine.h"
32#include "llvm/CodeGen/ISDOpcodes.h"
33#include "llvm/CodeGen/RuntimeLibcalls.h"
34#include "llvm/CodeGen/SelectionDAG.h"
35#include "llvm/CodeGen/SelectionDAGNodes.h"
36#include "llvm/CodeGen/TargetCallingConv.h"
37#include "llvm/CodeGen/ValueTypes.h"
38#include "llvm/IR/Attributes.h"
39#include "llvm/IR/CallingConv.h"
40#include "llvm/IR/DataLayout.h"
41#include "llvm/IR/DerivedTypes.h"
42#include "llvm/IR/Function.h"
43#include "llvm/IR/IRBuilder.h"
44#include "llvm/IR/InlineAsm.h"
45#include "llvm/IR/Instruction.h"
46#include "llvm/IR/Instructions.h"
47#include "llvm/IR/Type.h"
48#include "llvm/Support/Alignment.h"
49#include "llvm/Support/AtomicOrdering.h"
50#include "llvm/Support/Casting.h"
51#include "llvm/Support/ErrorHandling.h"
52#include "llvm/Support/MachineValueType.h"
53#include <algorithm>
54#include <cassert>
55#include <climits>
56#include <cstdint>
57#include <iterator>
58#include <map>
59#include <string>
60#include <utility>
61#include <vector>
62
63namespace llvm {
64
65class BranchProbability;
66class CCState;
67class CCValAssign;
68class Constant;
69class FastISel;
70class FunctionLoweringInfo;
71class GlobalValue;
72class GISelKnownBits;
73class IntrinsicInst;
74struct KnownBits;
75class LegacyDivergenceAnalysis;
76class LLVMContext;
77class MachineBasicBlock;
78class MachineFunction;
79class MachineInstr;
80class MachineJumpTableInfo;
81class MachineLoop;
82class MachineRegisterInfo;
83class MCContext;
84class MCExpr;
85class Module;
86class ProfileSummaryInfo;
87class TargetLibraryInfo;
88class TargetMachine;
89class TargetRegisterClass;
90class TargetRegisterInfo;
91class TargetTransformInfo;
92class Value;
93
94namespace Sched {
95
96 enum Preference {
97 None, // No preference
98 Source, // Follow source order.
99 RegPressure, // Scheduling for lowest register pressure.
100 Hybrid, // Scheduling for both latency and register pressure.
101 ILP, // Scheduling for ILP in low register pressure mode.
102 VLIW // Scheduling for VLIW targets.
103 };
104
105} // end namespace Sched
106
107// MemOp models a memory operation, either memset or memcpy/memmove.
108struct MemOp {
109private:
110 // Shared
111 uint64_t Size;
112 bool DstAlignCanChange; // true if destination alignment can satisfy any
113 // constraint.
114 Align DstAlign; // Specified alignment of the memory operation.
115
116 bool AllowOverlap;
117 // memset only
118 bool IsMemset; // If setthis memory operation is a memset.
119 bool ZeroMemset; // If set clears out memory with zeros.
120 // memcpy only
121 bool MemcpyStrSrc; // Indicates whether the memcpy source is an in-register
122 // constant so it does not need to be loaded.
123 Align SrcAlign; // Inferred alignment of the source or default value if the
124 // memory operation does not need to load the value.
125public:
126 static MemOp Copy(uint64_t Size, bool DstAlignCanChange, Align DstAlign,
127 Align SrcAlign, bool IsVolatile,
128 bool MemcpyStrSrc = false) {
129 MemOp Op;
130 Op.Size = Size;
131 Op.DstAlignCanChange = DstAlignCanChange;
132 Op.DstAlign = DstAlign;
133 Op.AllowOverlap = !IsVolatile;
134 Op.IsMemset = false;
135 Op.ZeroMemset = false;
136 Op.MemcpyStrSrc = MemcpyStrSrc;
137 Op.SrcAlign = SrcAlign;
138 return Op;
139 }
140
141 static MemOp Set(uint64_t Size, bool DstAlignCanChange, Align DstAlign,
142 bool IsZeroMemset, bool IsVolatile) {
143 MemOp Op;
144 Op.Size = Size;
145 Op.DstAlignCanChange = DstAlignCanChange;
146 Op.DstAlign = DstAlign;
147 Op.AllowOverlap = !IsVolatile;
148 Op.IsMemset = true;
149 Op.ZeroMemset = IsZeroMemset;
150 Op.MemcpyStrSrc = false;
151 return Op;
152 }
153
154 uint64_t size() const { return Size; }
155 Align getDstAlign() const {
156 assert(!DstAlignCanChange)((!DstAlignCanChange) ? static_cast<void> (0) : __assert_fail
("!DstAlignCanChange", "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/TargetLowering.h"
, 156, __PRETTY_FUNCTION__))
;
157 return DstAlign;
158 }
159 bool isFixedDstAlign() const { return !DstAlignCanChange; }
160 bool allowOverlap() const { return AllowOverlap; }
161 bool isMemset() const { return IsMemset; }
162 bool isMemcpy() const { return !IsMemset; }
163 bool isMemcpyWithFixedDstAlign() const {
164 return isMemcpy() && !DstAlignCanChange;
165 }
166 bool isZeroMemset() const { return isMemset() && ZeroMemset; }
167 bool isMemcpyStrSrc() const {
168 assert(isMemcpy() && "Must be a memcpy")((isMemcpy() && "Must be a memcpy") ? static_cast<
void> (0) : __assert_fail ("isMemcpy() && \"Must be a memcpy\""
, "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/TargetLowering.h"
, 168, __PRETTY_FUNCTION__))
;
169 return MemcpyStrSrc;
170 }
171 Align getSrcAlign() const {
172 assert(isMemcpy() && "Must be a memcpy")((isMemcpy() && "Must be a memcpy") ? static_cast<
void> (0) : __assert_fail ("isMemcpy() && \"Must be a memcpy\""
, "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/TargetLowering.h"
, 172, __PRETTY_FUNCTION__))
;
173 return SrcAlign;
174 }
175 bool isSrcAligned(Align AlignCheck) const {
176 return isMemset() || llvm::isAligned(AlignCheck, SrcAlign.value());
177 }
178 bool isDstAligned(Align AlignCheck) const {
179 return DstAlignCanChange || llvm::isAligned(AlignCheck, DstAlign.value());
180 }
181 bool isAligned(Align AlignCheck) const {
182 return isSrcAligned(AlignCheck) && isDstAligned(AlignCheck);
183 }
184};
185
186/// This base class for TargetLowering contains the SelectionDAG-independent
187/// parts that can be used from the rest of CodeGen.
188class TargetLoweringBase {
189public:
190 /// This enum indicates whether operations are valid for a target, and if not,
191 /// what action should be used to make them valid.
192 enum LegalizeAction : uint8_t {
193 Legal, // The target natively supports this operation.
194 Promote, // This operation should be executed in a larger type.
195 Expand, // Try to expand this to other ops, otherwise use a libcall.
196 LibCall, // Don't try to expand this to other ops, always use a libcall.
197 Custom // Use the LowerOperation hook to implement custom lowering.
198 };
199
200 /// This enum indicates whether a types are legal for a target, and if not,
201 /// what action should be used to make them valid.
202 enum LegalizeTypeAction : uint8_t {
203 TypeLegal, // The target natively supports this type.
204 TypePromoteInteger, // Replace this integer with a larger one.
205 TypeExpandInteger, // Split this integer into two of half the size.
206 TypeSoftenFloat, // Convert this float to a same size integer type.
207 TypeExpandFloat, // Split this float into two of half the size.
208 TypeScalarizeVector, // Replace this one-element vector with its element.
209 TypeSplitVector, // Split this vector into two of half the size.
210 TypeWidenVector, // This vector should be widened into a larger vector.
211 TypePromoteFloat, // Replace this float with a larger one.
212 TypeSoftPromoteHalf, // Soften half to i16 and use float to do arithmetic.
213 TypeScalarizeScalableVector, // This action is explicitly left unimplemented.
214 // While it is theoretically possible to
215 // legalize operations on scalable types with a
216 // loop that handles the vscale * #lanes of the
217 // vector, this is non-trivial at SelectionDAG
218 // level and these types are better to be
219 // widened or promoted.
220 };
221
222 /// LegalizeKind holds the legalization kind that needs to happen to EVT
223 /// in order to type-legalize it.
224 using LegalizeKind = std::pair<LegalizeTypeAction, EVT>;
225
226 /// Enum that describes how the target represents true/false values.
227 enum BooleanContent {
228 UndefinedBooleanContent, // Only bit 0 counts, the rest can hold garbage.
229 ZeroOrOneBooleanContent, // All bits zero except for bit 0.
230 ZeroOrNegativeOneBooleanContent // All bits equal to bit 0.
231 };
232
233 /// Enum that describes what type of support for selects the target has.
234 enum SelectSupportKind {
235 ScalarValSelect, // The target supports scalar selects (ex: cmov).
236 ScalarCondVectorVal, // The target supports selects with a scalar condition
237 // and vector values (ex: cmov).
238 VectorMaskSelect // The target supports vector selects with a vector
239 // mask (ex: x86 blends).
240 };
241
242 /// Enum that specifies what an atomic load/AtomicRMWInst is expanded
243 /// to, if at all. Exists because different targets have different levels of
244 /// support for these atomic instructions, and also have different options
245 /// w.r.t. what they should expand to.
246 enum class AtomicExpansionKind {
247 None, // Don't expand the instruction.
248 LLSC, // Expand the instruction into loadlinked/storeconditional; used
249 // by ARM/AArch64.
250 LLOnly, // Expand the (load) instruction into just a load-linked, which has
251 // greater atomic guarantees than a normal load.
252 CmpXChg, // Expand the instruction into cmpxchg; used by at least X86.
253 MaskedIntrinsic, // Use a target-specific intrinsic for the LL/SC loop.
254 };
255
256 /// Enum that specifies when a multiplication should be expanded.
257 enum class MulExpansionKind {
258 Always, // Always expand the instruction.
259 OnlyLegalOrCustom, // Only expand when the resulting instructions are legal
260 // or custom.
261 };
262
263 /// Enum that specifies when a float negation is beneficial.
264 enum class NegatibleCost {
265 Cheaper = 0, // Negated expression is cheaper.
266 Neutral = 1, // Negated expression has the same cost.
267 Expensive = 2 // Negated expression is more expensive.
268 };
269
270 class ArgListEntry {
271 public:
272 Value *Val = nullptr;
273 SDValue Node = SDValue();
274 Type *Ty = nullptr;
275 bool IsSExt : 1;
276 bool IsZExt : 1;
277 bool IsInReg : 1;
278 bool IsSRet : 1;
279 bool IsNest : 1;
280 bool IsByVal : 1;
281 bool IsByRef : 1;
282 bool IsInAlloca : 1;
283 bool IsPreallocated : 1;
284 bool IsReturned : 1;
285 bool IsSwiftSelf : 1;
286 bool IsSwiftError : 1;
287 bool IsCFGuardTarget : 1;
288 MaybeAlign Alignment = None;
289 Type *ByValType = nullptr;
290 Type *PreallocatedType = nullptr;
291
292 ArgListEntry()
293 : IsSExt(false), IsZExt(false), IsInReg(false), IsSRet(false),
294 IsNest(false), IsByVal(false), IsByRef(false), IsInAlloca(false),
295 IsPreallocated(false), IsReturned(false), IsSwiftSelf(false),
296 IsSwiftError(false), IsCFGuardTarget(false) {}
297
298 void setAttributes(const CallBase *Call, unsigned ArgIdx);
299 };
300 using ArgListTy = std::vector<ArgListEntry>;
301
302 virtual void markLibCallAttributes(MachineFunction *MF, unsigned CC,
303 ArgListTy &Args) const {};
304
305 static ISD::NodeType getExtendForContent(BooleanContent Content) {
306 switch (Content) {
307 case UndefinedBooleanContent:
308 // Extend by adding rubbish bits.
309 return ISD::ANY_EXTEND;
310 case ZeroOrOneBooleanContent:
311 // Extend by adding zero bits.
312 return ISD::ZERO_EXTEND;
313 case ZeroOrNegativeOneBooleanContent:
314 // Extend by copying the sign bit.
315 return ISD::SIGN_EXTEND;
316 }
317 llvm_unreachable("Invalid content kind")::llvm::llvm_unreachable_internal("Invalid content kind", "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/TargetLowering.h"
, 317)
;
318 }
319
320 explicit TargetLoweringBase(const TargetMachine &TM);
321 TargetLoweringBase(const TargetLoweringBase &) = delete;
322 TargetLoweringBase &operator=(const TargetLoweringBase &) = delete;
323 virtual ~TargetLoweringBase() = default;
324
325 /// Return true if the target support strict float operation
326 bool isStrictFPEnabled() const {
327 return IsStrictFPEnabled;
328 }
329
330protected:
331 /// Initialize all of the actions to default values.
332 void initActions();
333
334public:
335 const TargetMachine &getTargetMachine() const { return TM; }
336
337 virtual bool useSoftFloat() const { return false; }
338
339 /// Return the pointer type for the given address space, defaults to
340 /// the pointer type from the data layout.
341 /// FIXME: The default needs to be removed once all the code is updated.
342 virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS = 0) const {
343 return MVT::getIntegerVT(DL.getPointerSizeInBits(AS));
344 }
345
346 /// Return the in-memory pointer type for the given address space, defaults to
347 /// the pointer type from the data layout. FIXME: The default needs to be
348 /// removed once all the code is updated.
349 MVT getPointerMemTy(const DataLayout &DL, uint32_t AS = 0) const {
350 return MVT::getIntegerVT(DL.getPointerSizeInBits(AS));
351 }
352
353 /// Return the type for frame index, which is determined by
354 /// the alloca address space specified through the data layout.
355 MVT getFrameIndexTy(const DataLayout &DL) const {
356 return getPointerTy(DL, DL.getAllocaAddrSpace());
357 }
358
359 /// Return the type for code pointers, which is determined by the program
360 /// address space specified through the data layout.
361 MVT getProgramPointerTy(const DataLayout &DL) const {
362 return getPointerTy(DL, DL.getProgramAddressSpace());
363 }
364
365 /// Return the type for operands of fence.
366 /// TODO: Let fence operands be of i32 type and remove this.
367 virtual MVT getFenceOperandTy(const DataLayout &DL) const {
368 return getPointerTy(DL);
369 }
370
371 /// EVT is not used in-tree, but is used by out-of-tree target.
372 /// A documentation for this function would be nice...
373 virtual MVT getScalarShiftAmountTy(const DataLayout &, EVT) const;
374
375 EVT getShiftAmountTy(EVT LHSTy, const DataLayout &DL,
376 bool LegalTypes = true) const;
377
378 /// Return the preferred type to use for a shift opcode, given the shifted
379 /// amount type is \p ShiftValueTy.
380 LLVM_READONLY__attribute__((__pure__))
381 virtual LLT getPreferredShiftAmountTy(LLT ShiftValueTy) const {
382 return ShiftValueTy;
383 }
384
385 /// Returns the type to be used for the index operand of:
386 /// ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT,
387 /// ISD::INSERT_SUBVECTOR, and ISD::EXTRACT_SUBVECTOR
388 virtual MVT getVectorIdxTy(const DataLayout &DL) const {
389 return getPointerTy(DL);
390 }
391
392 /// This callback is used to inspect load/store instructions and add
393 /// target-specific MachineMemOperand flags to them. The default
394 /// implementation does nothing.
395 virtual MachineMemOperand::Flags getTargetMMOFlags(const Instruction &I) const {
396 return MachineMemOperand::MONone;
397 }
398
399 MachineMemOperand::Flags getLoadMemOperandFlags(const LoadInst &LI,
400 const DataLayout &DL) const;
401 MachineMemOperand::Flags getStoreMemOperandFlags(const StoreInst &SI,
402 const DataLayout &DL) const;
403 MachineMemOperand::Flags getAtomicMemOperandFlags(const Instruction &AI,
404 const DataLayout &DL) const;
405
406 virtual bool isSelectSupported(SelectSupportKind /*kind*/) const {
407 return true;
408 }
409
410 /// Return true if it is profitable to convert a select of FP constants into
411 /// a constant pool load whose address depends on the select condition. The
412 /// parameter may be used to differentiate a select with FP compare from
413 /// integer compare.
414 virtual bool reduceSelectOfFPConstantLoads(EVT CmpOpVT) const {
415 return true;
416 }
417
418 /// Return true if multiple condition registers are available.
419 bool hasMultipleConditionRegisters() const {
420 return HasMultipleConditionRegisters;
421 }
422
423 /// Return true if the target has BitExtract instructions.
424 bool hasExtractBitsInsn() const { return HasExtractBitsInsn; }
425
426 /// Return the preferred vector type legalization action.
427 virtual TargetLoweringBase::LegalizeTypeAction
428 getPreferredVectorAction(MVT VT) const {
429 // The default action for one element vectors is to scalarize
430 if (VT.getVectorElementCount() == 1)
431 return TypeScalarizeVector;
432 // The default action for an odd-width vector is to widen.
433 if (!VT.isPow2VectorType())
434 return TypeWidenVector;
435 // The default action for other vectors is to promote
436 return TypePromoteInteger;
437 }
438
439 // Return true if the half type should be passed around as i16, but promoted
440 // to float around arithmetic. The default behavior is to pass around as
441 // float and convert around loads/stores/bitcasts and other places where
442 // the size matters.
443 virtual bool softPromoteHalfType() const { return false; }
444
445 // There are two general methods for expanding a BUILD_VECTOR node:
446 // 1. Use SCALAR_TO_VECTOR on the defined scalar values and then shuffle
447 // them together.
448 // 2. Build the vector on the stack and then load it.
449 // If this function returns true, then method (1) will be used, subject to
450 // the constraint that all of the necessary shuffles are legal (as determined
451 // by isShuffleMaskLegal). If this function returns false, then method (2) is
452 // always used. The vector type, and the number of defined values, are
453 // provided.
454 virtual bool
455 shouldExpandBuildVectorWithShuffles(EVT /* VT */,
456 unsigned DefinedValues) const {
457 return DefinedValues < 3;
458 }
459
460 /// Return true if integer divide is usually cheaper than a sequence of
461 /// several shifts, adds, and multiplies for this target.
462 /// The definition of "cheaper" may depend on whether we're optimizing
463 /// for speed or for size.
464 virtual bool isIntDivCheap(EVT VT, AttributeList Attr) const { return false; }
465
466 /// Return true if the target can handle a standalone remainder operation.
467 virtual bool hasStandaloneRem(EVT VT) const {
468 return true;
469 }
470
471 /// Return true if SQRT(X) shouldn't be replaced with X*RSQRT(X).
472 virtual bool isFsqrtCheap(SDValue X, SelectionDAG &DAG) const {
473 // Default behavior is to replace SQRT(X) with X*RSQRT(X).
474 return false;
475 }
476
477 /// Reciprocal estimate status values used by the functions below.
478 enum ReciprocalEstimate : int {
479 Unspecified = -1,
480 Disabled = 0,
481 Enabled = 1
482 };
483
484 /// Return a ReciprocalEstimate enum value for a square root of the given type
485 /// based on the function's attributes. If the operation is not overridden by
486 /// the function's attributes, "Unspecified" is returned and target defaults
487 /// are expected to be used for instruction selection.
488 int getRecipEstimateSqrtEnabled(EVT VT, MachineFunction &MF) const;
489
490 /// Return a ReciprocalEstimate enum value for a division of the given type
491 /// based on the function's attributes. If the operation is not overridden by
492 /// the function's attributes, "Unspecified" is returned and target defaults
493 /// are expected to be used for instruction selection.
494 int getRecipEstimateDivEnabled(EVT VT, MachineFunction &MF) const;
495
496 /// Return the refinement step count for a square root of the given type based
497 /// on the function's attributes. If the operation is not overridden by
498 /// the function's attributes, "Unspecified" is returned and target defaults
499 /// are expected to be used for instruction selection.
500 int getSqrtRefinementSteps(EVT VT, MachineFunction &MF) const;
501
502 /// Return the refinement step count for a division of the given type based
503 /// on the function's attributes. If the operation is not overridden by
504 /// the function's attributes, "Unspecified" is returned and target defaults
505 /// are expected to be used for instruction selection.
506 int getDivRefinementSteps(EVT VT, MachineFunction &MF) const;
507
508 /// Returns true if target has indicated at least one type should be bypassed.
509 bool isSlowDivBypassed() const { return !BypassSlowDivWidths.empty(); }
510
511 /// Returns map of slow types for division or remainder with corresponding
512 /// fast types
513 const DenseMap<unsigned int, unsigned int> &getBypassSlowDivWidths() const {
514 return BypassSlowDivWidths;
515 }
516
517 /// Return true if Flow Control is an expensive operation that should be
518 /// avoided.
519 bool isJumpExpensive() const { return JumpIsExpensive; }
520
521 /// Return true if selects are only cheaper than branches if the branch is
522 /// unlikely to be predicted right.
523 bool isPredictableSelectExpensive() const {
524 return PredictableSelectIsExpensive;
525 }
526
527 virtual bool fallBackToDAGISel(const Instruction &Inst) const {
528 return false;
529 }
530
531 /// If a branch or a select condition is skewed in one direction by more than
532 /// this factor, it is very likely to be predicted correctly.
533 virtual BranchProbability getPredictableBranchThreshold() const;
534
535 /// Return true if the following transform is beneficial:
536 /// fold (conv (load x)) -> (load (conv*)x)
537 /// On architectures that don't natively support some vector loads
538 /// efficiently, casting the load to a smaller vector of larger types and
539 /// loading is more efficient, however, this can be undone by optimizations in
540 /// dag combiner.
541 virtual bool isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT,
542 const SelectionDAG &DAG,
543 const MachineMemOperand &MMO) const {
544 // Don't do if we could do an indexed load on the original type, but not on
545 // the new one.
546 if (!LoadVT.isSimple() || !BitcastVT.isSimple())
547 return true;
548
549 MVT LoadMVT = LoadVT.getSimpleVT();
550
551 // Don't bother doing this if it's just going to be promoted again later, as
552 // doing so might interfere with other combines.
553 if (getOperationAction(ISD::LOAD, LoadMVT) == Promote &&
554 getTypeToPromoteTo(ISD::LOAD, LoadMVT) == BitcastVT.getSimpleVT())
555 return false;
556
557 bool Fast = false;
558 return allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), BitcastVT,
559 MMO, &Fast) && Fast;
560 }
561
562 /// Return true if the following transform is beneficial:
563 /// (store (y (conv x)), y*)) -> (store x, (x*))
564 virtual bool isStoreBitCastBeneficial(EVT StoreVT, EVT BitcastVT,
565 const SelectionDAG &DAG,
566 const MachineMemOperand &MMO) const {
567 // Default to the same logic as loads.
568 return isLoadBitCastBeneficial(StoreVT, BitcastVT, DAG, MMO);
569 }
570
571 /// Return true if it is expected to be cheaper to do a store of a non-zero
572 /// vector constant with the given size and type for the address space than to
573 /// store the individual scalar element constants.
574 virtual bool storeOfVectorConstantIsCheap(EVT MemVT,
575 unsigned NumElem,
576 unsigned AddrSpace) const {
577 return false;
578 }
579
580 /// Allow store merging for the specified type after legalization in addition
581 /// to before legalization. This may transform stores that do not exist
582 /// earlier (for example, stores created from intrinsics).
583 virtual bool mergeStoresAfterLegalization(EVT MemVT) const {
584 return true;
585 }
586
587 /// Returns if it's reasonable to merge stores to MemVT size.
588 virtual bool canMergeStoresTo(unsigned AS, EVT MemVT,
589 const SelectionDAG &DAG) const {
590 return true;
591 }
592
593 /// Return true if it is cheap to speculate a call to intrinsic cttz.
594 virtual bool isCheapToSpeculateCttz() const {
595 return false;
596 }
597
598 /// Return true if it is cheap to speculate a call to intrinsic ctlz.
599 virtual bool isCheapToSpeculateCtlz() const {
600 return false;
601 }
602
603 /// Return true if ctlz instruction is fast.
604 virtual bool isCtlzFast() const {
605 return false;
606 }
607
608 /// Return true if instruction generated for equality comparison is folded
609 /// with instruction generated for signed comparison.
610 virtual bool isEqualityCmpFoldedWithSignedCmp() const { return true; }
611
612 /// Return true if it is safe to transform an integer-domain bitwise operation
613 /// into the equivalent floating-point operation. This should be set to true
614 /// if the target has IEEE-754-compliant fabs/fneg operations for the input
615 /// type.
616 virtual bool hasBitPreservingFPLogic(EVT VT) const {
617 return false;
618 }
619
620 /// Return true if it is cheaper to split the store of a merged int val
621 /// from a pair of smaller values into multiple stores.
622 virtual bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const {
623 return false;
624 }
625
626 /// Return if the target supports combining a
627 /// chain like:
628 /// \code
629 /// %andResult = and %val1, #mask
630 /// %icmpResult = icmp %andResult, 0
631 /// \endcode
632 /// into a single machine instruction of a form like:
633 /// \code
634 /// cc = test %register, #mask
635 /// \endcode
636 virtual bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const {
637 return false;
638 }
639
640 /// Use bitwise logic to make pairs of compares more efficient. For example:
641 /// and (seteq A, B), (seteq C, D) --> seteq (or (xor A, B), (xor C, D)), 0
642 /// This should be true when it takes more than one instruction to lower
643 /// setcc (cmp+set on x86 scalar), when bitwise ops are faster than logic on
644 /// condition bits (crand on PowerPC), and/or when reducing cmp+br is a win.
645 virtual bool convertSetCCLogicToBitwiseLogic(EVT VT) const {
646 return false;
647 }
648
649 /// Return the preferred operand type if the target has a quick way to compare
650 /// integer values of the given size. Assume that any legal integer type can
651 /// be compared efficiently. Targets may override this to allow illegal wide
652 /// types to return a vector type if there is support to compare that type.
653 virtual MVT hasFastEqualityCompare(unsigned NumBits) const {
654 MVT VT = MVT::getIntegerVT(NumBits);
655 return isTypeLegal(VT) ? VT : MVT::INVALID_SIMPLE_VALUE_TYPE;
656 }
657
658 /// Return true if the target should transform:
659 /// (X & Y) == Y ---> (~X & Y) == 0
660 /// (X & Y) != Y ---> (~X & Y) != 0
661 ///
662 /// This may be profitable if the target has a bitwise and-not operation that
663 /// sets comparison flags. A target may want to limit the transformation based
664 /// on the type of Y or if Y is a constant.
665 ///
666 /// Note that the transform will not occur if Y is known to be a power-of-2
667 /// because a mask and compare of a single bit can be handled by inverting the
668 /// predicate, for example:
669 /// (X & 8) == 8 ---> (X & 8) != 0
670 virtual bool hasAndNotCompare(SDValue Y) const {
671 return false;
672 }
673
674 /// Return true if the target has a bitwise and-not operation:
675 /// X = ~A & B
676 /// This can be used to simplify select or other instructions.
677 virtual bool hasAndNot(SDValue X) const {
678 // If the target has the more complex version of this operation, assume that
679 // it has this operation too.
680 return hasAndNotCompare(X);
681 }
682
683 /// Return true if the target has a bit-test instruction:
684 /// (X & (1 << Y)) ==/!= 0
685 /// This knowledge can be used to prevent breaking the pattern,
686 /// or creating it if it could be recognized.
687 virtual bool hasBitTest(SDValue X, SDValue Y) const { return false; }
688
689 /// There are two ways to clear extreme bits (either low or high):
690 /// Mask: x & (-1 << y) (the instcombine canonical form)
691 /// Shifts: x >> y << y
692 /// Return true if the variant with 2 variable shifts is preferred.
693 /// Return false if there is no preference.
694 virtual bool shouldFoldMaskToVariableShiftPair(SDValue X) const {
695 // By default, let's assume that no one prefers shifts.
696 return false;
697 }
698
699 /// Return true if it is profitable to fold a pair of shifts into a mask.
700 /// This is usually true on most targets. But some targets, like Thumb1,
701 /// have immediate shift instructions, but no immediate "and" instruction;
702 /// this makes the fold unprofitable.
703 virtual bool shouldFoldConstantShiftPairToMask(const SDNode *N,
704 CombineLevel Level) const {
705 return true;
706 }
707
708 /// Should we tranform the IR-optimal check for whether given truncation
709 /// down into KeptBits would be truncating or not:
710 /// (add %x, (1 << (KeptBits-1))) srccond (1 << KeptBits)
711 /// Into it's more traditional form:
712 /// ((%x << C) a>> C) dstcond %x
713 /// Return true if we should transform.
714 /// Return false if there is no preference.
715 virtual bool shouldTransformSignedTruncationCheck(EVT XVT,
716 unsigned KeptBits) const {
717 // By default, let's assume that no one prefers shifts.
718 return false;
719 }
720
721 /// Given the pattern
722 /// (X & (C l>>/<< Y)) ==/!= 0
723 /// return true if it should be transformed into:
724 /// ((X <</l>> Y) & C) ==/!= 0
725 /// WARNING: if 'X' is a constant, the fold may deadlock!
726 /// FIXME: we could avoid passing XC, but we can't use isConstOrConstSplat()
727 /// here because it can end up being not linked in.
728 virtual bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
729 SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y,
730 unsigned OldShiftOpcode, unsigned NewShiftOpcode,
731 SelectionDAG &DAG) const {
732 if (hasBitTest(X, Y)) {
733 // One interesting pattern that we'd want to form is 'bit test':
734 // ((1 << Y) & C) ==/!= 0
735 // But we also need to be careful not to try to reverse that fold.
736
737 // Is this '1 << Y' ?
738 if (OldShiftOpcode == ISD::SHL && CC->isOne())
739 return false; // Keep the 'bit test' pattern.
740
741 // Will it be '1 << Y' after the transform ?
742 if (XC && NewShiftOpcode == ISD::SHL && XC->isOne())
743 return true; // Do form the 'bit test' pattern.
744 }
745
746 // If 'X' is a constant, and we transform, then we will immediately
747 // try to undo the fold, thus causing endless combine loop.
748 // So by default, let's assume everyone prefers the fold
749 // iff 'X' is not a constant.
750 return !XC;
751 }
752
753 /// These two forms are equivalent:
754 /// sub %y, (xor %x, -1)
755 /// add (add %x, 1), %y
756 /// The variant with two add's is IR-canonical.
757 /// Some targets may prefer one to the other.
758 virtual bool preferIncOfAddToSubOfNot(EVT VT) const {
759 // By default, let's assume that everyone prefers the form with two add's.
760 return true;
761 }
762
763 /// Return true if the target wants to use the optimization that
764 /// turns ext(promotableInst1(...(promotableInstN(load)))) into
765 /// promotedInst1(...(promotedInstN(ext(load)))).
766 bool enableExtLdPromotion() const { return EnableExtLdPromotion; }
767
768 /// Return true if the target can combine store(extractelement VectorTy,
769 /// Idx).
770 /// \p Cost[out] gives the cost of that transformation when this is true.
771 virtual bool canCombineStoreAndExtract(Type *VectorTy, Value *Idx,
772 unsigned &Cost) const {
773 return false;
774 }
775
776 /// Return true if inserting a scalar into a variable element of an undef
777 /// vector is more efficiently handled by splatting the scalar instead.
778 virtual bool shouldSplatInsEltVarIndex(EVT) const {
779 return false;
780 }
781
782 /// Return true if target always beneficiates from combining into FMA for a
783 /// given value type. This must typically return false on targets where FMA
784 /// takes more cycles to execute than FADD.
785 virtual bool enableAggressiveFMAFusion(EVT VT) const {
786 return false;
787 }
788
789 /// Return the ValueType of the result of SETCC operations.
790 virtual EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context,
791 EVT VT) const;
792
793 /// Return the ValueType for comparison libcalls. Comparions libcalls include
794 /// floating point comparion calls, and Ordered/Unordered check calls on
795 /// floating point numbers.
796 virtual
797 MVT::SimpleValueType getCmpLibcallReturnType() const;
798
799 /// For targets without i1 registers, this gives the nature of the high-bits
800 /// of boolean values held in types wider than i1.
801 ///
802 /// "Boolean values" are special true/false values produced by nodes like
803 /// SETCC and consumed (as the condition) by nodes like SELECT and BRCOND.
804 /// Not to be confused with general values promoted from i1. Some cpus
805 /// distinguish between vectors of boolean and scalars; the isVec parameter
806 /// selects between the two kinds. For example on X86 a scalar boolean should
807 /// be zero extended from i1, while the elements of a vector of booleans
808 /// should be sign extended from i1.
809 ///
810 /// Some cpus also treat floating point types the same way as they treat
811 /// vectors instead of the way they treat scalars.
812 BooleanContent getBooleanContents(bool isVec, bool isFloat) const {
813 if (isVec)
814 return BooleanVectorContents;
815 return isFloat ? BooleanFloatContents : BooleanContents;
816 }
817
818 BooleanContent getBooleanContents(EVT Type) const {
819 return getBooleanContents(Type.isVector(), Type.isFloatingPoint());
820 }
821
822 /// Return target scheduling preference.
823 Sched::Preference getSchedulingPreference() const {
824 return SchedPreferenceInfo;
825 }
826
827 /// Some scheduler, e.g. hybrid, can switch to different scheduling heuristics
828 /// for different nodes. This function returns the preference (or none) for
829 /// the given node.
830 virtual Sched::Preference getSchedulingPreference(SDNode *) const {
831 return Sched::None;
832 }
833
834 /// Return the register class that should be used for the specified value
835 /// type.
836 virtual const TargetRegisterClass *getRegClassFor(MVT VT, bool isDivergent = false) const {
837 (void)isDivergent;
838 const TargetRegisterClass *RC = RegClassForVT[VT.SimpleTy];
839 assert(RC && "This value type is not natively supported!")((RC && "This value type is not natively supported!")
? static_cast<void> (0) : __assert_fail ("RC && \"This value type is not natively supported!\""
, "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/TargetLowering.h"
, 839, __PRETTY_FUNCTION__))
;
840 return RC;
841 }
842
843 /// Allows target to decide about the register class of the
844 /// specific value that is live outside the defining block.
845 /// Returns true if the value needs uniform register class.
846 virtual bool requiresUniformRegister(MachineFunction &MF,
847 const Value *) const {
848 return false;
849 }
850
851 /// Return the 'representative' register class for the specified value
852 /// type.
853 ///
854 /// The 'representative' register class is the largest legal super-reg
855 /// register class for the register class of the value type. For example, on
856 /// i386 the rep register class for i8, i16, and i32 are GR32; while the rep
857 /// register class is GR64 on x86_64.
858 virtual const TargetRegisterClass *getRepRegClassFor(MVT VT) const {
859 const TargetRegisterClass *RC = RepRegClassForVT[VT.SimpleTy];
860 return RC;
861 }
862
863 /// Return the cost of the 'representative' register class for the specified
864 /// value type.
865 virtual uint8_t getRepRegClassCostFor(MVT VT) const {
866 return RepRegClassCostForVT[VT.SimpleTy];
867 }
868
869 /// Return true if SHIFT instructions should be expanded to SHIFT_PARTS
870 /// instructions, and false if a library call is preferred (e.g for code-size
871 /// reasons).
872 virtual bool shouldExpandShift(SelectionDAG &DAG, SDNode *N) const {
873 return true;
874 }
875
876 /// Return true if the target has native support for the specified value type.
877 /// This means that it has a register that directly holds it without
878 /// promotions or expansions.
879 bool isTypeLegal(EVT VT) const {
880 assert(!VT.isSimple() ||((!VT.isSimple() || (unsigned)VT.getSimpleVT().SimpleTy < array_lengthof
(RegClassForVT)) ? static_cast<void> (0) : __assert_fail
("!VT.isSimple() || (unsigned)VT.getSimpleVT().SimpleTy < array_lengthof(RegClassForVT)"
, "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/TargetLowering.h"
, 881, __PRETTY_FUNCTION__))
881 (unsigned)VT.getSimpleVT().SimpleTy < array_lengthof(RegClassForVT))((!VT.isSimple() || (unsigned)VT.getSimpleVT().SimpleTy < array_lengthof
(RegClassForVT)) ? static_cast<void> (0) : __assert_fail
("!VT.isSimple() || (unsigned)VT.getSimpleVT().SimpleTy < array_lengthof(RegClassForVT)"
, "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/TargetLowering.h"
, 881, __PRETTY_FUNCTION__))
;
882 return VT.isSimple() && RegClassForVT[VT.getSimpleVT().SimpleTy] != nullptr;
883 }
884
885 class ValueTypeActionImpl {
886 /// ValueTypeActions - For each value type, keep a LegalizeTypeAction enum
887 /// that indicates how instruction selection should deal with the type.
888 LegalizeTypeAction ValueTypeActions[MVT::LAST_VALUETYPE];
889
890 public:
891 ValueTypeActionImpl() {
892 std::fill(std::begin(ValueTypeActions), std::end(ValueTypeActions),
893 TypeLegal);
894 }
895
896 LegalizeTypeAction getTypeAction(MVT VT) const {
897 return ValueTypeActions[VT.SimpleTy];
898 }
899
900 void setTypeAction(MVT VT, LegalizeTypeAction Action) {
901 ValueTypeActions[VT.SimpleTy] = Action;
902 }
903 };
904
905 const ValueTypeActionImpl &getValueTypeActions() const {
906 return ValueTypeActions;
907 }
908
909 /// Return how we should legalize values of this type, either it is already
910 /// legal (return 'Legal') or we need to promote it to a larger type (return
911 /// 'Promote'), or we need to expand it into multiple registers of smaller
912 /// integer type (return 'Expand'). 'Custom' is not an option.
913 LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const {
914 return getTypeConversion(Context, VT).first;
915 }
916 LegalizeTypeAction getTypeAction(MVT VT) const {
917 return ValueTypeActions.getTypeAction(VT);
918 }
919
920 /// For types supported by the target, this is an identity function. For
921 /// types that must be promoted to larger types, this returns the larger type
922 /// to promote to. For integer types that are larger than the largest integer
923 /// register, this contains one step in the expansion to get to the smaller
924 /// register. For illegal floating point types, this returns the integer type
925 /// to transform to.
926 EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const {
927 return getTypeConversion(Context, VT).second;
928 }
929
930 /// For types supported by the target, this is an identity function. For
931 /// types that must be expanded (i.e. integer types that are larger than the
932 /// largest integer register or illegal floating point types), this returns
933 /// the largest legal type it will be expanded to.
934 EVT getTypeToExpandTo(LLVMContext &Context, EVT VT) const {
935 assert(!VT.isVector())((!VT.isVector()) ? static_cast<void> (0) : __assert_fail
("!VT.isVector()", "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/TargetLowering.h"
, 935, __PRETTY_FUNCTION__))
;
936 while (true) {
937 switch (getTypeAction(Context, VT)) {
938 case TypeLegal:
939 return VT;
940 case TypeExpandInteger:
941 VT = getTypeToTransformTo(Context, VT);
942 break;
943 default:
944 llvm_unreachable("Type is not legal nor is it to be expanded!")::llvm::llvm_unreachable_internal("Type is not legal nor is it to be expanded!"
, "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/TargetLowering.h"
, 944)
;
945 }
946 }
947 }
948
949 /// Vector types are broken down into some number of legal first class types.
950 /// For example, EVT::v8f32 maps to 2 EVT::v4f32 with Altivec or SSE1, or 8
951 /// promoted EVT::f64 values with the X86 FP stack. Similarly, EVT::v2i64
952 /// turns into 4 EVT::i32 values with both PPC and X86.
953 ///
954 /// This method returns the number of registers needed, and the VT for each
955 /// register. It also returns the VT and quantity of the intermediate values
956 /// before they are promoted/expanded.
957 unsigned getVectorTypeBreakdown(LLVMContext &Context, EVT VT,
958 EVT &IntermediateVT,
959 unsigned &NumIntermediates,
960 MVT &RegisterVT) const;
961
962 /// Certain targets such as MIPS require that some types such as vectors are
963 /// always broken down into scalars in some contexts. This occurs even if the
964 /// vector type is legal.
965 virtual unsigned getVectorTypeBreakdownForCallingConv(
966 LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,
967 unsigned &NumIntermediates, MVT &RegisterVT) const {
968 return getVectorTypeBreakdown(Context, VT, IntermediateVT, NumIntermediates,
969 RegisterVT);
970 }
971
972 struct IntrinsicInfo {
973 unsigned opc = 0; // target opcode
974 EVT memVT; // memory VT
975
976 // value representing memory location
977 PointerUnion<const Value *, const PseudoSourceValue *> ptrVal;
978
979 int offset = 0; // offset off of ptrVal
980 uint64_t size = 0; // the size of the memory location
981 // (taken from memVT if zero)
982 MaybeAlign align = Align(1); // alignment
983
984 MachineMemOperand::Flags flags = MachineMemOperand::MONone;
985 IntrinsicInfo() = default;
986 };
987
988 /// Given an intrinsic, checks if on the target the intrinsic will need to map
989 /// to a MemIntrinsicNode (touches memory). If this is the case, it returns
990 /// true and store the intrinsic information into the IntrinsicInfo that was
991 /// passed to the function.
992 virtual bool getTgtMemIntrinsic(IntrinsicInfo &, const CallInst &,
993 MachineFunction &,
994 unsigned /*Intrinsic*/) const {
995 return false;
996 }
997
998 /// Returns true if the target can instruction select the specified FP
999 /// immediate natively. If false, the legalizer will materialize the FP
1000 /// immediate as a load from a constant pool.
1001 virtual bool isFPImmLegal(const APFloat & /*Imm*/, EVT /*VT*/,
1002 bool ForCodeSize = false) const {
1003 return false;
1004 }
1005
1006 /// Targets can use this to indicate that they only support *some*
1007 /// VECTOR_SHUFFLE operations, those with specific masks. By default, if a
1008 /// target supports the VECTOR_SHUFFLE node, all mask values are assumed to be
1009 /// legal.
1010 virtual bool isShuffleMaskLegal(ArrayRef<int> /*Mask*/, EVT /*VT*/) const {
1011 return true;
1012 }
1013
1014 /// Returns true if the operation can trap for the value type.
1015 ///
1016 /// VT must be a legal type. By default, we optimistically assume most
1017 /// operations don't trap except for integer divide and remainder.
1018 virtual bool canOpTrap(unsigned Op, EVT VT) const;
1019
1020 /// Similar to isShuffleMaskLegal. Targets can use this to indicate if there
1021 /// is a suitable VECTOR_SHUFFLE that can be used to replace a VAND with a
1022 /// constant pool entry.
1023 virtual bool isVectorClearMaskLegal(ArrayRef<int> /*Mask*/,
1024 EVT /*VT*/) const {
1025 return false;
1026 }
1027
1028 /// Return how this operation should be treated: either it is legal, needs to
1029 /// be promoted to a larger size, needs to be expanded to some other code
1030 /// sequence, or the target has a custom expander for it.
1031 LegalizeAction getOperationAction(unsigned Op, EVT VT) const {
1032 if (VT.isExtended()) return Expand;
1033 // If a target-specific SDNode requires legalization, require the target
1034 // to provide custom legalization for it.
1035 if (Op >= array_lengthof(OpActions[0])) return Custom;
1036 return OpActions[(unsigned)VT.getSimpleVT().SimpleTy][Op];
1037 }
1038
1039 /// Custom method defined by each target to indicate if an operation which
1040 /// may require a scale is supported natively by the target.
1041 /// If not, the operation is illegal.
1042 virtual bool isSupportedFixedPointOperation(unsigned Op, EVT VT,
1043 unsigned Scale) const {
1044 return false;
1045 }
1046
1047 /// Some fixed point operations may be natively supported by the target but
1048 /// only for specific scales. This method allows for checking
1049 /// if the width is supported by the target for a given operation that may
1050 /// depend on scale.
1051 LegalizeAction getFixedPointOperationAction(unsigned Op, EVT VT,
1052 unsigned Scale) const {
1053 auto Action = getOperationAction(Op, VT);
1054 if (Action != Legal)
1055 return Action;
1056
1057 // This operation is supported in this type but may only work on specific
1058 // scales.
1059 bool Supported;
1060 switch (Op) {
1061 default:
1062 llvm_unreachable("Unexpected fixed point operation.")::llvm::llvm_unreachable_internal("Unexpected fixed point operation."
, "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/TargetLowering.h"
, 1062)
;
1063 case ISD::SMULFIX:
1064 case ISD::SMULFIXSAT:
1065 case ISD::UMULFIX:
1066 case ISD::UMULFIXSAT:
1067 case ISD::SDIVFIX:
1068 case ISD::SDIVFIXSAT:
1069 case ISD::UDIVFIX:
1070 case ISD::UDIVFIXSAT:
1071 Supported = isSupportedFixedPointOperation(Op, VT, Scale);
1072 break;
1073 }
1074
1075 return Supported ? Action : Expand;
1076 }
1077
1078 // If Op is a strict floating-point operation, return the result
1079 // of getOperationAction for the equivalent non-strict operation.
1080 LegalizeAction getStrictFPOperationAction(unsigned Op, EVT VT) const {
1081 unsigned EqOpc;
1082 switch (Op) {
1083 default: llvm_unreachable("Unexpected FP pseudo-opcode")::llvm::llvm_unreachable_internal("Unexpected FP pseudo-opcode"
, "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/TargetLowering.h"
, 1083)
;
1084#define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \
1085 case ISD::STRICT_##DAGN: EqOpc = ISD::DAGN; break;
1086#define CMP_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \
1087 case ISD::STRICT_##DAGN: EqOpc = ISD::SETCC; break;
1088#include "llvm/IR/ConstrainedOps.def"
1089 }
1090
1091 return getOperationAction(EqOpc, VT);
1092 }
1093
1094 /// Return true if the specified operation is legal on this target or can be
1095 /// made legal with custom lowering. This is used to help guide high-level
1096 /// lowering decisions. LegalOnly is an optional convenience for code paths
1097 /// traversed pre and post legalisation.
1098 bool isOperationLegalOrCustom(unsigned Op, EVT VT,
1099 bool LegalOnly = false) const {
1100 if (LegalOnly)
1101 return isOperationLegal(Op, VT);
1102
1103 return (VT == MVT::Other || isTypeLegal(VT)) &&
1104 (getOperationAction(Op, VT) == Legal ||
1105 getOperationAction(Op, VT) == Custom);
1106 }
1107
1108 /// Return true if the specified operation is legal on this target or can be
1109 /// made legal using promotion. This is used to help guide high-level lowering
1110 /// decisions. LegalOnly is an optional convenience for code paths traversed
1111 /// pre and post legalisation.
1112 bool isOperationLegalOrPromote(unsigned Op, EVT VT,
1113 bool LegalOnly = false) const {
1114 if (LegalOnly)
1115 return isOperationLegal(Op, VT);
1116
1117 return (VT == MVT::Other || isTypeLegal(VT)) &&
1118 (getOperationAction(Op, VT) == Legal ||
1119 getOperationAction(Op, VT) == Promote);
1120 }
1121
1122 /// Return true if the specified operation is legal on this target or can be
1123 /// made legal with custom lowering or using promotion. This is used to help
1124 /// guide high-level lowering decisions. LegalOnly is an optional convenience
1125 /// for code paths traversed pre and post legalisation.
1126 bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT,
1127 bool LegalOnly = false) const {
1128 if (LegalOnly)
1129 return isOperationLegal(Op, VT);
1130
1131 return (VT == MVT::Other || isTypeLegal(VT)) &&
1132 (getOperationAction(Op, VT) == Legal ||
1133 getOperationAction(Op, VT) == Custom ||
1134 getOperationAction(Op, VT) == Promote);
1135 }
1136
1137 /// Return true if the operation uses custom lowering, regardless of whether
1138 /// the type is legal or not.
1139 bool isOperationCustom(unsigned Op, EVT VT) const {
1140 return getOperationAction(Op, VT) == Custom;
1141 }
1142
1143 /// Return true if lowering to a jump table is allowed.
1144 virtual bool areJTsAllowed(const Function *Fn) const {
1145 if (Fn->getFnAttribute("no-jump-tables").getValueAsString() == "true")
1146 return false;
1147
1148 return isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) ||
1149 isOperationLegalOrCustom(ISD::BRIND, MVT::Other);
1150 }
1151
1152 /// Check whether the range [Low,High] fits in a machine word.
1153 bool rangeFitsInWord(const APInt &Low, const APInt &High,
1154 const DataLayout &DL) const {
1155 // FIXME: Using the pointer type doesn't seem ideal.
1156 uint64_t BW = DL.getIndexSizeInBits(0u);
1157 uint64_t Range = (High - Low).getLimitedValue(UINT64_MAX(18446744073709551615UL) - 1) + 1;
1158 return Range <= BW;
1159 }
1160
1161 /// Return true if lowering to a jump table is suitable for a set of case
1162 /// clusters which may contain \p NumCases cases, \p Range range of values.
1163 virtual bool isSuitableForJumpTable(const SwitchInst *SI, uint64_t NumCases,
1164 uint64_t Range, ProfileSummaryInfo *PSI,
1165 BlockFrequencyInfo *BFI) const;
1166
1167 /// Return true if lowering to a bit test is suitable for a set of case
1168 /// clusters which contains \p NumDests unique destinations, \p Low and
1169 /// \p High as its lowest and highest case values, and expects \p NumCmps
1170 /// case value comparisons. Check if the number of destinations, comparison
1171 /// metric, and range are all suitable.
1172 bool isSuitableForBitTests(unsigned NumDests, unsigned NumCmps,
1173 const APInt &Low, const APInt &High,
1174 const DataLayout &DL) const {
1175 // FIXME: I don't think NumCmps is the correct metric: a single case and a
1176 // range of cases both require only one branch to lower. Just looking at the
1177 // number of clusters and destinations should be enough to decide whether to
1178 // build bit tests.
1179
1180 // To lower a range with bit tests, the range must fit the bitwidth of a
1181 // machine word.
1182 if (!rangeFitsInWord(Low, High, DL))
1183 return false;
1184
1185 // Decide whether it's profitable to lower this range with bit tests. Each
1186 // destination requires a bit test and branch, and there is an overall range
1187 // check branch. For a small number of clusters, separate comparisons might
1188 // be cheaper, and for many destinations, splitting the range might be
1189 // better.
1190 return (NumDests == 1 && NumCmps >= 3) || (NumDests == 2 && NumCmps >= 5) ||
1191 (NumDests == 3 && NumCmps >= 6);
1192 }
1193
1194 /// Return true if the specified operation is illegal on this target or
1195 /// unlikely to be made legal with custom lowering. This is used to help guide
1196 /// high-level lowering decisions.
1197 bool isOperationExpand(unsigned Op, EVT VT) const {
1198 return (!isTypeLegal(VT) || getOperationAction(Op, VT) == Expand);
1199 }
1200
1201 /// Return true if the specified operation is legal on this target.
1202 bool isOperationLegal(unsigned Op, EVT VT) const {
1203 return (VT == MVT::Other || isTypeLegal(VT)) &&
1204 getOperationAction(Op, VT) == Legal;
1205 }
1206
1207 /// Return how this load with extension should be treated: either it is legal,
1208 /// needs to be promoted to a larger size, needs to be expanded to some other
1209 /// code sequence, or the target has a custom expander for it.
1210 LegalizeAction getLoadExtAction(unsigned ExtType, EVT ValVT,
1211 EVT MemVT) const {
1212 if (ValVT.isExtended() || MemVT.isExtended()) return Expand;
1213 unsigned ValI = (unsigned) ValVT.getSimpleVT().SimpleTy;
1214 unsigned MemI = (unsigned) MemVT.getSimpleVT().SimpleTy;
1215 assert(ExtType < ISD::LAST_LOADEXT_TYPE && ValI < MVT::LAST_VALUETYPE &&((ExtType < ISD::LAST_LOADEXT_TYPE && ValI < MVT
::LAST_VALUETYPE && MemI < MVT::LAST_VALUETYPE &&
"Table isn't big enough!") ? static_cast<void> (0) : __assert_fail
("ExtType < ISD::LAST_LOADEXT_TYPE && ValI < MVT::LAST_VALUETYPE && MemI < MVT::LAST_VALUETYPE && \"Table isn't big enough!\""
, "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/TargetLowering.h"
, 1216, __PRETTY_FUNCTION__))
1216 MemI < MVT::LAST_VALUETYPE && "Table isn't big enough!")((ExtType < ISD::LAST_LOADEXT_TYPE && ValI < MVT
::LAST_VALUETYPE && MemI < MVT::LAST_VALUETYPE &&
"Table isn't big enough!") ? static_cast<void> (0) : __assert_fail
("ExtType < ISD::LAST_LOADEXT_TYPE && ValI < MVT::LAST_VALUETYPE && MemI < MVT::LAST_VALUETYPE && \"Table isn't big enough!\""
, "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/TargetLowering.h"
, 1216, __PRETTY_FUNCTION__))
;
1217 unsigned Shift = 4 * ExtType;
1218 return (LegalizeAction)((LoadExtActions[ValI][MemI] >> Shift) & 0xf);
1219 }
1220
1221 /// Return true if the specified load with extension is legal on this target.
1222 bool isLoadExtLegal(unsigned ExtType, EVT ValVT, EVT MemVT) const {
1223 return getLoadExtAction(ExtType, ValVT, MemVT) == Legal;
1224 }
1225
1226 /// Return true if the specified load with extension is legal or custom
1227 /// on this target.
1228 bool isLoadExtLegalOrCustom(unsigned ExtType, EVT ValVT, EVT MemVT) const {
1229 return getLoadExtAction(ExtType, ValVT, MemVT) == Legal ||
1230 getLoadExtAction(ExtType, ValVT, MemVT) == Custom;
1231 }
1232
1233 /// Return how this store with truncation should be treated: either it is
1234 /// legal, needs to be promoted to a larger size, needs to be expanded to some
1235 /// other code sequence, or the target has a custom expander for it.
1236 LegalizeAction getTruncStoreAction(EVT ValVT, EVT MemVT) const {
1237 if (ValVT.isExtended() || MemVT.isExtended()) return Expand;
1238 unsigned ValI = (unsigned) ValVT.getSimpleVT().SimpleTy;
1239 unsigned MemI = (unsigned) MemVT.getSimpleVT().SimpleTy;
1240 assert(ValI < MVT::LAST_VALUETYPE && MemI < MVT::LAST_VALUETYPE &&((ValI < MVT::LAST_VALUETYPE && MemI < MVT::LAST_VALUETYPE
&& "Table isn't big enough!") ? static_cast<void>
(0) : __assert_fail ("ValI < MVT::LAST_VALUETYPE && MemI < MVT::LAST_VALUETYPE && \"Table isn't big enough!\""
, "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/TargetLowering.h"
, 1241, __PRETTY_FUNCTION__))
1241 "Table isn't big enough!")((ValI < MVT::LAST_VALUETYPE && MemI < MVT::LAST_VALUETYPE
&& "Table isn't big enough!") ? static_cast<void>
(0) : __assert_fail ("ValI < MVT::LAST_VALUETYPE && MemI < MVT::LAST_VALUETYPE && \"Table isn't big enough!\""
, "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/TargetLowering.h"
, 1241, __PRETTY_FUNCTION__))
;
1242 return TruncStoreActions[ValI][MemI];
1243 }
1244
1245 /// Return true if the specified store with truncation is legal on this
1246 /// target.
1247 bool isTruncStoreLegal(EVT ValVT, EVT MemVT) const {
1248 return isTypeLegal(ValVT) && getTruncStoreAction(ValVT, MemVT) == Legal;
1249 }
1250
1251 /// Return true if the specified store with truncation has solution on this
1252 /// target.
1253 bool isTruncStoreLegalOrCustom(EVT ValVT, EVT MemVT) const {
1254 return isTypeLegal(ValVT) &&
1255 (getTruncStoreAction(ValVT, MemVT) == Legal ||
1256 getTruncStoreAction(ValVT, MemVT) == Custom);
1257 }
1258
1259 /// Return how the indexed load should be treated: either it is legal, needs
1260 /// to be promoted to a larger size, needs to be expanded to some other code
1261 /// sequence, or the target has a custom expander for it.
1262 LegalizeAction getIndexedLoadAction(unsigned IdxMode, MVT VT) const {
1263 return getIndexedModeAction(IdxMode, VT, IMAB_Load);
1264 }
1265
1266 /// Return true if the specified indexed load is legal on this target.
1267 bool isIndexedLoadLegal(unsigned IdxMode, EVT VT) const {
1268 return VT.isSimple() &&
1269 (getIndexedLoadAction(IdxMode, VT.getSimpleVT()) == Legal ||
1270 getIndexedLoadAction(IdxMode, VT.getSimpleVT()) == Custom);
1271 }
1272
1273 /// Return how the indexed store should be treated: either it is legal, needs
1274 /// to be promoted to a larger size, needs to be expanded to some other code
1275 /// sequence, or the target has a custom expander for it.
1276 LegalizeAction getIndexedStoreAction(unsigned IdxMode, MVT VT) const {
1277 return getIndexedModeAction(IdxMode, VT, IMAB_Store);
1278 }
1279
1280 /// Return true if the specified indexed load is legal on this target.
1281 bool isIndexedStoreLegal(unsigned IdxMode, EVT VT) const {
1282 return VT.isSimple() &&
1283 (getIndexedStoreAction(IdxMode, VT.getSimpleVT()) == Legal ||
1284 getIndexedStoreAction(IdxMode, VT.getSimpleVT()) == Custom);
1285 }
1286
1287 /// Return how the indexed load should be treated: either it is legal, needs
1288 /// to be promoted to a larger size, needs to be expanded to some other code
1289 /// sequence, or the target has a custom expander for it.
1290 LegalizeAction getIndexedMaskedLoadAction(unsigned IdxMode, MVT VT) const {
1291 return getIndexedModeAction(IdxMode, VT, IMAB_MaskedLoad);
1292 }
1293
1294 /// Return true if the specified indexed load is legal on this target.
1295 bool isIndexedMaskedLoadLegal(unsigned IdxMode, EVT VT) const {
1296 return VT.isSimple() &&
1297 (getIndexedMaskedLoadAction(IdxMode, VT.getSimpleVT()) == Legal ||
1298 getIndexedMaskedLoadAction(IdxMode, VT.getSimpleVT()) == Custom);
1299 }
1300
1301 /// Return how the indexed store should be treated: either it is legal, needs
1302 /// to be promoted to a larger size, needs to be expanded to some other code
1303 /// sequence, or the target has a custom expander for it.
1304 LegalizeAction getIndexedMaskedStoreAction(unsigned IdxMode, MVT VT) const {
1305 return getIndexedModeAction(IdxMode, VT, IMAB_MaskedStore);
1306 }
1307
1308 /// Return true if the specified indexed load is legal on this target.
1309 bool isIndexedMaskedStoreLegal(unsigned IdxMode, EVT VT) const {
1310 return VT.isSimple() &&
1311 (getIndexedMaskedStoreAction(IdxMode, VT.getSimpleVT()) == Legal ||
1312 getIndexedMaskedStoreAction(IdxMode, VT.getSimpleVT()) == Custom);
1313 }
1314
1315 /// Return how the condition code should be treated: either it is legal, needs
1316 /// to be expanded to some other code sequence, or the target has a custom
1317 /// expander for it.
1318 LegalizeAction
1319 getCondCodeAction(ISD::CondCode CC, MVT VT) const {
1320 assert((unsigned)CC < array_lengthof(CondCodeActions) &&(((unsigned)CC < array_lengthof(CondCodeActions) &&
((unsigned)VT.SimpleTy >> 3) < array_lengthof(CondCodeActions
[0]) && "Table isn't big enough!") ? static_cast<void
> (0) : __assert_fail ("(unsigned)CC < array_lengthof(CondCodeActions) && ((unsigned)VT.SimpleTy >> 3) < array_lengthof(CondCodeActions[0]) && \"Table isn't big enough!\""
, "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/TargetLowering.h"
, 1322, __PRETTY_FUNCTION__))
1321 ((unsigned)VT.SimpleTy >> 3) < array_lengthof(CondCodeActions[0]) &&(((unsigned)CC < array_lengthof(CondCodeActions) &&
((unsigned)VT.SimpleTy >> 3) < array_lengthof(CondCodeActions
[0]) && "Table isn't big enough!") ? static_cast<void
> (0) : __assert_fail ("(unsigned)CC < array_lengthof(CondCodeActions) && ((unsigned)VT.SimpleTy >> 3) < array_lengthof(CondCodeActions[0]) && \"Table isn't big enough!\""
, "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/TargetLowering.h"
, 1322, __PRETTY_FUNCTION__))
1322 "Table isn't big enough!")(((unsigned)CC < array_lengthof(CondCodeActions) &&
((unsigned)VT.SimpleTy >> 3) < array_lengthof(CondCodeActions
[0]) && "Table isn't big enough!") ? static_cast<void
> (0) : __assert_fail ("(unsigned)CC < array_lengthof(CondCodeActions) && ((unsigned)VT.SimpleTy >> 3) < array_lengthof(CondCodeActions[0]) && \"Table isn't big enough!\""
, "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/TargetLowering.h"
, 1322, __PRETTY_FUNCTION__))
;
1323 // See setCondCodeAction for how this is encoded.
1324 uint32_t Shift = 4 * (VT.SimpleTy & 0x7);
1325 uint32_t Value = CondCodeActions[CC][VT.SimpleTy >> 3];
1326 LegalizeAction Action = (LegalizeAction) ((Value >> Shift) & 0xF);
1327 assert(Action != Promote && "Can't promote condition code!")((Action != Promote && "Can't promote condition code!"
) ? static_cast<void> (0) : __assert_fail ("Action != Promote && \"Can't promote condition code!\""
, "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/TargetLowering.h"
, 1327, __PRETTY_FUNCTION__))
;
1328 return Action;
1329 }
1330
1331 /// Return true if the specified condition code is legal on this target.
1332 bool isCondCodeLegal(ISD::CondCode CC, MVT VT) const {
1333 return getCondCodeAction(CC, VT) == Legal;
1334 }
1335
1336 /// Return true if the specified condition code is legal or custom on this
1337 /// target.
1338 bool isCondCodeLegalOrCustom(ISD::CondCode CC, MVT VT) const {
1339 return getCondCodeAction(CC, VT) == Legal ||
1340 getCondCodeAction(CC, VT) == Custom;
1341 }
1342
1343 /// If the action for this operation is to promote, this method returns the
1344 /// ValueType to promote to.
1345 MVT getTypeToPromoteTo(unsigned Op, MVT VT) const {
1346 assert(getOperationAction(Op, VT) == Promote &&((getOperationAction(Op, VT) == Promote && "This operation isn't promoted!"
) ? static_cast<void> (0) : __assert_fail ("getOperationAction(Op, VT) == Promote && \"This operation isn't promoted!\""
, "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/TargetLowering.h"
, 1347, __PRETTY_FUNCTION__))
1347 "This operation isn't promoted!")((getOperationAction(Op, VT) == Promote && "This operation isn't promoted!"
) ? static_cast<void> (0) : __assert_fail ("getOperationAction(Op, VT) == Promote && \"This operation isn't promoted!\""
, "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/TargetLowering.h"
, 1347, __PRETTY_FUNCTION__))
;
1348
1349 // See if this has an explicit type specified.
1350 std::map<std::pair<unsigned, MVT::SimpleValueType>,
1351 MVT::SimpleValueType>::const_iterator PTTI =
1352 PromoteToType.find(std::make_pair(Op, VT.SimpleTy));
1353 if (PTTI != PromoteToType.end()) return PTTI->second;
1354
1355 assert((VT.isInteger() || VT.isFloatingPoint()) &&(((VT.isInteger() || VT.isFloatingPoint()) && "Cannot autopromote this type, add it with AddPromotedToType."
) ? static_cast<void> (0) : __assert_fail ("(VT.isInteger() || VT.isFloatingPoint()) && \"Cannot autopromote this type, add it with AddPromotedToType.\""
, "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/TargetLowering.h"
, 1356, __PRETTY_FUNCTION__))
1356 "Cannot autopromote this type, add it with AddPromotedToType.")(((VT.isInteger() || VT.isFloatingPoint()) && "Cannot autopromote this type, add it with AddPromotedToType."
) ? static_cast<void> (0) : __assert_fail ("(VT.isInteger() || VT.isFloatingPoint()) && \"Cannot autopromote this type, add it with AddPromotedToType.\""
, "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/TargetLowering.h"
, 1356, __PRETTY_FUNCTION__))
;
1357
1358 MVT NVT = VT;
1359 do {
1360 NVT = (MVT::SimpleValueType)(NVT.SimpleTy+1);
1361 assert(NVT.isInteger() == VT.isInteger() && NVT != MVT::isVoid &&((NVT.isInteger() == VT.isInteger() && NVT != MVT::isVoid
&& "Didn't find type to promote to!") ? static_cast<
void> (0) : __assert_fail ("NVT.isInteger() == VT.isInteger() && NVT != MVT::isVoid && \"Didn't find type to promote to!\""
, "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/TargetLowering.h"
, 1362, __PRETTY_FUNCTION__))
1362 "Didn't find type to promote to!")((NVT.isInteger() == VT.isInteger() && NVT != MVT::isVoid
&& "Didn't find type to promote to!") ? static_cast<
void> (0) : __assert_fail ("NVT.isInteger() == VT.isInteger() && NVT != MVT::isVoid && \"Didn't find type to promote to!\""
, "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/TargetLowering.h"
, 1362, __PRETTY_FUNCTION__))
;
1363 } while (!isTypeLegal(NVT) ||
1364 getOperationAction(Op, NVT) == Promote);
1365 return NVT;
1366 }
1367
1368 /// Return the EVT corresponding to this LLVM type. This is fixed by the LLVM
1369 /// operations except for the pointer size. If AllowUnknown is true, this
1370 /// will return MVT::Other for types with no EVT counterpart (e.g. structs),
1371 /// otherwise it will assert.
1372 EVT getValueType(const DataLayout &DL, Type *Ty,
1373 bool AllowUnknown = false) const {
1374 // Lower scalar pointers to native pointer types.
1375 if (auto *PTy = dyn_cast<PointerType>(Ty))
32
Assuming 'PTy' is null
33
Taking false branch
1376 return getPointerTy(DL, PTy->getAddressSpace());
1377
1378 if (auto *VTy = dyn_cast<VectorType>(Ty)) {
34
Assuming 'VTy' is non-null
35
Taking true branch
1379 Type *EltTy = VTy->getElementType();
1380 // Lower vectors of pointers to native pointer types.
1381 if (auto *PTy
36.1
'PTy' is null
36.1
'PTy' is null
36.1
'PTy' is null
36.1
'PTy' is null
= dyn_cast<PointerType>(EltTy)) {
36
Assuming 'EltTy' is not a 'PointerType'
37
Taking false branch
1382 EVT PointerTy(getPointerTy(DL, PTy->getAddressSpace()));
1383 EltTy = PointerTy.getTypeForEVT(Ty->getContext());
1384 }
1385 return EVT::getVectorVT(Ty->getContext(), EVT::getEVT(EltTy, false),
38
Called C++ object pointer is null
1386 VTy->getElementCount());
1387 }
1388
1389 return EVT::getEVT(Ty, AllowUnknown);
1390 }
1391
1392 EVT getMemValueType(const DataLayout &DL, Type *Ty,
1393 bool AllowUnknown = false) const {
1394 // Lower scalar pointers to native pointer types.
1395 if (PointerType *PTy = dyn_cast<PointerType>(Ty))
1396 return getPointerMemTy(DL, PTy->getAddressSpace());
1397 else if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
1398 Type *Elm = VTy->getElementType();
1399 if (PointerType *PT = dyn_cast<PointerType>(Elm)) {
1400 EVT PointerTy(getPointerMemTy(DL, PT->getAddressSpace()));
1401 Elm = PointerTy.getTypeForEVT(Ty->getContext());
1402 }
1403 return EVT::getVectorVT(Ty->getContext(), EVT::getEVT(Elm, false),
1404 VTy->getElementCount());
1405 }
1406
1407 return getValueType(DL, Ty, AllowUnknown);
1408 }
1409
1410
1411 /// Return the MVT corresponding to this LLVM type. See getValueType.
1412 MVT getSimpleValueType(const DataLayout &DL, Type *Ty,
1413 bool AllowUnknown = false) const {
1414 return getValueType(DL, Ty, AllowUnknown).getSimpleVT();
1415 }
1416
1417 /// Return the desired alignment for ByVal or InAlloca aggregate function
1418 /// arguments in the caller parameter area. This is the actual alignment, not
1419 /// its logarithm.
1420 virtual unsigned getByValTypeAlignment(Type *Ty, const DataLayout &DL) const;
1421
1422 /// Return the type of registers that this ValueType will eventually require.
1423 MVT getRegisterType(MVT VT) const {
1424 assert((unsigned)VT.SimpleTy < array_lengthof(RegisterTypeForVT))(((unsigned)VT.SimpleTy < array_lengthof(RegisterTypeForVT
)) ? static_cast<void> (0) : __assert_fail ("(unsigned)VT.SimpleTy < array_lengthof(RegisterTypeForVT)"
, "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/TargetLowering.h"
, 1424, __PRETTY_FUNCTION__))
;
1425 return RegisterTypeForVT[VT.SimpleTy];
1426 }
1427
1428 /// Return the type of registers that this ValueType will eventually require.
1429 MVT getRegisterType(LLVMContext &Context, EVT VT) const {
1430 if (VT.isSimple()) {
1431 assert((unsigned)VT.getSimpleVT().SimpleTy <(((unsigned)VT.getSimpleVT().SimpleTy < array_lengthof(RegisterTypeForVT
)) ? static_cast<void> (0) : __assert_fail ("(unsigned)VT.getSimpleVT().SimpleTy < array_lengthof(RegisterTypeForVT)"
, "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/TargetLowering.h"
, 1432, __PRETTY_FUNCTION__))
1432 array_lengthof(RegisterTypeForVT))(((unsigned)VT.getSimpleVT().SimpleTy < array_lengthof(RegisterTypeForVT
)) ? static_cast<void> (0) : __assert_fail ("(unsigned)VT.getSimpleVT().SimpleTy < array_lengthof(RegisterTypeForVT)"
, "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/TargetLowering.h"
, 1432, __PRETTY_FUNCTION__))
;
1433 return RegisterTypeForVT[VT.getSimpleVT().SimpleTy];
1434 }
1435 if (VT.isVector()) {
1436 EVT VT1;
1437 MVT RegisterVT;
1438 unsigned NumIntermediates;
1439 (void)getVectorTypeBreakdown(Context, VT, VT1,
1440 NumIntermediates, RegisterVT);
1441 return RegisterVT;
1442 }
1443 if (VT.isInteger()) {
1444 return getRegisterType(Context, getTypeToTransformTo(Context, VT));
1445 }
1446 llvm_unreachable("Unsupported extended type!")::llvm::llvm_unreachable_internal("Unsupported extended type!"
, "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/TargetLowering.h"
, 1446)
;
1447 }
1448
1449 /// Return the number of registers that this ValueType will eventually
1450 /// require.
1451 ///
1452 /// This is one for any types promoted to live in larger registers, but may be
1453 /// more than one for types (like i64) that are split into pieces. For types
1454 /// like i140, which are first promoted then expanded, it is the number of
1455 /// registers needed to hold all the bits of the original type. For an i140
1456 /// on a 32 bit machine this means 5 registers.
1457 unsigned getNumRegisters(LLVMContext &Context, EVT VT) const {
1458 if (VT.isSimple()) {
1459 assert((unsigned)VT.getSimpleVT().SimpleTy <(((unsigned)VT.getSimpleVT().SimpleTy < array_lengthof(NumRegistersForVT
)) ? static_cast<void> (0) : __assert_fail ("(unsigned)VT.getSimpleVT().SimpleTy < array_lengthof(NumRegistersForVT)"
, "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/TargetLowering.h"
, 1460, __PRETTY_FUNCTION__))
1460 array_lengthof(NumRegistersForVT))(((unsigned)VT.getSimpleVT().SimpleTy < array_lengthof(NumRegistersForVT
)) ? static_cast<void> (0) : __assert_fail ("(unsigned)VT.getSimpleVT().SimpleTy < array_lengthof(NumRegistersForVT)"
, "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/TargetLowering.h"
, 1460, __PRETTY_FUNCTION__))
;
1461 return NumRegistersForVT[VT.getSimpleVT().SimpleTy];
1462 }
1463 if (VT.isVector()) {
1464 EVT VT1;
1465 MVT VT2;
1466 unsigned NumIntermediates;
1467 return getVectorTypeBreakdown(Context, VT, VT1, NumIntermediates, VT2);
1468 }
1469 if (VT.isInteger()) {
1470 unsigned BitWidth = VT.getSizeInBits();
1471 unsigned RegWidth = getRegisterType(Context, VT).getSizeInBits();
1472 return (BitWidth + RegWidth - 1) / RegWidth;
1473 }
1474 llvm_unreachable("Unsupported extended type!")::llvm::llvm_unreachable_internal("Unsupported extended type!"
, "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/TargetLowering.h"
, 1474)
;
1475 }
1476
1477 /// Certain combinations of ABIs, Targets and features require that types
1478 /// are legal for some operations and not for other operations.
1479 /// For MIPS all vector types must be passed through the integer register set.
1480 virtual MVT getRegisterTypeForCallingConv(LLVMContext &Context,
1481 CallingConv::ID CC, EVT VT) const {
1482 return getRegisterType(Context, VT);
1483 }
1484
1485 /// Certain targets require unusual breakdowns of certain types. For MIPS,
1486 /// this occurs when a vector type is used, as vector are passed through the
1487 /// integer register set.
1488 virtual unsigned getNumRegistersForCallingConv(LLVMContext &Context,
1489 CallingConv::ID CC,
1490 EVT VT) const {
1491 return getNumRegisters(Context, VT);
1492 }
1493
1494 /// Certain targets have context senstive alignment requirements, where one
1495 /// type has the alignment requirement of another type.
1496 virtual Align getABIAlignmentForCallingConv(Type *ArgTy,
1497 DataLayout DL) const {
1498 return DL.getABITypeAlign(ArgTy);
1499 }
1500
1501 /// If true, then instruction selection should seek to shrink the FP constant
1502 /// of the specified type to a smaller type in order to save space and / or
1503 /// reduce runtime.
1504 virtual bool ShouldShrinkFPConstant(EVT) const { return true; }
1505
1506 /// Return true if it is profitable to reduce a load to a smaller type.
1507 /// Example: (i16 (trunc (i32 (load x))) -> i16 load x
1508 virtual bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy,
1509 EVT NewVT) const {
1510 // By default, assume that it is cheaper to extract a subvector from a wide
1511 // vector load rather than creating multiple narrow vector loads.
1512 if (NewVT.isVector() && !Load->hasOneUse())
1513 return false;
1514
1515 return true;
1516 }
1517
1518 /// When splitting a value of the specified type into parts, does the Lo
1519 /// or Hi part come first? This usually follows the endianness, except
1520 /// for ppcf128, where the Hi part always comes first.
1521 bool hasBigEndianPartOrdering(EVT VT, const DataLayout &DL) const {
1522 return DL.isBigEndian() || VT == MVT::ppcf128;
1523 }
1524
1525 /// If true, the target has custom DAG combine transformations that it can
1526 /// perform for the specified node.
1527 bool hasTargetDAGCombine(ISD::NodeType NT) const {
1528 assert(unsigned(NT >> 3) < array_lengthof(TargetDAGCombineArray))((unsigned(NT >> 3) < array_lengthof(TargetDAGCombineArray
)) ? static_cast<void> (0) : __assert_fail ("unsigned(NT >> 3) < array_lengthof(TargetDAGCombineArray)"
, "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/TargetLowering.h"
, 1528, __PRETTY_FUNCTION__))
;
1529 return TargetDAGCombineArray[NT >> 3] & (1 << (NT&7));
1530 }
1531
1532 unsigned getGatherAllAliasesMaxDepth() const {
1533 return GatherAllAliasesMaxDepth;
1534 }
1535
1536 /// Returns the size of the platform's va_list object.
1537 virtual unsigned getVaListSizeInBits(const DataLayout &DL) const {
1538 return getPointerTy(DL).getSizeInBits();
1539 }
1540
1541 /// Get maximum # of store operations permitted for llvm.memset
1542 ///
1543 /// This function returns the maximum number of store operations permitted
1544 /// to replace a call to llvm.memset. The value is set by the target at the
1545 /// performance threshold for such a replacement. If OptSize is true,
1546 /// return the limit for functions that have OptSize attribute.
1547 unsigned getMaxStoresPerMemset(bool OptSize) const {
1548 return OptSize ? MaxStoresPerMemsetOptSize : MaxStoresPerMemset;
1549 }
1550
1551 /// Get maximum # of store operations permitted for llvm.memcpy
1552 ///
1553 /// This function returns the maximum number of store operations permitted
1554 /// to replace a call to llvm.memcpy. The value is set by the target at the
1555 /// performance threshold for such a replacement. If OptSize is true,
1556 /// return the limit for functions that have OptSize attribute.
1557 unsigned getMaxStoresPerMemcpy(bool OptSize) const {
1558 return OptSize ? MaxStoresPerMemcpyOptSize : MaxStoresPerMemcpy;
1559 }
1560
1561 /// \brief Get maximum # of store operations to be glued together
1562 ///
1563 /// This function returns the maximum number of store operations permitted
1564 /// to glue together during lowering of llvm.memcpy. The value is set by
1565 // the target at the performance threshold for such a replacement.
1566 virtual unsigned getMaxGluedStoresPerMemcpy() const {
1567 return MaxGluedStoresPerMemcpy;
1568 }
1569
1570 /// Get maximum # of load operations permitted for memcmp
1571 ///
1572 /// This function returns the maximum number of load operations permitted
1573 /// to replace a call to memcmp. The value is set by the target at the
1574 /// performance threshold for such a replacement. If OptSize is true,
1575 /// return the limit for functions that have OptSize attribute.
1576 unsigned getMaxExpandSizeMemcmp(bool OptSize) const {
1577 return OptSize ? MaxLoadsPerMemcmpOptSize : MaxLoadsPerMemcmp;
1578 }
1579
1580 /// Get maximum # of store operations permitted for llvm.memmove
1581 ///
1582 /// This function returns the maximum number of store operations permitted
1583 /// to replace a call to llvm.memmove. The value is set by the target at the
1584 /// performance threshold for such a replacement. If OptSize is true,
1585 /// return the limit for functions that have OptSize attribute.
1586 unsigned getMaxStoresPerMemmove(bool OptSize) const {
1587 return OptSize ? MaxStoresPerMemmoveOptSize : MaxStoresPerMemmove;
1588 }
1589
1590 /// Determine if the target supports unaligned memory accesses.
1591 ///
1592 /// This function returns true if the target allows unaligned memory accesses
1593 /// of the specified type in the given address space. If true, it also returns
1594 /// whether the unaligned memory access is "fast" in the last argument by
1595 /// reference. This is used, for example, in situations where an array
1596 /// copy/move/set is converted to a sequence of store operations. Its use
1597 /// helps to ensure that such replacements don't generate code that causes an
1598 /// alignment error (trap) on the target machine.
1599 virtual bool allowsMisalignedMemoryAccesses(
1600 EVT, unsigned AddrSpace = 0, unsigned Align = 1,
1601 MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
1602 bool * /*Fast*/ = nullptr) const {
1603 return false;
1604 }
1605
1606 /// LLT handling variant.
1607 virtual bool allowsMisalignedMemoryAccesses(
1608 LLT, unsigned AddrSpace = 0, Align Alignment = Align(1),
1609 MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
1610 bool * /*Fast*/ = nullptr) const {
1611 return false;
1612 }
1613
1614 /// This function returns true if the memory access is aligned or if the
1615 /// target allows this specific unaligned memory access. If the access is
1616 /// allowed, the optional final parameter returns if the access is also fast
1617 /// (as defined by the target).
1618 bool allowsMemoryAccessForAlignment(
1619 LLVMContext &Context, const DataLayout &DL, EVT VT,
1620 unsigned AddrSpace = 0, Align Alignment = Align(1),
1621 MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
1622 bool *Fast = nullptr) const;
1623
1624 /// Return true if the memory access of this type is aligned or if the target
1625 /// allows this specific unaligned access for the given MachineMemOperand.
1626 /// If the access is allowed, the optional final parameter returns if the
1627 /// access is also fast (as defined by the target).
1628 bool allowsMemoryAccessForAlignment(LLVMContext &Context,
1629 const DataLayout &DL, EVT VT,
1630 const MachineMemOperand &MMO,
1631 bool *Fast = nullptr) const;
1632
1633 /// Return true if the target supports a memory access of this type for the
1634 /// given address space and alignment. If the access is allowed, the optional
1635 /// final parameter returns if the access is also fast (as defined by the
1636 /// target).
1637 virtual bool
1638 allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT,
1639 unsigned AddrSpace = 0, Align Alignment = Align(1),
1640 MachineMemOperand::Flags Flags = MachineMemOperand::MONone,
1641 bool *Fast = nullptr) const;
1642
1643 /// Return true if the target supports a memory access of this type for the
1644 /// given MachineMemOperand. If the access is allowed, the optional
1645 /// final parameter returns if the access is also fast (as defined by the
1646 /// target).
1647 bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT,
1648 const MachineMemOperand &MMO,
1649 bool *Fast = nullptr) const;
1650
1651 /// Returns the target specific optimal type for load and store operations as
1652 /// a result of memset, memcpy, and memmove lowering.
1653 /// It returns EVT::Other if the type should be determined using generic
1654 /// target-independent logic.
1655 virtual EVT
1656 getOptimalMemOpType(const MemOp &Op,
1657 const AttributeList & /*FuncAttributes*/) const {
1658 return MVT::Other;
1659 }
1660
1661 /// LLT returning variant.
1662 virtual LLT
1663 getOptimalMemOpLLT(const MemOp &Op,
1664 const AttributeList & /*FuncAttributes*/) const {
1665 return LLT();
1666 }
1667
1668 /// Returns true if it's safe to use load / store of the specified type to
1669 /// expand memcpy / memset inline.
1670 ///
1671 /// This is mostly true for all types except for some special cases. For
1672 /// example, on X86 targets without SSE2 f64 load / store are done with fldl /
1673 /// fstpl which also does type conversion. Note the specified type doesn't
1674 /// have to be legal as the hook is used before type legalization.
1675 virtual bool isSafeMemOpType(MVT /*VT*/) const { return true; }
1676
1677 /// Return lower limit for number of blocks in a jump table.
1678 virtual unsigned getMinimumJumpTableEntries() const;
1679
1680 /// Return lower limit of the density in a jump table.
1681 unsigned getMinimumJumpTableDensity(bool OptForSize) const;
1682
1683 /// Return upper limit for number of entries in a jump table.
1684 /// Zero if no limit.
1685 unsigned getMaximumJumpTableSize() const;
1686
1687 virtual bool isJumpTableRelative() const;
1688
1689 /// Return true if a mulh[s|u] node for a specific type is cheaper than
1690 /// a multiply followed by a shift. This is false by default.
1691 virtual bool isMulhCheaperThanMulShift(EVT Type) const { return false; }
1692
1693 /// If a physical register, this specifies the register that
1694 /// llvm.savestack/llvm.restorestack should save and restore.
1695 unsigned getStackPointerRegisterToSaveRestore() const {
1696 return StackPointerRegisterToSaveRestore;
1697 }
1698
1699 /// If a physical register, this returns the register that receives the
1700 /// exception address on entry to an EH pad.
1701 virtual Register
1702 getExceptionPointerRegister(const Constant *PersonalityFn) const {
1703 return Register();
1704 }
1705
1706 /// If a physical register, this returns the register that receives the
1707 /// exception typeid on entry to a landing pad.
1708 virtual Register
1709 getExceptionSelectorRegister(const Constant *PersonalityFn) const {
1710 return Register();
1711 }
1712
1713 virtual bool needsFixedCatchObjects() const {
1714 report_fatal_error("Funclet EH is not implemented for this target");
1715 }
1716
1717 /// Return the minimum stack alignment of an argument.
1718 Align getMinStackArgumentAlignment() const {
1719 return MinStackArgumentAlignment;
1720 }
1721
1722 /// Return the minimum function alignment.
1723 Align getMinFunctionAlignment() const { return MinFunctionAlignment; }
1724
1725 /// Return the preferred function alignment.
1726 Align getPrefFunctionAlignment() const { return PrefFunctionAlignment; }
1727
1728 /// Return the preferred loop alignment.
1729 virtual Align getPrefLoopAlignment(MachineLoop *ML = nullptr) const {
1730 return PrefLoopAlignment;
1731 }
1732
1733 /// Should loops be aligned even when the function is marked OptSize (but not
1734 /// MinSize).
1735 virtual bool alignLoopsWithOptSize() const {
1736 return false;
1737 }
1738
1739 /// If the target has a standard location for the stack protector guard,
1740 /// returns the address of that location. Otherwise, returns nullptr.
1741 /// DEPRECATED: please override useLoadStackGuardNode and customize
1742 /// LOAD_STACK_GUARD, or customize \@llvm.stackguard().
1743 virtual Value *getIRStackGuard(IRBuilder<> &IRB) const;
1744
1745 /// Inserts necessary declarations for SSP (stack protection) purpose.
1746 /// Should be used only when getIRStackGuard returns nullptr.
1747 virtual void insertSSPDeclarations(Module &M) const;
1748
1749 /// Return the variable that's previously inserted by insertSSPDeclarations,
1750 /// if any, otherwise return nullptr. Should be used only when
1751 /// getIRStackGuard returns nullptr.
1752 virtual Value *getSDagStackGuard(const Module &M) const;
1753
1754 /// If this function returns true, stack protection checks should XOR the
1755 /// frame pointer (or whichever pointer is used to address locals) into the
1756 /// stack guard value before checking it. getIRStackGuard must return nullptr
1757 /// if this returns true.
1758 virtual bool useStackGuardXorFP() const { return false; }
1759
1760 /// If the target has a standard stack protection check function that
1761 /// performs validation and error handling, returns the function. Otherwise,
1762 /// returns nullptr. Must be previously inserted by insertSSPDeclarations.
1763 /// Should be used only when getIRStackGuard returns nullptr.
1764 virtual Function *getSSPStackGuardCheck(const Module &M) const;
1765
1766protected:
1767 Value *getDefaultSafeStackPointerLocation(IRBuilder<> &IRB,
1768 bool UseTLS) const;
1769
1770public:
1771 /// Returns the target-specific address of the unsafe stack pointer.
1772 virtual Value *getSafeStackPointerLocation(IRBuilder<> &IRB) const;
1773
1774 /// Returns the name of the symbol used to emit stack probes or the empty
1775 /// string if not applicable.
1776 virtual bool hasStackProbeSymbol(MachineFunction &MF) const { return false; }
1777
1778 virtual bool hasInlineStackProbe(MachineFunction &MF) const { return false; }
1779
1780 virtual StringRef getStackProbeSymbolName(MachineFunction &MF) const {
1781 return "";
1782 }
1783
1784 /// Returns true if a cast from SrcAS to DestAS is "cheap", such that e.g. we
1785 /// are happy to sink it into basic blocks. A cast may be free, but not
1786 /// necessarily a no-op. e.g. a free truncate from a 64-bit to 32-bit pointer.
1787 virtual bool isFreeAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const;
1788
1789 /// Return true if the pointer arguments to CI should be aligned by aligning
1790 /// the object whose address is being passed. If so then MinSize is set to the
1791 /// minimum size the object must be to be aligned and PrefAlign is set to the
1792 /// preferred alignment.
1793 virtual bool shouldAlignPointerArgs(CallInst * /*CI*/, unsigned & /*MinSize*/,
1794 unsigned & /*PrefAlign*/) const {
1795 return false;
1796 }
1797
1798 //===--------------------------------------------------------------------===//
1799 /// \name Helpers for TargetTransformInfo implementations
1800 /// @{
1801
1802 /// Get the ISD node that corresponds to the Instruction class opcode.
1803 int InstructionOpcodeToISD(unsigned Opcode) const;
1804
1805 /// Estimate the cost of type-legalization and the legalized type.
1806 std::pair<int, MVT> getTypeLegalizationCost(const DataLayout &DL,
1807 Type *Ty) const;
1808
1809 /// @}
1810
1811 //===--------------------------------------------------------------------===//
1812 /// \name Helpers for atomic expansion.
1813 /// @{
1814
1815 /// Returns the maximum atomic operation size (in bits) supported by
1816 /// the backend. Atomic operations greater than this size (as well
1817 /// as ones that are not naturally aligned), will be expanded by
1818 /// AtomicExpandPass into an __atomic_* library call.
1819 unsigned getMaxAtomicSizeInBitsSupported() const {
1820 return MaxAtomicSizeInBitsSupported;
1821 }
1822
1823 /// Returns the size of the smallest cmpxchg or ll/sc instruction
1824 /// the backend supports. Any smaller operations are widened in
1825 /// AtomicExpandPass.
1826 ///
1827 /// Note that *unlike* operations above the maximum size, atomic ops
1828 /// are still natively supported below the minimum; they just
1829 /// require a more complex expansion.
1830 unsigned getMinCmpXchgSizeInBits() const { return MinCmpXchgSizeInBits; }
1831
1832 /// Whether the target supports unaligned atomic operations.
1833 bool supportsUnalignedAtomics() const { return SupportsUnalignedAtomics; }
1834
1835 /// Whether AtomicExpandPass should automatically insert fences and reduce
1836 /// ordering for this atomic. This should be true for most architectures with
1837 /// weak memory ordering. Defaults to false.
1838 virtual bool shouldInsertFencesForAtomic(const Instruction *I) const {
1839 return false;
1840 }
1841
1842 /// Perform a load-linked operation on Addr, returning a "Value *" with the
1843 /// corresponding pointee type. This may entail some non-trivial operations to
1844 /// truncate or reconstruct types that will be illegal in the backend. See
1845 /// ARMISelLowering for an example implementation.
1846 virtual Value *emitLoadLinked(IRBuilder<> &Builder, Value *Addr,
1847 AtomicOrdering Ord) const {
1848 llvm_unreachable("Load linked unimplemented on this target")::llvm::llvm_unreachable_internal("Load linked unimplemented on this target"
, "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/TargetLowering.h"
, 1848)
;
1849 }
1850
1851 /// Perform a store-conditional operation to Addr. Return the status of the
1852 /// store. This should be 0 if the store succeeded, non-zero otherwise.
1853 virtual Value *emitStoreConditional(IRBuilder<> &Builder, Value *Val,
1854 Value *Addr, AtomicOrdering Ord) const {
1855 llvm_unreachable("Store conditional unimplemented on this target")::llvm::llvm_unreachable_internal("Store conditional unimplemented on this target"
, "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/TargetLowering.h"
, 1855)
;
1856 }
1857
1858 /// Perform a masked atomicrmw using a target-specific intrinsic. This
1859 /// represents the core LL/SC loop which will be lowered at a late stage by
1860 /// the backend.
1861 virtual Value *emitMaskedAtomicRMWIntrinsic(IRBuilder<> &Builder,
1862 AtomicRMWInst *AI,
1863 Value *AlignedAddr, Value *Incr,
1864 Value *Mask, Value *ShiftAmt,
1865 AtomicOrdering Ord) const {
1866 llvm_unreachable("Masked atomicrmw expansion unimplemented on this target")::llvm::llvm_unreachable_internal("Masked atomicrmw expansion unimplemented on this target"
, "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/TargetLowering.h"
, 1866)
;
1867 }
1868
1869 /// Perform a masked cmpxchg using a target-specific intrinsic. This
1870 /// represents the core LL/SC loop which will be lowered at a late stage by
1871 /// the backend.
1872 virtual Value *emitMaskedAtomicCmpXchgIntrinsic(
1873 IRBuilder<> &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
1874 Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
1875 llvm_unreachable("Masked cmpxchg expansion unimplemented on this target")::llvm::llvm_unreachable_internal("Masked cmpxchg expansion unimplemented on this target"
, "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/TargetLowering.h"
, 1875)
;
1876 }
1877
1878 /// Inserts in the IR a target-specific intrinsic specifying a fence.
1879 /// It is called by AtomicExpandPass before expanding an
1880 /// AtomicRMW/AtomicCmpXchg/AtomicStore/AtomicLoad
1881 /// if shouldInsertFencesForAtomic returns true.
1882 ///
1883 /// Inst is the original atomic instruction, prior to other expansions that
1884 /// may be performed.
1885 ///
1886 /// This function should either return a nullptr, or a pointer to an IR-level
1887 /// Instruction*. Even complex fence sequences can be represented by a
1888 /// single Instruction* through an intrinsic to be lowered later.
1889 /// Backends should override this method to produce target-specific intrinsic
1890 /// for their fences.
1891 /// FIXME: Please note that the default implementation here in terms of
1892 /// IR-level fences exists for historical/compatibility reasons and is
1893 /// *unsound* ! Fences cannot, in general, be used to restore sequential
1894 /// consistency. For example, consider the following example:
1895 /// atomic<int> x = y = 0;
1896 /// int r1, r2, r3, r4;
1897 /// Thread 0:
1898 /// x.store(1);
1899 /// Thread 1:
1900 /// y.store(1);
1901 /// Thread 2:
1902 /// r1 = x.load();
1903 /// r2 = y.load();
1904 /// Thread 3:
1905 /// r3 = y.load();
1906 /// r4 = x.load();
1907 /// r1 = r3 = 1 and r2 = r4 = 0 is impossible as long as the accesses are all
1908 /// seq_cst. But if they are lowered to monotonic accesses, no amount of
1909 /// IR-level fences can prevent it.
1910 /// @{
1911 virtual Instruction *emitLeadingFence(IRBuilder<> &Builder, Instruction *Inst,
1912 AtomicOrdering Ord) const {
1913 if (isReleaseOrStronger(Ord) && Inst->hasAtomicStore())
1914 return Builder.CreateFence(Ord);
1915 else
1916 return nullptr;
1917 }
1918
1919 virtual Instruction *emitTrailingFence(IRBuilder<> &Builder,
1920 Instruction *Inst,
1921 AtomicOrdering Ord) const {
1922 if (isAcquireOrStronger(Ord))
1923 return Builder.CreateFence(Ord);
1924 else
1925 return nullptr;
1926 }
1927 /// @}
1928
1929 // Emits code that executes when the comparison result in the ll/sc
1930 // expansion of a cmpxchg instruction is such that the store-conditional will
1931 // not execute. This makes it possible to balance out the load-linked with
1932 // a dedicated instruction, if desired.
1933 // E.g., on ARM, if ldrex isn't followed by strex, the exclusive monitor would
1934 // be unnecessarily held, except if clrex, inserted by this hook, is executed.
1935 virtual void emitAtomicCmpXchgNoStoreLLBalance(IRBuilder<> &Builder) const {}
1936
1937 /// Returns true if the given (atomic) store should be expanded by the
1938 /// IR-level AtomicExpand pass into an "atomic xchg" which ignores its input.
1939 virtual bool shouldExpandAtomicStoreInIR(StoreInst *SI) const {
1940 return false;
1941 }
1942
1943 /// Returns true if arguments should be sign-extended in lib calls.
1944 virtual bool shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const {
1945 return IsSigned;
1946 }
1947
1948 /// Returns true if arguments should be extended in lib calls.
1949 virtual bool shouldExtendTypeInLibCall(EVT Type) const {
1950 return true;
1951 }
1952
1953 /// Returns how the given (atomic) load should be expanded by the
1954 /// IR-level AtomicExpand pass.
1955 virtual AtomicExpansionKind shouldExpandAtomicLoadInIR(LoadInst *LI) const {
1956 return AtomicExpansionKind::None;
1957 }
1958
1959 /// Returns how the given atomic cmpxchg should be expanded by the IR-level
1960 /// AtomicExpand pass.
1961 virtual AtomicExpansionKind
1962 shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const {
1963 return AtomicExpansionKind::None;
1964 }
1965
1966 /// Returns how the IR-level AtomicExpand pass should expand the given
1967 /// AtomicRMW, if at all. Default is to never expand.
1968 virtual AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
1969 return RMW->isFloatingPointOperation() ?
1970 AtomicExpansionKind::CmpXChg : AtomicExpansionKind::None;
1971 }
1972
1973 /// On some platforms, an AtomicRMW that never actually modifies the value
1974 /// (such as fetch_add of 0) can be turned into a fence followed by an
1975 /// atomic load. This may sound useless, but it makes it possible for the
1976 /// processor to keep the cacheline shared, dramatically improving
1977 /// performance. And such idempotent RMWs are useful for implementing some
1978 /// kinds of locks, see for example (justification + benchmarks):
1979 /// http://www.hpl.hp.com/techreports/2012/HPL-2012-68.pdf
1980 /// This method tries doing that transformation, returning the atomic load if
1981 /// it succeeds, and nullptr otherwise.
1982 /// If shouldExpandAtomicLoadInIR returns true on that load, it will undergo
1983 /// another round of expansion.
1984 virtual LoadInst *
1985 lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *RMWI) const {
1986 return nullptr;
1987 }
1988
1989 /// Returns how the platform's atomic operations are extended (ZERO_EXTEND,
1990 /// SIGN_EXTEND, or ANY_EXTEND).
1991 virtual ISD::NodeType getExtendForAtomicOps() const {
1992 return ISD::ZERO_EXTEND;
1993 }
1994
1995 /// Returns how the platform's atomic compare and swap expects its comparison
1996 /// value to be extended (ZERO_EXTEND, SIGN_EXTEND, or ANY_EXTEND). This is
1997 /// separate from getExtendForAtomicOps, which is concerned with the
1998 /// sign-extension of the instruction's output, whereas here we are concerned
1999 /// with the sign-extension of the input. For targets with compare-and-swap
2000 /// instructions (or sub-word comparisons in their LL/SC loop expansions),
2001 /// the input can be ANY_EXTEND, but the output will still have a specific
2002 /// extension.
2003 virtual ISD::NodeType getExtendForAtomicCmpSwapArg() const {
2004 return ISD::ANY_EXTEND;
2005 }
2006
2007 /// @}
2008
2009 /// Returns true if we should normalize
2010 /// select(N0&N1, X, Y) => select(N0, select(N1, X, Y), Y) and
2011 /// select(N0|N1, X, Y) => select(N0, select(N1, X, Y, Y)) if it is likely
2012 /// that it saves us from materializing N0 and N1 in an integer register.
2013 /// Targets that are able to perform and/or on flags should return false here.
2014 virtual bool shouldNormalizeToSelectSequence(LLVMContext &Context,
2015 EVT VT) const {
2016 // If a target has multiple condition registers, then it likely has logical
2017 // operations on those registers.
2018 if (hasMultipleConditionRegisters())
2019 return false;
2020 // Only do the transform if the value won't be split into multiple
2021 // registers.
2022 LegalizeTypeAction Action = getTypeAction(Context, VT);
2023 return Action != TypeExpandInteger && Action != TypeExpandFloat &&
2024 Action != TypeSplitVector;
2025 }
2026
2027 virtual bool isProfitableToCombineMinNumMaxNum(EVT VT) const { return true; }
2028
2029 /// Return true if a select of constants (select Cond, C1, C2) should be
2030 /// transformed into simple math ops with the condition value. For example:
2031 /// select Cond, C1, C1-1 --> add (zext Cond), C1-1
2032 virtual bool convertSelectOfConstantsToMath(EVT VT) const {
2033 return false;
2034 }
2035
2036 /// Return true if it is profitable to transform an integer
2037 /// multiplication-by-constant into simpler operations like shifts and adds.
2038 /// This may be true if the target does not directly support the
2039 /// multiplication operation for the specified type or the sequence of simpler
2040 /// ops is faster than the multiply.
2041 virtual bool decomposeMulByConstant(LLVMContext &Context,
2042 EVT VT, SDValue C) const {
2043 return false;
2044 }
2045
2046 /// Return true if it is more correct/profitable to use strict FP_TO_INT
2047 /// conversion operations - canonicalizing the FP source value instead of
2048 /// converting all cases and then selecting based on value.
2049 /// This may be true if the target throws exceptions for out of bounds
2050 /// conversions or has fast FP CMOV.
2051 virtual bool shouldUseStrictFP_TO_INT(EVT FpVT, EVT IntVT,
2052 bool IsSigned) const {
2053 return false;
2054 }
2055
2056 //===--------------------------------------------------------------------===//
2057 // TargetLowering Configuration Methods - These methods should be invoked by
2058 // the derived class constructor to configure this object for the target.
2059 //
2060protected:
2061 /// Specify how the target extends the result of integer and floating point
2062 /// boolean values from i1 to a wider type. See getBooleanContents.
2063 void setBooleanContents(BooleanContent Ty) {
2064 BooleanContents = Ty;
2065 BooleanFloatContents = Ty;
2066 }
2067
2068 /// Specify how the target extends the result of integer and floating point
2069 /// boolean values from i1 to a wider type. See getBooleanContents.
2070 void setBooleanContents(BooleanContent IntTy, BooleanContent FloatTy) {
2071 BooleanContents = IntTy;
2072 BooleanFloatContents = FloatTy;
2073 }
2074
2075 /// Specify how the target extends the result of a vector boolean value from a
2076 /// vector of i1 to a wider type. See getBooleanContents.
2077 void setBooleanVectorContents(BooleanContent Ty) {
2078 BooleanVectorContents = Ty;
2079 }
2080
2081 /// Specify the target scheduling preference.
2082 void setSchedulingPreference(Sched::Preference Pref) {
2083 SchedPreferenceInfo = Pref;
2084 }
2085
2086 /// Indicate the minimum number of blocks to generate jump tables.
2087 void setMinimumJumpTableEntries(unsigned Val);
2088
2089 /// Indicate the maximum number of entries in jump tables.
2090 /// Set to zero to generate unlimited jump tables.
2091 void setMaximumJumpTableSize(unsigned);
2092
2093 /// If set to a physical register, this specifies the register that
2094 /// llvm.savestack/llvm.restorestack should save and restore.
2095 void setStackPointerRegisterToSaveRestore(Register R) {
2096 StackPointerRegisterToSaveRestore = R;
2097 }
2098
2099 /// Tells the code generator that the target has multiple (allocatable)
2100 /// condition registers that can be used to store the results of comparisons
2101 /// for use by selects and conditional branches. With multiple condition
2102 /// registers, the code generator will not aggressively sink comparisons into
2103 /// the blocks of their users.
2104 void setHasMultipleConditionRegisters(bool hasManyRegs = true) {
2105 HasMultipleConditionRegisters = hasManyRegs;
2106 }
2107
2108 /// Tells the code generator that the target has BitExtract instructions.
2109 /// The code generator will aggressively sink "shift"s into the blocks of
2110 /// their users if the users will generate "and" instructions which can be
2111 /// combined with "shift" to BitExtract instructions.
2112 void setHasExtractBitsInsn(bool hasExtractInsn = true) {
2113 HasExtractBitsInsn = hasExtractInsn;
2114 }
2115
2116 /// Tells the code generator not to expand logic operations on comparison
2117 /// predicates into separate sequences that increase the amount of flow
2118 /// control.
2119 void setJumpIsExpensive(bool isExpensive = true);
2120
2121 /// Tells the code generator which bitwidths to bypass.
2122 void addBypassSlowDiv(unsigned int SlowBitWidth, unsigned int FastBitWidth) {
2123 BypassSlowDivWidths[SlowBitWidth] = FastBitWidth;
2124 }
2125
2126 /// Add the specified register class as an available regclass for the
2127 /// specified value type. This indicates the selector can handle values of
2128 /// that class natively.
2129 void addRegisterClass(MVT VT, const TargetRegisterClass *RC) {
2130 assert((unsigned)VT.SimpleTy < array_lengthof(RegClassForVT))(((unsigned)VT.SimpleTy < array_lengthof(RegClassForVT)) ?
static_cast<void> (0) : __assert_fail ("(unsigned)VT.SimpleTy < array_lengthof(RegClassForVT)"
, "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/TargetLowering.h"
, 2130, __PRETTY_FUNCTION__))
;
2131 RegClassForVT[VT.SimpleTy] = RC;
2132 }
2133
2134 /// Return the largest legal super-reg register class of the register class
2135 /// for the specified type and its associated "cost".
2136 virtual std::pair<const TargetRegisterClass *, uint8_t>
2137 findRepresentativeClass(const TargetRegisterInfo *TRI, MVT VT) const;
2138
2139 /// Once all of the register classes are added, this allows us to compute
2140 /// derived properties we expose.
2141 void computeRegisterProperties(const TargetRegisterInfo *TRI);
2142
2143 /// Indicate that the specified operation does not work with the specified
2144 /// type and indicate what to do about it. Note that VT may refer to either
2145 /// the type of a result or that of an operand of Op.
2146 void setOperationAction(unsigned Op, MVT VT,
2147 LegalizeAction Action) {
2148 assert(Op < array_lengthof(OpActions[0]) && "Table isn't big enough!")((Op < array_lengthof(OpActions[0]) && "Table isn't big enough!"
) ? static_cast<void> (0) : __assert_fail ("Op < array_lengthof(OpActions[0]) && \"Table isn't big enough!\""
, "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/TargetLowering.h"
, 2148, __PRETTY_FUNCTION__))
;
2149 OpActions[(unsigned)VT.SimpleTy][Op] = Action;
2150 }
2151
2152 /// Indicate that the specified load with extension does not work with the
2153 /// specified type and indicate what to do about it.
2154 void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT,
2155 LegalizeAction Action) {
2156 assert(ExtType < ISD::LAST_LOADEXT_TYPE && ValVT.isValid() &&((ExtType < ISD::LAST_LOADEXT_TYPE && ValVT.isValid
() && MemVT.isValid() && "Table isn't big enough!"
) ? static_cast<void> (0) : __assert_fail ("ExtType < ISD::LAST_LOADEXT_TYPE && ValVT.isValid() && MemVT.isValid() && \"Table isn't big enough!\""
, "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/TargetLowering.h"
, 2157, __PRETTY_FUNCTION__))
2157 MemVT.isValid() && "Table isn't big enough!")((ExtType < ISD::LAST_LOADEXT_TYPE && ValVT.isValid
() && MemVT.isValid() && "Table isn't big enough!"
) ? static_cast<void> (0) : __assert_fail ("ExtType < ISD::LAST_LOADEXT_TYPE && ValVT.isValid() && MemVT.isValid() && \"Table isn't big enough!\""
, "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/TargetLowering.h"
, 2157, __PRETTY_FUNCTION__))
;
2158 assert((unsigned)Action < 0x10 && "too many bits for bitfield array")(((unsigned)Action < 0x10 && "too many bits for bitfield array"
) ? static_cast<void> (0) : __assert_fail ("(unsigned)Action < 0x10 && \"too many bits for bitfield array\""
, "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/TargetLowering.h"
, 2158, __PRETTY_FUNCTION__))
;
2159 unsigned Shift = 4 * ExtType;
2160 LoadExtActions[ValVT.SimpleTy][MemVT.SimpleTy] &= ~((uint16_t)0xF << Shift);
2161 LoadExtActions[ValVT.SimpleTy][MemVT.SimpleTy] |= (uint16_t)Action << Shift;
2162 }
2163
2164 /// Indicate that the specified truncating store does not work with the
2165 /// specified type and indicate what to do about it.
2166 void setTruncStoreAction(MVT ValVT, MVT MemVT,
2167 LegalizeAction Action) {
2168 assert(ValVT.isValid() && MemVT.isValid() && "Table isn't big enough!")((ValVT.isValid() && MemVT.isValid() && "Table isn't big enough!"
) ? static_cast<void> (0) : __assert_fail ("ValVT.isValid() && MemVT.isValid() && \"Table isn't big enough!\""
, "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/TargetLowering.h"
, 2168, __PRETTY_FUNCTION__))
;
2169 TruncStoreActions[(unsigned)ValVT.SimpleTy][MemVT.SimpleTy] = Action;
2170 }
2171
2172 /// Indicate that the specified indexed load does or does not work with the
2173 /// specified type and indicate what to do abort it.
2174 ///
2175 /// NOTE: All indexed mode loads are initialized to Expand in
2176 /// TargetLowering.cpp
2177 void setIndexedLoadAction(unsigned IdxMode, MVT VT, LegalizeAction Action) {
2178 setIndexedModeAction(IdxMode, VT, IMAB_Load, Action);
2179 }
2180
2181 /// Indicate that the specified indexed store does or does not work with the
2182 /// specified type and indicate what to do about it.
2183 ///
2184 /// NOTE: All indexed mode stores are initialized to Expand in
2185 /// TargetLowering.cpp
2186 void setIndexedStoreAction(unsigned IdxMode, MVT VT, LegalizeAction Action) {
2187 setIndexedModeAction(IdxMode, VT, IMAB_Store, Action);
2188 }
2189
2190 /// Indicate that the specified indexed masked load does or does not work with
2191 /// the specified type and indicate what to do about it.
2192 ///
2193 /// NOTE: All indexed mode masked loads are initialized to Expand in
2194 /// TargetLowering.cpp
2195 void setIndexedMaskedLoadAction(unsigned IdxMode, MVT VT,
2196 LegalizeAction Action) {
2197 setIndexedModeAction(IdxMode, VT, IMAB_MaskedLoad, Action);
2198 }
2199
2200 /// Indicate that the specified indexed masked store does or does not work
2201 /// with the specified type and indicate what to do about it.
2202 ///
2203 /// NOTE: All indexed mode masked stores are initialized to Expand in
2204 /// TargetLowering.cpp
2205 void setIndexedMaskedStoreAction(unsigned IdxMode, MVT VT,
2206 LegalizeAction Action) {
2207 setIndexedModeAction(IdxMode, VT, IMAB_MaskedStore, Action);
2208 }
2209
2210 /// Indicate that the specified condition code is or isn't supported on the
2211 /// target and indicate what to do about it.
2212 void setCondCodeAction(ISD::CondCode CC, MVT VT,
2213 LegalizeAction Action) {
2214 assert(VT.isValid() && (unsigned)CC < array_lengthof(CondCodeActions) &&((VT.isValid() && (unsigned)CC < array_lengthof(CondCodeActions
) && "Table isn't big enough!") ? static_cast<void
> (0) : __assert_fail ("VT.isValid() && (unsigned)CC < array_lengthof(CondCodeActions) && \"Table isn't big enough!\""
, "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/TargetLowering.h"
, 2215, __PRETTY_FUNCTION__))
2215 "Table isn't big enough!")((VT.isValid() && (unsigned)CC < array_lengthof(CondCodeActions
) && "Table isn't big enough!") ? static_cast<void
> (0) : __assert_fail ("VT.isValid() && (unsigned)CC < array_lengthof(CondCodeActions) && \"Table isn't big enough!\""
, "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/TargetLowering.h"
, 2215, __PRETTY_FUNCTION__))
;
2216 assert((unsigned)Action < 0x10 && "too many bits for bitfield array")(((unsigned)Action < 0x10 && "too many bits for bitfield array"
) ? static_cast<void> (0) : __assert_fail ("(unsigned)Action < 0x10 && \"too many bits for bitfield array\""
, "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/TargetLowering.h"
, 2216, __PRETTY_FUNCTION__))
;
2217 /// The lower 3 bits of the SimpleTy index into Nth 4bit set from the 32-bit
2218 /// value and the upper 29 bits index into the second dimension of the array
2219 /// to select what 32-bit value to use.
2220 uint32_t Shift = 4 * (VT.SimpleTy & 0x7);
2221 CondCodeActions[CC][VT.SimpleTy >> 3] &= ~((uint32_t)0xF << Shift);
2222 CondCodeActions[CC][VT.SimpleTy >> 3] |= (uint32_t)Action << Shift;
2223 }
2224
2225 /// If Opc/OrigVT is specified as being promoted, the promotion code defaults
2226 /// to trying a larger integer/fp until it can find one that works. If that
2227 /// default is insufficient, this method can be used by the target to override
2228 /// the default.
2229 void AddPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT) {
2230 PromoteToType[std::make_pair(Opc, OrigVT.SimpleTy)] = DestVT.SimpleTy;
2231 }
2232
2233 /// Convenience method to set an operation to Promote and specify the type
2234 /// in a single call.
2235 void setOperationPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT) {
2236 setOperationAction(Opc, OrigVT, Promote);
2237 AddPromotedToType(Opc, OrigVT, DestVT);
2238 }
2239
2240 /// Targets should invoke this method for each target independent node that
2241 /// they want to provide a custom DAG combiner for by implementing the
2242 /// PerformDAGCombine virtual method.
2243 void setTargetDAGCombine(ISD::NodeType NT) {
2244 assert(unsigned(NT >> 3) < array_lengthof(TargetDAGCombineArray))((unsigned(NT >> 3) < array_lengthof(TargetDAGCombineArray
)) ? static_cast<void> (0) : __assert_fail ("unsigned(NT >> 3) < array_lengthof(TargetDAGCombineArray)"
, "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/TargetLowering.h"
, 2244, __PRETTY_FUNCTION__))
;
2245 TargetDAGCombineArray[NT >> 3] |= 1 << (NT&7);
2246 }
2247
2248 /// Set the target's minimum function alignment.
2249 void setMinFunctionAlignment(Align Alignment) {
2250 MinFunctionAlignment = Alignment;
2251 }
2252
2253 /// Set the target's preferred function alignment. This should be set if
2254 /// there is a performance benefit to higher-than-minimum alignment
2255 void setPrefFunctionAlignment(Align Alignment) {
2256 PrefFunctionAlignment = Alignment;
2257 }
2258
2259 /// Set the target's preferred loop alignment. Default alignment is one, it
2260 /// means the target does not care about loop alignment. The target may also
2261 /// override getPrefLoopAlignment to provide per-loop values.
2262 void setPrefLoopAlignment(Align Alignment) { PrefLoopAlignment = Alignment; }
2263
2264 /// Set the minimum stack alignment of an argument.
2265 void setMinStackArgumentAlignment(Align Alignment) {
2266 MinStackArgumentAlignment = Alignment;
2267 }
2268
2269 /// Set the maximum atomic operation size supported by the
2270 /// backend. Atomic operations greater than this size (as well as
2271 /// ones that are not naturally aligned), will be expanded by
2272 /// AtomicExpandPass into an __atomic_* library call.
2273 void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits) {
2274 MaxAtomicSizeInBitsSupported = SizeInBits;
2275 }
2276
2277 /// Sets the minimum cmpxchg or ll/sc size supported by the backend.
2278 void setMinCmpXchgSizeInBits(unsigned SizeInBits) {
2279 MinCmpXchgSizeInBits = SizeInBits;
2280 }
2281
2282 /// Sets whether unaligned atomic operations are supported.
2283 void setSupportsUnalignedAtomics(bool UnalignedSupported) {
2284 SupportsUnalignedAtomics = UnalignedSupported;
2285 }
2286
2287public:
2288 //===--------------------------------------------------------------------===//
2289 // Addressing mode description hooks (used by LSR etc).
2290 //
2291
2292 /// CodeGenPrepare sinks address calculations into the same BB as Load/Store
2293 /// instructions reading the address. This allows as much computation as
2294 /// possible to be done in the address mode for that operand. This hook lets
2295 /// targets also pass back when this should be done on intrinsics which
2296 /// load/store.
2297 virtual bool getAddrModeArguments(IntrinsicInst * /*I*/,
2298 SmallVectorImpl<Value*> &/*Ops*/,
2299 Type *&/*AccessTy*/) const {
2300 return false;
2301 }
2302
2303 /// This represents an addressing mode of:
2304 /// BaseGV + BaseOffs + BaseReg + Scale*ScaleReg
2305 /// If BaseGV is null, there is no BaseGV.
2306 /// If BaseOffs is zero, there is no base offset.
2307 /// If HasBaseReg is false, there is no base register.
2308 /// If Scale is zero, there is no ScaleReg. Scale of 1 indicates a reg with
2309 /// no scale.
2310 struct AddrMode {
2311 GlobalValue *BaseGV = nullptr;
2312 int64_t BaseOffs = 0;
2313 bool HasBaseReg = false;
2314 int64_t Scale = 0;
2315 AddrMode() = default;
2316 };
2317
2318 /// Return true if the addressing mode represented by AM is legal for this
2319 /// target, for a load/store of the specified type.
2320 ///
2321 /// The type may be VoidTy, in which case only return true if the addressing
2322 /// mode is legal for a load/store of any legal type. TODO: Handle
2323 /// pre/postinc as well.
2324 ///
2325 /// If the address space cannot be determined, it will be -1.
2326 ///
2327 /// TODO: Remove default argument
2328 virtual bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM,
2329 Type *Ty, unsigned AddrSpace,
2330 Instruction *I = nullptr) const;
2331
2332 /// Return the cost of the scaling factor used in the addressing mode
2333 /// represented by AM for this target, for a load/store of the specified type.
2334 ///
2335 /// If the AM is supported, the return value must be >= 0.
2336 /// If the AM is not supported, it returns a negative value.
2337 /// TODO: Handle pre/postinc as well.
2338 /// TODO: Remove default argument
2339 virtual int getScalingFactorCost(const DataLayout &DL, const AddrMode &AM,
2340 Type *Ty, unsigned AS = 0) const {
2341 // Default: assume that any scaling factor used in a legal AM is free.
2342 if (isLegalAddressingMode(DL, AM, Ty, AS))
2343 return 0;
2344 return -1;
2345 }
2346
2347 /// Return true if the specified immediate is legal icmp immediate, that is
2348 /// the target has icmp instructions which can compare a register against the
2349 /// immediate without having to materialize the immediate into a register.
2350 virtual bool isLegalICmpImmediate(int64_t) const {
2351 return true;
2352 }
2353
2354 /// Return true if the specified immediate is legal add immediate, that is the
2355 /// target has add instructions which can add a register with the immediate
2356 /// without having to materialize the immediate into a register.
2357 virtual bool isLegalAddImmediate(int64_t) const {
2358 return true;
2359 }
2360
2361 /// Return true if the specified immediate is legal for the value input of a
2362 /// store instruction.
2363 virtual bool isLegalStoreImmediate(int64_t Value) const {
2364 // Default implementation assumes that at least 0 works since it is likely
2365 // that a zero register exists or a zero immediate is allowed.
2366 return Value == 0;
2367 }
2368
2369 /// Return true if it's significantly cheaper to shift a vector by a uniform
2370 /// scalar than by an amount which will vary across each lane. On x86 before
2371 /// AVX2 for example, there is a "psllw" instruction for the former case, but
2372 /// no simple instruction for a general "a << b" operation on vectors.
2373 /// This should also apply to lowering for vector funnel shifts (rotates).
2374 virtual bool isVectorShiftByScalarCheap(Type *Ty) const {
2375 return false;
2376 }
2377
2378 /// Given a shuffle vector SVI representing a vector splat, return a new
2379 /// scalar type of size equal to SVI's scalar type if the new type is more
2380 /// profitable. Returns nullptr otherwise. For example under MVE float splats
2381 /// are converted to integer to prevent the need to move from SPR to GPR
2382 /// registers.
2383 virtual Type* shouldConvertSplatType(ShuffleVectorInst* SVI) const {
2384 return nullptr;
2385 }
2386
2387 /// Given a set in interconnected phis of type 'From' that are loaded/stored
2388 /// or bitcast to type 'To', return true if the set should be converted to
2389 /// 'To'.
2390 virtual bool shouldConvertPhiType(Type *From, Type *To) const {
2391 return (From->isIntegerTy() || From->isFloatingPointTy()) &&
2392 (To->isIntegerTy() || To->isFloatingPointTy());
2393 }
2394
2395 /// Returns true if the opcode is a commutative binary operation.
2396 virtual bool isCommutativeBinOp(unsigned Opcode) const {
2397 // FIXME: This should get its info from the td file.
2398 switch (Opcode) {
2399 case ISD::ADD:
2400 case ISD::SMIN:
2401 case ISD::SMAX:
2402 case ISD::UMIN:
2403 case ISD::UMAX:
2404 case ISD::MUL:
2405 case ISD::MULHU:
2406 case ISD::MULHS:
2407 case ISD::SMUL_LOHI:
2408 case ISD::UMUL_LOHI:
2409 case ISD::FADD:
2410 case ISD::FMUL:
2411 case ISD::AND:
2412 case ISD::OR:
2413 case ISD::XOR:
2414 case ISD::SADDO:
2415 case ISD::UADDO:
2416 case ISD::ADDC:
2417 case ISD::ADDE:
2418 case ISD::SADDSAT:
2419 case ISD::UADDSAT:
2420 case ISD::FMINNUM:
2421 case ISD::FMAXNUM:
2422 case ISD::FMINNUM_IEEE:
2423 case ISD::FMAXNUM_IEEE:
2424 case ISD::FMINIMUM:
2425 case ISD::FMAXIMUM:
2426 return true;
2427 default: return false;
2428 }
2429 }
2430
2431 /// Return true if the node is a math/logic binary operator.
2432 virtual bool isBinOp(unsigned Opcode) const {
2433 // A commutative binop must be a binop.
2434 if (isCommutativeBinOp(Opcode))
2435 return true;
2436 // These are non-commutative binops.
2437 switch (Opcode) {
2438 case ISD::SUB:
2439 case ISD::SHL:
2440 case ISD::SRL:
2441 case ISD::SRA:
2442 case ISD::SDIV:
2443 case ISD::UDIV:
2444 case ISD::SREM:
2445 case ISD::UREM:
2446 case ISD::FSUB:
2447 case ISD::FDIV:
2448 case ISD::FREM:
2449 return true;
2450 default:
2451 return false;
2452 }
2453 }
2454
2455 /// Return true if it's free to truncate a value of type FromTy to type
2456 /// ToTy. e.g. On x86 it's free to truncate a i32 value in register EAX to i16
2457 /// by referencing its sub-register AX.
2458 /// Targets must return false when FromTy <= ToTy.
2459 virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const {
2460 return false;
2461 }
2462
2463 /// Return true if a truncation from FromTy to ToTy is permitted when deciding
2464 /// whether a call is in tail position. Typically this means that both results
2465 /// would be assigned to the same register or stack slot, but it could mean
2466 /// the target performs adequate checks of its own before proceeding with the
2467 /// tail call. Targets must return false when FromTy <= ToTy.
2468 virtual bool allowTruncateForTailCall(Type *FromTy, Type *ToTy) const {
2469 return false;
2470 }
2471
2472 virtual bool isTruncateFree(EVT FromVT, EVT ToVT) const {
2473 return false;
2474 }
2475
2476 virtual bool isProfitableToHoist(Instruction *I) const { return true; }
2477
2478 /// Return true if the extension represented by \p I is free.
2479 /// Unlikely the is[Z|FP]ExtFree family which is based on types,
2480 /// this method can use the context provided by \p I to decide
2481 /// whether or not \p I is free.
2482 /// This method extends the behavior of the is[Z|FP]ExtFree family.
2483 /// In other words, if is[Z|FP]Free returns true, then this method
2484 /// returns true as well. The converse is not true.
2485 /// The target can perform the adequate checks by overriding isExtFreeImpl.
2486 /// \pre \p I must be a sign, zero, or fp extension.
2487 bool isExtFree(const Instruction *I) const {
2488 switch (I->getOpcode()) {
2489 case Instruction::FPExt:
2490 if (isFPExtFree(EVT::getEVT(I->getType()),
2491 EVT::getEVT(I->getOperand(0)->getType())))
2492 return true;
2493 break;
2494 case Instruction::ZExt:
2495 if (isZExtFree(I->getOperand(0)->getType(), I->getType()))
2496 return true;
2497 break;
2498 case Instruction::SExt:
2499 break;
2500 default:
2501 llvm_unreachable("Instruction is not an extension")::llvm::llvm_unreachable_internal("Instruction is not an extension"
, "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/TargetLowering.h"
, 2501)
;
2502 }
2503 return isExtFreeImpl(I);
2504 }
2505
2506 /// Return true if \p Load and \p Ext can form an ExtLoad.
2507 /// For example, in AArch64
2508 /// %L = load i8, i8* %ptr
2509 /// %E = zext i8 %L to i32
2510 /// can be lowered into one load instruction
2511 /// ldrb w0, [x0]
2512 bool isExtLoad(const LoadInst *Load, const Instruction *Ext,
2513 const DataLayout &DL) const {
2514 EVT VT = getValueType(DL, Ext->getType());
2515 EVT LoadVT = getValueType(DL, Load->getType());
2516
2517 // If the load has other users and the truncate is not free, the ext
2518 // probably isn't free.
2519 if (!Load->hasOneUse() && (isTypeLegal(LoadVT) || !isTypeLegal(VT)) &&
2520 !isTruncateFree(Ext->getType(), Load->getType()))
2521 return false;
2522
2523 // Check whether the target supports casts folded into loads.
2524 unsigned LType;
2525 if (isa<ZExtInst>(Ext))
2526 LType = ISD::ZEXTLOAD;
2527 else {
2528 assert(isa<SExtInst>(Ext) && "Unexpected ext type!")((isa<SExtInst>(Ext) && "Unexpected ext type!")
? static_cast<void> (0) : __assert_fail ("isa<SExtInst>(Ext) && \"Unexpected ext type!\""
, "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/TargetLowering.h"
, 2528, __PRETTY_FUNCTION__))
;
2529 LType = ISD::SEXTLOAD;
2530 }
2531
2532 return isLoadExtLegal(LType, VT, LoadVT);
2533 }
2534
2535 /// Return true if any actual instruction that defines a value of type FromTy
2536 /// implicitly zero-extends the value to ToTy in the result register.
2537 ///
2538 /// The function should return true when it is likely that the truncate can
2539 /// be freely folded with an instruction defining a value of FromTy. If
2540 /// the defining instruction is unknown (because you're looking at a
2541 /// function argument, PHI, etc.) then the target may require an
2542 /// explicit truncate, which is not necessarily free, but this function
2543 /// does not deal with those cases.
2544 /// Targets must return false when FromTy >= ToTy.
2545 virtual bool isZExtFree(Type *FromTy, Type *ToTy) const {
2546 return false;
2547 }
2548
2549 virtual bool isZExtFree(EVT FromTy, EVT ToTy) const {
2550 return false;
2551 }
2552
2553 /// Return true if sign-extension from FromTy to ToTy is cheaper than
2554 /// zero-extension.
2555 virtual bool isSExtCheaperThanZExt(EVT FromTy, EVT ToTy) const {
2556 return false;
2557 }
2558
2559 /// Return true if sinking I's operands to the same basic block as I is
2560 /// profitable, e.g. because the operands can be folded into a target
2561 /// instruction during instruction selection. After calling the function
2562 /// \p Ops contains the Uses to sink ordered by dominance (dominating users
2563 /// come first).
2564 virtual bool shouldSinkOperands(Instruction *I,
2565 SmallVectorImpl<Use *> &Ops) const {
2566 return false;
2567 }
2568
2569 /// Return true if the target supplies and combines to a paired load
2570 /// two loaded values of type LoadedType next to each other in memory.
2571 /// RequiredAlignment gives the minimal alignment constraints that must be met
2572 /// to be able to select this paired load.
2573 ///
2574 /// This information is *not* used to generate actual paired loads, but it is
2575 /// used to generate a sequence of loads that is easier to combine into a
2576 /// paired load.
2577 /// For instance, something like this:
2578 /// a = load i64* addr
2579 /// b = trunc i64 a to i32
2580 /// c = lshr i64 a, 32
2581 /// d = trunc i64 c to i32
2582 /// will be optimized into:
2583 /// b = load i32* addr1
2584 /// d = load i32* addr2
2585 /// Where addr1 = addr2 +/- sizeof(i32).
2586 ///
2587 /// In other words, unless the target performs a post-isel load combining,
2588 /// this information should not be provided because it will generate more
2589 /// loads.
2590 virtual bool hasPairedLoad(EVT /*LoadedType*/,
2591 Align & /*RequiredAlignment*/) const {
2592 return false;
2593 }
2594
2595 /// Return true if the target has a vector blend instruction.
2596 virtual bool hasVectorBlend() const { return false; }
2597
2598 /// Get the maximum supported factor for interleaved memory accesses.
2599 /// Default to be the minimum interleave factor: 2.
2600 virtual unsigned getMaxSupportedInterleaveFactor() const { return 2; }
2601
2602 /// Lower an interleaved load to target specific intrinsics. Return
2603 /// true on success.
2604 ///
2605 /// \p LI is the vector load instruction.
2606 /// \p Shuffles is the shufflevector list to DE-interleave the loaded vector.
2607 /// \p Indices is the corresponding indices for each shufflevector.
2608 /// \p Factor is the interleave factor.
2609 virtual bool lowerInterleavedLoad(LoadInst *LI,
2610 ArrayRef<ShuffleVectorInst *> Shuffles,
2611 ArrayRef<unsigned> Indices,
2612 unsigned Factor) const {
2613 return false;
2614 }
2615
2616 /// Lower an interleaved store to target specific intrinsics. Return
2617 /// true on success.
2618 ///
2619 /// \p SI is the vector store instruction.
2620 /// \p SVI is the shufflevector to RE-interleave the stored vector.
2621 /// \p Factor is the interleave factor.
2622 virtual bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI,
2623 unsigned Factor) const {
2624 return false;
2625 }
2626
2627 /// Return true if zero-extending the specific node Val to type VT2 is free
2628 /// (either because it's implicitly zero-extended such as ARM ldrb / ldrh or
2629 /// because it's folded such as X86 zero-extending loads).
2630 virtual bool isZExtFree(SDValue Val, EVT VT2) const {
2631 return isZExtFree(Val.getValueType(), VT2);
2632 }
2633
2634 /// Return true if an fpext operation is free (for instance, because
2635 /// single-precision floating-point numbers are implicitly extended to
2636 /// double-precision).
2637 virtual bool isFPExtFree(EVT DestVT, EVT SrcVT) const {
2638 assert(SrcVT.isFloatingPoint() && DestVT.isFloatingPoint() &&((SrcVT.isFloatingPoint() && DestVT.isFloatingPoint()
&& "invalid fpext types") ? static_cast<void> (
0) : __assert_fail ("SrcVT.isFloatingPoint() && DestVT.isFloatingPoint() && \"invalid fpext types\""
, "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/TargetLowering.h"
, 2639, __PRETTY_FUNCTION__))
2639 "invalid fpext types")((SrcVT.isFloatingPoint() && DestVT.isFloatingPoint()
&& "invalid fpext types") ? static_cast<void> (
0) : __assert_fail ("SrcVT.isFloatingPoint() && DestVT.isFloatingPoint() && \"invalid fpext types\""
, "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/TargetLowering.h"
, 2639, __PRETTY_FUNCTION__))
;
2640 return false;
2641 }
2642
2643 /// Return true if an fpext operation input to an \p Opcode operation is free
2644 /// (for instance, because half-precision floating-point numbers are
2645 /// implicitly extended to float-precision) for an FMA instruction.
2646 virtual bool isFPExtFoldable(const SelectionDAG &DAG, unsigned Opcode,
2647 EVT DestVT, EVT SrcVT) const {
2648 assert(DestVT.isFloatingPoint() && SrcVT.isFloatingPoint() &&((DestVT.isFloatingPoint() && SrcVT.isFloatingPoint()
&& "invalid fpext types") ? static_cast<void> (
0) : __assert_fail ("DestVT.isFloatingPoint() && SrcVT.isFloatingPoint() && \"invalid fpext types\""
, "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/TargetLowering.h"
, 2649, __PRETTY_FUNCTION__))
2649 "invalid fpext types")((DestVT.isFloatingPoint() && SrcVT.isFloatingPoint()
&& "invalid fpext types") ? static_cast<void> (
0) : __assert_fail ("DestVT.isFloatingPoint() && SrcVT.isFloatingPoint() && \"invalid fpext types\""
, "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/TargetLowering.h"
, 2649, __PRETTY_FUNCTION__))
;
2650 return isFPExtFree(DestVT, SrcVT);
2651 }
2652
2653 /// Return true if folding a vector load into ExtVal (a sign, zero, or any
2654 /// extend node) is profitable.
2655 virtual bool isVectorLoadExtDesirable(SDValue ExtVal) const { return false; }
2656
2657 /// Return true if an fneg operation is free to the point where it is never
2658 /// worthwhile to replace it with a bitwise operation.
2659 virtual bool isFNegFree(EVT VT) const {
2660 assert(VT.isFloatingPoint())((VT.isFloatingPoint()) ? static_cast<void> (0) : __assert_fail
("VT.isFloatingPoint()", "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/TargetLowering.h"
, 2660, __PRETTY_FUNCTION__))
;
2661 return false;
2662 }
2663
2664 /// Return true if an fabs operation is free to the point where it is never
2665 /// worthwhile to replace it with a bitwise operation.
2666 virtual bool isFAbsFree(EVT VT) const {
2667 assert(VT.isFloatingPoint())((VT.isFloatingPoint()) ? static_cast<void> (0) : __assert_fail
("VT.isFloatingPoint()", "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/TargetLowering.h"
, 2667, __PRETTY_FUNCTION__))
;
2668 return false;
2669 }
2670
2671 /// Return true if an FMA operation is faster than a pair of fmul and fadd
2672 /// instructions. fmuladd intrinsics will be expanded to FMAs when this method
2673 /// returns true, otherwise fmuladd is expanded to fmul + fadd.
2674 ///
2675 /// NOTE: This may be called before legalization on types for which FMAs are
2676 /// not legal, but should return true if those types will eventually legalize
2677 /// to types that support FMAs. After legalization, it will only be called on
2678 /// types that support FMAs (via Legal or Custom actions)
2679 virtual bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
2680 EVT) const {
2681 return false;
2682 }
2683
2684 /// IR version
2685 virtual bool isFMAFasterThanFMulAndFAdd(const Function &F, Type *) const {
2686 return false;
2687 }
2688
2689 /// Returns true if be combined with to form an ISD::FMAD. \p N may be an
2690 /// ISD::FADD, ISD::FSUB, or an ISD::FMUL which will be distributed into an
2691 /// fadd/fsub.
2692 virtual bool isFMADLegal(const SelectionDAG &DAG, const SDNode *N) const {
2693 assert((N->getOpcode() == ISD::FADD || N->getOpcode() == ISD::FSUB ||(((N->getOpcode() == ISD::FADD || N->getOpcode() == ISD
::FSUB || N->getOpcode() == ISD::FMUL) && "unexpected node in FMAD forming combine"
) ? static_cast<void> (0) : __assert_fail ("(N->getOpcode() == ISD::FADD || N->getOpcode() == ISD::FSUB || N->getOpcode() == ISD::FMUL) && \"unexpected node in FMAD forming combine\""
, "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/TargetLowering.h"
, 2695, __PRETTY_FUNCTION__))
2694 N->getOpcode() == ISD::FMUL) &&(((N->getOpcode() == ISD::FADD || N->getOpcode() == ISD
::FSUB || N->getOpcode() == ISD::FMUL) && "unexpected node in FMAD forming combine"
) ? static_cast<void> (0) : __assert_fail ("(N->getOpcode() == ISD::FADD || N->getOpcode() == ISD::FSUB || N->getOpcode() == ISD::FMUL) && \"unexpected node in FMAD forming combine\""
, "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/TargetLowering.h"
, 2695, __PRETTY_FUNCTION__))
2695 "unexpected node in FMAD forming combine")(((N->getOpcode() == ISD::FADD || N->getOpcode() == ISD
::FSUB || N->getOpcode() == ISD::FMUL) && "unexpected node in FMAD forming combine"
) ? static_cast<void> (0) : __assert_fail ("(N->getOpcode() == ISD::FADD || N->getOpcode() == ISD::FSUB || N->getOpcode() == ISD::FMUL) && \"unexpected node in FMAD forming combine\""
, "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/TargetLowering.h"
, 2695, __PRETTY_FUNCTION__))
;
2696 return isOperationLegal(ISD::FMAD, N->getValueType(0));
2697 }
2698
2699 /// Return true if it's profitable to narrow operations of type VT1 to
2700 /// VT2. e.g. on x86, it's profitable to narrow from i32 to i8 but not from
2701 /// i32 to i16.
2702 virtual bool isNarrowingProfitable(EVT /*VT1*/, EVT /*VT2*/) const {
2703 return false;
2704 }
2705
2706 /// Return true if it is beneficial to convert a load of a constant to
2707 /// just the constant itself.
2708 /// On some targets it might be more efficient to use a combination of
2709 /// arithmetic instructions to materialize the constant instead of loading it
2710 /// from a constant pool.
2711 virtual bool shouldConvertConstantLoadToIntImm(const APInt &Imm,
2712 Type *Ty) const {
2713 return false;
2714 }
2715
2716 /// Return true if EXTRACT_SUBVECTOR is cheap for extracting this result type
2717 /// from this source type with this index. This is needed because
2718 /// EXTRACT_SUBVECTOR usually has custom lowering that depends on the index of
2719 /// the first element, and only the target knows which lowering is cheap.
2720 virtual bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
2721 unsigned Index) const {
2722 return false;
2723 }
2724
2725 /// Try to convert an extract element of a vector binary operation into an
2726 /// extract element followed by a scalar operation.
2727 virtual bool shouldScalarizeBinop(SDValue VecOp) const {
2728 return false;
2729 }
2730
2731 /// Return true if extraction of a scalar element from the given vector type
2732 /// at the given index is cheap. For example, if scalar operations occur on
2733 /// the same register file as vector operations, then an extract element may
2734 /// be a sub-register rename rather than an actual instruction.
2735 virtual bool isExtractVecEltCheap(EVT VT, unsigned Index) const {
2736 return false;
2737 }
2738
2739 /// Try to convert math with an overflow comparison into the corresponding DAG
2740 /// node operation. Targets may want to override this independently of whether
2741 /// the operation is legal/custom for the given type because it may obscure
2742 /// matching of other patterns.
2743 virtual bool shouldFormOverflowOp(unsigned Opcode, EVT VT,
2744 bool MathUsed) const {
2745 // TODO: The default logic is inherited from code in CodeGenPrepare.
2746 // The opcode should not make a difference by default?
2747 if (Opcode != ISD::UADDO)
2748 return false;
2749
2750 // Allow the transform as long as we have an integer type that is not
2751 // obviously illegal and unsupported and if the math result is used
2752 // besides the overflow check. On some targets (e.g. SPARC), it is
2753 // not profitable to form on overflow op if the math result has no
2754 // concrete users.
2755 if (VT.isVector())
2756 return false;
2757 return MathUsed && (VT.isSimple() || !isOperationExpand(Opcode, VT));
2758 }
2759
2760 // Return true if it is profitable to use a scalar input to a BUILD_VECTOR
2761 // even if the vector itself has multiple uses.
2762 virtual bool aggressivelyPreferBuildVectorSources(EVT VecVT) const {
2763 return false;
2764 }
2765
2766 // Return true if CodeGenPrepare should consider splitting large offset of a
2767 // GEP to make the GEP fit into the addressing mode and can be sunk into the
2768 // same blocks of its users.
2769 virtual bool shouldConsiderGEPOffsetSplit() const { return false; }
2770
2771 /// Return true if creating a shift of the type by the given
2772 /// amount is not profitable.
2773 virtual bool shouldAvoidTransformToShift(EVT VT, unsigned Amount) const {
2774 return false;
2775 }
2776
2777 //===--------------------------------------------------------------------===//
2778 // Runtime Library hooks
2779 //
2780
2781 /// Rename the default libcall routine name for the specified libcall.
2782 void setLibcallName(RTLIB::Libcall Call, const char *Name) {
2783 LibcallRoutineNames[Call] = Name;
2784 }
2785
2786 /// Get the libcall routine name for the specified libcall.
2787 const char *getLibcallName(RTLIB::Libcall Call) const {
2788 return LibcallRoutineNames[Call];
2789 }
2790
2791 /// Override the default CondCode to be used to test the result of the
2792 /// comparison libcall against zero.
2793 void setCmpLibcallCC(RTLIB::Libcall Call, ISD::CondCode CC) {
2794 CmpLibcallCCs[Call] = CC;
2795 }
2796
2797 /// Get the CondCode that's to be used to test the result of the comparison
2798 /// libcall against zero.
2799 ISD::CondCode getCmpLibcallCC(RTLIB::Libcall Call) const {
2800 return CmpLibcallCCs[Call];
2801 }
2802
2803 /// Set the CallingConv that should be used for the specified libcall.
2804 void setLibcallCallingConv(RTLIB::Libcall Call, CallingConv::ID CC) {
2805 LibcallCallingConvs[Call] = CC;
2806 }
2807
2808 /// Get the CallingConv that should be used for the specified libcall.
2809 CallingConv::ID getLibcallCallingConv(RTLIB::Libcall Call) const {
2810 return LibcallCallingConvs[Call];
2811 }
2812
2813 /// Execute target specific actions to finalize target lowering.
2814 /// This is used to set extra flags in MachineFrameInformation and freezing
2815 /// the set of reserved registers.
2816 /// The default implementation just freezes the set of reserved registers.
2817 virtual void finalizeLowering(MachineFunction &MF) const;
2818
2819 //===----------------------------------------------------------------------===//
2820 // GlobalISel Hooks
2821 //===----------------------------------------------------------------------===//
2822 /// Check whether or not \p MI needs to be moved close to its uses.
2823 virtual bool shouldLocalize(const MachineInstr &MI, const TargetTransformInfo *TTI) const;
2824
2825
2826private:
2827 const TargetMachine &TM;
2828
2829 /// Tells the code generator that the target has multiple (allocatable)
2830 /// condition registers that can be used to store the results of comparisons
2831 /// for use by selects and conditional branches. With multiple condition
2832 /// registers, the code generator will not aggressively sink comparisons into
2833 /// the blocks of their users.
2834 bool HasMultipleConditionRegisters;
2835
2836 /// Tells the code generator that the target has BitExtract instructions.
2837 /// The code generator will aggressively sink "shift"s into the blocks of
2838 /// their users if the users will generate "and" instructions which can be
2839 /// combined with "shift" to BitExtract instructions.
2840 bool HasExtractBitsInsn;
2841
2842 /// Tells the code generator to bypass slow divide or remainder
2843 /// instructions. For example, BypassSlowDivWidths[32,8] tells the code
2844 /// generator to bypass 32-bit integer div/rem with an 8-bit unsigned integer
2845 /// div/rem when the operands are positive and less than 256.
2846 DenseMap <unsigned int, unsigned int> BypassSlowDivWidths;
2847
2848 /// Tells the code generator that it shouldn't generate extra flow control
2849 /// instructions and should attempt to combine flow control instructions via
2850 /// predication.
2851 bool JumpIsExpensive;
2852
2853 /// Information about the contents of the high-bits in boolean values held in
2854 /// a type wider than i1. See getBooleanContents.
2855 BooleanContent BooleanContents;
2856
2857 /// Information about the contents of the high-bits in boolean values held in
2858 /// a type wider than i1. See getBooleanContents.
2859 BooleanContent BooleanFloatContents;
2860
2861 /// Information about the contents of the high-bits in boolean vector values
2862 /// when the element type is wider than i1. See getBooleanContents.
2863 BooleanContent BooleanVectorContents;
2864
2865 /// The target scheduling preference: shortest possible total cycles or lowest
2866 /// register usage.
2867 Sched::Preference SchedPreferenceInfo;
2868
2869 /// The minimum alignment that any argument on the stack needs to have.
2870 Align MinStackArgumentAlignment;
2871
2872 /// The minimum function alignment (used when optimizing for size, and to
2873 /// prevent explicitly provided alignment from leading to incorrect code).
2874 Align MinFunctionAlignment;
2875
2876 /// The preferred function alignment (used when alignment unspecified and
2877 /// optimizing for speed).
2878 Align PrefFunctionAlignment;
2879
2880 /// The preferred loop alignment (in log2 bot in bytes).
2881 Align PrefLoopAlignment;
2882
2883 /// Size in bits of the maximum atomics size the backend supports.
2884 /// Accesses larger than this will be expanded by AtomicExpandPass.
2885 unsigned MaxAtomicSizeInBitsSupported;
2886
2887 /// Size in bits of the minimum cmpxchg or ll/sc operation the
2888 /// backend supports.
2889 unsigned MinCmpXchgSizeInBits;
2890
2891 /// This indicates if the target supports unaligned atomic operations.
2892 bool SupportsUnalignedAtomics;
2893
2894 /// If set to a physical register, this specifies the register that
2895 /// llvm.savestack/llvm.restorestack should save and restore.
2896 Register StackPointerRegisterToSaveRestore;
2897
2898 /// This indicates the default register class to use for each ValueType the
2899 /// target supports natively.
2900 const TargetRegisterClass *RegClassForVT[MVT::LAST_VALUETYPE];
2901 uint16_t NumRegistersForVT[MVT::LAST_VALUETYPE];
2902 MVT RegisterTypeForVT[MVT::LAST_VALUETYPE];
2903
2904 /// This indicates the "representative" register class to use for each
2905 /// ValueType the target supports natively. This information is used by the
2906 /// scheduler to track register pressure. By default, the representative
2907 /// register class is the largest legal super-reg register class of the
2908 /// register class of the specified type. e.g. On x86, i8, i16, and i32's
2909 /// representative class would be GR32.
2910 const TargetRegisterClass *RepRegClassForVT[MVT::LAST_VALUETYPE];
2911
2912 /// This indicates the "cost" of the "representative" register class for each
2913 /// ValueType. The cost is used by the scheduler to approximate register
2914 /// pressure.
2915 uint8_t RepRegClassCostForVT[MVT::LAST_VALUETYPE];
2916
2917 /// For any value types we are promoting or expanding, this contains the value
2918 /// type that we are changing to. For Expanded types, this contains one step
2919 /// of the expand (e.g. i64 -> i32), even if there are multiple steps required
2920 /// (e.g. i64 -> i16). For types natively supported by the system, this holds
2921 /// the same type (e.g. i32 -> i32).
2922 MVT TransformToType[MVT::LAST_VALUETYPE];
2923
2924 /// For each operation and each value type, keep a LegalizeAction that
2925 /// indicates how instruction selection should deal with the operation. Most
2926 /// operations are Legal (aka, supported natively by the target), but
2927 /// operations that are not should be described. Note that operations on
2928 /// non-legal value types are not described here.
2929 LegalizeAction OpActions[MVT::LAST_VALUETYPE][ISD::BUILTIN_OP_END];
2930
2931 /// For each load extension type and each value type, keep a LegalizeAction
2932 /// that indicates how instruction selection should deal with a load of a
2933 /// specific value type and extension type. Uses 4-bits to store the action
2934 /// for each of the 4 load ext types.
2935 uint16_t LoadExtActions[MVT::LAST_VALUETYPE][MVT::LAST_VALUETYPE];
2936
2937 /// For each value type pair keep a LegalizeAction that indicates whether a
2938 /// truncating store of a specific value type and truncating type is legal.
2939 LegalizeAction TruncStoreActions[MVT::LAST_VALUETYPE][MVT::LAST_VALUETYPE];
2940
2941 /// For each indexed mode and each value type, keep a quad of LegalizeAction
2942 /// that indicates how instruction selection should deal with the load /
2943 /// store / maskedload / maskedstore.
2944 ///
2945 /// The first dimension is the value_type for the reference. The second
2946 /// dimension represents the various modes for load store.
2947 uint16_t IndexedModeActions[MVT::LAST_VALUETYPE][ISD::LAST_INDEXED_MODE];
2948
2949 /// For each condition code (ISD::CondCode) keep a LegalizeAction that
2950 /// indicates how instruction selection should deal with the condition code.
2951 ///
2952 /// Because each CC action takes up 4 bits, we need to have the array size be
2953 /// large enough to fit all of the value types. This can be done by rounding
2954 /// up the MVT::LAST_VALUETYPE value to the next multiple of 8.
2955 uint32_t CondCodeActions[ISD::SETCC_INVALID][(MVT::LAST_VALUETYPE + 7) / 8];
2956
2957 ValueTypeActionImpl ValueTypeActions;
2958
2959private:
2960 LegalizeKind getTypeConversion(LLVMContext &Context, EVT VT) const;
2961
2962 /// Targets can specify ISD nodes that they would like PerformDAGCombine
2963 /// callbacks for by calling setTargetDAGCombine(), which sets a bit in this
2964 /// array.
2965 unsigned char
2966 TargetDAGCombineArray[(ISD::BUILTIN_OP_END+CHAR_BIT8-1)/CHAR_BIT8];
2967
2968 /// For operations that must be promoted to a specific type, this holds the
2969 /// destination type. This map should be sparse, so don't hold it as an
2970 /// array.
2971 ///
2972 /// Targets add entries to this map with AddPromotedToType(..), clients access
2973 /// this with getTypeToPromoteTo(..).
2974 std::map<std::pair<unsigned, MVT::SimpleValueType>, MVT::SimpleValueType>
2975 PromoteToType;
2976
2977 /// Stores the name each libcall.
2978 const char *LibcallRoutineNames[RTLIB::UNKNOWN_LIBCALL + 1];
2979
2980 /// The ISD::CondCode that should be used to test the result of each of the
2981 /// comparison libcall against zero.
2982 ISD::CondCode CmpLibcallCCs[RTLIB::UNKNOWN_LIBCALL];
2983
2984 /// Stores the CallingConv that should be used for each libcall.
2985 CallingConv::ID LibcallCallingConvs[RTLIB::UNKNOWN_LIBCALL];
2986
2987 /// Set default libcall names and calling conventions.
2988 void InitLibcalls(const Triple &TT);
2989
2990 /// The bits of IndexedModeActions used to store the legalisation actions
2991 /// We store the data as | ML | MS | L | S | each taking 4 bits.
2992 enum IndexedModeActionsBits {
2993 IMAB_Store = 0,
2994 IMAB_Load = 4,
2995 IMAB_MaskedStore = 8,
2996 IMAB_MaskedLoad = 12
2997 };
2998
2999 void setIndexedModeAction(unsigned IdxMode, MVT VT, unsigned Shift,
3000 LegalizeAction Action) {
3001 assert(VT.isValid() && IdxMode < ISD::LAST_INDEXED_MODE &&((VT.isValid() && IdxMode < ISD::LAST_INDEXED_MODE
&& (unsigned)Action < 0xf && "Table isn't big enough!"
) ? static_cast<void> (0) : __assert_fail ("VT.isValid() && IdxMode < ISD::LAST_INDEXED_MODE && (unsigned)Action < 0xf && \"Table isn't big enough!\""
, "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/TargetLowering.h"
, 3002, __PRETTY_FUNCTION__))
3002 (unsigned)Action < 0xf && "Table isn't big enough!")((VT.isValid() && IdxMode < ISD::LAST_INDEXED_MODE
&& (unsigned)Action < 0xf && "Table isn't big enough!"
) ? static_cast<void> (0) : __assert_fail ("VT.isValid() && IdxMode < ISD::LAST_INDEXED_MODE && (unsigned)Action < 0xf && \"Table isn't big enough!\""
, "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/TargetLowering.h"
, 3002, __PRETTY_FUNCTION__))
;
3003 unsigned Ty = (unsigned)VT.SimpleTy;
3004 IndexedModeActions[Ty][IdxMode] &= ~(0xf << Shift);
3005 IndexedModeActions[Ty][IdxMode] |= ((uint16_t)Action) << Shift;
3006 }
3007
3008 LegalizeAction getIndexedModeAction(unsigned IdxMode, MVT VT,
3009 unsigned Shift) const {
3010 assert(IdxMode < ISD::LAST_INDEXED_MODE && VT.isValid() &&((IdxMode < ISD::LAST_INDEXED_MODE && VT.isValid()
&& "Table isn't big enough!") ? static_cast<void>
(0) : __assert_fail ("IdxMode < ISD::LAST_INDEXED_MODE && VT.isValid() && \"Table isn't big enough!\""
, "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/TargetLowering.h"
, 3011, __PRETTY_FUNCTION__))
3011 "Table isn't big enough!")((IdxMode < ISD::LAST_INDEXED_MODE && VT.isValid()
&& "Table isn't big enough!") ? static_cast<void>
(0) : __assert_fail ("IdxMode < ISD::LAST_INDEXED_MODE && VT.isValid() && \"Table isn't big enough!\""
, "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/TargetLowering.h"
, 3011, __PRETTY_FUNCTION__))
;
3012 unsigned Ty = (unsigned)VT.SimpleTy;
3013 return (LegalizeAction)((IndexedModeActions[Ty][IdxMode] >> Shift) & 0xf);
3014 }
3015
3016protected:
3017 /// Return true if the extension represented by \p I is free.
3018 /// \pre \p I is a sign, zero, or fp extension and
3019 /// is[Z|FP]ExtFree of the related types is not true.
3020 virtual bool isExtFreeImpl(const Instruction *I) const { return false; }
3021
3022 /// Depth that GatherAllAliases should should continue looking for chain
3023 /// dependencies when trying to find a more preferable chain. As an
3024 /// approximation, this should be more than the number of consecutive stores
3025 /// expected to be merged.
3026 unsigned GatherAllAliasesMaxDepth;
3027
3028 /// \brief Specify maximum number of store instructions per memset call.
3029 ///
3030 /// When lowering \@llvm.memset this field specifies the maximum number of
3031 /// store operations that may be substituted for the call to memset. Targets
3032 /// must set this value based on the cost threshold for that target. Targets
3033 /// should assume that the memset will be done using as many of the largest
3034 /// store operations first, followed by smaller ones, if necessary, per
3035 /// alignment restrictions. For example, storing 9 bytes on a 32-bit machine
3036 /// with 16-bit alignment would result in four 2-byte stores and one 1-byte
3037 /// store. This only applies to setting a constant array of a constant size.
3038 unsigned MaxStoresPerMemset;
3039 /// Likewise for functions with the OptSize attribute.
3040 unsigned MaxStoresPerMemsetOptSize;
3041
3042 /// \brief Specify maximum number of store instructions per memcpy call.
3043 ///
3044 /// When lowering \@llvm.memcpy this field specifies the maximum number of
3045 /// store operations that may be substituted for a call to memcpy. Targets
3046 /// must set this value based on the cost threshold for that target. Targets
3047 /// should assume that the memcpy will be done using as many of the largest
3048 /// store operations first, followed by smaller ones, if necessary, per
3049 /// alignment restrictions. For example, storing 7 bytes on a 32-bit machine
3050 /// with 32-bit alignment would result in one 4-byte store, a one 2-byte store
3051 /// and one 1-byte store. This only applies to copying a constant array of
3052 /// constant size.
3053 unsigned MaxStoresPerMemcpy;
3054 /// Likewise for functions with the OptSize attribute.
3055 unsigned MaxStoresPerMemcpyOptSize;
3056 /// \brief Specify max number of store instructions to glue in inlined memcpy.
3057 ///
3058 /// When memcpy is inlined based on MaxStoresPerMemcpy, specify maximum number
3059 /// of store instructions to keep together. This helps in pairing and
3060 // vectorization later on.
3061 unsigned MaxGluedStoresPerMemcpy = 0;
3062
3063 /// \brief Specify maximum number of load instructions per memcmp call.
3064 ///
3065 /// When lowering \@llvm.memcmp this field specifies the maximum number of
3066 /// pairs of load operations that may be substituted for a call to memcmp.
3067 /// Targets must set this value based on the cost threshold for that target.
3068 /// Targets should assume that the memcmp will be done using as many of the
3069 /// largest load operations first, followed by smaller ones, if necessary, per
3070 /// alignment restrictions. For example, loading 7 bytes on a 32-bit machine
3071 /// with 32-bit alignment would result in one 4-byte load, a one 2-byte load
3072 /// and one 1-byte load. This only applies to copying a constant array of
3073 /// constant size.
3074 unsigned MaxLoadsPerMemcmp;
3075 /// Likewise for functions with the OptSize attribute.
3076 unsigned MaxLoadsPerMemcmpOptSize;
3077
3078 /// \brief Specify maximum number of store instructions per memmove call.
3079 ///
3080 /// When lowering \@llvm.memmove this field specifies the maximum number of
3081 /// store instructions that may be substituted for a call to memmove. Targets
3082 /// must set this value based on the cost threshold for that target. Targets
3083 /// should assume that the memmove will be done using as many of the largest
3084 /// store operations first, followed by smaller ones, if necessary, per
3085 /// alignment restrictions. For example, moving 9 bytes on a 32-bit machine
3086 /// with 8-bit alignment would result in nine 1-byte stores. This only
3087 /// applies to copying a constant array of constant size.
3088 unsigned MaxStoresPerMemmove;
3089 /// Likewise for functions with the OptSize attribute.
3090 unsigned MaxStoresPerMemmoveOptSize;
3091
3092 /// Tells the code generator that select is more expensive than a branch if
3093 /// the branch is usually predicted right.
3094 bool PredictableSelectIsExpensive;
3095
3096 /// \see enableExtLdPromotion.
3097 bool EnableExtLdPromotion;
3098
3099 /// Return true if the value types that can be represented by the specified
3100 /// register class are all legal.
3101 bool isLegalRC(const TargetRegisterInfo &TRI,
3102 const TargetRegisterClass &RC) const;
3103
3104 /// Replace/modify any TargetFrameIndex operands with a targte-dependent
3105 /// sequence of memory operands that is recognized by PrologEpilogInserter.
3106 MachineBasicBlock *emitPatchPoint(MachineInstr &MI,
3107 MachineBasicBlock *MBB) const;
3108
3109 /// Replace/modify the XRay custom event operands with target-dependent
3110 /// details.
3111 MachineBasicBlock *emitXRayCustomEvent(MachineInstr &MI,
3112 MachineBasicBlock *MBB) const;
3113
3114 /// Replace/modify the XRay typed event operands with target-dependent
3115 /// details.
3116 MachineBasicBlock *emitXRayTypedEvent(MachineInstr &MI,
3117 MachineBasicBlock *MBB) const;
3118
3119 bool IsStrictFPEnabled;
3120};
3121
3122/// This class defines information used to lower LLVM code to legal SelectionDAG
3123/// operators that the target instruction selector can accept natively.
3124///
3125/// This class also defines callbacks that targets must implement to lower
3126/// target-specific constructs to SelectionDAG operators.
3127class TargetLowering : public TargetLoweringBase {
3128public:
3129 struct DAGCombinerInfo;
3130 struct MakeLibCallOptions;
3131
3132 TargetLowering(const TargetLowering &) = delete;
3133 TargetLowering &operator=(const TargetLowering &) = delete;
3134
3135 explicit TargetLowering(const TargetMachine &TM);
3136
3137 bool isPositionIndependent() const;
3138
3139 virtual bool isSDNodeSourceOfDivergence(const SDNode *N,
3140 FunctionLoweringInfo *FLI,
3141 LegacyDivergenceAnalysis *DA) const {
3142 return false;
3143 }
3144
3145 virtual bool isSDNodeAlwaysUniform(const SDNode * N) const {
3146 return false;
3147 }
3148
3149 /// Returns true by value, base pointer and offset pointer and addressing mode
3150 /// by reference if the node's address can be legally represented as
3151 /// pre-indexed load / store address.
3152 virtual bool getPreIndexedAddressParts(SDNode * /*N*/, SDValue &/*Base*/,
3153 SDValue &/*Offset*/,
3154 ISD::MemIndexedMode &/*AM*/,
3155 SelectionDAG &/*DAG*/) const {
3156 return false;
3157 }
3158
3159 /// Returns true by value, base pointer and offset pointer and addressing mode
3160 /// by reference if this node can be combined with a load / store to form a
3161 /// post-indexed load / store.
3162 virtual bool getPostIndexedAddressParts(SDNode * /*N*/, SDNode * /*Op*/,
3163 SDValue &/*Base*/,
3164 SDValue &/*Offset*/,
3165 ISD::MemIndexedMode &/*AM*/,
3166 SelectionDAG &/*DAG*/) const {
3167 return false;
3168 }
3169
3170 /// Returns true if the specified base+offset is a legal indexed addressing
3171 /// mode for this target. \p MI is the load or store instruction that is being
3172 /// considered for transformation.
3173 virtual bool isIndexingLegal(MachineInstr &MI, Register Base, Register Offset,
3174 bool IsPre, MachineRegisterInfo &MRI) const {
3175 return false;
3176 }
3177
3178 /// Return the entry encoding for a jump table in the current function. The
3179 /// returned value is a member of the MachineJumpTableInfo::JTEntryKind enum.
3180 virtual unsigned getJumpTableEncoding() const;
3181
3182 virtual const MCExpr *
3183 LowerCustomJumpTableEntry(const MachineJumpTableInfo * /*MJTI*/,
3184 const MachineBasicBlock * /*MBB*/, unsigned /*uid*/,
3185 MCContext &/*Ctx*/) const {
3186 llvm_unreachable("Need to implement this hook if target has custom JTIs")::llvm::llvm_unreachable_internal("Need to implement this hook if target has custom JTIs"
, "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/TargetLowering.h"
, 3186)
;
3187 }
3188
3189 /// Returns relocation base for the given PIC jumptable.
3190 virtual SDValue getPICJumpTableRelocBase(SDValue Table,
3191 SelectionDAG &DAG) const;
3192
3193 /// This returns the relocation base for the given PIC jumptable, the same as
3194 /// getPICJumpTableRelocBase, but as an MCExpr.
3195 virtual const MCExpr *
3196 getPICJumpTableRelocBaseExpr(const MachineFunction *MF,
3197 unsigned JTI, MCContext &Ctx) const;
3198
3199 /// Return true if folding a constant offset with the given GlobalAddress is
3200 /// legal. It is frequently not legal in PIC relocation models.
3201 virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const;
3202
3203 bool isInTailCallPosition(SelectionDAG &DAG, SDNode *Node,
3204 SDValue &Chain) const;
3205
3206 void softenSetCCOperands(SelectionDAG &DAG, EVT VT, SDValue &NewLHS,
3207 SDValue &NewRHS, ISD::CondCode &CCCode,
3208 const SDLoc &DL, const SDValue OldLHS,
3209 const SDValue OldRHS) const;
3210
3211 void softenSetCCOperands(SelectionDAG &DAG, EVT VT, SDValue &NewLHS,
3212 SDValue &NewRHS, ISD::CondCode &CCCode,
3213 const SDLoc &DL, const SDValue OldLHS,
3214 const SDValue OldRHS, SDValue &Chain,
3215 bool IsSignaling = false) const;
3216
3217 /// Returns a pair of (return value, chain).
3218 /// It is an error to pass RTLIB::UNKNOWN_LIBCALL as \p LC.
3219 std::pair<SDValue, SDValue> makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC,
3220 EVT RetVT, ArrayRef<SDValue> Ops,
3221 MakeLibCallOptions CallOptions,
3222 const SDLoc &dl,
3223 SDValue Chain = SDValue()) const;
3224
3225 /// Check whether parameters to a call that are passed in callee saved
3226 /// registers are the same as from the calling function. This needs to be
3227 /// checked for tail call eligibility.
3228 bool parametersInCSRMatch(const MachineRegisterInfo &MRI,
3229 const uint32_t *CallerPreservedMask,
3230 const SmallVectorImpl<CCValAssign> &ArgLocs,
3231 const SmallVectorImpl<SDValue> &OutVals) const;
3232
3233 //===--------------------------------------------------------------------===//
3234 // TargetLowering Optimization Methods
3235 //
3236
3237 /// A convenience struct that encapsulates a DAG, and two SDValues for
3238 /// returning information from TargetLowering to its clients that want to
3239 /// combine.
3240 struct TargetLoweringOpt {
3241 SelectionDAG &DAG;
3242 bool LegalTys;
3243 bool LegalOps;
3244 SDValue Old;
3245 SDValue New;
3246
3247 explicit TargetLoweringOpt(SelectionDAG &InDAG,
3248 bool LT, bool LO) :
3249 DAG(InDAG), LegalTys(LT), LegalOps(LO) {}
3250
3251 bool LegalTypes() const { return LegalTys; }
3252 bool LegalOperations() const { return LegalOps; }
3253
3254 bool CombineTo(SDValue O, SDValue N) {
3255 Old = O;
3256 New = N;
3257 return true;
3258 }
3259 };
3260
3261 /// Determines the optimal series of memory ops to replace the memset / memcpy.
3262 /// Return true if the number of memory ops is below the threshold (Limit).
3263 /// It returns the types of the sequence of memory ops to perform
3264 /// memset / memcpy by reference.
3265 bool findOptimalMemOpLowering(std::vector<EVT> &MemOps, unsigned Limit,
3266 const MemOp &Op, unsigned DstAS, unsigned SrcAS,
3267 const AttributeList &FuncAttributes) const;
3268
3269 /// Check to see if the specified operand of the specified instruction is a
3270 /// constant integer. If so, check to see if there are any bits set in the
3271 /// constant that are not demanded. If so, shrink the constant and return
3272 /// true.
3273 bool ShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits,
3274 const APInt &DemandedElts,
3275 TargetLoweringOpt &TLO) const;
3276
3277 /// Helper wrapper around ShrinkDemandedConstant, demanding all elements.
3278 bool ShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits,
3279 TargetLoweringOpt &TLO) const;
3280
3281 // Target hook to do target-specific const optimization, which is called by
3282 // ShrinkDemandedConstant. This function should return true if the target
3283 // doesn't want ShrinkDemandedConstant to further optimize the constant.
3284 virtual bool targetShrinkDemandedConstant(SDValue Op,
3285 const APInt &DemandedBits,
3286 const APInt &DemandedElts,
3287 TargetLoweringOpt &TLO) const {
3288 return false;
3289 }
3290
3291 /// Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free. This
3292 /// uses isZExtFree and ZERO_EXTEND for the widening cast, but it could be
3293 /// generalized for targets with other types of implicit widening casts.
3294 bool ShrinkDemandedOp(SDValue Op, unsigned BitWidth, const APInt &Demanded,
3295 TargetLoweringOpt &TLO) const;
3296
3297 /// Look at Op. At this point, we know that only the DemandedBits bits of the
3298 /// result of Op are ever used downstream. If we can use this information to
3299 /// simplify Op, create a new simplified DAG node and return true, returning
3300 /// the original and new nodes in Old and New. Otherwise, analyze the
3301 /// expression and return a mask of KnownOne and KnownZero bits for the
3302 /// expression (used to simplify the caller). The KnownZero/One bits may only
3303 /// be accurate for those bits in the Demanded masks.
3304 /// \p AssumeSingleUse When this parameter is true, this function will
3305 /// attempt to simplify \p Op even if there are multiple uses.
3306 /// Callers are responsible for correctly updating the DAG based on the
3307 /// results of this function, because simply replacing replacing TLO.Old
3308 /// with TLO.New will be incorrect when this parameter is true and TLO.Old
3309 /// has multiple uses.
3310 bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
3311 const APInt &DemandedElts, KnownBits &Known,
3312 TargetLoweringOpt &TLO, unsigned Depth = 0,
3313 bool AssumeSingleUse = false) const;
3314
3315 /// Helper wrapper around SimplifyDemandedBits, demanding all elements.
3316 /// Adds Op back to the worklist upon success.
3317 bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
3318 KnownBits &Known, TargetLoweringOpt &TLO,
3319 unsigned Depth = 0,
3320 bool AssumeSingleUse = false) const;
3321
3322 /// Helper wrapper around SimplifyDemandedBits.
3323 /// Adds Op back to the worklist upon success.
3324 bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
3325 DAGCombinerInfo &DCI) const;
3326
3327 /// More limited version of SimplifyDemandedBits that can be used to "look
3328 /// through" ops that don't contribute to the DemandedBits/DemandedElts -
3329 /// bitwise ops etc.
3330 SDValue SimplifyMultipleUseDemandedBits(SDValue Op, const APInt &DemandedBits,
3331 const APInt &DemandedElts,
3332 SelectionDAG &DAG,
3333 unsigned Depth) const;
3334
3335 /// Helper wrapper around SimplifyMultipleUseDemandedBits, demanding all
3336 /// elements.
3337 SDValue SimplifyMultipleUseDemandedBits(SDValue Op, const APInt &DemandedBits,
3338 SelectionDAG &DAG,
3339 unsigned Depth = 0) const;
3340
3341 /// Helper wrapper around SimplifyMultipleUseDemandedBits, demanding all
3342 /// bits from only some vector elements.
3343 SDValue SimplifyMultipleUseDemandedVectorElts(SDValue Op,
3344 const APInt &DemandedElts,
3345 SelectionDAG &DAG,
3346 unsigned Depth = 0) const;
3347
3348 /// Look at Vector Op. At this point, we know that only the DemandedElts
3349 /// elements of the result of Op are ever used downstream. If we can use
3350 /// this information to simplify Op, create a new simplified DAG node and
3351 /// return true, storing the original and new nodes in TLO.
3352 /// Otherwise, analyze the expression and return a mask of KnownUndef and
3353 /// KnownZero elements for the expression (used to simplify the caller).
3354 /// The KnownUndef/Zero elements may only be accurate for those bits
3355 /// in the DemandedMask.
3356 /// \p AssumeSingleUse When this parameter is true, this function will
3357 /// attempt to simplify \p Op even if there are multiple uses.
3358 /// Callers are responsible for correctly updating the DAG based on the
3359 /// results of this function, because simply replacing replacing TLO.Old
3360 /// with TLO.New will be incorrect when this parameter is true and TLO.Old
3361 /// has multiple uses.
3362 bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedEltMask,
3363 APInt &KnownUndef, APInt &KnownZero,
3364 TargetLoweringOpt &TLO, unsigned Depth = 0,
3365 bool AssumeSingleUse = false) const;
3366
3367 /// Helper wrapper around SimplifyDemandedVectorElts.
3368 /// Adds Op back to the worklist upon success.
3369 bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedElts,
3370 APInt &KnownUndef, APInt &KnownZero,
3371 DAGCombinerInfo &DCI) const;
3372
3373 /// Determine which of the bits specified in Mask are known to be either zero
3374 /// or one and return them in the KnownZero/KnownOne bitsets. The DemandedElts
3375 /// argument allows us to only collect the known bits that are shared by the
3376 /// requested vector elements.
3377 virtual void computeKnownBitsForTargetNode(const SDValue Op,
3378 KnownBits &Known,
3379 const APInt &DemandedElts,
3380 const SelectionDAG &DAG,
3381 unsigned Depth = 0) const;
3382
3383 /// Determine which of the bits specified in Mask are known to be either zero
3384 /// or one and return them in the KnownZero/KnownOne bitsets. The DemandedElts
3385 /// argument allows us to only collect the known bits that are shared by the
3386 /// requested vector elements. This is for GISel.
3387 virtual void computeKnownBitsForTargetInstr(GISelKnownBits &Analysis,
3388 Register R, KnownBits &Known,
3389 const APInt &DemandedElts,
3390 const MachineRegisterInfo &MRI,
3391 unsigned Depth = 0) const;
3392
3393 /// Determine the known alignment for the pointer value \p R. This is can
3394 /// typically be inferred from the number of low known 0 bits. However, for a
3395 /// pointer with a non-integral address space, the alignment value may be
3396 /// independent from the known low bits.
3397 virtual Align computeKnownAlignForTargetInstr(GISelKnownBits &Analysis,
3398 Register R,
3399 const MachineRegisterInfo &MRI,
3400 unsigned Depth = 0) const;
3401
3402 /// Determine which of the bits of FrameIndex \p FIOp are known to be 0.
3403 /// Default implementation computes low bits based on alignment
3404 /// information. This should preserve known bits passed into it.
3405 virtual void computeKnownBitsForFrameIndex(int FIOp,
3406 KnownBits &Known,
3407 const MachineFunction &MF) const;
3408
3409 /// This method can be implemented by targets that want to expose additional
3410 /// information about sign bits to the DAG Combiner. The DemandedElts
3411 /// argument allows us to only collect the minimum sign bits that are shared
3412 /// by the requested vector elements.
3413 virtual unsigned ComputeNumSignBitsForTargetNode(SDValue Op,
3414 const APInt &DemandedElts,
3415 const SelectionDAG &DAG,
3416 unsigned Depth = 0) const;
3417
3418 /// This method can be implemented by targets that want to expose additional
3419 /// information about sign bits to GlobalISel combiners. The DemandedElts
3420 /// argument allows us to only collect the minimum sign bits that are shared
3421 /// by the requested vector elements.
3422 virtual unsigned computeNumSignBitsForTargetInstr(GISelKnownBits &Analysis,
3423 Register R,
3424 const APInt &DemandedElts,
3425 const MachineRegisterInfo &MRI,
3426 unsigned Depth = 0) const;
3427
3428 /// Attempt to simplify any target nodes based on the demanded vector
3429 /// elements, returning true on success. Otherwise, analyze the expression and
3430 /// return a mask of KnownUndef and KnownZero elements for the expression
3431 /// (used to simplify the caller). The KnownUndef/Zero elements may only be
3432 /// accurate for those bits in the DemandedMask.
3433 virtual bool SimplifyDemandedVectorEltsForTargetNode(
3434 SDValue Op, const APInt &DemandedElts, APInt &KnownUndef,
3435 APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth = 0) const;
3436
3437 /// Attempt to simplify any target nodes based on the demanded bits/elts,
3438 /// returning true on success. Otherwise, analyze the
3439 /// expression and return a mask of KnownOne and KnownZero bits for the
3440 /// expression (used to simplify the caller). The KnownZero/One bits may only
3441 /// be accurate for those bits in the Demanded masks.
3442 virtual bool SimplifyDemandedBitsForTargetNode(SDValue Op,
3443 const APInt &DemandedBits,
3444 const APInt &DemandedElts,
3445 KnownBits &Known,
3446 TargetLoweringOpt &TLO,
3447 unsigned Depth = 0) const;
3448
3449 /// More limited version of SimplifyDemandedBits that can be used to "look
3450 /// through" ops that don't contribute to the DemandedBits/DemandedElts -
3451 /// bitwise ops etc.
3452 virtual SDValue SimplifyMultipleUseDemandedBitsForTargetNode(
3453 SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
3454 SelectionDAG &DAG, unsigned Depth) const;
3455
3456 /// Tries to build a legal vector shuffle using the provided parameters
3457 /// or equivalent variations. The Mask argument maybe be modified as the
3458 /// function tries different variations.
3459 /// Returns an empty SDValue if the operation fails.
3460 SDValue buildLegalVectorShuffle(EVT VT, const SDLoc &DL, SDValue N0,
3461 SDValue N1, MutableArrayRef<int> Mask,
3462 SelectionDAG &DAG) const;
3463
3464 /// This method returns the constant pool value that will be loaded by LD.
3465 /// NOTE: You must check for implicit extensions of the constant by LD.
3466 virtual const Constant *getTargetConstantFromLoad(LoadSDNode *LD) const;
3467
3468 /// If \p SNaN is false, \returns true if \p Op is known to never be any
3469 /// NaN. If \p sNaN is true, returns if \p Op is known to never be a signaling
3470 /// NaN.
3471 virtual bool isKnownNeverNaNForTargetNode(SDValue Op,
3472 const SelectionDAG &DAG,
3473 bool SNaN = false,
3474 unsigned Depth = 0) const;
3475 struct DAGCombinerInfo {
3476 void *DC; // The DAG Combiner object.
3477 CombineLevel Level;
3478 bool CalledByLegalizer;
3479
3480 public:
3481 SelectionDAG &DAG;
3482
3483 DAGCombinerInfo(SelectionDAG &dag, CombineLevel level, bool cl, void *dc)
3484 : DC(dc), Level(level), CalledByLegalizer(cl), DAG(dag) {}
3485
3486 bool isBeforeLegalize() const { return Level == BeforeLegalizeTypes; }
3487 bool isBeforeLegalizeOps() const { return Level < AfterLegalizeVectorOps; }
3488 bool isAfterLegalizeDAG() const { return Level >= AfterLegalizeDAG; }
3489 CombineLevel getDAGCombineLevel() { return Level; }
3490 bool isCalledByLegalizer() const { return CalledByLegalizer; }
3491
3492 void AddToWorklist(SDNode *N);
3493 SDValue CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo = true);
3494 SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true);
3495 SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo = true);
3496
3497 bool recursivelyDeleteUnusedNodes(SDNode *N);
3498
3499 void CommitTargetLoweringOpt(const TargetLoweringOpt &TLO);
3500 };
3501
3502 /// Return if the N is a constant or constant vector equal to the true value
3503 /// from getBooleanContents().
3504 bool isConstTrueVal(const SDNode *N) const;
3505
3506 /// Return if the N is a constant or constant vector equal to the false value
3507 /// from getBooleanContents().
3508 bool isConstFalseVal(const SDNode *N) const;
3509
3510 /// Return if \p N is a True value when extended to \p VT.
3511 bool isExtendedTrueVal(const ConstantSDNode *N, EVT VT, bool SExt) const;
3512
3513 /// Try to simplify a setcc built with the specified operands and cc. If it is
3514 /// unable to simplify it, return a null SDValue.
3515 SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
3516 bool foldBooleans, DAGCombinerInfo &DCI,
3517 const SDLoc &dl) const;
3518
3519 // For targets which wrap address, unwrap for analysis.
3520 virtual SDValue unwrapAddress(SDValue N) const { return N; }
3521
3522 /// Returns true (and the GlobalValue and the offset) if the node is a
3523 /// GlobalAddress + offset.
3524 virtual bool
3525 isGAPlusOffset(SDNode *N, const GlobalValue* &GA, int64_t &Offset) const;
3526
3527 /// This method will be invoked for all target nodes and for any
3528 /// target-independent nodes that the target has registered with invoke it
3529 /// for.
3530 ///
3531 /// The semantics are as follows:
3532 /// Return Value:
3533 /// SDValue.Val == 0 - No change was made
3534 /// SDValue.Val == N - N was replaced, is dead, and is already handled.
3535 /// otherwise - N should be replaced by the returned Operand.
3536 ///
3537 /// In addition, methods provided by DAGCombinerInfo may be used to perform
3538 /// more complex transformations.
3539 ///
3540 virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
3541
3542 /// Return true if it is profitable to move this shift by a constant amount
3543 /// though its operand, adjusting any immediate operands as necessary to
3544 /// preserve semantics. This transformation may not be desirable if it
3545 /// disrupts a particularly auspicious target-specific tree (e.g. bitfield
3546 /// extraction in AArch64). By default, it returns true.
3547 ///
3548 /// @param N the shift node
3549 /// @param Level the current DAGCombine legalization level.
3550 virtual bool isDesirableToCommuteWithShift(const SDNode *N,
3551 CombineLevel Level) const {
3552 return true;
3553 }
3554
3555 /// Return true if the target has native support for the specified value type
3556 /// and it is 'desirable' to use the type for the given node type. e.g. On x86
3557 /// i16 is legal, but undesirable since i16 instruction encodings are longer
3558 /// and some i16 instructions are slow.
3559 virtual bool isTypeDesirableForOp(unsigned /*Opc*/, EVT VT) const {
3560 // By default, assume all legal types are desirable.
3561 return isTypeLegal(VT);
3562 }
3563
3564 /// Return true if it is profitable for dag combiner to transform a floating
3565 /// point op of specified opcode to a equivalent op of an integer
3566 /// type. e.g. f32 load -> i32 load can be profitable on ARM.
3567 virtual bool isDesirableToTransformToIntegerOp(unsigned /*Opc*/,
3568 EVT /*VT*/) const {
3569 return false;
3570 }
3571
3572 /// This method query the target whether it is beneficial for dag combiner to
3573 /// promote the specified node. If true, it should return the desired
3574 /// promotion type by reference.
3575 virtual bool IsDesirableToPromoteOp(SDValue /*Op*/, EVT &/*PVT*/) const {
3576 return false;
3577 }
3578
3579 /// Return true if the target supports swifterror attribute. It optimizes
3580 /// loads and stores to reading and writing a specific register.
3581 virtual bool supportSwiftError() const {
3582 return false;
3583 }
3584
3585 /// Return true if the target supports that a subset of CSRs for the given
3586 /// machine function is handled explicitly via copies.
3587 virtual bool supportSplitCSR(MachineFunction *MF) const {
3588 return false;
3589 }
3590
3591 /// Perform necessary initialization to handle a subset of CSRs explicitly
3592 /// via copies. This function is called at the beginning of instruction
3593 /// selection.
3594 virtual void initializeSplitCSR(MachineBasicBlock *Entry) const {
3595 llvm_unreachable("Not Implemented")::llvm::llvm_unreachable_internal("Not Implemented", "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/TargetLowering.h"
, 3595)
;
3596 }
3597
3598 /// Insert explicit copies in entry and exit blocks. We copy a subset of
3599 /// CSRs to virtual registers in the entry block, and copy them back to
3600 /// physical registers in the exit blocks. This function is called at the end
3601 /// of instruction selection.
3602 virtual void insertCopiesSplitCSR(
3603 MachineBasicBlock *Entry,
3604 const SmallVectorImpl<MachineBasicBlock *> &Exits) const {
3605 llvm_unreachable("Not Implemented")::llvm::llvm_unreachable_internal("Not Implemented", "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/TargetLowering.h"
, 3605)
;
3606 }
3607
3608 /// Return the newly negated expression if the cost is not expensive and
3609 /// set the cost in \p Cost to indicate that if it is cheaper or neutral to
3610 /// do the negation.
3611 virtual SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG,
3612 bool LegalOps, bool OptForSize,
3613 NegatibleCost &Cost,
3614 unsigned Depth = 0) const;
3615
3616 /// This is the helper function to return the newly negated expression only
3617 /// when the cost is cheaper.
3618 SDValue getCheaperNegatedExpression(SDValue Op, SelectionDAG &DAG,
3619 bool LegalOps, bool OptForSize,
3620 unsigned Depth = 0) const {
3621 NegatibleCost Cost = NegatibleCost::Expensive;
3622 SDValue Neg =
3623 getNegatedExpression(Op, DAG, LegalOps, OptForSize, Cost, Depth);
3624 if (Neg && Cost == NegatibleCost::Cheaper)
3625 return Neg;
3626 // Remove the new created node to avoid the side effect to the DAG.
3627 if (Neg && Neg.getNode()->use_empty())
3628 DAG.RemoveDeadNode(Neg.getNode());
3629 return SDValue();
3630 }
3631
3632 /// This is the helper function to return the newly negated expression if
3633 /// the cost is not expensive.
3634 SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps,
3635 bool OptForSize, unsigned Depth = 0) const {
3636 NegatibleCost Cost = NegatibleCost::Expensive;
3637 return getNegatedExpression(Op, DAG, LegalOps, OptForSize, Cost, Depth);
3638 }
3639
3640 //===--------------------------------------------------------------------===//
3641 // Lowering methods - These methods must be implemented by targets so that
3642 // the SelectionDAGBuilder code knows how to lower these.
3643 //
3644
3645 /// Target-specific splitting of values into parts that fit a register
3646 /// storing a legal type
3647 virtual bool splitValueIntoRegisterParts(SelectionDAG &DAG, const SDLoc &DL,
3648 SDValue Val, SDValue *Parts,
3649 unsigned NumParts, MVT PartVT,
3650 Optional<CallingConv::ID> CC) const {
3651 return false;
3652 }
3653
3654 /// Target-specific combining of register parts into its original value
3655 virtual SDValue
3656 joinRegisterPartsIntoValue(SelectionDAG &DAG, const SDLoc &DL,
3657 const SDValue *Parts, unsigned NumParts,
3658 MVT PartVT, EVT ValueVT,
3659 Optional<CallingConv::ID> CC) const {
3660 return SDValue();
3661 }
3662
3663 /// This hook must be implemented to lower the incoming (formal) arguments,
3664 /// described by the Ins array, into the specified DAG. The implementation
3665 /// should fill in the InVals array with legal-type argument values, and
3666 /// return the resulting token chain value.
3667 virtual SDValue LowerFormalArguments(
3668 SDValue /*Chain*/, CallingConv::ID /*CallConv*/, bool /*isVarArg*/,
3669 const SmallVectorImpl<ISD::InputArg> & /*Ins*/, const SDLoc & /*dl*/,
3670 SelectionDAG & /*DAG*/, SmallVectorImpl<SDValue> & /*InVals*/) const {
3671 llvm_unreachable("Not Implemented")::llvm::llvm_unreachable_internal("Not Implemented", "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/TargetLowering.h"
, 3671)
;
3672 }
3673
3674 /// This structure contains all information that is necessary for lowering
3675 /// calls. It is passed to TLI::LowerCallTo when the SelectionDAG builder
3676 /// needs to lower a call, and targets will see this struct in their LowerCall
3677 /// implementation.
3678 struct CallLoweringInfo {
3679 SDValue Chain;
3680 Type *RetTy = nullptr;
3681 bool RetSExt : 1;
3682 bool RetZExt : 1;
3683 bool IsVarArg : 1;
3684 bool IsInReg : 1;
3685 bool DoesNotReturn : 1;
3686 bool IsReturnValueUsed : 1;
3687 bool IsConvergent : 1;
3688 bool IsPatchPoint : 1;
3689 bool IsPreallocated : 1;
3690 bool NoMerge : 1;
3691
3692 // IsTailCall should be modified by implementations of
3693 // TargetLowering::LowerCall that perform tail call conversions.
3694 bool IsTailCall = false;
3695
3696 // Is Call lowering done post SelectionDAG type legalization.
3697 bool IsPostTypeLegalization = false;
3698
3699 unsigned NumFixedArgs = -1;
3700 CallingConv::ID CallConv = CallingConv::C;
3701 SDValue Callee;
3702 ArgListTy Args;
3703 SelectionDAG &DAG;
3704 SDLoc DL;
3705 const CallBase *CB = nullptr;
3706 SmallVector<ISD::OutputArg, 32> Outs;
3707 SmallVector<SDValue, 32> OutVals;
3708 SmallVector<ISD::InputArg, 32> Ins;
3709 SmallVector<SDValue, 4> InVals;
3710
3711 CallLoweringInfo(SelectionDAG &DAG)
3712 : RetSExt(false), RetZExt(false), IsVarArg(false), IsInReg(false),
3713 DoesNotReturn(false), IsReturnValueUsed(true), IsConvergent(false),
3714 IsPatchPoint(false), IsPreallocated(false), NoMerge(false),
3715 DAG(DAG) {}
3716
3717 CallLoweringInfo &setDebugLoc(const SDLoc &dl) {
3718 DL = dl;
3719 return *this;
3720 }
3721
3722 CallLoweringInfo &setChain(SDValue InChain) {
3723 Chain = InChain;
3724 return *this;
3725 }
3726
3727 // setCallee with target/module-specific attributes
3728 CallLoweringInfo &setLibCallee(CallingConv::ID CC, Type *ResultType,
3729 SDValue Target, ArgListTy &&ArgsList) {
3730 RetTy = ResultType;
3731 Callee = Target;
3732 CallConv = CC;
3733 NumFixedArgs = ArgsList.size();
3734 Args = std::move(ArgsList);
3735
3736 DAG.getTargetLoweringInfo().markLibCallAttributes(
3737 &(DAG.getMachineFunction()), CC, Args);
3738 return *this;
3739 }
3740
3741 CallLoweringInfo &setCallee(CallingConv::ID CC, Type *ResultType,
3742 SDValue Target, ArgListTy &&ArgsList) {
3743 RetTy = ResultType;
3744 Callee = Target;
3745 CallConv = CC;
3746 NumFixedArgs = ArgsList.size();
3747 Args = std::move(ArgsList);
3748 return *this;
3749 }
3750
3751 CallLoweringInfo &setCallee(Type *ResultType, FunctionType *FTy,
3752 SDValue Target, ArgListTy &&ArgsList,
3753 const CallBase &Call) {
3754 RetTy = ResultType;
3755
3756 IsInReg = Call.hasRetAttr(Attribute::InReg);
3757 DoesNotReturn =
3758 Call.doesNotReturn() ||
3759 (!isa<InvokeInst>(Call) && isa<UnreachableInst>(Call.getNextNode()));
3760 IsVarArg = FTy->isVarArg();
3761 IsReturnValueUsed = !Call.use_empty();
3762 RetSExt = Call.hasRetAttr(Attribute::SExt);
3763 RetZExt = Call.hasRetAttr(Attribute::ZExt);
3764 NoMerge = Call.hasFnAttr(Attribute::NoMerge);
3765
3766 Callee = Target;
3767
3768 CallConv = Call.getCallingConv();
3769 NumFixedArgs = FTy->getNumParams();
3770 Args = std::move(ArgsList);
3771
3772 CB = &Call;
3773
3774 return *this;
3775 }
3776
3777 CallLoweringInfo &setInRegister(bool Value = true) {
3778 IsInReg = Value;
3779 return *this;
3780 }
3781
3782 CallLoweringInfo &setNoReturn(bool Value = true) {
3783 DoesNotReturn = Value;
3784 return *this;
3785 }
3786
3787 CallLoweringInfo &setVarArg(bool Value = true) {
3788 IsVarArg = Value;
3789 return *this;
3790 }
3791
3792 CallLoweringInfo &setTailCall(bool Value = true) {
3793 IsTailCall = Value;
3794 return *this;
3795 }
3796
3797 CallLoweringInfo &setDiscardResult(bool Value = true) {
3798 IsReturnValueUsed = !Value;
3799 return *this;
3800 }
3801
3802 CallLoweringInfo &setConvergent(bool Value = true) {
3803 IsConvergent = Value;
3804 return *this;
3805 }
3806
3807 CallLoweringInfo &setSExtResult(bool Value = true) {
3808 RetSExt = Value;
3809 return *this;
3810 }
3811
3812 CallLoweringInfo &setZExtResult(bool Value = true) {
3813 RetZExt = Value;
3814 return *this;
3815 }
3816
3817 CallLoweringInfo &setIsPatchPoint(bool Value = true) {
3818 IsPatchPoint = Value;
3819 return *this;
3820 }
3821
3822 CallLoweringInfo &setIsPreallocated(bool Value = true) {
3823 IsPreallocated = Value;
3824 return *this;
3825 }
3826
3827 CallLoweringInfo &setIsPostTypeLegalization(bool Value=true) {
3828 IsPostTypeLegalization = Value;
3829 return *this;
3830 }
3831
3832 ArgListTy &getArgs() {
3833 return Args;
3834 }
3835 };
3836
3837 /// This structure is used to pass arguments to makeLibCall function.
3838 struct MakeLibCallOptions {
3839 // By passing type list before soften to makeLibCall, the target hook
3840 // shouldExtendTypeInLibCall can get the original type before soften.
3841 ArrayRef<EVT> OpsVTBeforeSoften;
3842 EVT RetVTBeforeSoften;
3843 bool IsSExt : 1;
3844 bool DoesNotReturn : 1;
3845 bool IsReturnValueUsed : 1;
3846 bool IsPostTypeLegalization : 1;
3847 bool IsSoften : 1;
3848
3849 MakeLibCallOptions()
3850 : IsSExt(false), DoesNotReturn(false), IsReturnValueUsed(true),
3851 IsPostTypeLegalization(false), IsSoften(false) {}
3852
3853 MakeLibCallOptions &setSExt(bool Value = true) {
3854 IsSExt = Value;
3855 return *this;
3856 }
3857
3858 MakeLibCallOptions &setNoReturn(bool Value = true) {
3859 DoesNotReturn = Value;
3860 return *this;
3861 }
3862
3863 MakeLibCallOptions &setDiscardResult(bool Value = true) {
3864 IsReturnValueUsed = !Value;
3865 return *this;
3866 }
3867
3868 MakeLibCallOptions &setIsPostTypeLegalization(bool Value = true) {
3869 IsPostTypeLegalization = Value;
3870 return *this;
3871 }
3872
3873 MakeLibCallOptions &setTypeListBeforeSoften(ArrayRef<EVT> OpsVT, EVT RetVT,
3874 bool Value = true) {
3875 OpsVTBeforeSoften = OpsVT;
3876 RetVTBeforeSoften = RetVT;
3877 IsSoften = Value;
3878 return *this;
3879 }
3880 };
3881
3882 /// This function lowers an abstract call to a function into an actual call.
3883 /// This returns a pair of operands. The first element is the return value
3884 /// for the function (if RetTy is not VoidTy). The second element is the
3885 /// outgoing token chain. It calls LowerCall to do the actual lowering.
3886 std::pair<SDValue, SDValue> LowerCallTo(CallLoweringInfo &CLI) const;
3887
3888 /// This hook must be implemented to lower calls into the specified
3889 /// DAG. The outgoing arguments to the call are described by the Outs array,
3890 /// and the values to be returned by the call are described by the Ins
3891 /// array. The implementation should fill in the InVals array with legal-type
3892 /// return values from the call, and return the resulting token chain value.
3893 virtual SDValue
3894 LowerCall(CallLoweringInfo &/*CLI*/,
3895 SmallVectorImpl<SDValue> &/*InVals*/) const {
3896 llvm_unreachable("Not Implemented")::llvm::llvm_unreachable_internal("Not Implemented", "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/TargetLowering.h"
, 3896)
;
3897 }
3898
3899 /// Target-specific cleanup for formal ByVal parameters.
3900 virtual void HandleByVal(CCState *, unsigned &, Align) const {}
3901
3902 /// This hook should be implemented to check whether the return values
3903 /// described by the Outs array can fit into the return registers. If false
3904 /// is returned, an sret-demotion is performed.
3905 virtual bool CanLowerReturn(CallingConv::ID /*CallConv*/,
3906 MachineFunction &/*MF*/, bool /*isVarArg*/,
3907 const SmallVectorImpl<ISD::OutputArg> &/*Outs*/,
3908 LLVMContext &/*Context*/) const
3909 {
3910 // Return true by default to get preexisting behavior.
3911 return true;
3912 }
3913
3914 /// This hook must be implemented to lower outgoing return values, described
3915 /// by the Outs array, into the specified DAG. The implementation should
3916 /// return the resulting token chain value.
3917 virtual SDValue LowerReturn(SDValue /*Chain*/, CallingConv::ID /*CallConv*/,
3918 bool /*isVarArg*/,
3919 const SmallVectorImpl<ISD::OutputArg> & /*Outs*/,
3920 const SmallVectorImpl<SDValue> & /*OutVals*/,
3921 const SDLoc & /*dl*/,
3922 SelectionDAG & /*DAG*/) const {
3923 llvm_unreachable("Not Implemented")::llvm::llvm_unreachable_internal("Not Implemented", "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/TargetLowering.h"
, 3923)
;
3924 }
3925
3926 /// Return true if result of the specified node is used by a return node
3927 /// only. It also compute and return the input chain for the tail call.
3928 ///
3929 /// This is used to determine whether it is possible to codegen a libcall as
3930 /// tail call at legalization time.
3931 virtual bool isUsedByReturnOnly(SDNode *, SDValue &/*Chain*/) const {
3932 return false;
3933 }
3934
3935 /// Return true if the target may be able emit the call instruction as a tail
3936 /// call. This is used by optimization passes to determine if it's profitable
3937 /// to duplicate return instructions to enable tailcall optimization.
3938 virtual bool mayBeEmittedAsTailCall(const CallInst *) const {
3939 return false;
3940 }
3941
3942 /// Return the builtin name for the __builtin___clear_cache intrinsic
3943 /// Default is to invoke the clear cache library call
3944 virtual const char * getClearCacheBuiltinName() const {
3945 return "__clear_cache";
3946 }
3947
3948 /// Return the register ID of the name passed in. Used by named register
3949 /// global variables extension. There is no target-independent behaviour
3950 /// so the default action is to bail.
3951 virtual Register getRegisterByName(const char* RegName, LLT Ty,
3952 const MachineFunction &MF) const {
3953 report_fatal_error("Named registers not implemented for this target");
3954 }
3955
3956 /// Return the type that should be used to zero or sign extend a
3957 /// zeroext/signext integer return value. FIXME: Some C calling conventions
3958 /// require the return type to be promoted, but this is not true all the time,
3959 /// e.g. i1/i8/i16 on x86/x86_64. It is also not necessary for non-C calling
3960 /// conventions. The frontend should handle this and include all of the
3961 /// necessary information.
3962 virtual EVT getTypeForExtReturn(LLVMContext &Context, EVT VT,
3963 ISD::NodeType /*ExtendKind*/) const {
3964 EVT MinVT = getRegisterType(Context, MVT::i32);
3965 return VT.bitsLT(MinVT) ? MinVT : VT;
3966 }
3967
3968 /// For some targets, an LLVM struct type must be broken down into multiple
3969 /// simple types, but the calling convention specifies that the entire struct
3970 /// must be passed in a block of consecutive registers.
3971 virtual bool
3972 functionArgumentNeedsConsecutiveRegisters(Type *Ty, CallingConv::ID CallConv,
3973 bool isVarArg) const {
3974 return false;
3975 }
3976
3977 /// For most targets, an LLVM type must be broken down into multiple
3978 /// smaller types. Usually the halves are ordered according to the endianness
3979 /// but for some platform that would break. So this method will default to
3980 /// matching the endianness but can be overridden.
3981 virtual bool
3982 shouldSplitFunctionArgumentsAsLittleEndian(const DataLayout &DL) const {
3983 return DL.isLittleEndian();
3984 }
3985
3986 /// Returns a 0 terminated array of registers that can be safely used as
3987 /// scratch registers.
3988 virtual const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const {
3989 return nullptr;
3990 }
3991
3992 /// This callback is used to prepare for a volatile or atomic load.
3993 /// It takes a chain node as input and returns the chain for the load itself.
3994 ///
3995 /// Having a callback like this is necessary for targets like SystemZ,
3996 /// which allows a CPU to reuse the result of a previous load indefinitely,
3997 /// even if a cache-coherent store is performed by another CPU. The default
3998 /// implementation does nothing.
3999 virtual SDValue prepareVolatileOrAtomicLoad(SDValue Chain, const SDLoc &DL,
4000 SelectionDAG &DAG) const {
4001 return Chain;
4002 }
4003
4004 /// Should SelectionDAG lower an atomic store of the given kind as a normal
4005 /// StoreSDNode (as opposed to an AtomicSDNode)? NOTE: The intention is to
4006 /// eventually migrate all targets to the using StoreSDNodes, but porting is
4007 /// being done target at a time.
4008 virtual bool lowerAtomicStoreAsStoreSDNode(const StoreInst &SI) const {
4009 assert(SI.isAtomic() && "violated precondition")((SI.isAtomic() && "violated precondition") ? static_cast
<void> (0) : __assert_fail ("SI.isAtomic() && \"violated precondition\""
, "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/TargetLowering.h"
, 4009, __PRETTY_FUNCTION__))
;
4010 return false;
4011 }
4012
4013 /// Should SelectionDAG lower an atomic load of the given kind as a normal
4014 /// LoadSDNode (as opposed to an AtomicSDNode)? NOTE: The intention is to
4015 /// eventually migrate all targets to the using LoadSDNodes, but porting is
4016 /// being done target at a time.
4017 virtual bool lowerAtomicLoadAsLoadSDNode(const LoadInst &LI) const {
4018 assert(LI.isAtomic() && "violated precondition")((LI.isAtomic() && "violated precondition") ? static_cast
<void> (0) : __assert_fail ("LI.isAtomic() && \"violated precondition\""
, "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/TargetLowering.h"
, 4018, __PRETTY_FUNCTION__))
;
4019 return false;
4020 }
4021
4022
4023 /// This callback is invoked by the type legalizer to legalize nodes with an
4024 /// illegal operand type but legal result types. It replaces the
4025 /// LowerOperation callback in the type Legalizer. The reason we can not do
4026 /// away with LowerOperation entirely is that LegalizeDAG isn't yet ready to
4027 /// use this callback.
4028 ///
4029 /// TODO: Consider merging with ReplaceNodeResults.
4030 ///
4031 /// The target places new result values for the node in Results (their number
4032 /// and types must exactly match those of the original return values of
4033 /// the node), or leaves Results empty, which indicates that the node is not
4034 /// to be custom lowered after all.
4035 /// The default implementation calls LowerOperation.
4036 virtual void LowerOperationWrapper(SDNode *N,
4037 SmallVectorImpl<SDValue> &Results,
4038 SelectionDAG &DAG) const;
4039
4040 /// This callback is invoked for operations that are unsupported by the
4041 /// target, which are registered to use 'custom' lowering, and whose defined
4042 /// values are all legal. If the target has no operations that require custom
4043 /// lowering, it need not implement this. The default implementation of this
4044 /// aborts.
4045 virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
4046
4047 /// This callback is invoked when a node result type is illegal for the
4048 /// target, and the operation was registered to use 'custom' lowering for that
4049 /// result type. The target places new result values for the node in Results
4050 /// (their number and types must exactly match those of the original return
4051 /// values of the node), or leaves Results empty, which indicates that the
4052 /// node is not to be custom lowered after all.
4053 ///
4054 /// If the target has no operations that require custom lowering, it need not
4055 /// implement this. The default implementation aborts.
4056 virtual void ReplaceNodeResults(SDNode * /*N*/,
4057 SmallVectorImpl<SDValue> &/*Results*/,
4058 SelectionDAG &/*DAG*/) const {
4059 llvm_unreachable("ReplaceNodeResults not implemented for this target!")::llvm::llvm_unreachable_internal("ReplaceNodeResults not implemented for this target!"
, "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/TargetLowering.h"
, 4059)
;
4060 }
4061
4062 /// This method returns the name of a target specific DAG node.
4063 virtual const char *getTargetNodeName(unsigned Opcode) const;
4064
4065 /// This method returns a target specific FastISel object, or null if the
4066 /// target does not support "fast" ISel.
4067 virtual FastISel *createFastISel(FunctionLoweringInfo &,
4068 const TargetLibraryInfo *) const {
4069 return nullptr;
4070 }
4071
4072 bool verifyReturnAddressArgumentIsConstant(SDValue Op,
4073 SelectionDAG &DAG) const;
4074
4075 //===--------------------------------------------------------------------===//
4076 // Inline Asm Support hooks
4077 //
4078
4079 /// This hook allows the target to expand an inline asm call to be explicit
4080 /// llvm code if it wants to. This is useful for turning simple inline asms
4081 /// into LLVM intrinsics, which gives the compiler more information about the
4082 /// behavior of the code.
4083 virtual bool ExpandInlineAsm(CallInst *) const {
4084 return false;
4085 }
4086
4087 enum ConstraintType {
4088 C_Register, // Constraint represents specific register(s).
4089 C_RegisterClass, // Constraint represents any of register(s) in class.
4090 C_Memory, // Memory constraint.
4091 C_Immediate, // Requires an immediate.
4092 C_Other, // Something else.
4093 C_Unknown // Unsupported constraint.
4094 };
4095
4096 enum ConstraintWeight {
4097 // Generic weights.
4098 CW_Invalid = -1, // No match.
4099 CW_Okay = 0, // Acceptable.
4100 CW_Good = 1, // Good weight.
4101 CW_Better = 2, // Better weight.
4102 CW_Best = 3, // Best weight.
4103
4104 // Well-known weights.
4105 CW_SpecificReg = CW_Okay, // Specific register operands.
4106 CW_Register = CW_Good, // Register operands.
4107 CW_Memory = CW_Better, // Memory operands.
4108 CW_Constant = CW_Best, // Constant operand.
4109 CW_Default = CW_Okay // Default or don't know type.
4110 };
4111
4112 /// This contains information for each constraint that we are lowering.
4113 struct AsmOperandInfo : public InlineAsm::ConstraintInfo {
4114 /// This contains the actual string for the code, like "m". TargetLowering
4115 /// picks the 'best' code from ConstraintInfo::Codes that most closely
4116 /// matches the operand.
4117 std::string ConstraintCode;
4118
4119 /// Information about the constraint code, e.g. Register, RegisterClass,
4120 /// Memory, Other, Unknown.
4121 TargetLowering::ConstraintType ConstraintType = TargetLowering::C_Unknown;
4122
4123 /// If this is the result output operand or a clobber, this is null,
4124 /// otherwise it is the incoming operand to the CallInst. This gets
4125 /// modified as the asm is processed.
4126 Value *CallOperandVal = nullptr;
4127
4128 /// The ValueType for the operand value.
4129 MVT ConstraintVT = MVT::Other;
4130
4131 /// Copy constructor for copying from a ConstraintInfo.
4132 AsmOperandInfo(InlineAsm::ConstraintInfo Info)
4133 : InlineAsm::ConstraintInfo(std::move(Info)) {}
4134
4135 /// Return true of this is an input operand that is a matching constraint
4136 /// like "4".
4137 bool isMatchingInputConstraint() const;
4138
4139 /// If this is an input matching constraint, this method returns the output
4140 /// operand it matches.
4141 unsigned getMatchedOperand() const;
4142 };
4143
4144 using AsmOperandInfoVector = std::vector<AsmOperandInfo>;
4145
4146 /// Split up the constraint string from the inline assembly value into the
4147 /// specific constraints and their prefixes, and also tie in the associated
4148 /// operand values. If this returns an empty vector, and if the constraint
4149 /// string itself isn't empty, there was an error parsing.
4150 virtual AsmOperandInfoVector ParseConstraints(const DataLayout &DL,
4151 const TargetRegisterInfo *TRI,
4152 const CallBase &Call) const;
4153
4154 /// Examine constraint type and operand type and determine a weight value.
4155 /// The operand object must already have been set up with the operand type.
4156 virtual ConstraintWeight getMultipleConstraintMatchWeight(
4157 AsmOperandInfo &info, int maIndex) const;
4158
4159 /// Examine constraint string and operand type and determine a weight value.
4160 /// The operand object must already have been set up with the operand type.
4161 virtual ConstraintWeight getSingleConstraintMatchWeight(
4162 AsmOperandInfo &info, const char *constraint) const;
4163
4164 /// Determines the constraint code and constraint type to use for the specific
4165 /// AsmOperandInfo, setting OpInfo.ConstraintCode and OpInfo.ConstraintType.
4166 /// If the actual operand being passed in is available, it can be passed in as
4167 /// Op, otherwise an empty SDValue can be passed.
4168 virtual void ComputeConstraintToUse(AsmOperandInfo &OpInfo,
4169 SDValue Op,
4170 SelectionDAG *DAG = nullptr) const;
4171
4172 /// Given a constraint, return the type of constraint it is for this target.
4173 virtual ConstraintType getConstraintType(StringRef Constraint) const;
4174
4175 /// Given a physical register constraint (e.g. {edx}), return the register
4176 /// number and the register class for the register.
4177 ///
4178 /// Given a register class constraint, like 'r', if this corresponds directly
4179 /// to an LLVM register class, return a register of 0 and the register class
4180 /// pointer.
4181 ///
4182 /// This should only be used for C_Register constraints. On error, this
4183 /// returns a register number of 0 and a null register class pointer.
4184 virtual std::pair<unsigned, const TargetRegisterClass *>
4185 getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
4186 StringRef Constraint, MVT VT) const;
4187
4188 virtual unsigned getInlineAsmMemConstraint(StringRef ConstraintCode) const {
4189 if (ConstraintCode == "m")
4190 return InlineAsm::Constraint_m;
4191 return InlineAsm::Constraint_Unknown;
4192 }
4193
4194 /// Try to replace an X constraint, which matches anything, with another that
4195 /// has more specific requirements based on the type of the corresponding
4196 /// operand. This returns null if there is no replacement to make.
4197 virtual const char *LowerXConstraint(EVT ConstraintVT) const;
4198
4199 /// Lower the specified operand into the Ops vector. If it is invalid, don't
4200 /// add anything to Ops.
4201 virtual void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint,
4202 std::vector<SDValue> &Ops,
4203 SelectionDAG &DAG) const;
4204
4205 // Lower custom output constraints. If invalid, return SDValue().
4206 virtual SDValue LowerAsmOutputForConstraint(SDValue &Chain, SDValue &Flag,
4207 const SDLoc &DL,
4208 const AsmOperandInfo &OpInfo,
4209 SelectionDAG &DAG) const;
4210
4211 //===--------------------------------------------------------------------===//
4212 // Div utility functions
4213 //
4214 SDValue BuildSDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization,
4215 SmallVectorImpl<SDNode *> &Created) const;
4216 SDValue BuildUDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization,
4217 SmallVectorImpl<SDNode *> &Created) const;
4218
4219 /// Targets may override this function to provide custom SDIV lowering for
4220 /// power-of-2 denominators. If the target returns an empty SDValue, LLVM
4221 /// assumes SDIV is expensive and replaces it with a series of other integer
4222 /// operations.
4223 virtual SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor,
4224 SelectionDAG &DAG,
4225 SmallVectorImpl<SDNode *> &Created) const;
4226
4227 /// Indicate whether this target prefers to combine FDIVs with the same
4228 /// divisor. If the transform should never be done, return zero. If the
4229 /// transform should be done, return the minimum number of divisor uses
4230 /// that must exist.
4231 virtual unsigned combineRepeatedFPDivisors() const {
4232 return 0;
4233 }
4234
4235 /// Hooks for building estimates in place of slower divisions and square
4236 /// roots.
4237
4238 /// Return either a square root or its reciprocal estimate value for the input
4239 /// operand.
4240 /// \p Enabled is a ReciprocalEstimate enum with value either 'Unspecified' or
4241 /// 'Enabled' as set by a potential default override attribute.
4242 /// If \p RefinementSteps is 'Unspecified', the number of Newton-Raphson
4243 /// refinement iterations required to generate a sufficient (though not
4244 /// necessarily IEEE-754 compliant) estimate is returned in that parameter.
4245 /// The boolean UseOneConstNR output is used to select a Newton-Raphson
4246 /// algorithm implementation that uses either one or two constants.
4247 /// The boolean Reciprocal is used to select whether the estimate is for the
4248 /// square root of the input operand or the reciprocal of its square root.
4249 /// A target may choose to implement its own refinement within this function.
4250 /// If that's true, then return '0' as the number of RefinementSteps to avoid
4251 /// any further refinement of the estimate.
4252 /// An empty SDValue return means no estimate sequence can be created.
4253 virtual SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG,
4254 int Enabled, int &RefinementSteps,
4255 bool &UseOneConstNR, bool Reciprocal) const {
4256 return SDValue();
4257 }
4258
4259 /// Return a reciprocal estimate value for the input operand.
4260 /// \p Enabled is a ReciprocalEstimate enum with value either 'Unspecified' or
4261 /// 'Enabled' as set by a potential default override attribute.
4262 /// If \p RefinementSteps is 'Unspecified', the number of Newton-Raphson
4263 /// refinement iterations required to generate a sufficient (though not
4264 /// necessarily IEEE-754 compliant) estimate is returned in that parameter.
4265 /// A target may choose to implement its own refinement within this function.
4266 /// If that's true, then return '0' as the number of RefinementSteps to avoid
4267 /// any further refinement of the estimate.
4268 /// An empty SDValue return means no estimate sequence can be created.
4269 virtual SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG,
4270 int Enabled, int &RefinementSteps) const {
4271 return SDValue();
4272 }
4273
4274 //===--------------------------------------------------------------------===//
4275 // Legalization utility functions
4276 //
4277
4278 /// Expand a MUL or [US]MUL_LOHI of n-bit values into two or four nodes,
4279 /// respectively, each computing an n/2-bit part of the result.
4280 /// \param Result A vector that will be filled with the parts of the result
4281 /// in little-endian order.
4282 /// \param LL Low bits of the LHS of the MUL. You can use this parameter
4283 /// if you want to control how low bits are extracted from the LHS.
4284 /// \param LH High bits of the LHS of the MUL. See LL for meaning.
4285 /// \param RL Low bits of the RHS of the MUL. See LL for meaning
4286 /// \param RH High bits of the RHS of the MUL. See LL for meaning.
4287 /// \returns true if the node has been expanded, false if it has not
4288 bool expandMUL_LOHI(unsigned Opcode, EVT VT, const SDLoc &dl, SDValue LHS,
4289 SDValue RHS, SmallVectorImpl<SDValue> &Result, EVT HiLoVT,
4290 SelectionDAG &DAG, MulExpansionKind Kind,
4291 SDValue LL = SDValue(), SDValue LH = SDValue(),
4292 SDValue RL = SDValue(), SDValue RH = SDValue()) const;
4293
4294 /// Expand a MUL into two nodes. One that computes the high bits of
4295 /// the result and one that computes the low bits.
4296 /// \param HiLoVT The value type to use for the Lo and Hi nodes.
4297 /// \param LL Low bits of the LHS of the MUL. You can use this parameter
4298 /// if you want to control how low bits are extracted from the LHS.
4299 /// \param LH High bits of the LHS of the MUL. See LL for meaning.
4300 /// \param RL Low bits of the RHS of the MUL. See LL for meaning
4301 /// \param RH High bits of the RHS of the MUL. See LL for meaning.
4302 /// \returns true if the node has been expanded. false if it has not
4303 bool expandMUL(SDNode *N, SDValue &Lo, SDValue &Hi, EVT HiLoVT,
4304 SelectionDAG &DAG, MulExpansionKind Kind,
4305 SDValue LL = SDValue(), SDValue LH = SDValue(),
4306 SDValue RL = SDValue(), SDValue RH = SDValue()) const;
4307
4308 /// Expand funnel shift.
4309 /// \param N Node to expand
4310 /// \param Result output after conversion
4311 /// \returns True, if the expansion was successful, false otherwise
4312 bool expandFunnelShift(SDNode *N, SDValue &Result, SelectionDAG &DAG) const;
4313
4314 /// Expand rotations.
4315 /// \param N Node to expand
4316 /// \param Result output after conversion
4317 /// \returns True, if the expansion was successful, false otherwise
4318 bool expandROT(SDNode *N, SDValue &Result, SelectionDAG &DAG) const;
4319
4320 /// Expand float(f32) to SINT(i64) conversion
4321 /// \param N Node to expand
4322 /// \param Result output after conversion
4323 /// \returns True, if the expansion was successful, false otherwise
4324 bool expandFP_TO_SINT(SDNode *N, SDValue &Result, SelectionDAG &DAG) const;
4325
4326 /// Expand float to UINT conversion
4327 /// \param N Node to expand
4328 /// \param Result output after conversion
4329 /// \param Chain output chain after conversion
4330 /// \returns True, if the expansion was successful, false otherwise
4331 bool expandFP_TO_UINT(SDNode *N, SDValue &Result, SDValue &Chain,
4332 SelectionDAG &DAG) const;
4333
4334 /// Expand UINT(i64) to double(f64) conversion
4335 /// \param N Node to expand
4336 /// \param Result output after conversion
4337 /// \param Chain output chain after conversion
4338 /// \returns True, if the expansion was successful, false otherwise
4339 bool expandUINT_TO_FP(SDNode *N, SDValue &Result, SDValue &Chain,
4340 SelectionDAG &DAG) const;
4341
4342 /// Expand fminnum/fmaxnum into fminnum_ieee/fmaxnum_ieee with quieted inputs.
4343 SDValue expandFMINNUM_FMAXNUM(SDNode *N, SelectionDAG &DAG) const;
4344
4345 /// Expand CTPOP nodes. Expands vector/scalar CTPOP nodes,
4346 /// vector nodes can only succeed if all operations are legal/custom.
4347 /// \param N Node to expand
4348 /// \param Result output after conversion
4349 /// \returns True, if the expansion was successful, false otherwise
4350 bool expandCTPOP(SDNode *N, SDValue &Result, SelectionDAG &DAG) const;
4351
4352 /// Expand CTLZ/CTLZ_ZERO_UNDEF nodes. Expands vector/scalar CTLZ nodes,
4353 /// vector nodes can only succeed if all operations are legal/custom.
4354 /// \param N Node to expand
4355 /// \param Result output after conversion
4356 /// \returns True, if the expansion was successful, false otherwise
4357 bool expandCTLZ(SDNode *N, SDValue &Result, SelectionDAG &DAG) const;
4358
4359 /// Expand CTTZ/CTTZ_ZERO_UNDEF nodes. Expands vector/scalar CTTZ nodes,
4360 /// vector nodes can only succeed if all operations are legal/custom.
4361 /// \param N Node to expand
4362 /// \param Result output after conversion
4363 /// \returns True, if the expansion was successful, false otherwise
4364 bool expandCTTZ(SDNode *N, SDValue &Result, SelectionDAG &DAG) const;
4365
4366 /// Expand ABS nodes. Expands vector/scalar ABS nodes,
4367 /// vector nodes can only succeed if all operations are legal/custom.
4368 /// (ABS x) -> (XOR (ADD x, (SRA x, type_size)), (SRA x, type_size))
4369 /// \param N Node to expand
4370 /// \param Result output after conversion
4371 /// \returns True, if the expansion was successful, false otherwise
4372 bool expandABS(SDNode *N, SDValue &Result, SelectionDAG &DAG) const;
4373
4374 /// Turn load of vector type into a load of the individual elements.
4375 /// \param LD load to expand
4376 /// \returns BUILD_VECTOR and TokenFactor nodes.
4377 std::pair<SDValue, SDValue> scalarizeVectorLoad(LoadSDNode *LD,
4378 SelectionDAG &DAG) const;
4379
4380 // Turn a store of a vector type into stores of the individual elements.
4381 /// \param ST Store with a vector value type
4382 /// \returns TokenFactor of the individual store chains.
4383 SDValue scalarizeVectorStore(StoreSDNode *ST, SelectionDAG &DAG) const;
4384
4385 /// Expands an unaligned load to 2 half-size loads for an integer, and
4386 /// possibly more for vectors.
4387 std::pair<SDValue, SDValue> expandUnalignedLoad(LoadSDNode *LD,
4388 SelectionDAG &DAG) const;
4389
4390 /// Expands an unaligned store to 2 half-size stores for integer values, and
4391 /// possibly more for vectors.
4392 SDValue expandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG) const;
4393
4394 /// Increments memory address \p Addr according to the type of the value
4395 /// \p DataVT that should be stored. If the data is stored in compressed
4396 /// form, the memory address should be incremented according to the number of
4397 /// the stored elements. This number is equal to the number of '1's bits
4398 /// in the \p Mask.
4399 /// \p DataVT is a vector type. \p Mask is a vector value.
4400 /// \p DataVT and \p Mask have the same number of vector elements.
4401 SDValue IncrementMemoryAddress(SDValue Addr, SDValue Mask, const SDLoc &DL,
4402 EVT DataVT, SelectionDAG &DAG,
4403 bool IsCompressedMemory) const;
4404
4405 /// Get a pointer to vector element \p Idx located in memory for a vector of
4406 /// type \p VecVT starting at a base address of \p VecPtr. If \p Idx is out of
4407 /// bounds the returned pointer is unspecified, but will be within the vector
4408 /// bounds.
4409 SDValue getVectorElementPointer(SelectionDAG &DAG, SDValue VecPtr, EVT VecVT,
4410 SDValue Index) const;
4411
4412 /// Method for building the DAG expansion of ISD::[US][ADD|SUB]SAT. This
4413 /// method accepts integers as its arguments.
4414 SDValue expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const;
4415
4416 /// Method for building the DAG expansion of ISD::[US]SHLSAT. This
4417 /// method accepts integers as its arguments.
4418 SDValue expandShlSat(SDNode *Node, SelectionDAG &DAG) const;
4419
4420 /// Method for building the DAG expansion of ISD::[U|S]MULFIX[SAT]. This
4421 /// method accepts integers as its arguments.
4422 SDValue expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const;
4423
4424 /// Method for building the DAG expansion of ISD::[US]DIVFIX[SAT]. This
4425 /// method accepts integers as its arguments.
4426 /// Note: This method may fail if the division could not be performed
4427 /// within the type. Clients must retry with a wider type if this happens.
4428 SDValue expandFixedPointDiv(unsigned Opcode, const SDLoc &dl,
4429 SDValue LHS, SDValue RHS,
4430 unsigned Scale, SelectionDAG &DAG) const;
4431
4432 /// Method for building the DAG expansion of ISD::U(ADD|SUB)O. Expansion
4433 /// always suceeds and populates the Result and Overflow arguments.
4434 void expandUADDSUBO(SDNode *Node, SDValue &Result, SDValue &Overflow,
4435 SelectionDAG &DAG) const;
4436
4437 /// Method for building the DAG expansion of ISD::S(ADD|SUB)O. Expansion
4438 /// always suceeds and populates the Result and Overflow arguments.
4439 void expandSADDSUBO(SDNode *Node, SDValue &Result, SDValue &Overflow,
4440 SelectionDAG &DAG) const;
4441
4442 /// Method for building the DAG expansion of ISD::[US]MULO. Returns whether
4443 /// expansion was successful and populates the Result and Overflow arguments.
4444 bool expandMULO(SDNode *Node, SDValue &Result, SDValue &Overflow,
4445 SelectionDAG &DAG) const;
4446
4447 /// Expand a VECREDUCE_* into an explicit calculation. If Count is specified,
4448 /// only the first Count elements of the vector are used.
4449 SDValue expandVecReduce(SDNode *Node, SelectionDAG &DAG) const;
4450
4451 /// Expand an SREM or UREM using SDIV/UDIV or SDIVREM/UDIVREM, if legal.
4452 /// Returns true if the expansion was successful.
4453 bool expandREM(SDNode *Node, SDValue &Result, SelectionDAG &DAG) const;
4454
4455 //===--------------------------------------------------------------------===//
4456 // Instruction Emitting Hooks
4457 //
4458
4459 /// This method should be implemented by targets that mark instructions with
4460 /// the 'usesCustomInserter' flag. These instructions are special in various
4461 /// ways, which require special support to insert. The specified MachineInstr
4462 /// is created but not inserted into any basic blocks, and this method is
4463 /// called to expand it into a sequence of instructions, potentially also
4464 /// creating new basic blocks and control flow.
4465 /// As long as the returned basic block is different (i.e., we created a new
4466 /// one), the custom inserter is free to modify the rest of \p MBB.
4467 virtual MachineBasicBlock *
4468 EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const;
4469
4470 /// This method should be implemented by targets that mark instructions with
4471 /// the 'hasPostISelHook' flag. These instructions must be adjusted after
4472 /// instruction selection by target hooks. e.g. To fill in optional defs for
4473 /// ARM 's' setting instructions.
4474 virtual void AdjustInstrPostInstrSelection(MachineInstr &MI,
4475 SDNode *Node) const;
4476
4477 /// If this function returns true, SelectionDAGBuilder emits a
4478 /// LOAD_STACK_GUARD node when it is lowering Intrinsic::stackprotector.
4479 virtual bool useLoadStackGuardNode() const {
4480 return false;
4481 }
4482
4483 virtual SDValue emitStackGuardXorFP(SelectionDAG &DAG, SDValue Val,
4484 const SDLoc &DL) const {
4485 llvm_unreachable("not implemented for this target")::llvm::llvm_unreachable_internal("not implemented for this target"
, "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/TargetLowering.h"
, 4485)
;
4486 }
4487
4488 /// Lower TLS global address SDNode for target independent emulated TLS model.
4489 virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA,
4490 SelectionDAG &DAG) const;
4491
4492 /// Expands target specific indirect branch for the case of JumpTable
4493 /// expanasion.
4494 virtual SDValue expandIndirectJTBranch(const SDLoc& dl, SDValue Value, SDValue Addr,
4495 SelectionDAG &DAG) const {
4496 return DAG.getNode(ISD::BRIND, dl, MVT::Other, Value, Addr);
4497 }
4498
4499 // seteq(x, 0) -> truncate(srl(ctlz(zext(x)), log2(#bits)))
4500 // If we're comparing for equality to zero and isCtlzFast is true, expose the
4501 // fact that this can be implemented as a ctlz/srl pair, so that the dag
4502 // combiner can fold the new nodes.
4503 SDValue lowerCmpEqZeroToCtlzSrl(SDValue Op, SelectionDAG &DAG) const;
4504
4505private:
4506 SDValue foldSetCCWithAnd(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
4507 const SDLoc &DL, DAGCombinerInfo &DCI) const;
4508 SDValue foldSetCCWithBinOp(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
4509 const SDLoc &DL, DAGCombinerInfo &DCI) const;
4510
4511 SDValue optimizeSetCCOfSignedTruncationCheck(EVT SCCVT, SDValue N0,
4512 SDValue N1, ISD::CondCode Cond,
4513 DAGCombinerInfo &DCI,
4514 const SDLoc &DL) const;
4515
4516 // (X & (C l>>/<< Y)) ==/!= 0 --> ((X <</l>> Y) & C) ==/!= 0
4517 SDValue optimizeSetCCByHoistingAndByConstFromLogicalShift(
4518 EVT SCCVT, SDValue N0, SDValue N1C, ISD::CondCode Cond,
4519 DAGCombinerInfo &DCI, const SDLoc &DL) const;
4520
4521 SDValue prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,
4522 SDValue CompTargetNode, ISD::CondCode Cond,
4523 DAGCombinerInfo &DCI, const SDLoc &DL,
4524 SmallVectorImpl<SDNode *> &Created) const;
4525 SDValue buildUREMEqFold(EVT SETCCVT, SDValue REMNode, SDValue CompTargetNode,
4526 ISD::CondCode Cond, DAGCombinerInfo &DCI,
4527 const SDLoc &DL) const;
4528
4529 SDValue prepareSREMEqFold(EVT SETCCVT, SDValue REMNode,
4530 SDValue CompTargetNode, ISD::CondCode Cond,
4531 DAGCombinerInfo &DCI, const SDLoc &DL,
4532 SmallVectorImpl<SDNode *> &Created) const;
4533 SDValue buildSREMEqFold(EVT SETCCVT, SDValue REMNode, SDValue CompTargetNode,
4534 ISD::CondCode Cond, DAGCombinerInfo &DCI,
4535 const SDLoc &DL) const;
4536};
4537
4538/// Given an LLVM IR type and return type attributes, compute the return value
4539/// EVTs and flags, and optionally also the offsets, if the return value is
4540/// being lowered to memory.
4541void GetReturnInfo(CallingConv::ID CC, Type *ReturnType, AttributeList attr,
4542 SmallVectorImpl<ISD::OutputArg> &Outs,
4543 const TargetLowering &TLI, const DataLayout &DL);
4544
4545} // end namespace llvm
4546
4547#endif // LLVM_CODEGEN_TARGETLOWERING_H