File: | llvm/include/llvm/CodeGen/TargetLowering.h |
Warning: | line 1385, column 31 Called C++ object pointer is null |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | //===-- AArch64TargetTransformInfo.cpp - AArch64 specific TTI -------------===// | ||||||||
2 | // | ||||||||
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||||||||
4 | // See https://llvm.org/LICENSE.txt for license information. | ||||||||
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||||||||
6 | // | ||||||||
7 | //===----------------------------------------------------------------------===// | ||||||||
8 | |||||||||
9 | #include "AArch64ExpandImm.h" | ||||||||
10 | #include "AArch64TargetTransformInfo.h" | ||||||||
11 | #include "MCTargetDesc/AArch64AddressingModes.h" | ||||||||
12 | #include "llvm/Analysis/LoopInfo.h" | ||||||||
13 | #include "llvm/Analysis/TargetTransformInfo.h" | ||||||||
14 | #include "llvm/CodeGen/BasicTTIImpl.h" | ||||||||
15 | #include "llvm/CodeGen/CostTable.h" | ||||||||
16 | #include "llvm/CodeGen/TargetLowering.h" | ||||||||
17 | #include "llvm/IR/IntrinsicInst.h" | ||||||||
18 | #include "llvm/IR/IntrinsicsAArch64.h" | ||||||||
19 | #include "llvm/Support/Debug.h" | ||||||||
20 | #include <algorithm> | ||||||||
21 | using namespace llvm; | ||||||||
22 | |||||||||
23 | #define DEBUG_TYPE"aarch64tti" "aarch64tti" | ||||||||
24 | |||||||||
25 | static cl::opt<bool> EnableFalkorHWPFUnrollFix("enable-falkor-hwpf-unroll-fix", | ||||||||
26 | cl::init(true), cl::Hidden); | ||||||||
27 | |||||||||
28 | bool AArch64TTIImpl::areInlineCompatible(const Function *Caller, | ||||||||
29 | const Function *Callee) const { | ||||||||
30 | const TargetMachine &TM = getTLI()->getTargetMachine(); | ||||||||
31 | |||||||||
32 | const FeatureBitset &CallerBits = | ||||||||
33 | TM.getSubtargetImpl(*Caller)->getFeatureBits(); | ||||||||
34 | const FeatureBitset &CalleeBits = | ||||||||
35 | TM.getSubtargetImpl(*Callee)->getFeatureBits(); | ||||||||
36 | |||||||||
37 | // Inline a callee if its target-features are a subset of the callers | ||||||||
38 | // target-features. | ||||||||
39 | return (CallerBits & CalleeBits) == CalleeBits; | ||||||||
40 | } | ||||||||
41 | |||||||||
42 | /// Calculate the cost of materializing a 64-bit value. This helper | ||||||||
43 | /// method might only calculate a fraction of a larger immediate. Therefore it | ||||||||
44 | /// is valid to return a cost of ZERO. | ||||||||
45 | int AArch64TTIImpl::getIntImmCost(int64_t Val) { | ||||||||
46 | // Check if the immediate can be encoded within an instruction. | ||||||||
47 | if (Val == 0 || AArch64_AM::isLogicalImmediate(Val, 64)) | ||||||||
48 | return 0; | ||||||||
49 | |||||||||
50 | if (Val < 0) | ||||||||
51 | Val = ~Val; | ||||||||
52 | |||||||||
53 | // Calculate how many moves we will need to materialize this constant. | ||||||||
54 | SmallVector<AArch64_IMM::ImmInsnModel, 4> Insn; | ||||||||
55 | AArch64_IMM::expandMOVImm(Val, 64, Insn); | ||||||||
56 | return Insn.size(); | ||||||||
57 | } | ||||||||
58 | |||||||||
59 | /// Calculate the cost of materializing the given constant. | ||||||||
60 | int AArch64TTIImpl::getIntImmCost(const APInt &Imm, Type *Ty, | ||||||||
61 | TTI::TargetCostKind CostKind) { | ||||||||
62 | assert(Ty->isIntegerTy())((Ty->isIntegerTy()) ? static_cast<void> (0) : __assert_fail ("Ty->isIntegerTy()", "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp" , 62, __PRETTY_FUNCTION__)); | ||||||||
63 | |||||||||
64 | unsigned BitSize = Ty->getPrimitiveSizeInBits(); | ||||||||
65 | if (BitSize == 0) | ||||||||
66 | return ~0U; | ||||||||
67 | |||||||||
68 | // Sign-extend all constants to a multiple of 64-bit. | ||||||||
69 | APInt ImmVal = Imm; | ||||||||
70 | if (BitSize & 0x3f) | ||||||||
71 | ImmVal = Imm.sext((BitSize + 63) & ~0x3fU); | ||||||||
72 | |||||||||
73 | // Split the constant into 64-bit chunks and calculate the cost for each | ||||||||
74 | // chunk. | ||||||||
75 | int Cost = 0; | ||||||||
76 | for (unsigned ShiftVal = 0; ShiftVal < BitSize; ShiftVal += 64) { | ||||||||
77 | APInt Tmp = ImmVal.ashr(ShiftVal).sextOrTrunc(64); | ||||||||
78 | int64_t Val = Tmp.getSExtValue(); | ||||||||
79 | Cost += getIntImmCost(Val); | ||||||||
80 | } | ||||||||
81 | // We need at least one instruction to materialze the constant. | ||||||||
82 | return std::max(1, Cost); | ||||||||
83 | } | ||||||||
84 | |||||||||
85 | int AArch64TTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx, | ||||||||
86 | const APInt &Imm, Type *Ty, | ||||||||
87 | TTI::TargetCostKind CostKind, | ||||||||
88 | Instruction *Inst) { | ||||||||
89 | assert(Ty->isIntegerTy())((Ty->isIntegerTy()) ? static_cast<void> (0) : __assert_fail ("Ty->isIntegerTy()", "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp" , 89, __PRETTY_FUNCTION__)); | ||||||||
90 | |||||||||
91 | unsigned BitSize = Ty->getPrimitiveSizeInBits(); | ||||||||
92 | // There is no cost model for constants with a bit size of 0. Return TCC_Free | ||||||||
93 | // here, so that constant hoisting will ignore this constant. | ||||||||
94 | if (BitSize == 0) | ||||||||
95 | return TTI::TCC_Free; | ||||||||
96 | |||||||||
97 | unsigned ImmIdx = ~0U; | ||||||||
98 | switch (Opcode) { | ||||||||
99 | default: | ||||||||
100 | return TTI::TCC_Free; | ||||||||
101 | case Instruction::GetElementPtr: | ||||||||
102 | // Always hoist the base address of a GetElementPtr. | ||||||||
103 | if (Idx == 0) | ||||||||
104 | return 2 * TTI::TCC_Basic; | ||||||||
105 | return TTI::TCC_Free; | ||||||||
106 | case Instruction::Store: | ||||||||
107 | ImmIdx = 0; | ||||||||
108 | break; | ||||||||
109 | case Instruction::Add: | ||||||||
110 | case Instruction::Sub: | ||||||||
111 | case Instruction::Mul: | ||||||||
112 | case Instruction::UDiv: | ||||||||
113 | case Instruction::SDiv: | ||||||||
114 | case Instruction::URem: | ||||||||
115 | case Instruction::SRem: | ||||||||
116 | case Instruction::And: | ||||||||
117 | case Instruction::Or: | ||||||||
118 | case Instruction::Xor: | ||||||||
119 | case Instruction::ICmp: | ||||||||
120 | ImmIdx = 1; | ||||||||
121 | break; | ||||||||
122 | // Always return TCC_Free for the shift value of a shift instruction. | ||||||||
123 | case Instruction::Shl: | ||||||||
124 | case Instruction::LShr: | ||||||||
125 | case Instruction::AShr: | ||||||||
126 | if (Idx == 1) | ||||||||
127 | return TTI::TCC_Free; | ||||||||
128 | break; | ||||||||
129 | case Instruction::Trunc: | ||||||||
130 | case Instruction::ZExt: | ||||||||
131 | case Instruction::SExt: | ||||||||
132 | case Instruction::IntToPtr: | ||||||||
133 | case Instruction::PtrToInt: | ||||||||
134 | case Instruction::BitCast: | ||||||||
135 | case Instruction::PHI: | ||||||||
136 | case Instruction::Call: | ||||||||
137 | case Instruction::Select: | ||||||||
138 | case Instruction::Ret: | ||||||||
139 | case Instruction::Load: | ||||||||
140 | break; | ||||||||
141 | } | ||||||||
142 | |||||||||
143 | if (Idx == ImmIdx) { | ||||||||
144 | int NumConstants = (BitSize + 63) / 64; | ||||||||
145 | int Cost = AArch64TTIImpl::getIntImmCost(Imm, Ty, CostKind); | ||||||||
146 | return (Cost <= NumConstants * TTI::TCC_Basic) | ||||||||
147 | ? static_cast<int>(TTI::TCC_Free) | ||||||||
148 | : Cost; | ||||||||
149 | } | ||||||||
150 | return AArch64TTIImpl::getIntImmCost(Imm, Ty, CostKind); | ||||||||
151 | } | ||||||||
152 | |||||||||
153 | int AArch64TTIImpl::getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, | ||||||||
154 | const APInt &Imm, Type *Ty, | ||||||||
155 | TTI::TargetCostKind CostKind) { | ||||||||
156 | assert(Ty->isIntegerTy())((Ty->isIntegerTy()) ? static_cast<void> (0) : __assert_fail ("Ty->isIntegerTy()", "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp" , 156, __PRETTY_FUNCTION__)); | ||||||||
157 | |||||||||
158 | unsigned BitSize = Ty->getPrimitiveSizeInBits(); | ||||||||
159 | // There is no cost model for constants with a bit size of 0. Return TCC_Free | ||||||||
160 | // here, so that constant hoisting will ignore this constant. | ||||||||
161 | if (BitSize == 0) | ||||||||
162 | return TTI::TCC_Free; | ||||||||
163 | |||||||||
164 | // Most (all?) AArch64 intrinsics do not support folding immediates into the | ||||||||
165 | // selected instruction, so we compute the materialization cost for the | ||||||||
166 | // immediate directly. | ||||||||
167 | if (IID >= Intrinsic::aarch64_addg && IID <= Intrinsic::aarch64_udiv) | ||||||||
168 | return AArch64TTIImpl::getIntImmCost(Imm, Ty, CostKind); | ||||||||
169 | |||||||||
170 | switch (IID) { | ||||||||
171 | default: | ||||||||
172 | return TTI::TCC_Free; | ||||||||
173 | case Intrinsic::sadd_with_overflow: | ||||||||
174 | case Intrinsic::uadd_with_overflow: | ||||||||
175 | case Intrinsic::ssub_with_overflow: | ||||||||
176 | case Intrinsic::usub_with_overflow: | ||||||||
177 | case Intrinsic::smul_with_overflow: | ||||||||
178 | case Intrinsic::umul_with_overflow: | ||||||||
179 | if (Idx == 1) { | ||||||||
180 | int NumConstants = (BitSize + 63) / 64; | ||||||||
181 | int Cost = AArch64TTIImpl::getIntImmCost(Imm, Ty, CostKind); | ||||||||
182 | return (Cost <= NumConstants * TTI::TCC_Basic) | ||||||||
183 | ? static_cast<int>(TTI::TCC_Free) | ||||||||
184 | : Cost; | ||||||||
185 | } | ||||||||
186 | break; | ||||||||
187 | case Intrinsic::experimental_stackmap: | ||||||||
188 | if ((Idx < 2) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue()))) | ||||||||
189 | return TTI::TCC_Free; | ||||||||
190 | break; | ||||||||
191 | case Intrinsic::experimental_patchpoint_void: | ||||||||
192 | case Intrinsic::experimental_patchpoint_i64: | ||||||||
193 | if ((Idx < 4) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue()))) | ||||||||
194 | return TTI::TCC_Free; | ||||||||
195 | break; | ||||||||
196 | case Intrinsic::experimental_gc_statepoint: | ||||||||
197 | if ((Idx < 5) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue()))) | ||||||||
198 | return TTI::TCC_Free; | ||||||||
199 | break; | ||||||||
200 | } | ||||||||
201 | return AArch64TTIImpl::getIntImmCost(Imm, Ty, CostKind); | ||||||||
202 | } | ||||||||
203 | |||||||||
204 | TargetTransformInfo::PopcntSupportKind | ||||||||
205 | AArch64TTIImpl::getPopcntSupport(unsigned TyWidth) { | ||||||||
206 | assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2")((isPowerOf2_32(TyWidth) && "Ty width must be power of 2" ) ? static_cast<void> (0) : __assert_fail ("isPowerOf2_32(TyWidth) && \"Ty width must be power of 2\"" , "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp" , 206, __PRETTY_FUNCTION__)); | ||||||||
207 | if (TyWidth == 32 || TyWidth == 64) | ||||||||
208 | return TTI::PSK_FastHardware; | ||||||||
209 | // TODO: AArch64TargetLowering::LowerCTPOP() supports 128bit popcount. | ||||||||
210 | return TTI::PSK_Software; | ||||||||
211 | } | ||||||||
212 | |||||||||
213 | bool AArch64TTIImpl::isWideningInstruction(Type *DstTy, unsigned Opcode, | ||||||||
214 | ArrayRef<const Value *> Args) { | ||||||||
215 | |||||||||
216 | // A helper that returns a vector type from the given type. The number of | ||||||||
217 | // elements in type Ty determine the vector width. | ||||||||
218 | auto toVectorTy = [&](Type *ArgTy) { | ||||||||
219 | return FixedVectorType::get(ArgTy->getScalarType(), | ||||||||
220 | cast<FixedVectorType>(DstTy)->getNumElements()); | ||||||||
221 | }; | ||||||||
222 | |||||||||
223 | // Exit early if DstTy is not a vector type whose elements are at least | ||||||||
224 | // 16-bits wide. | ||||||||
225 | if (!DstTy->isVectorTy() || DstTy->getScalarSizeInBits() < 16) | ||||||||
226 | return false; | ||||||||
227 | |||||||||
228 | // Determine if the operation has a widening variant. We consider both the | ||||||||
229 | // "long" (e.g., usubl) and "wide" (e.g., usubw) versions of the | ||||||||
230 | // instructions. | ||||||||
231 | // | ||||||||
232 | // TODO: Add additional widening operations (e.g., mul, shl, etc.) once we | ||||||||
233 | // verify that their extending operands are eliminated during code | ||||||||
234 | // generation. | ||||||||
235 | switch (Opcode) { | ||||||||
236 | case Instruction::Add: // UADDL(2), SADDL(2), UADDW(2), SADDW(2). | ||||||||
237 | case Instruction::Sub: // USUBL(2), SSUBL(2), USUBW(2), SSUBW(2). | ||||||||
238 | break; | ||||||||
239 | default: | ||||||||
240 | return false; | ||||||||
241 | } | ||||||||
242 | |||||||||
243 | // To be a widening instruction (either the "wide" or "long" versions), the | ||||||||
244 | // second operand must be a sign- or zero extend having a single user. We | ||||||||
245 | // only consider extends having a single user because they may otherwise not | ||||||||
246 | // be eliminated. | ||||||||
247 | if (Args.size() != 2 || | ||||||||
248 | (!isa<SExtInst>(Args[1]) && !isa<ZExtInst>(Args[1])) || | ||||||||
249 | !Args[1]->hasOneUse()) | ||||||||
250 | return false; | ||||||||
251 | auto *Extend = cast<CastInst>(Args[1]); | ||||||||
252 | |||||||||
253 | // Legalize the destination type and ensure it can be used in a widening | ||||||||
254 | // operation. | ||||||||
255 | auto DstTyL = TLI->getTypeLegalizationCost(DL, DstTy); | ||||||||
256 | unsigned DstElTySize = DstTyL.second.getScalarSizeInBits(); | ||||||||
257 | if (!DstTyL.second.isVector() || DstElTySize != DstTy->getScalarSizeInBits()) | ||||||||
258 | return false; | ||||||||
259 | |||||||||
260 | // Legalize the source type and ensure it can be used in a widening | ||||||||
261 | // operation. | ||||||||
262 | auto *SrcTy = toVectorTy(Extend->getSrcTy()); | ||||||||
263 | auto SrcTyL = TLI->getTypeLegalizationCost(DL, SrcTy); | ||||||||
264 | unsigned SrcElTySize = SrcTyL.second.getScalarSizeInBits(); | ||||||||
265 | if (!SrcTyL.second.isVector() || SrcElTySize != SrcTy->getScalarSizeInBits()) | ||||||||
266 | return false; | ||||||||
267 | |||||||||
268 | // Get the total number of vector elements in the legalized types. | ||||||||
269 | unsigned NumDstEls = DstTyL.first * DstTyL.second.getVectorNumElements(); | ||||||||
270 | unsigned NumSrcEls = SrcTyL.first * SrcTyL.second.getVectorNumElements(); | ||||||||
271 | |||||||||
272 | // Return true if the legalized types have the same number of vector elements | ||||||||
273 | // and the destination element type size is twice that of the source type. | ||||||||
274 | return NumDstEls == NumSrcEls && 2 * SrcElTySize == DstElTySize; | ||||||||
275 | } | ||||||||
276 | |||||||||
277 | int AArch64TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, | ||||||||
278 | TTI::CastContextHint CCH, | ||||||||
279 | TTI::TargetCostKind CostKind, | ||||||||
280 | const Instruction *I) { | ||||||||
281 | int ISD = TLI->InstructionOpcodeToISD(Opcode); | ||||||||
282 | assert(ISD && "Invalid opcode")((ISD && "Invalid opcode") ? static_cast<void> ( 0) : __assert_fail ("ISD && \"Invalid opcode\"", "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp" , 282, __PRETTY_FUNCTION__)); | ||||||||
283 | |||||||||
284 | // If the cast is observable, and it is used by a widening instruction (e.g., | ||||||||
285 | // uaddl, saddw, etc.), it may be free. | ||||||||
286 | if (I && I->hasOneUse()) { | ||||||||
287 | auto *SingleUser = cast<Instruction>(*I->user_begin()); | ||||||||
288 | SmallVector<const Value *, 4> Operands(SingleUser->operand_values()); | ||||||||
289 | if (isWideningInstruction(Dst, SingleUser->getOpcode(), Operands)) { | ||||||||
290 | // If the cast is the second operand, it is free. We will generate either | ||||||||
291 | // a "wide" or "long" version of the widening instruction. | ||||||||
292 | if (I == SingleUser->getOperand(1)) | ||||||||
293 | return 0; | ||||||||
294 | // If the cast is not the second operand, it will be free if it looks the | ||||||||
295 | // same as the second operand. In this case, we will generate a "long" | ||||||||
296 | // version of the widening instruction. | ||||||||
297 | if (auto *Cast = dyn_cast<CastInst>(SingleUser->getOperand(1))) | ||||||||
298 | if (I->getOpcode() == unsigned(Cast->getOpcode()) && | ||||||||
299 | cast<CastInst>(I)->getSrcTy() == Cast->getSrcTy()) | ||||||||
300 | return 0; | ||||||||
301 | } | ||||||||
302 | } | ||||||||
303 | |||||||||
304 | // TODO: Allow non-throughput costs that aren't binary. | ||||||||
305 | auto AdjustCost = [&CostKind](int Cost) { | ||||||||
306 | if (CostKind != TTI::TCK_RecipThroughput) | ||||||||
307 | return Cost == 0 ? 0 : 1; | ||||||||
308 | return Cost; | ||||||||
309 | }; | ||||||||
310 | |||||||||
311 | EVT SrcTy = TLI->getValueType(DL, Src); | ||||||||
312 | EVT DstTy = TLI->getValueType(DL, Dst); | ||||||||
313 | |||||||||
314 | if (!SrcTy.isSimple() || !DstTy.isSimple()) | ||||||||
315 | return AdjustCost( | ||||||||
316 | BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I)); | ||||||||
317 | |||||||||
318 | static const TypeConversionCostTblEntry | ||||||||
319 | ConversionTbl[] = { | ||||||||
320 | { ISD::TRUNCATE, MVT::v4i16, MVT::v4i32, 1 }, | ||||||||
321 | { ISD::TRUNCATE, MVT::v4i32, MVT::v4i64, 0 }, | ||||||||
322 | { ISD::TRUNCATE, MVT::v8i8, MVT::v8i32, 3 }, | ||||||||
323 | { ISD::TRUNCATE, MVT::v16i8, MVT::v16i32, 6 }, | ||||||||
324 | |||||||||
325 | // The number of shll instructions for the extension. | ||||||||
326 | { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 3 }, | ||||||||
327 | { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i16, 3 }, | ||||||||
328 | { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i32, 2 }, | ||||||||
329 | { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i32, 2 }, | ||||||||
330 | { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i8, 3 }, | ||||||||
331 | { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i8, 3 }, | ||||||||
332 | { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i16, 2 }, | ||||||||
333 | { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i16, 2 }, | ||||||||
334 | { ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i8, 7 }, | ||||||||
335 | { ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i8, 7 }, | ||||||||
336 | { ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i16, 6 }, | ||||||||
337 | { ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i16, 6 }, | ||||||||
338 | { ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i8, 2 }, | ||||||||
339 | { ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i8, 2 }, | ||||||||
340 | { ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i8, 6 }, | ||||||||
341 | { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i8, 6 }, | ||||||||
342 | |||||||||
343 | // LowerVectorINT_TO_FP: | ||||||||
344 | { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i32, 1 }, | ||||||||
345 | { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 }, | ||||||||
346 | { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i64, 1 }, | ||||||||
347 | { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i32, 1 }, | ||||||||
348 | { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 }, | ||||||||
349 | { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i64, 1 }, | ||||||||
350 | |||||||||
351 | // Complex: to v2f32 | ||||||||
352 | { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i8, 3 }, | ||||||||
353 | { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i16, 3 }, | ||||||||
354 | { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i64, 2 }, | ||||||||
355 | { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i8, 3 }, | ||||||||
356 | { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i16, 3 }, | ||||||||
357 | { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i64, 2 }, | ||||||||
358 | |||||||||
359 | // Complex: to v4f32 | ||||||||
360 | { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i8, 4 }, | ||||||||
361 | { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i16, 2 }, | ||||||||
362 | { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i8, 3 }, | ||||||||
363 | { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i16, 2 }, | ||||||||
364 | |||||||||
365 | // Complex: to v8f32 | ||||||||
366 | { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i8, 10 }, | ||||||||
367 | { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i16, 4 }, | ||||||||
368 | { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i8, 10 }, | ||||||||
369 | { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i16, 4 }, | ||||||||
370 | |||||||||
371 | // Complex: to v16f32 | ||||||||
372 | { ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i8, 21 }, | ||||||||
373 | { ISD::UINT_TO_FP, MVT::v16f32, MVT::v16i8, 21 }, | ||||||||
374 | |||||||||
375 | // Complex: to v2f64 | ||||||||
376 | { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i8, 4 }, | ||||||||
377 | { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i16, 4 }, | ||||||||
378 | { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 }, | ||||||||
379 | { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i8, 4 }, | ||||||||
380 | { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i16, 4 }, | ||||||||
381 | { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 }, | ||||||||
382 | |||||||||
383 | |||||||||
384 | // LowerVectorFP_TO_INT | ||||||||
385 | { ISD::FP_TO_SINT, MVT::v2i32, MVT::v2f32, 1 }, | ||||||||
386 | { ISD::FP_TO_SINT, MVT::v4i32, MVT::v4f32, 1 }, | ||||||||
387 | { ISD::FP_TO_SINT, MVT::v2i64, MVT::v2f64, 1 }, | ||||||||
388 | { ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f32, 1 }, | ||||||||
389 | { ISD::FP_TO_UINT, MVT::v4i32, MVT::v4f32, 1 }, | ||||||||
390 | { ISD::FP_TO_UINT, MVT::v2i64, MVT::v2f64, 1 }, | ||||||||
391 | |||||||||
392 | // Complex, from v2f32: legal type is v2i32 (no cost) or v2i64 (1 ext). | ||||||||
393 | { ISD::FP_TO_SINT, MVT::v2i64, MVT::v2f32, 2 }, | ||||||||
394 | { ISD::FP_TO_SINT, MVT::v2i16, MVT::v2f32, 1 }, | ||||||||
395 | { ISD::FP_TO_SINT, MVT::v2i8, MVT::v2f32, 1 }, | ||||||||
396 | { ISD::FP_TO_UINT, MVT::v2i64, MVT::v2f32, 2 }, | ||||||||
397 | { ISD::FP_TO_UINT, MVT::v2i16, MVT::v2f32, 1 }, | ||||||||
398 | { ISD::FP_TO_UINT, MVT::v2i8, MVT::v2f32, 1 }, | ||||||||
399 | |||||||||
400 | // Complex, from v4f32: legal type is v4i16, 1 narrowing => ~2 | ||||||||
401 | { ISD::FP_TO_SINT, MVT::v4i16, MVT::v4f32, 2 }, | ||||||||
402 | { ISD::FP_TO_SINT, MVT::v4i8, MVT::v4f32, 2 }, | ||||||||
403 | { ISD::FP_TO_UINT, MVT::v4i16, MVT::v4f32, 2 }, | ||||||||
404 | { ISD::FP_TO_UINT, MVT::v4i8, MVT::v4f32, 2 }, | ||||||||
405 | |||||||||
406 | // Complex, from v2f64: legal type is v2i32, 1 narrowing => ~2. | ||||||||
407 | { ISD::FP_TO_SINT, MVT::v2i32, MVT::v2f64, 2 }, | ||||||||
408 | { ISD::FP_TO_SINT, MVT::v2i16, MVT::v2f64, 2 }, | ||||||||
409 | { ISD::FP_TO_SINT, MVT::v2i8, MVT::v2f64, 2 }, | ||||||||
410 | { ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f64, 2 }, | ||||||||
411 | { ISD::FP_TO_UINT, MVT::v2i16, MVT::v2f64, 2 }, | ||||||||
412 | { ISD::FP_TO_UINT, MVT::v2i8, MVT::v2f64, 2 }, | ||||||||
413 | }; | ||||||||
414 | |||||||||
415 | if (const auto *Entry = ConvertCostTableLookup(ConversionTbl, ISD, | ||||||||
416 | DstTy.getSimpleVT(), | ||||||||
417 | SrcTy.getSimpleVT())) | ||||||||
418 | return AdjustCost(Entry->Cost); | ||||||||
419 | |||||||||
420 | return AdjustCost( | ||||||||
421 | BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I)); | ||||||||
422 | } | ||||||||
423 | |||||||||
424 | int AArch64TTIImpl::getExtractWithExtendCost(unsigned Opcode, Type *Dst, | ||||||||
425 | VectorType *VecTy, | ||||||||
426 | unsigned Index) { | ||||||||
427 | |||||||||
428 | // Make sure we were given a valid extend opcode. | ||||||||
429 | assert((Opcode == Instruction::SExt || Opcode == Instruction::ZExt) &&(((Opcode == Instruction::SExt || Opcode == Instruction::ZExt ) && "Invalid opcode") ? static_cast<void> (0) : __assert_fail ("(Opcode == Instruction::SExt || Opcode == Instruction::ZExt) && \"Invalid opcode\"" , "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp" , 430, __PRETTY_FUNCTION__)) | ||||||||
430 | "Invalid opcode")(((Opcode == Instruction::SExt || Opcode == Instruction::ZExt ) && "Invalid opcode") ? static_cast<void> (0) : __assert_fail ("(Opcode == Instruction::SExt || Opcode == Instruction::ZExt) && \"Invalid opcode\"" , "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp" , 430, __PRETTY_FUNCTION__)); | ||||||||
431 | |||||||||
432 | // We are extending an element we extract from a vector, so the source type | ||||||||
433 | // of the extend is the element type of the vector. | ||||||||
434 | auto *Src = VecTy->getElementType(); | ||||||||
435 | |||||||||
436 | // Sign- and zero-extends are for integer types only. | ||||||||
437 | assert(isa<IntegerType>(Dst) && isa<IntegerType>(Src) && "Invalid type")((isa<IntegerType>(Dst) && isa<IntegerType> (Src) && "Invalid type") ? static_cast<void> (0 ) : __assert_fail ("isa<IntegerType>(Dst) && isa<IntegerType>(Src) && \"Invalid type\"" , "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp" , 437, __PRETTY_FUNCTION__)); | ||||||||
438 | |||||||||
439 | // Get the cost for the extract. We compute the cost (if any) for the extend | ||||||||
440 | // below. | ||||||||
441 | auto Cost = getVectorInstrCost(Instruction::ExtractElement, VecTy, Index); | ||||||||
442 | |||||||||
443 | // Legalize the types. | ||||||||
444 | auto VecLT = TLI->getTypeLegalizationCost(DL, VecTy); | ||||||||
445 | auto DstVT = TLI->getValueType(DL, Dst); | ||||||||
446 | auto SrcVT = TLI->getValueType(DL, Src); | ||||||||
447 | TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput; | ||||||||
448 | |||||||||
449 | // If the resulting type is still a vector and the destination type is legal, | ||||||||
450 | // we may get the extension for free. If not, get the default cost for the | ||||||||
451 | // extend. | ||||||||
452 | if (!VecLT.second.isVector() || !TLI->isTypeLegal(DstVT)) | ||||||||
453 | return Cost + getCastInstrCost(Opcode, Dst, Src, TTI::CastContextHint::None, | ||||||||
454 | CostKind); | ||||||||
455 | |||||||||
456 | // The destination type should be larger than the element type. If not, get | ||||||||
457 | // the default cost for the extend. | ||||||||
458 | if (DstVT.getSizeInBits() < SrcVT.getSizeInBits()) | ||||||||
459 | return Cost + getCastInstrCost(Opcode, Dst, Src, TTI::CastContextHint::None, | ||||||||
460 | CostKind); | ||||||||
461 | |||||||||
462 | switch (Opcode) { | ||||||||
463 | default: | ||||||||
464 | llvm_unreachable("Opcode should be either SExt or ZExt")::llvm::llvm_unreachable_internal("Opcode should be either SExt or ZExt" , "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp" , 464); | ||||||||
465 | |||||||||
466 | // For sign-extends, we only need a smov, which performs the extension | ||||||||
467 | // automatically. | ||||||||
468 | case Instruction::SExt: | ||||||||
469 | return Cost; | ||||||||
470 | |||||||||
471 | // For zero-extends, the extend is performed automatically by a umov unless | ||||||||
472 | // the destination type is i64 and the element type is i8 or i16. | ||||||||
473 | case Instruction::ZExt: | ||||||||
474 | if (DstVT.getSizeInBits() != 64u || SrcVT.getSizeInBits() == 32u) | ||||||||
475 | return Cost; | ||||||||
476 | } | ||||||||
477 | |||||||||
478 | // If we are unable to perform the extend for free, get the default cost. | ||||||||
479 | return Cost + getCastInstrCost(Opcode, Dst, Src, TTI::CastContextHint::None, | ||||||||
480 | CostKind); | ||||||||
481 | } | ||||||||
482 | |||||||||
483 | unsigned AArch64TTIImpl::getCFInstrCost(unsigned Opcode, | ||||||||
484 | TTI::TargetCostKind CostKind) { | ||||||||
485 | if (CostKind != TTI::TCK_RecipThroughput) | ||||||||
486 | return Opcode == Instruction::PHI ? 0 : 1; | ||||||||
487 | assert(CostKind == TTI::TCK_RecipThroughput && "unexpected CostKind")((CostKind == TTI::TCK_RecipThroughput && "unexpected CostKind" ) ? static_cast<void> (0) : __assert_fail ("CostKind == TTI::TCK_RecipThroughput && \"unexpected CostKind\"" , "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp" , 487, __PRETTY_FUNCTION__)); | ||||||||
488 | // Branches are assumed to be predicted. | ||||||||
489 | return 0; | ||||||||
490 | } | ||||||||
491 | |||||||||
492 | int AArch64TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, | ||||||||
493 | unsigned Index) { | ||||||||
494 | assert(Val->isVectorTy() && "This must be a vector type")((Val->isVectorTy() && "This must be a vector type" ) ? static_cast<void> (0) : __assert_fail ("Val->isVectorTy() && \"This must be a vector type\"" , "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp" , 494, __PRETTY_FUNCTION__)); | ||||||||
495 | |||||||||
496 | if (Index != -1U) { | ||||||||
497 | // Legalize the type. | ||||||||
498 | std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Val); | ||||||||
499 | |||||||||
500 | // This type is legalized to a scalar type. | ||||||||
501 | if (!LT.second.isVector()) | ||||||||
502 | return 0; | ||||||||
503 | |||||||||
504 | // The type may be split. Normalize the index to the new type. | ||||||||
505 | unsigned Width = LT.second.getVectorNumElements(); | ||||||||
506 | Index = Index % Width; | ||||||||
507 | |||||||||
508 | // The element at index zero is already inside the vector. | ||||||||
509 | if (Index == 0) | ||||||||
510 | return 0; | ||||||||
511 | } | ||||||||
512 | |||||||||
513 | // All other insert/extracts cost this much. | ||||||||
514 | return ST->getVectorInsertExtractBaseCost(); | ||||||||
515 | } | ||||||||
516 | |||||||||
517 | int AArch64TTIImpl::getArithmeticInstrCost( | ||||||||
518 | unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, | ||||||||
519 | TTI::OperandValueKind Opd1Info, | ||||||||
520 | TTI::OperandValueKind Opd2Info, TTI::OperandValueProperties Opd1PropInfo, | ||||||||
521 | TTI::OperandValueProperties Opd2PropInfo, ArrayRef<const Value *> Args, | ||||||||
522 | const Instruction *CxtI) { | ||||||||
523 | // TODO: Handle more cost kinds. | ||||||||
524 | if (CostKind != TTI::TCK_RecipThroughput) | ||||||||
525 | return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Opd1Info, | ||||||||
526 | Opd2Info, Opd1PropInfo, | ||||||||
527 | Opd2PropInfo, Args, CxtI); | ||||||||
528 | |||||||||
529 | // Legalize the type. | ||||||||
530 | std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty); | ||||||||
531 | |||||||||
532 | // If the instruction is a widening instruction (e.g., uaddl, saddw, etc.), | ||||||||
533 | // add in the widening overhead specified by the sub-target. Since the | ||||||||
534 | // extends feeding widening instructions are performed automatically, they | ||||||||
535 | // aren't present in the generated code and have a zero cost. By adding a | ||||||||
536 | // widening overhead here, we attach the total cost of the combined operation | ||||||||
537 | // to the widening instruction. | ||||||||
538 | int Cost = 0; | ||||||||
539 | if (isWideningInstruction(Ty, Opcode, Args)) | ||||||||
540 | Cost += ST->getWideningBaseCost(); | ||||||||
541 | |||||||||
542 | int ISD = TLI->InstructionOpcodeToISD(Opcode); | ||||||||
543 | |||||||||
544 | switch (ISD) { | ||||||||
545 | default: | ||||||||
546 | return Cost + BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Opd1Info, | ||||||||
547 | Opd2Info, | ||||||||
548 | Opd1PropInfo, Opd2PropInfo); | ||||||||
549 | case ISD::SDIV: | ||||||||
550 | if (Opd2Info == TargetTransformInfo::OK_UniformConstantValue && | ||||||||
551 | Opd2PropInfo == TargetTransformInfo::OP_PowerOf2) { | ||||||||
552 | // On AArch64, scalar signed division by constants power-of-two are | ||||||||
553 | // normally expanded to the sequence ADD + CMP + SELECT + SRA. | ||||||||
554 | // The OperandValue properties many not be same as that of previous | ||||||||
555 | // operation; conservatively assume OP_None. | ||||||||
556 | Cost += getArithmeticInstrCost(Instruction::Add, Ty, CostKind, | ||||||||
557 | Opd1Info, Opd2Info, | ||||||||
558 | TargetTransformInfo::OP_None, | ||||||||
559 | TargetTransformInfo::OP_None); | ||||||||
560 | Cost += getArithmeticInstrCost(Instruction::Sub, Ty, CostKind, | ||||||||
561 | Opd1Info, Opd2Info, | ||||||||
562 | TargetTransformInfo::OP_None, | ||||||||
563 | TargetTransformInfo::OP_None); | ||||||||
564 | Cost += getArithmeticInstrCost(Instruction::Select, Ty, CostKind, | ||||||||
565 | Opd1Info, Opd2Info, | ||||||||
566 | TargetTransformInfo::OP_None, | ||||||||
567 | TargetTransformInfo::OP_None); | ||||||||
568 | Cost += getArithmeticInstrCost(Instruction::AShr, Ty, CostKind, | ||||||||
569 | Opd1Info, Opd2Info, | ||||||||
570 | TargetTransformInfo::OP_None, | ||||||||
571 | TargetTransformInfo::OP_None); | ||||||||
572 | return Cost; | ||||||||
573 | } | ||||||||
574 | LLVM_FALLTHROUGH[[gnu::fallthrough]]; | ||||||||
575 | case ISD::UDIV: | ||||||||
576 | if (Opd2Info == TargetTransformInfo::OK_UniformConstantValue) { | ||||||||
577 | auto VT = TLI->getValueType(DL, Ty); | ||||||||
578 | if (TLI->isOperationLegalOrCustom(ISD::MULHU, VT)) { | ||||||||
579 | // Vector signed division by constant are expanded to the | ||||||||
580 | // sequence MULHS + ADD/SUB + SRA + SRL + ADD, and unsigned division | ||||||||
581 | // to MULHS + SUB + SRL + ADD + SRL. | ||||||||
582 | int MulCost = getArithmeticInstrCost(Instruction::Mul, Ty, CostKind, | ||||||||
583 | Opd1Info, Opd2Info, | ||||||||
584 | TargetTransformInfo::OP_None, | ||||||||
585 | TargetTransformInfo::OP_None); | ||||||||
586 | int AddCost = getArithmeticInstrCost(Instruction::Add, Ty, CostKind, | ||||||||
587 | Opd1Info, Opd2Info, | ||||||||
588 | TargetTransformInfo::OP_None, | ||||||||
589 | TargetTransformInfo::OP_None); | ||||||||
590 | int ShrCost = getArithmeticInstrCost(Instruction::AShr, Ty, CostKind, | ||||||||
591 | Opd1Info, Opd2Info, | ||||||||
592 | TargetTransformInfo::OP_None, | ||||||||
593 | TargetTransformInfo::OP_None); | ||||||||
594 | return MulCost * 2 + AddCost * 2 + ShrCost * 2 + 1; | ||||||||
595 | } | ||||||||
596 | } | ||||||||
597 | |||||||||
598 | Cost += BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Opd1Info, | ||||||||
599 | Opd2Info, | ||||||||
600 | Opd1PropInfo, Opd2PropInfo); | ||||||||
601 | if (Ty->isVectorTy()) { | ||||||||
602 | // On AArch64, vector divisions are not supported natively and are | ||||||||
603 | // expanded into scalar divisions of each pair of elements. | ||||||||
604 | Cost += getArithmeticInstrCost(Instruction::ExtractElement, Ty, CostKind, | ||||||||
605 | Opd1Info, Opd2Info, Opd1PropInfo, | ||||||||
606 | Opd2PropInfo); | ||||||||
607 | Cost += getArithmeticInstrCost(Instruction::InsertElement, Ty, CostKind, | ||||||||
608 | Opd1Info, Opd2Info, Opd1PropInfo, | ||||||||
609 | Opd2PropInfo); | ||||||||
610 | // TODO: if one of the arguments is scalar, then it's not necessary to | ||||||||
611 | // double the cost of handling the vector elements. | ||||||||
612 | Cost += Cost; | ||||||||
613 | } | ||||||||
614 | return Cost; | ||||||||
615 | |||||||||
616 | case ISD::ADD: | ||||||||
617 | case ISD::MUL: | ||||||||
618 | case ISD::XOR: | ||||||||
619 | case ISD::OR: | ||||||||
620 | case ISD::AND: | ||||||||
621 | // These nodes are marked as 'custom' for combining purposes only. | ||||||||
622 | // We know that they are legal. See LowerAdd in ISelLowering. | ||||||||
623 | return (Cost + 1) * LT.first; | ||||||||
624 | |||||||||
625 | case ISD::FADD: | ||||||||
626 | // These nodes are marked as 'custom' just to lower them to SVE. | ||||||||
627 | // We know said lowering will incur no additional cost. | ||||||||
628 | if (isa<FixedVectorType>(Ty) && !Ty->getScalarType()->isFP128Ty()) | ||||||||
629 | return (Cost + 2) * LT.first; | ||||||||
630 | |||||||||
631 | return Cost + BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Opd1Info, | ||||||||
632 | Opd2Info, | ||||||||
633 | Opd1PropInfo, Opd2PropInfo); | ||||||||
634 | } | ||||||||
635 | } | ||||||||
636 | |||||||||
637 | int AArch64TTIImpl::getAddressComputationCost(Type *Ty, ScalarEvolution *SE, | ||||||||
638 | const SCEV *Ptr) { | ||||||||
639 | // Address computations in vectorized code with non-consecutive addresses will | ||||||||
640 | // likely result in more instructions compared to scalar code where the | ||||||||
641 | // computation can more often be merged into the index mode. The resulting | ||||||||
642 | // extra micro-ops can significantly decrease throughput. | ||||||||
643 | unsigned NumVectorInstToHideOverhead = 10; | ||||||||
644 | int MaxMergeDistance = 64; | ||||||||
645 | |||||||||
646 | if (Ty->isVectorTy() && SE && | ||||||||
647 | !BaseT::isConstantStridedAccessLessThan(SE, Ptr, MaxMergeDistance + 1)) | ||||||||
648 | return NumVectorInstToHideOverhead; | ||||||||
649 | |||||||||
650 | // In many cases the address computation is not merged into the instruction | ||||||||
651 | // addressing mode. | ||||||||
652 | return 1; | ||||||||
653 | } | ||||||||
654 | |||||||||
655 | int AArch64TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, | ||||||||
656 | Type *CondTy, | ||||||||
657 | TTI::TargetCostKind CostKind, | ||||||||
658 | const Instruction *I) { | ||||||||
659 | // TODO: Handle other cost kinds. | ||||||||
660 | if (CostKind
| ||||||||
| |||||||||
661 | return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, CostKind, I); | ||||||||
662 | |||||||||
663 | int ISD = TLI->InstructionOpcodeToISD(Opcode); | ||||||||
664 | // We don't lower some vector selects well that are wider than the register | ||||||||
665 | // width. | ||||||||
666 | if (ValTy->isVectorTy() && ISD == ISD::SELECT) { | ||||||||
667 | // We would need this many instructions to hide the scalarization happening. | ||||||||
668 | const int AmortizationCost = 20; | ||||||||
669 | static const TypeConversionCostTblEntry | ||||||||
670 | VectorSelectTbl[] = { | ||||||||
671 | { ISD::SELECT, MVT::v16i1, MVT::v16i16, 16 }, | ||||||||
672 | { ISD::SELECT, MVT::v8i1, MVT::v8i32, 8 }, | ||||||||
673 | { ISD::SELECT, MVT::v16i1, MVT::v16i32, 16 }, | ||||||||
674 | { ISD::SELECT, MVT::v4i1, MVT::v4i64, 4 * AmortizationCost }, | ||||||||
675 | { ISD::SELECT, MVT::v8i1, MVT::v8i64, 8 * AmortizationCost }, | ||||||||
676 | { ISD::SELECT, MVT::v16i1, MVT::v16i64, 16 * AmortizationCost } | ||||||||
677 | }; | ||||||||
678 | |||||||||
679 | EVT SelCondTy = TLI->getValueType(DL, CondTy); | ||||||||
680 | EVT SelValTy = TLI->getValueType(DL, ValTy); | ||||||||
681 | if (SelCondTy.isSimple() && SelValTy.isSimple()) { | ||||||||
682 | if (const auto *Entry = ConvertCostTableLookup(VectorSelectTbl, ISD, | ||||||||
683 | SelCondTy.getSimpleVT(), | ||||||||
684 | SelValTy.getSimpleVT())) | ||||||||
685 | return Entry->Cost; | ||||||||
686 | } | ||||||||
687 | } | ||||||||
688 | return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, CostKind, I); | ||||||||
689 | } | ||||||||
690 | |||||||||
691 | AArch64TTIImpl::TTI::MemCmpExpansionOptions | ||||||||
692 | AArch64TTIImpl::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const { | ||||||||
693 | TTI::MemCmpExpansionOptions Options; | ||||||||
694 | if (ST->requiresStrictAlign()) { | ||||||||
695 | // TODO: Add cost modeling for strict align. Misaligned loads expand to | ||||||||
696 | // a bunch of instructions when strict align is enabled. | ||||||||
697 | return Options; | ||||||||
698 | } | ||||||||
699 | Options.AllowOverlappingLoads = true; | ||||||||
700 | Options.MaxNumLoads = TLI->getMaxExpandSizeMemcmp(OptSize); | ||||||||
701 | Options.NumLoadsPerBlock = Options.MaxNumLoads; | ||||||||
702 | // TODO: Though vector loads usually perform well on AArch64, in some targets | ||||||||
703 | // they may wake up the FP unit, which raises the power consumption. Perhaps | ||||||||
704 | // they could be used with no holds barred (-O3). | ||||||||
705 | Options.LoadSizes = {8, 4, 2, 1}; | ||||||||
706 | return Options; | ||||||||
707 | } | ||||||||
708 | |||||||||
709 | int AArch64TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Ty, | ||||||||
710 | MaybeAlign Alignment, unsigned AddressSpace, | ||||||||
711 | TTI::TargetCostKind CostKind, | ||||||||
712 | const Instruction *I) { | ||||||||
713 | // TODO: Handle other cost kinds. | ||||||||
714 | if (CostKind != TTI::TCK_RecipThroughput) | ||||||||
715 | return 1; | ||||||||
716 | |||||||||
717 | // Type legalization can't handle structs | ||||||||
718 | if (TLI->getValueType(DL, Ty, true) == MVT::Other) | ||||||||
719 | return BaseT::getMemoryOpCost(Opcode, Ty, Alignment, AddressSpace, | ||||||||
720 | CostKind); | ||||||||
721 | |||||||||
722 | auto LT = TLI->getTypeLegalizationCost(DL, Ty); | ||||||||
723 | |||||||||
724 | if (ST->isMisaligned128StoreSlow() && Opcode == Instruction::Store && | ||||||||
725 | LT.second.is128BitVector() && (!Alignment || *Alignment < Align(16))) { | ||||||||
726 | // Unaligned stores are extremely inefficient. We don't split all | ||||||||
727 | // unaligned 128-bit stores because the negative impact that has shown in | ||||||||
728 | // practice on inlined block copy code. | ||||||||
729 | // We make such stores expensive so that we will only vectorize if there | ||||||||
730 | // are 6 other instructions getting vectorized. | ||||||||
731 | const int AmortizationCost = 6; | ||||||||
732 | |||||||||
733 | return LT.first * 2 * AmortizationCost; | ||||||||
734 | } | ||||||||
735 | |||||||||
736 | if (Ty->isVectorTy() && | ||||||||
737 | cast<VectorType>(Ty)->getElementType()->isIntegerTy(8)) { | ||||||||
738 | unsigned ProfitableNumElements; | ||||||||
739 | if (Opcode == Instruction::Store) | ||||||||
740 | // We use a custom trunc store lowering so v.4b should be profitable. | ||||||||
741 | ProfitableNumElements = 4; | ||||||||
742 | else | ||||||||
743 | // We scalarize the loads because there is not v.4b register and we | ||||||||
744 | // have to promote the elements to v.2. | ||||||||
745 | ProfitableNumElements = 8; | ||||||||
746 | |||||||||
747 | if (cast<FixedVectorType>(Ty)->getNumElements() < ProfitableNumElements) { | ||||||||
748 | unsigned NumVecElts = cast<FixedVectorType>(Ty)->getNumElements(); | ||||||||
749 | unsigned NumVectorizableInstsToAmortize = NumVecElts * 2; | ||||||||
750 | // We generate 2 instructions per vector element. | ||||||||
751 | return NumVectorizableInstsToAmortize * NumVecElts * 2; | ||||||||
752 | } | ||||||||
753 | } | ||||||||
754 | |||||||||
755 | return LT.first; | ||||||||
756 | } | ||||||||
757 | |||||||||
758 | int AArch64TTIImpl::getInterleavedMemoryOpCost( | ||||||||
759 | unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices, | ||||||||
760 | Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, | ||||||||
761 | bool UseMaskForCond, bool UseMaskForGaps) { | ||||||||
762 | assert(Factor >= 2 && "Invalid interleave factor")((Factor >= 2 && "Invalid interleave factor") ? static_cast <void> (0) : __assert_fail ("Factor >= 2 && \"Invalid interleave factor\"" , "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp" , 762, __PRETTY_FUNCTION__)); | ||||||||
763 | auto *VecVTy = cast<FixedVectorType>(VecTy); | ||||||||
764 | |||||||||
765 | if (!UseMaskForCond && !UseMaskForGaps && | ||||||||
766 | Factor <= TLI->getMaxSupportedInterleaveFactor()) { | ||||||||
767 | unsigned NumElts = VecVTy->getNumElements(); | ||||||||
768 | auto *SubVecTy = | ||||||||
769 | FixedVectorType::get(VecTy->getScalarType(), NumElts / Factor); | ||||||||
770 | |||||||||
771 | // ldN/stN only support legal vector types of size 64 or 128 in bits. | ||||||||
772 | // Accesses having vector types that are a multiple of 128 bits can be | ||||||||
773 | // matched to more than one ldN/stN instruction. | ||||||||
774 | if (NumElts % Factor == 0 && | ||||||||
775 | TLI->isLegalInterleavedAccessType(SubVecTy, DL)) | ||||||||
776 | return Factor * TLI->getNumInterleavedAccesses(SubVecTy, DL); | ||||||||
777 | } | ||||||||
778 | |||||||||
779 | return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices, | ||||||||
780 | Alignment, AddressSpace, CostKind, | ||||||||
781 | UseMaskForCond, UseMaskForGaps); | ||||||||
782 | } | ||||||||
783 | |||||||||
784 | int AArch64TTIImpl::getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) { | ||||||||
785 | int Cost = 0; | ||||||||
786 | TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput; | ||||||||
787 | for (auto *I : Tys) { | ||||||||
788 | if (!I->isVectorTy()) | ||||||||
789 | continue; | ||||||||
790 | if (I->getScalarSizeInBits() * cast<FixedVectorType>(I)->getNumElements() == | ||||||||
791 | 128) | ||||||||
792 | Cost += getMemoryOpCost(Instruction::Store, I, Align(128), 0, CostKind) + | ||||||||
793 | getMemoryOpCost(Instruction::Load, I, Align(128), 0, CostKind); | ||||||||
794 | } | ||||||||
795 | return Cost; | ||||||||
796 | } | ||||||||
797 | |||||||||
798 | unsigned AArch64TTIImpl::getMaxInterleaveFactor(unsigned VF) { | ||||||||
799 | return ST->getMaxInterleaveFactor(); | ||||||||
800 | } | ||||||||
801 | |||||||||
802 | // For Falkor, we want to avoid having too many strided loads in a loop since | ||||||||
803 | // that can exhaust the HW prefetcher resources. We adjust the unroller | ||||||||
804 | // MaxCount preference below to attempt to ensure unrolling doesn't create too | ||||||||
805 | // many strided loads. | ||||||||
806 | static void | ||||||||
807 | getFalkorUnrollingPreferences(Loop *L, ScalarEvolution &SE, | ||||||||
808 | TargetTransformInfo::UnrollingPreferences &UP) { | ||||||||
809 | enum { MaxStridedLoads = 7 }; | ||||||||
810 | auto countStridedLoads = [](Loop *L, ScalarEvolution &SE) { | ||||||||
811 | int StridedLoads = 0; | ||||||||
812 | // FIXME? We could make this more precise by looking at the CFG and | ||||||||
813 | // e.g. not counting loads in each side of an if-then-else diamond. | ||||||||
814 | for (const auto BB : L->blocks()) { | ||||||||
815 | for (auto &I : *BB) { | ||||||||
816 | LoadInst *LMemI = dyn_cast<LoadInst>(&I); | ||||||||
817 | if (!LMemI) | ||||||||
818 | continue; | ||||||||
819 | |||||||||
820 | Value *PtrValue = LMemI->getPointerOperand(); | ||||||||
821 | if (L->isLoopInvariant(PtrValue)) | ||||||||
822 | continue; | ||||||||
823 | |||||||||
824 | const SCEV *LSCEV = SE.getSCEV(PtrValue); | ||||||||
825 | const SCEVAddRecExpr *LSCEVAddRec = dyn_cast<SCEVAddRecExpr>(LSCEV); | ||||||||
826 | if (!LSCEVAddRec || !LSCEVAddRec->isAffine()) | ||||||||
827 | continue; | ||||||||
828 | |||||||||
829 | // FIXME? We could take pairing of unrolled load copies into account | ||||||||
830 | // by looking at the AddRec, but we would probably have to limit this | ||||||||
831 | // to loops with no stores or other memory optimization barriers. | ||||||||
832 | ++StridedLoads; | ||||||||
833 | // We've seen enough strided loads that seeing more won't make a | ||||||||
834 | // difference. | ||||||||
835 | if (StridedLoads > MaxStridedLoads / 2) | ||||||||
836 | return StridedLoads; | ||||||||
837 | } | ||||||||
838 | } | ||||||||
839 | return StridedLoads; | ||||||||
840 | }; | ||||||||
841 | |||||||||
842 | int StridedLoads = countStridedLoads(L, SE); | ||||||||
843 | LLVM_DEBUG(dbgs() << "falkor-hwpf: detected " << StridedLoadsdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64tti")) { dbgs() << "falkor-hwpf: detected " << StridedLoads << " strided loads\n"; } } while (false) | ||||||||
844 | << " strided loads\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64tti")) { dbgs() << "falkor-hwpf: detected " << StridedLoads << " strided loads\n"; } } while (false); | ||||||||
845 | // Pick the largest power of 2 unroll count that won't result in too many | ||||||||
846 | // strided loads. | ||||||||
847 | if (StridedLoads) { | ||||||||
848 | UP.MaxCount = 1 << Log2_32(MaxStridedLoads / StridedLoads); | ||||||||
849 | LLVM_DEBUG(dbgs() << "falkor-hwpf: setting unroll MaxCount to "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64tti")) { dbgs() << "falkor-hwpf: setting unroll MaxCount to " << UP.MaxCount << '\n'; } } while (false) | ||||||||
850 | << UP.MaxCount << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType ("aarch64tti")) { dbgs() << "falkor-hwpf: setting unroll MaxCount to " << UP.MaxCount << '\n'; } } while (false); | ||||||||
851 | } | ||||||||
852 | } | ||||||||
853 | |||||||||
854 | void AArch64TTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE, | ||||||||
855 | TTI::UnrollingPreferences &UP) { | ||||||||
856 | // Enable partial unrolling and runtime unrolling. | ||||||||
857 | BaseT::getUnrollingPreferences(L, SE, UP); | ||||||||
858 | |||||||||
859 | // For inner loop, it is more likely to be a hot one, and the runtime check | ||||||||
860 | // can be promoted out from LICM pass, so the overhead is less, let's try | ||||||||
861 | // a larger threshold to unroll more loops. | ||||||||
862 | if (L->getLoopDepth() > 1) | ||||||||
863 | UP.PartialThreshold *= 2; | ||||||||
864 | |||||||||
865 | // Disable partial & runtime unrolling on -Os. | ||||||||
866 | UP.PartialOptSizeThreshold = 0; | ||||||||
867 | |||||||||
868 | if (ST->getProcFamily() == AArch64Subtarget::Falkor && | ||||||||
869 | EnableFalkorHWPFUnrollFix) | ||||||||
870 | getFalkorUnrollingPreferences(L, SE, UP); | ||||||||
871 | } | ||||||||
872 | |||||||||
873 | void AArch64TTIImpl::getPeelingPreferences(Loop *L, ScalarEvolution &SE, | ||||||||
874 | TTI::PeelingPreferences &PP) { | ||||||||
875 | BaseT::getPeelingPreferences(L, SE, PP); | ||||||||
876 | } | ||||||||
877 | |||||||||
878 | Value *AArch64TTIImpl::getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, | ||||||||
879 | Type *ExpectedType) { | ||||||||
880 | switch (Inst->getIntrinsicID()) { | ||||||||
881 | default: | ||||||||
882 | return nullptr; | ||||||||
883 | case Intrinsic::aarch64_neon_st2: | ||||||||
884 | case Intrinsic::aarch64_neon_st3: | ||||||||
885 | case Intrinsic::aarch64_neon_st4: { | ||||||||
886 | // Create a struct type | ||||||||
887 | StructType *ST = dyn_cast<StructType>(ExpectedType); | ||||||||
888 | if (!ST) | ||||||||
889 | return nullptr; | ||||||||
890 | unsigned NumElts = Inst->getNumArgOperands() - 1; | ||||||||
891 | if (ST->getNumElements() != NumElts) | ||||||||
892 | return nullptr; | ||||||||
893 | for (unsigned i = 0, e = NumElts; i != e; ++i) { | ||||||||
894 | if (Inst->getArgOperand(i)->getType() != ST->getElementType(i)) | ||||||||
895 | return nullptr; | ||||||||
896 | } | ||||||||
897 | Value *Res = UndefValue::get(ExpectedType); | ||||||||
898 | IRBuilder<> Builder(Inst); | ||||||||
899 | for (unsigned i = 0, e = NumElts; i != e; ++i) { | ||||||||
900 | Value *L = Inst->getArgOperand(i); | ||||||||
901 | Res = Builder.CreateInsertValue(Res, L, i); | ||||||||
902 | } | ||||||||
903 | return Res; | ||||||||
904 | } | ||||||||
905 | case Intrinsic::aarch64_neon_ld2: | ||||||||
906 | case Intrinsic::aarch64_neon_ld3: | ||||||||
907 | case Intrinsic::aarch64_neon_ld4: | ||||||||
908 | if (Inst->getType() == ExpectedType) | ||||||||
909 | return Inst; | ||||||||
910 | return nullptr; | ||||||||
911 | } | ||||||||
912 | } | ||||||||
913 | |||||||||
914 | bool AArch64TTIImpl::getTgtMemIntrinsic(IntrinsicInst *Inst, | ||||||||
915 | MemIntrinsicInfo &Info) { | ||||||||
916 | switch (Inst->getIntrinsicID()) { | ||||||||
917 | default: | ||||||||
918 | break; | ||||||||
919 | case Intrinsic::aarch64_neon_ld2: | ||||||||
920 | case Intrinsic::aarch64_neon_ld3: | ||||||||
921 | case Intrinsic::aarch64_neon_ld4: | ||||||||
922 | Info.ReadMem = true; | ||||||||
923 | Info.WriteMem = false; | ||||||||
924 | Info.PtrVal = Inst->getArgOperand(0); | ||||||||
925 | break; | ||||||||
926 | case Intrinsic::aarch64_neon_st2: | ||||||||
927 | case Intrinsic::aarch64_neon_st3: | ||||||||
928 | case Intrinsic::aarch64_neon_st4: | ||||||||
929 | Info.ReadMem = false; | ||||||||
930 | Info.WriteMem = true; | ||||||||
931 | Info.PtrVal = Inst->getArgOperand(Inst->getNumArgOperands() - 1); | ||||||||
932 | break; | ||||||||
933 | } | ||||||||
934 | |||||||||
935 | switch (Inst->getIntrinsicID()) { | ||||||||
936 | default: | ||||||||
937 | return false; | ||||||||
938 | case Intrinsic::aarch64_neon_ld2: | ||||||||
939 | case Intrinsic::aarch64_neon_st2: | ||||||||
940 | Info.MatchingId = VECTOR_LDST_TWO_ELEMENTS; | ||||||||
941 | break; | ||||||||
942 | case Intrinsic::aarch64_neon_ld3: | ||||||||
943 | case Intrinsic::aarch64_neon_st3: | ||||||||
944 | Info.MatchingId = VECTOR_LDST_THREE_ELEMENTS; | ||||||||
945 | break; | ||||||||
946 | case Intrinsic::aarch64_neon_ld4: | ||||||||
947 | case Intrinsic::aarch64_neon_st4: | ||||||||
948 | Info.MatchingId = VECTOR_LDST_FOUR_ELEMENTS; | ||||||||
949 | break; | ||||||||
950 | } | ||||||||
951 | return true; | ||||||||
952 | } | ||||||||
953 | |||||||||
954 | /// See if \p I should be considered for address type promotion. We check if \p | ||||||||
955 | /// I is a sext with right type and used in memory accesses. If it used in a | ||||||||
956 | /// "complex" getelementptr, we allow it to be promoted without finding other | ||||||||
957 | /// sext instructions that sign extended the same initial value. A getelementptr | ||||||||
958 | /// is considered as "complex" if it has more than 2 operands. | ||||||||
959 | bool AArch64TTIImpl::shouldConsiderAddressTypePromotion( | ||||||||
960 | const Instruction &I, bool &AllowPromotionWithoutCommonHeader) { | ||||||||
961 | bool Considerable = false; | ||||||||
962 | AllowPromotionWithoutCommonHeader = false; | ||||||||
963 | if (!isa<SExtInst>(&I)) | ||||||||
964 | return false; | ||||||||
965 | Type *ConsideredSExtType = | ||||||||
966 | Type::getInt64Ty(I.getParent()->getParent()->getContext()); | ||||||||
967 | if (I.getType() != ConsideredSExtType) | ||||||||
968 | return false; | ||||||||
969 | // See if the sext is the one with the right type and used in at least one | ||||||||
970 | // GetElementPtrInst. | ||||||||
971 | for (const User *U : I.users()) { | ||||||||
972 | if (const GetElementPtrInst *GEPInst = dyn_cast<GetElementPtrInst>(U)) { | ||||||||
973 | Considerable = true; | ||||||||
974 | // A getelementptr is considered as "complex" if it has more than 2 | ||||||||
975 | // operands. We will promote a SExt used in such complex GEP as we | ||||||||
976 | // expect some computation to be merged if they are done on 64 bits. | ||||||||
977 | if (GEPInst->getNumOperands() > 2) { | ||||||||
978 | AllowPromotionWithoutCommonHeader = true; | ||||||||
979 | break; | ||||||||
980 | } | ||||||||
981 | } | ||||||||
982 | } | ||||||||
983 | return Considerable; | ||||||||
984 | } | ||||||||
985 | |||||||||
986 | bool AArch64TTIImpl::useReductionIntrinsic(unsigned Opcode, Type *Ty, | ||||||||
987 | TTI::ReductionFlags Flags) const { | ||||||||
988 | auto *VTy = cast<VectorType>(Ty); | ||||||||
989 | unsigned ScalarBits = Ty->getScalarSizeInBits(); | ||||||||
990 | switch (Opcode) { | ||||||||
991 | case Instruction::FAdd: | ||||||||
992 | case Instruction::FMul: | ||||||||
993 | case Instruction::And: | ||||||||
994 | case Instruction::Or: | ||||||||
995 | case Instruction::Xor: | ||||||||
996 | case Instruction::Mul: | ||||||||
997 | return false; | ||||||||
998 | case Instruction::Add: | ||||||||
999 | return ScalarBits * cast<FixedVectorType>(VTy)->getNumElements() >= 128; | ||||||||
1000 | case Instruction::ICmp: | ||||||||
1001 | return (ScalarBits < 64) && | ||||||||
1002 | (ScalarBits * cast<FixedVectorType>(VTy)->getNumElements() >= 128); | ||||||||
1003 | case Instruction::FCmp: | ||||||||
1004 | return Flags.NoNaN; | ||||||||
1005 | default: | ||||||||
1006 | llvm_unreachable("Unhandled reduction opcode")::llvm::llvm_unreachable_internal("Unhandled reduction opcode" , "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp" , 1006); | ||||||||
1007 | } | ||||||||
1008 | return false; | ||||||||
1009 | } | ||||||||
1010 | |||||||||
1011 | int AArch64TTIImpl::getArithmeticReductionCost(unsigned Opcode, | ||||||||
1012 | VectorType *ValTy, | ||||||||
1013 | bool IsPairwiseForm, | ||||||||
1014 | TTI::TargetCostKind CostKind) { | ||||||||
1015 | |||||||||
1016 | if (IsPairwiseForm) | ||||||||
1017 | return BaseT::getArithmeticReductionCost(Opcode, ValTy, IsPairwiseForm, | ||||||||
1018 | CostKind); | ||||||||
1019 | |||||||||
1020 | std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy); | ||||||||
1021 | MVT MTy = LT.second; | ||||||||
1022 | int ISD = TLI->InstructionOpcodeToISD(Opcode); | ||||||||
1023 | assert(ISD && "Invalid opcode")((ISD && "Invalid opcode") ? static_cast<void> ( 0) : __assert_fail ("ISD && \"Invalid opcode\"", "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp" , 1023, __PRETTY_FUNCTION__)); | ||||||||
1024 | |||||||||
1025 | // Horizontal adds can use the 'addv' instruction. We model the cost of these | ||||||||
1026 | // instructions as normal vector adds. This is the only arithmetic vector | ||||||||
1027 | // reduction operation for which we have an instruction. | ||||||||
1028 | static const CostTblEntry CostTblNoPairwise[]{ | ||||||||
1029 | {ISD::ADD, MVT::v8i8, 1}, | ||||||||
1030 | {ISD::ADD, MVT::v16i8, 1}, | ||||||||
1031 | {ISD::ADD, MVT::v4i16, 1}, | ||||||||
1032 | {ISD::ADD, MVT::v8i16, 1}, | ||||||||
1033 | {ISD::ADD, MVT::v4i32, 1}, | ||||||||
1034 | }; | ||||||||
1035 | |||||||||
1036 | if (const auto *Entry = CostTableLookup(CostTblNoPairwise, ISD, MTy)) | ||||||||
1037 | return LT.first * Entry->Cost; | ||||||||
1038 | |||||||||
1039 | return BaseT::getArithmeticReductionCost(Opcode, ValTy, IsPairwiseForm, | ||||||||
1040 | CostKind); | ||||||||
1041 | } | ||||||||
1042 | |||||||||
1043 | int AArch64TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, | ||||||||
1044 | int Index, VectorType *SubTp) { | ||||||||
1045 | if (Kind == TTI::SK_Broadcast || Kind == TTI::SK_Transpose || | ||||||||
1046 | Kind == TTI::SK_Select || Kind == TTI::SK_PermuteSingleSrc) { | ||||||||
1047 | static const CostTblEntry ShuffleTbl[] = { | ||||||||
1048 | // Broadcast shuffle kinds can be performed with 'dup'. | ||||||||
1049 | { TTI::SK_Broadcast, MVT::v8i8, 1 }, | ||||||||
1050 | { TTI::SK_Broadcast, MVT::v16i8, 1 }, | ||||||||
1051 | { TTI::SK_Broadcast, MVT::v4i16, 1 }, | ||||||||
1052 | { TTI::SK_Broadcast, MVT::v8i16, 1 }, | ||||||||
1053 | { TTI::SK_Broadcast, MVT::v2i32, 1 }, | ||||||||
1054 | { TTI::SK_Broadcast, MVT::v4i32, 1 }, | ||||||||
1055 | { TTI::SK_Broadcast, MVT::v2i64, 1 }, | ||||||||
1056 | { TTI::SK_Broadcast, MVT::v2f32, 1 }, | ||||||||
1057 | { TTI::SK_Broadcast, MVT::v4f32, 1 }, | ||||||||
1058 | { TTI::SK_Broadcast, MVT::v2f64, 1 }, | ||||||||
1059 | // Transpose shuffle kinds can be performed with 'trn1/trn2' and | ||||||||
1060 | // 'zip1/zip2' instructions. | ||||||||
1061 | { TTI::SK_Transpose, MVT::v8i8, 1 }, | ||||||||
1062 | { TTI::SK_Transpose, MVT::v16i8, 1 }, | ||||||||
1063 | { TTI::SK_Transpose, MVT::v4i16, 1 }, | ||||||||
1064 | { TTI::SK_Transpose, MVT::v8i16, 1 }, | ||||||||
1065 | { TTI::SK_Transpose, MVT::v2i32, 1 }, | ||||||||
1066 | { TTI::SK_Transpose, MVT::v4i32, 1 }, | ||||||||
1067 | { TTI::SK_Transpose, MVT::v2i64, 1 }, | ||||||||
1068 | { TTI::SK_Transpose, MVT::v2f32, 1 }, | ||||||||
1069 | { TTI::SK_Transpose, MVT::v4f32, 1 }, | ||||||||
1070 | { TTI::SK_Transpose, MVT::v2f64, 1 }, | ||||||||
1071 | // Select shuffle kinds. | ||||||||
1072 | // TODO: handle vXi8/vXi16. | ||||||||
1073 | { TTI::SK_Select, MVT::v2i32, 1 }, // mov. | ||||||||
1074 | { TTI::SK_Select, MVT::v4i32, 2 }, // rev+trn (or similar). | ||||||||
1075 | { TTI::SK_Select, MVT::v2i64, 1 }, // mov. | ||||||||
1076 | { TTI::SK_Select, MVT::v2f32, 1 }, // mov. | ||||||||
1077 | { TTI::SK_Select, MVT::v4f32, 2 }, // rev+trn (or similar). | ||||||||
1078 | { TTI::SK_Select, MVT::v2f64, 1 }, // mov. | ||||||||
1079 | // PermuteSingleSrc shuffle kinds. | ||||||||
1080 | // TODO: handle vXi8/vXi16. | ||||||||
1081 | { TTI::SK_PermuteSingleSrc, MVT::v2i32, 1 }, // mov. | ||||||||
1082 | { TTI::SK_PermuteSingleSrc, MVT::v4i32, 3 }, // perfectshuffle worst case. | ||||||||
1083 | { TTI::SK_PermuteSingleSrc, MVT::v2i64, 1 }, // mov. | ||||||||
1084 | { TTI::SK_PermuteSingleSrc, MVT::v2f32, 1 }, // mov. | ||||||||
1085 | { TTI::SK_PermuteSingleSrc, MVT::v4f32, 3 }, // perfectshuffle worst case. | ||||||||
1086 | { TTI::SK_PermuteSingleSrc, MVT::v2f64, 1 }, // mov. | ||||||||
1087 | }; | ||||||||
1088 | std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp); | ||||||||
1089 | if (const auto *Entry = CostTableLookup(ShuffleTbl, Kind, LT.second)) | ||||||||
1090 | return LT.first * Entry->Cost; | ||||||||
1091 | } | ||||||||
1092 | |||||||||
1093 | return BaseT::getShuffleCost(Kind, Tp, Index, SubTp); | ||||||||
1094 | } |
1 | //===- llvm/Type.h - Classes for handling data types ------------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file contains the declaration of the Type class. For more "Type" |
10 | // stuff, look in DerivedTypes.h. |
11 | // |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #ifndef LLVM_IR_TYPE_H |
15 | #define LLVM_IR_TYPE_H |
16 | |
17 | #include "llvm/ADT/APFloat.h" |
18 | #include "llvm/ADT/ArrayRef.h" |
19 | #include "llvm/ADT/SmallPtrSet.h" |
20 | #include "llvm/Support/CBindingWrapping.h" |
21 | #include "llvm/Support/Casting.h" |
22 | #include "llvm/Support/Compiler.h" |
23 | #include "llvm/Support/ErrorHandling.h" |
24 | #include "llvm/Support/TypeSize.h" |
25 | #include <cassert> |
26 | #include <cstdint> |
27 | #include <iterator> |
28 | |
29 | namespace llvm { |
30 | |
31 | template<class GraphType> struct GraphTraits; |
32 | class IntegerType; |
33 | class LLVMContext; |
34 | class PointerType; |
35 | class raw_ostream; |
36 | class StringRef; |
37 | |
38 | /// The instances of the Type class are immutable: once they are created, |
39 | /// they are never changed. Also note that only one instance of a particular |
40 | /// type is ever created. Thus seeing if two types are equal is a matter of |
41 | /// doing a trivial pointer comparison. To enforce that no two equal instances |
42 | /// are created, Type instances can only be created via static factory methods |
43 | /// in class Type and in derived classes. Once allocated, Types are never |
44 | /// free'd. |
45 | /// |
46 | class Type { |
47 | public: |
48 | //===--------------------------------------------------------------------===// |
49 | /// Definitions of all of the base types for the Type system. Based on this |
50 | /// value, you can cast to a class defined in DerivedTypes.h. |
51 | /// Note: If you add an element to this, you need to add an element to the |
52 | /// Type::getPrimitiveType function, or else things will break! |
53 | /// Also update LLVMTypeKind and LLVMGetTypeKind () in the C binding. |
54 | /// |
55 | enum TypeID { |
56 | // PrimitiveTypes |
57 | HalfTyID = 0, ///< 16-bit floating point type |
58 | BFloatTyID, ///< 16-bit floating point type (7-bit significand) |
59 | FloatTyID, ///< 32-bit floating point type |
60 | DoubleTyID, ///< 64-bit floating point type |
61 | X86_FP80TyID, ///< 80-bit floating point type (X87) |
62 | FP128TyID, ///< 128-bit floating point type (112-bit significand) |
63 | PPC_FP128TyID, ///< 128-bit floating point type (two 64-bits, PowerPC) |
64 | VoidTyID, ///< type with no size |
65 | LabelTyID, ///< Labels |
66 | MetadataTyID, ///< Metadata |
67 | X86_MMXTyID, ///< MMX vectors (64 bits, X86 specific) |
68 | TokenTyID, ///< Tokens |
69 | |
70 | // Derived types... see DerivedTypes.h file. |
71 | IntegerTyID, ///< Arbitrary bit width integers |
72 | FunctionTyID, ///< Functions |
73 | PointerTyID, ///< Pointers |
74 | StructTyID, ///< Structures |
75 | ArrayTyID, ///< Arrays |
76 | FixedVectorTyID, ///< Fixed width SIMD vector type |
77 | ScalableVectorTyID ///< Scalable SIMD vector type |
78 | }; |
79 | |
80 | private: |
81 | /// This refers to the LLVMContext in which this type was uniqued. |
82 | LLVMContext &Context; |
83 | |
84 | TypeID ID : 8; // The current base type of this type. |
85 | unsigned SubclassData : 24; // Space for subclasses to store data. |
86 | // Note that this should be synchronized with |
87 | // MAX_INT_BITS value in IntegerType class. |
88 | |
89 | protected: |
90 | friend class LLVMContextImpl; |
91 | |
92 | explicit Type(LLVMContext &C, TypeID tid) |
93 | : Context(C), ID(tid), SubclassData(0) {} |
94 | ~Type() = default; |
95 | |
96 | unsigned getSubclassData() const { return SubclassData; } |
97 | |
98 | void setSubclassData(unsigned val) { |
99 | SubclassData = val; |
100 | // Ensure we don't have any accidental truncation. |
101 | assert(getSubclassData() == val && "Subclass data too large for field")((getSubclassData() == val && "Subclass data too large for field" ) ? static_cast<void> (0) : __assert_fail ("getSubclassData() == val && \"Subclass data too large for field\"" , "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/IR/Type.h" , 101, __PRETTY_FUNCTION__)); |
102 | } |
103 | |
104 | /// Keeps track of how many Type*'s there are in the ContainedTys list. |
105 | unsigned NumContainedTys = 0; |
106 | |
107 | /// A pointer to the array of Types contained by this Type. For example, this |
108 | /// includes the arguments of a function type, the elements of a structure, |
109 | /// the pointee of a pointer, the element type of an array, etc. This pointer |
110 | /// may be 0 for types that don't contain other types (Integer, Double, |
111 | /// Float). |
112 | Type * const *ContainedTys = nullptr; |
113 | |
114 | public: |
115 | /// Print the current type. |
116 | /// Omit the type details if \p NoDetails == true. |
117 | /// E.g., let %st = type { i32, i16 } |
118 | /// When \p NoDetails is true, we only print %st. |
119 | /// Put differently, \p NoDetails prints the type as if |
120 | /// inlined with the operands when printing an instruction. |
121 | void print(raw_ostream &O, bool IsForDebug = false, |
122 | bool NoDetails = false) const; |
123 | |
124 | void dump() const; |
125 | |
126 | /// Return the LLVMContext in which this type was uniqued. |
127 | LLVMContext &getContext() const { return Context; } |
128 | |
129 | //===--------------------------------------------------------------------===// |
130 | // Accessors for working with types. |
131 | // |
132 | |
133 | /// Return the type id for the type. This will return one of the TypeID enum |
134 | /// elements defined above. |
135 | TypeID getTypeID() const { return ID; } |
136 | |
137 | /// Return true if this is 'void'. |
138 | bool isVoidTy() const { return getTypeID() == VoidTyID; } |
139 | |
140 | /// Return true if this is 'half', a 16-bit IEEE fp type. |
141 | bool isHalfTy() const { return getTypeID() == HalfTyID; } |
142 | |
143 | /// Return true if this is 'bfloat', a 16-bit bfloat type. |
144 | bool isBFloatTy() const { return getTypeID() == BFloatTyID; } |
145 | |
146 | /// Return true if this is 'float', a 32-bit IEEE fp type. |
147 | bool isFloatTy() const { return getTypeID() == FloatTyID; } |
148 | |
149 | /// Return true if this is 'double', a 64-bit IEEE fp type. |
150 | bool isDoubleTy() const { return getTypeID() == DoubleTyID; } |
151 | |
152 | /// Return true if this is x86 long double. |
153 | bool isX86_FP80Ty() const { return getTypeID() == X86_FP80TyID; } |
154 | |
155 | /// Return true if this is 'fp128'. |
156 | bool isFP128Ty() const { return getTypeID() == FP128TyID; } |
157 | |
158 | /// Return true if this is powerpc long double. |
159 | bool isPPC_FP128Ty() const { return getTypeID() == PPC_FP128TyID; } |
160 | |
161 | /// Return true if this is one of the six floating-point types |
162 | bool isFloatingPointTy() const { |
163 | return getTypeID() == HalfTyID || getTypeID() == BFloatTyID || |
164 | getTypeID() == FloatTyID || getTypeID() == DoubleTyID || |
165 | getTypeID() == X86_FP80TyID || getTypeID() == FP128TyID || |
166 | getTypeID() == PPC_FP128TyID; |
167 | } |
168 | |
169 | const fltSemantics &getFltSemantics() const { |
170 | switch (getTypeID()) { |
171 | case HalfTyID: return APFloat::IEEEhalf(); |
172 | case BFloatTyID: return APFloat::BFloat(); |
173 | case FloatTyID: return APFloat::IEEEsingle(); |
174 | case DoubleTyID: return APFloat::IEEEdouble(); |
175 | case X86_FP80TyID: return APFloat::x87DoubleExtended(); |
176 | case FP128TyID: return APFloat::IEEEquad(); |
177 | case PPC_FP128TyID: return APFloat::PPCDoubleDouble(); |
178 | default: llvm_unreachable("Invalid floating type")::llvm::llvm_unreachable_internal("Invalid floating type", "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/IR/Type.h" , 178); |
179 | } |
180 | } |
181 | |
182 | /// Return true if this is X86 MMX. |
183 | bool isX86_MMXTy() const { return getTypeID() == X86_MMXTyID; } |
184 | |
185 | /// Return true if this is a FP type or a vector of FP. |
186 | bool isFPOrFPVectorTy() const { return getScalarType()->isFloatingPointTy(); } |
187 | |
188 | /// Return true if this is 'label'. |
189 | bool isLabelTy() const { return getTypeID() == LabelTyID; } |
190 | |
191 | /// Return true if this is 'metadata'. |
192 | bool isMetadataTy() const { return getTypeID() == MetadataTyID; } |
193 | |
194 | /// Return true if this is 'token'. |
195 | bool isTokenTy() const { return getTypeID() == TokenTyID; } |
196 | |
197 | /// True if this is an instance of IntegerType. |
198 | bool isIntegerTy() const { return getTypeID() == IntegerTyID; } |
199 | |
200 | /// Return true if this is an IntegerType of the given width. |
201 | bool isIntegerTy(unsigned Bitwidth) const; |
202 | |
203 | /// Return true if this is an integer type or a vector of integer types. |
204 | bool isIntOrIntVectorTy() const { return getScalarType()->isIntegerTy(); } |
205 | |
206 | /// Return true if this is an integer type or a vector of integer types of |
207 | /// the given width. |
208 | bool isIntOrIntVectorTy(unsigned BitWidth) const { |
209 | return getScalarType()->isIntegerTy(BitWidth); |
210 | } |
211 | |
212 | /// Return true if this is an integer type or a pointer type. |
213 | bool isIntOrPtrTy() const { return isIntegerTy() || isPointerTy(); } |
214 | |
215 | /// True if this is an instance of FunctionType. |
216 | bool isFunctionTy() const { return getTypeID() == FunctionTyID; } |
217 | |
218 | /// True if this is an instance of StructType. |
219 | bool isStructTy() const { return getTypeID() == StructTyID; } |
220 | |
221 | /// True if this is an instance of ArrayType. |
222 | bool isArrayTy() const { return getTypeID() == ArrayTyID; } |
223 | |
224 | /// True if this is an instance of PointerType. |
225 | bool isPointerTy() const { return getTypeID() == PointerTyID; } |
226 | |
227 | /// Return true if this is a pointer type or a vector of pointer types. |
228 | bool isPtrOrPtrVectorTy() const { return getScalarType()->isPointerTy(); } |
229 | |
230 | /// True if this is an instance of VectorType. |
231 | inline bool isVectorTy() const { |
232 | return getTypeID() == ScalableVectorTyID || getTypeID() == FixedVectorTyID; |
233 | } |
234 | |
235 | /// Return true if this type could be converted with a lossless BitCast to |
236 | /// type 'Ty'. For example, i8* to i32*. BitCasts are valid for types of the |
237 | /// same size only where no re-interpretation of the bits is done. |
238 | /// Determine if this type could be losslessly bitcast to Ty |
239 | bool canLosslesslyBitCastTo(Type *Ty) const; |
240 | |
241 | /// Return true if this type is empty, that is, it has no elements or all of |
242 | /// its elements are empty. |
243 | bool isEmptyTy() const; |
244 | |
245 | /// Return true if the type is "first class", meaning it is a valid type for a |
246 | /// Value. |
247 | bool isFirstClassType() const { |
248 | return getTypeID() != FunctionTyID && getTypeID() != VoidTyID; |
249 | } |
250 | |
251 | /// Return true if the type is a valid type for a register in codegen. This |
252 | /// includes all first-class types except struct and array types. |
253 | bool isSingleValueType() const { |
254 | return isFloatingPointTy() || isX86_MMXTy() || isIntegerTy() || |
255 | isPointerTy() || isVectorTy(); |
256 | } |
257 | |
258 | /// Return true if the type is an aggregate type. This means it is valid as |
259 | /// the first operand of an insertvalue or extractvalue instruction. This |
260 | /// includes struct and array types, but does not include vector types. |
261 | bool isAggregateType() const { |
262 | return getTypeID() == StructTyID || getTypeID() == ArrayTyID; |
263 | } |
264 | |
265 | /// Return true if it makes sense to take the size of this type. To get the |
266 | /// actual size for a particular target, it is reasonable to use the |
267 | /// DataLayout subsystem to do this. |
268 | bool isSized(SmallPtrSetImpl<Type*> *Visited = nullptr) const { |
269 | // If it's a primitive, it is always sized. |
270 | if (getTypeID() == IntegerTyID || isFloatingPointTy() || |
271 | getTypeID() == PointerTyID || |
272 | getTypeID() == X86_MMXTyID) |
273 | return true; |
274 | // If it is not something that can have a size (e.g. a function or label), |
275 | // it doesn't have a size. |
276 | if (getTypeID() != StructTyID && getTypeID() != ArrayTyID && !isVectorTy()) |
277 | return false; |
278 | // Otherwise we have to try harder to decide. |
279 | return isSizedDerivedType(Visited); |
280 | } |
281 | |
282 | /// Return the basic size of this type if it is a primitive type. These are |
283 | /// fixed by LLVM and are not target-dependent. |
284 | /// This will return zero if the type does not have a size or is not a |
285 | /// primitive type. |
286 | /// |
287 | /// If this is a scalable vector type, the scalable property will be set and |
288 | /// the runtime size will be a positive integer multiple of the base size. |
289 | /// |
290 | /// Note that this may not reflect the size of memory allocated for an |
291 | /// instance of the type or the number of bytes that are written when an |
292 | /// instance of the type is stored to memory. The DataLayout class provides |
293 | /// additional query functions to provide this information. |
294 | /// |
295 | TypeSize getPrimitiveSizeInBits() const LLVM_READONLY__attribute__((__pure__)); |
296 | |
297 | /// If this is a vector type, return the getPrimitiveSizeInBits value for the |
298 | /// element type. Otherwise return the getPrimitiveSizeInBits value for this |
299 | /// type. |
300 | unsigned getScalarSizeInBits() const LLVM_READONLY__attribute__((__pure__)); |
301 | |
302 | /// Return the width of the mantissa of this type. This is only valid on |
303 | /// floating-point types. If the FP type does not have a stable mantissa (e.g. |
304 | /// ppc long double), this method returns -1. |
305 | int getFPMantissaWidth() const; |
306 | |
307 | /// If this is a vector type, return the element type, otherwise return |
308 | /// 'this'. |
309 | inline Type *getScalarType() const { |
310 | if (isVectorTy()) |
311 | return getContainedType(0); |
312 | return const_cast<Type *>(this); |
313 | } |
314 | |
315 | //===--------------------------------------------------------------------===// |
316 | // Type Iteration support. |
317 | // |
318 | using subtype_iterator = Type * const *; |
319 | |
320 | subtype_iterator subtype_begin() const { return ContainedTys; } |
321 | subtype_iterator subtype_end() const { return &ContainedTys[NumContainedTys];} |
322 | ArrayRef<Type*> subtypes() const { |
323 | return makeArrayRef(subtype_begin(), subtype_end()); |
324 | } |
325 | |
326 | using subtype_reverse_iterator = std::reverse_iterator<subtype_iterator>; |
327 | |
328 | subtype_reverse_iterator subtype_rbegin() const { |
329 | return subtype_reverse_iterator(subtype_end()); |
330 | } |
331 | subtype_reverse_iterator subtype_rend() const { |
332 | return subtype_reverse_iterator(subtype_begin()); |
333 | } |
334 | |
335 | /// This method is used to implement the type iterator (defined at the end of |
336 | /// the file). For derived types, this returns the types 'contained' in the |
337 | /// derived type. |
338 | Type *getContainedType(unsigned i) const { |
339 | assert(i < NumContainedTys && "Index out of range!")((i < NumContainedTys && "Index out of range!") ? static_cast <void> (0) : __assert_fail ("i < NumContainedTys && \"Index out of range!\"" , "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/IR/Type.h" , 339, __PRETTY_FUNCTION__)); |
340 | return ContainedTys[i]; |
341 | } |
342 | |
343 | /// Return the number of types in the derived type. |
344 | unsigned getNumContainedTypes() const { return NumContainedTys; } |
345 | |
346 | //===--------------------------------------------------------------------===// |
347 | // Helper methods corresponding to subclass methods. This forces a cast to |
348 | // the specified subclass and calls its accessor. "getArrayNumElements" (for |
349 | // example) is shorthand for cast<ArrayType>(Ty)->getNumElements(). This is |
350 | // only intended to cover the core methods that are frequently used, helper |
351 | // methods should not be added here. |
352 | |
353 | inline unsigned getIntegerBitWidth() const; |
354 | |
355 | inline Type *getFunctionParamType(unsigned i) const; |
356 | inline unsigned getFunctionNumParams() const; |
357 | inline bool isFunctionVarArg() const; |
358 | |
359 | inline StringRef getStructName() const; |
360 | inline unsigned getStructNumElements() const; |
361 | inline Type *getStructElementType(unsigned N) const; |
362 | |
363 | inline uint64_t getArrayNumElements() const; |
364 | |
365 | Type *getArrayElementType() const { |
366 | assert(getTypeID() == ArrayTyID)((getTypeID() == ArrayTyID) ? static_cast<void> (0) : __assert_fail ("getTypeID() == ArrayTyID", "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/IR/Type.h" , 366, __PRETTY_FUNCTION__)); |
367 | return ContainedTys[0]; |
368 | } |
369 | |
370 | Type *getPointerElementType() const { |
371 | assert(getTypeID() == PointerTyID)((getTypeID() == PointerTyID) ? static_cast<void> (0) : __assert_fail ("getTypeID() == PointerTyID", "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/IR/Type.h" , 371, __PRETTY_FUNCTION__)); |
372 | return ContainedTys[0]; |
373 | } |
374 | |
375 | /// Given an integer or vector type, change the lane bitwidth to NewBitwidth, |
376 | /// whilst keeping the old number of lanes. |
377 | inline Type *getWithNewBitWidth(unsigned NewBitWidth) const; |
378 | |
379 | /// Given scalar/vector integer type, returns a type with elements twice as |
380 | /// wide as in the original type. For vectors, preserves element count. |
381 | inline Type *getExtendedType() const; |
382 | |
383 | /// Get the address space of this pointer or pointer vector type. |
384 | inline unsigned getPointerAddressSpace() const; |
385 | |
386 | //===--------------------------------------------------------------------===// |
387 | // Static members exported by the Type class itself. Useful for getting |
388 | // instances of Type. |
389 | // |
390 | |
391 | /// Return a type based on an identifier. |
392 | static Type *getPrimitiveType(LLVMContext &C, TypeID IDNumber); |
393 | |
394 | //===--------------------------------------------------------------------===// |
395 | // These are the builtin types that are always available. |
396 | // |
397 | static Type *getVoidTy(LLVMContext &C); |
398 | static Type *getLabelTy(LLVMContext &C); |
399 | static Type *getHalfTy(LLVMContext &C); |
400 | static Type *getBFloatTy(LLVMContext &C); |
401 | static Type *getFloatTy(LLVMContext &C); |
402 | static Type *getDoubleTy(LLVMContext &C); |
403 | static Type *getMetadataTy(LLVMContext &C); |
404 | static Type *getX86_FP80Ty(LLVMContext &C); |
405 | static Type *getFP128Ty(LLVMContext &C); |
406 | static Type *getPPC_FP128Ty(LLVMContext &C); |
407 | static Type *getX86_MMXTy(LLVMContext &C); |
408 | static Type *getTokenTy(LLVMContext &C); |
409 | static IntegerType *getIntNTy(LLVMContext &C, unsigned N); |
410 | static IntegerType *getInt1Ty(LLVMContext &C); |
411 | static IntegerType *getInt8Ty(LLVMContext &C); |
412 | static IntegerType *getInt16Ty(LLVMContext &C); |
413 | static IntegerType *getInt32Ty(LLVMContext &C); |
414 | static IntegerType *getInt64Ty(LLVMContext &C); |
415 | static IntegerType *getInt128Ty(LLVMContext &C); |
416 | template <typename ScalarTy> static Type *getScalarTy(LLVMContext &C) { |
417 | int noOfBits = sizeof(ScalarTy) * CHAR_BIT8; |
418 | if (std::is_integral<ScalarTy>::value) { |
419 | return (Type*) Type::getIntNTy(C, noOfBits); |
420 | } else if (std::is_floating_point<ScalarTy>::value) { |
421 | switch (noOfBits) { |
422 | case 32: |
423 | return Type::getFloatTy(C); |
424 | case 64: |
425 | return Type::getDoubleTy(C); |
426 | } |
427 | } |
428 | llvm_unreachable("Unsupported type in Type::getScalarTy")::llvm::llvm_unreachable_internal("Unsupported type in Type::getScalarTy" , "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/IR/Type.h" , 428); |
429 | } |
430 | |
431 | //===--------------------------------------------------------------------===// |
432 | // Convenience methods for getting pointer types with one of the above builtin |
433 | // types as pointee. |
434 | // |
435 | static PointerType *getHalfPtrTy(LLVMContext &C, unsigned AS = 0); |
436 | static PointerType *getBFloatPtrTy(LLVMContext &C, unsigned AS = 0); |
437 | static PointerType *getFloatPtrTy(LLVMContext &C, unsigned AS = 0); |
438 | static PointerType *getDoublePtrTy(LLVMContext &C, unsigned AS = 0); |
439 | static PointerType *getX86_FP80PtrTy(LLVMContext &C, unsigned AS = 0); |
440 | static PointerType *getFP128PtrTy(LLVMContext &C, unsigned AS = 0); |
441 | static PointerType *getPPC_FP128PtrTy(LLVMContext &C, unsigned AS = 0); |
442 | static PointerType *getX86_MMXPtrTy(LLVMContext &C, unsigned AS = 0); |
443 | static PointerType *getIntNPtrTy(LLVMContext &C, unsigned N, unsigned AS = 0); |
444 | static PointerType *getInt1PtrTy(LLVMContext &C, unsigned AS = 0); |
445 | static PointerType *getInt8PtrTy(LLVMContext &C, unsigned AS = 0); |
446 | static PointerType *getInt16PtrTy(LLVMContext &C, unsigned AS = 0); |
447 | static PointerType *getInt32PtrTy(LLVMContext &C, unsigned AS = 0); |
448 | static PointerType *getInt64PtrTy(LLVMContext &C, unsigned AS = 0); |
449 | |
450 | /// Return a pointer to the current type. This is equivalent to |
451 | /// PointerType::get(Foo, AddrSpace). |
452 | PointerType *getPointerTo(unsigned AddrSpace = 0) const; |
453 | |
454 | private: |
455 | /// Derived types like structures and arrays are sized iff all of the members |
456 | /// of the type are sized as well. Since asking for their size is relatively |
457 | /// uncommon, move this operation out-of-line. |
458 | bool isSizedDerivedType(SmallPtrSetImpl<Type*> *Visited = nullptr) const; |
459 | }; |
460 | |
461 | // Printing of types. |
462 | inline raw_ostream &operator<<(raw_ostream &OS, const Type &T) { |
463 | T.print(OS); |
464 | return OS; |
465 | } |
466 | |
467 | // allow isa<PointerType>(x) to work without DerivedTypes.h included. |
468 | template <> struct isa_impl<PointerType, Type> { |
469 | static inline bool doit(const Type &Ty) { |
470 | return Ty.getTypeID() == Type::PointerTyID; |
471 | } |
472 | }; |
473 | |
474 | // Create wrappers for C Binding types (see CBindingWrapping.h). |
475 | DEFINE_ISA_CONVERSION_FUNCTIONS(Type, LLVMTypeRef)inline Type *unwrap(LLVMTypeRef P) { return reinterpret_cast< Type*>(P); } inline LLVMTypeRef wrap(const Type *P) { return reinterpret_cast<LLVMTypeRef>(const_cast<Type*>( P)); } template<typename T> inline T *unwrap(LLVMTypeRef P) { return cast<T>(unwrap(P)); } |
476 | |
477 | /* Specialized opaque type conversions. |
478 | */ |
479 | inline Type **unwrap(LLVMTypeRef* Tys) { |
480 | return reinterpret_cast<Type**>(Tys); |
481 | } |
482 | |
483 | inline LLVMTypeRef *wrap(Type **Tys) { |
484 | return reinterpret_cast<LLVMTypeRef*>(const_cast<Type**>(Tys)); |
485 | } |
486 | |
487 | } // end namespace llvm |
488 | |
489 | #endif // LLVM_IR_TYPE_H |
1 | //===- BasicTTIImpl.h -------------------------------------------*- C++ -*-===// | ||||||||
2 | // | ||||||||
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||||||||
4 | // See https://llvm.org/LICENSE.txt for license information. | ||||||||
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||||||||
6 | // | ||||||||
7 | //===----------------------------------------------------------------------===// | ||||||||
8 | // | ||||||||
9 | /// \file | ||||||||
10 | /// This file provides a helper that implements much of the TTI interface in | ||||||||
11 | /// terms of the target-independent code generator and TargetLowering | ||||||||
12 | /// interfaces. | ||||||||
13 | // | ||||||||
14 | //===----------------------------------------------------------------------===// | ||||||||
15 | |||||||||
16 | #ifndef LLVM_CODEGEN_BASICTTIIMPL_H | ||||||||
17 | #define LLVM_CODEGEN_BASICTTIIMPL_H | ||||||||
18 | |||||||||
19 | #include "llvm/ADT/APInt.h" | ||||||||
20 | #include "llvm/ADT/ArrayRef.h" | ||||||||
21 | #include "llvm/ADT/BitVector.h" | ||||||||
22 | #include "llvm/ADT/SmallPtrSet.h" | ||||||||
23 | #include "llvm/ADT/SmallVector.h" | ||||||||
24 | #include "llvm/Analysis/LoopInfo.h" | ||||||||
25 | #include "llvm/Analysis/TargetTransformInfo.h" | ||||||||
26 | #include "llvm/Analysis/TargetTransformInfoImpl.h" | ||||||||
27 | #include "llvm/CodeGen/ISDOpcodes.h" | ||||||||
28 | #include "llvm/CodeGen/TargetLowering.h" | ||||||||
29 | #include "llvm/CodeGen/TargetSubtargetInfo.h" | ||||||||
30 | #include "llvm/CodeGen/ValueTypes.h" | ||||||||
31 | #include "llvm/IR/BasicBlock.h" | ||||||||
32 | #include "llvm/IR/Constant.h" | ||||||||
33 | #include "llvm/IR/Constants.h" | ||||||||
34 | #include "llvm/IR/DataLayout.h" | ||||||||
35 | #include "llvm/IR/DerivedTypes.h" | ||||||||
36 | #include "llvm/IR/InstrTypes.h" | ||||||||
37 | #include "llvm/IR/Instruction.h" | ||||||||
38 | #include "llvm/IR/Instructions.h" | ||||||||
39 | #include "llvm/IR/Intrinsics.h" | ||||||||
40 | #include "llvm/IR/Operator.h" | ||||||||
41 | #include "llvm/IR/Type.h" | ||||||||
42 | #include "llvm/IR/Value.h" | ||||||||
43 | #include "llvm/Support/Casting.h" | ||||||||
44 | #include "llvm/Support/CommandLine.h" | ||||||||
45 | #include "llvm/Support/ErrorHandling.h" | ||||||||
46 | #include "llvm/Support/MachineValueType.h" | ||||||||
47 | #include "llvm/Support/MathExtras.h" | ||||||||
48 | #include <algorithm> | ||||||||
49 | #include <cassert> | ||||||||
50 | #include <cstdint> | ||||||||
51 | #include <limits> | ||||||||
52 | #include <utility> | ||||||||
53 | |||||||||
54 | namespace llvm { | ||||||||
55 | |||||||||
56 | class Function; | ||||||||
57 | class GlobalValue; | ||||||||
58 | class LLVMContext; | ||||||||
59 | class ScalarEvolution; | ||||||||
60 | class SCEV; | ||||||||
61 | class TargetMachine; | ||||||||
62 | |||||||||
63 | extern cl::opt<unsigned> PartialUnrollingThreshold; | ||||||||
64 | |||||||||
65 | /// Base class which can be used to help build a TTI implementation. | ||||||||
66 | /// | ||||||||
67 | /// This class provides as much implementation of the TTI interface as is | ||||||||
68 | /// possible using the target independent parts of the code generator. | ||||||||
69 | /// | ||||||||
70 | /// In order to subclass it, your class must implement a getST() method to | ||||||||
71 | /// return the subtarget, and a getTLI() method to return the target lowering. | ||||||||
72 | /// We need these methods implemented in the derived class so that this class | ||||||||
73 | /// doesn't have to duplicate storage for them. | ||||||||
74 | template <typename T> | ||||||||
75 | class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> { | ||||||||
76 | private: | ||||||||
77 | using BaseT = TargetTransformInfoImplCRTPBase<T>; | ||||||||
78 | using TTI = TargetTransformInfo; | ||||||||
79 | |||||||||
80 | /// Helper function to access this as a T. | ||||||||
81 | T *thisT() { return static_cast<T *>(this); } | ||||||||
82 | |||||||||
83 | /// Estimate a cost of Broadcast as an extract and sequence of insert | ||||||||
84 | /// operations. | ||||||||
85 | unsigned getBroadcastShuffleOverhead(FixedVectorType *VTy) { | ||||||||
86 | unsigned Cost = 0; | ||||||||
87 | // Broadcast cost is equal to the cost of extracting the zero'th element | ||||||||
88 | // plus the cost of inserting it into every element of the result vector. | ||||||||
89 | Cost += thisT()->getVectorInstrCost(Instruction::ExtractElement, VTy, 0); | ||||||||
90 | |||||||||
91 | for (int i = 0, e = VTy->getNumElements(); i < e; ++i) { | ||||||||
92 | Cost += thisT()->getVectorInstrCost(Instruction::InsertElement, VTy, i); | ||||||||
93 | } | ||||||||
94 | return Cost; | ||||||||
95 | } | ||||||||
96 | |||||||||
97 | /// Estimate a cost of shuffle as a sequence of extract and insert | ||||||||
98 | /// operations. | ||||||||
99 | unsigned getPermuteShuffleOverhead(FixedVectorType *VTy) { | ||||||||
100 | unsigned Cost = 0; | ||||||||
101 | // Shuffle cost is equal to the cost of extracting element from its argument | ||||||||
102 | // plus the cost of inserting them onto the result vector. | ||||||||
103 | |||||||||
104 | // e.g. <4 x float> has a mask of <0,5,2,7> i.e we need to extract from | ||||||||
105 | // index 0 of first vector, index 1 of second vector,index 2 of first | ||||||||
106 | // vector and finally index 3 of second vector and insert them at index | ||||||||
107 | // <0,1,2,3> of result vector. | ||||||||
108 | for (int i = 0, e = VTy->getNumElements(); i < e; ++i) { | ||||||||
109 | Cost += thisT()->getVectorInstrCost(Instruction::InsertElement, VTy, i); | ||||||||
110 | Cost += thisT()->getVectorInstrCost(Instruction::ExtractElement, VTy, i); | ||||||||
111 | } | ||||||||
112 | return Cost; | ||||||||
113 | } | ||||||||
114 | |||||||||
115 | /// Estimate a cost of subvector extraction as a sequence of extract and | ||||||||
116 | /// insert operations. | ||||||||
117 | unsigned getExtractSubvectorOverhead(FixedVectorType *VTy, int Index, | ||||||||
118 | FixedVectorType *SubVTy) { | ||||||||
119 | assert(VTy && SubVTy &&((VTy && SubVTy && "Can only extract subvectors from vectors" ) ? static_cast<void> (0) : __assert_fail ("VTy && SubVTy && \"Can only extract subvectors from vectors\"" , "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/BasicTTIImpl.h" , 120, __PRETTY_FUNCTION__)) | ||||||||
120 | "Can only extract subvectors from vectors")((VTy && SubVTy && "Can only extract subvectors from vectors" ) ? static_cast<void> (0) : __assert_fail ("VTy && SubVTy && \"Can only extract subvectors from vectors\"" , "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/BasicTTIImpl.h" , 120, __PRETTY_FUNCTION__)); | ||||||||
121 | int NumSubElts = SubVTy->getNumElements(); | ||||||||
122 | assert((Index + NumSubElts) <= (int)VTy->getNumElements() &&(((Index + NumSubElts) <= (int)VTy->getNumElements() && "SK_ExtractSubvector index out of range") ? static_cast<void > (0) : __assert_fail ("(Index + NumSubElts) <= (int)VTy->getNumElements() && \"SK_ExtractSubvector index out of range\"" , "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/BasicTTIImpl.h" , 123, __PRETTY_FUNCTION__)) | ||||||||
123 | "SK_ExtractSubvector index out of range")(((Index + NumSubElts) <= (int)VTy->getNumElements() && "SK_ExtractSubvector index out of range") ? static_cast<void > (0) : __assert_fail ("(Index + NumSubElts) <= (int)VTy->getNumElements() && \"SK_ExtractSubvector index out of range\"" , "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/BasicTTIImpl.h" , 123, __PRETTY_FUNCTION__)); | ||||||||
124 | |||||||||
125 | unsigned Cost = 0; | ||||||||
126 | // Subvector extraction cost is equal to the cost of extracting element from | ||||||||
127 | // the source type plus the cost of inserting them into the result vector | ||||||||
128 | // type. | ||||||||
129 | for (int i = 0; i != NumSubElts; ++i) { | ||||||||
130 | Cost += thisT()->getVectorInstrCost(Instruction::ExtractElement, VTy, | ||||||||
131 | i + Index); | ||||||||
132 | Cost += | ||||||||
133 | thisT()->getVectorInstrCost(Instruction::InsertElement, SubVTy, i); | ||||||||
134 | } | ||||||||
135 | return Cost; | ||||||||
136 | } | ||||||||
137 | |||||||||
138 | /// Estimate a cost of subvector insertion as a sequence of extract and | ||||||||
139 | /// insert operations. | ||||||||
140 | unsigned getInsertSubvectorOverhead(FixedVectorType *VTy, int Index, | ||||||||
141 | FixedVectorType *SubVTy) { | ||||||||
142 | assert(VTy && SubVTy &&((VTy && SubVTy && "Can only insert subvectors into vectors" ) ? static_cast<void> (0) : __assert_fail ("VTy && SubVTy && \"Can only insert subvectors into vectors\"" , "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/BasicTTIImpl.h" , 143, __PRETTY_FUNCTION__)) | ||||||||
143 | "Can only insert subvectors into vectors")((VTy && SubVTy && "Can only insert subvectors into vectors" ) ? static_cast<void> (0) : __assert_fail ("VTy && SubVTy && \"Can only insert subvectors into vectors\"" , "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/BasicTTIImpl.h" , 143, __PRETTY_FUNCTION__)); | ||||||||
144 | int NumSubElts = SubVTy->getNumElements(); | ||||||||
145 | assert((Index + NumSubElts) <= (int)VTy->getNumElements() &&(((Index + NumSubElts) <= (int)VTy->getNumElements() && "SK_InsertSubvector index out of range") ? static_cast<void > (0) : __assert_fail ("(Index + NumSubElts) <= (int)VTy->getNumElements() && \"SK_InsertSubvector index out of range\"" , "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/BasicTTIImpl.h" , 146, __PRETTY_FUNCTION__)) | ||||||||
146 | "SK_InsertSubvector index out of range")(((Index + NumSubElts) <= (int)VTy->getNumElements() && "SK_InsertSubvector index out of range") ? static_cast<void > (0) : __assert_fail ("(Index + NumSubElts) <= (int)VTy->getNumElements() && \"SK_InsertSubvector index out of range\"" , "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/BasicTTIImpl.h" , 146, __PRETTY_FUNCTION__)); | ||||||||
147 | |||||||||
148 | unsigned Cost = 0; | ||||||||
149 | // Subvector insertion cost is equal to the cost of extracting element from | ||||||||
150 | // the source type plus the cost of inserting them into the result vector | ||||||||
151 | // type. | ||||||||
152 | for (int i = 0; i != NumSubElts; ++i) { | ||||||||
153 | Cost += | ||||||||
154 | thisT()->getVectorInstrCost(Instruction::ExtractElement, SubVTy, i); | ||||||||
155 | Cost += thisT()->getVectorInstrCost(Instruction::InsertElement, VTy, | ||||||||
156 | i + Index); | ||||||||
157 | } | ||||||||
158 | return Cost; | ||||||||
159 | } | ||||||||
160 | |||||||||
161 | /// Local query method delegates up to T which *must* implement this! | ||||||||
162 | const TargetSubtargetInfo *getST() const { | ||||||||
163 | return static_cast<const T *>(this)->getST(); | ||||||||
164 | } | ||||||||
165 | |||||||||
166 | /// Local query method delegates up to T which *must* implement this! | ||||||||
167 | const TargetLoweringBase *getTLI() const { | ||||||||
168 | return static_cast<const T *>(this)->getTLI(); | ||||||||
169 | } | ||||||||
170 | |||||||||
171 | static ISD::MemIndexedMode getISDIndexedMode(TTI::MemIndexedMode M) { | ||||||||
172 | switch (M) { | ||||||||
173 | case TTI::MIM_Unindexed: | ||||||||
174 | return ISD::UNINDEXED; | ||||||||
175 | case TTI::MIM_PreInc: | ||||||||
176 | return ISD::PRE_INC; | ||||||||
177 | case TTI::MIM_PreDec: | ||||||||
178 | return ISD::PRE_DEC; | ||||||||
179 | case TTI::MIM_PostInc: | ||||||||
180 | return ISD::POST_INC; | ||||||||
181 | case TTI::MIM_PostDec: | ||||||||
182 | return ISD::POST_DEC; | ||||||||
183 | } | ||||||||
184 | llvm_unreachable("Unexpected MemIndexedMode")::llvm::llvm_unreachable_internal("Unexpected MemIndexedMode" , "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/BasicTTIImpl.h" , 184); | ||||||||
185 | } | ||||||||
186 | |||||||||
187 | protected: | ||||||||
188 | explicit BasicTTIImplBase(const TargetMachine *TM, const DataLayout &DL) | ||||||||
189 | : BaseT(DL) {} | ||||||||
190 | virtual ~BasicTTIImplBase() = default; | ||||||||
191 | |||||||||
192 | using TargetTransformInfoImplBase::DL; | ||||||||
193 | |||||||||
194 | public: | ||||||||
195 | /// \name Scalar TTI Implementations | ||||||||
196 | /// @{ | ||||||||
197 | bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth, | ||||||||
198 | unsigned AddressSpace, unsigned Alignment, | ||||||||
199 | bool *Fast) const { | ||||||||
200 | EVT E = EVT::getIntegerVT(Context, BitWidth); | ||||||||
201 | return getTLI()->allowsMisalignedMemoryAccesses( | ||||||||
202 | E, AddressSpace, Alignment, MachineMemOperand::MONone, Fast); | ||||||||
203 | } | ||||||||
204 | |||||||||
205 | bool hasBranchDivergence() { return false; } | ||||||||
206 | |||||||||
207 | bool useGPUDivergenceAnalysis() { return false; } | ||||||||
208 | |||||||||
209 | bool isSourceOfDivergence(const Value *V) { return false; } | ||||||||
210 | |||||||||
211 | bool isAlwaysUniform(const Value *V) { return false; } | ||||||||
212 | |||||||||
213 | unsigned getFlatAddressSpace() { | ||||||||
214 | // Return an invalid address space. | ||||||||
215 | return -1; | ||||||||
216 | } | ||||||||
217 | |||||||||
218 | bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes, | ||||||||
219 | Intrinsic::ID IID) const { | ||||||||
220 | return false; | ||||||||
221 | } | ||||||||
222 | |||||||||
223 | bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const { | ||||||||
224 | return getTLI()->getTargetMachine().isNoopAddrSpaceCast(FromAS, ToAS); | ||||||||
225 | } | ||||||||
226 | |||||||||
227 | Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV, | ||||||||
228 | Value *NewV) const { | ||||||||
229 | return nullptr; | ||||||||
230 | } | ||||||||
231 | |||||||||
232 | bool isLegalAddImmediate(int64_t imm) { | ||||||||
233 | return getTLI()->isLegalAddImmediate(imm); | ||||||||
234 | } | ||||||||
235 | |||||||||
236 | bool isLegalICmpImmediate(int64_t imm) { | ||||||||
237 | return getTLI()->isLegalICmpImmediate(imm); | ||||||||
238 | } | ||||||||
239 | |||||||||
240 | bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, | ||||||||
241 | bool HasBaseReg, int64_t Scale, | ||||||||
242 | unsigned AddrSpace, Instruction *I = nullptr) { | ||||||||
243 | TargetLoweringBase::AddrMode AM; | ||||||||
244 | AM.BaseGV = BaseGV; | ||||||||
245 | AM.BaseOffs = BaseOffset; | ||||||||
246 | AM.HasBaseReg = HasBaseReg; | ||||||||
247 | AM.Scale = Scale; | ||||||||
248 | return getTLI()->isLegalAddressingMode(DL, AM, Ty, AddrSpace, I); | ||||||||
249 | } | ||||||||
250 | |||||||||
251 | bool isIndexedLoadLegal(TTI::MemIndexedMode M, Type *Ty, | ||||||||
252 | const DataLayout &DL) const { | ||||||||
253 | EVT VT = getTLI()->getValueType(DL, Ty); | ||||||||
254 | return getTLI()->isIndexedLoadLegal(getISDIndexedMode(M), VT); | ||||||||
255 | } | ||||||||
256 | |||||||||
257 | bool isIndexedStoreLegal(TTI::MemIndexedMode M, Type *Ty, | ||||||||
258 | const DataLayout &DL) const { | ||||||||
259 | EVT VT = getTLI()->getValueType(DL, Ty); | ||||||||
260 | return getTLI()->isIndexedStoreLegal(getISDIndexedMode(M), VT); | ||||||||
261 | } | ||||||||
262 | |||||||||
263 | bool isLSRCostLess(TTI::LSRCost C1, TTI::LSRCost C2) { | ||||||||
264 | return TargetTransformInfoImplBase::isLSRCostLess(C1, C2); | ||||||||
265 | } | ||||||||
266 | |||||||||
267 | bool isProfitableLSRChainElement(Instruction *I) { | ||||||||
268 | return TargetTransformInfoImplBase::isProfitableLSRChainElement(I); | ||||||||
269 | } | ||||||||
270 | |||||||||
271 | int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, | ||||||||
272 | bool HasBaseReg, int64_t Scale, unsigned AddrSpace) { | ||||||||
273 | TargetLoweringBase::AddrMode AM; | ||||||||
274 | AM.BaseGV = BaseGV; | ||||||||
275 | AM.BaseOffs = BaseOffset; | ||||||||
276 | AM.HasBaseReg = HasBaseReg; | ||||||||
277 | AM.Scale = Scale; | ||||||||
278 | return getTLI()->getScalingFactorCost(DL, AM, Ty, AddrSpace); | ||||||||
279 | } | ||||||||
280 | |||||||||
281 | bool isTruncateFree(Type *Ty1, Type *Ty2) { | ||||||||
282 | return getTLI()->isTruncateFree(Ty1, Ty2); | ||||||||
283 | } | ||||||||
284 | |||||||||
285 | bool isProfitableToHoist(Instruction *I) { | ||||||||
286 | return getTLI()->isProfitableToHoist(I); | ||||||||
287 | } | ||||||||
288 | |||||||||
289 | bool useAA() const { return getST()->useAA(); } | ||||||||
290 | |||||||||
291 | bool isTypeLegal(Type *Ty) { | ||||||||
292 | EVT VT = getTLI()->getValueType(DL, Ty); | ||||||||
293 | return getTLI()->isTypeLegal(VT); | ||||||||
294 | } | ||||||||
295 | |||||||||
296 | int getGEPCost(Type *PointeeType, const Value *Ptr, | ||||||||
297 | ArrayRef<const Value *> Operands) { | ||||||||
298 | return BaseT::getGEPCost(PointeeType, Ptr, Operands); | ||||||||
299 | } | ||||||||
300 | |||||||||
301 | unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI, | ||||||||
302 | unsigned &JumpTableSize, | ||||||||
303 | ProfileSummaryInfo *PSI, | ||||||||
304 | BlockFrequencyInfo *BFI) { | ||||||||
305 | /// Try to find the estimated number of clusters. Note that the number of | ||||||||
306 | /// clusters identified in this function could be different from the actual | ||||||||
307 | /// numbers found in lowering. This function ignore switches that are | ||||||||
308 | /// lowered with a mix of jump table / bit test / BTree. This function was | ||||||||
309 | /// initially intended to be used when estimating the cost of switch in | ||||||||
310 | /// inline cost heuristic, but it's a generic cost model to be used in other | ||||||||
311 | /// places (e.g., in loop unrolling). | ||||||||
312 | unsigned N = SI.getNumCases(); | ||||||||
313 | const TargetLoweringBase *TLI = getTLI(); | ||||||||
314 | const DataLayout &DL = this->getDataLayout(); | ||||||||
315 | |||||||||
316 | JumpTableSize = 0; | ||||||||
317 | bool IsJTAllowed = TLI->areJTsAllowed(SI.getParent()->getParent()); | ||||||||
318 | |||||||||
319 | // Early exit if both a jump table and bit test are not allowed. | ||||||||
320 | if (N < 1 || (!IsJTAllowed && DL.getIndexSizeInBits(0u) < N)) | ||||||||
321 | return N; | ||||||||
322 | |||||||||
323 | APInt MaxCaseVal = SI.case_begin()->getCaseValue()->getValue(); | ||||||||
324 | APInt MinCaseVal = MaxCaseVal; | ||||||||
325 | for (auto CI : SI.cases()) { | ||||||||
326 | const APInt &CaseVal = CI.getCaseValue()->getValue(); | ||||||||
327 | if (CaseVal.sgt(MaxCaseVal)) | ||||||||
328 | MaxCaseVal = CaseVal; | ||||||||
329 | if (CaseVal.slt(MinCaseVal)) | ||||||||
330 | MinCaseVal = CaseVal; | ||||||||
331 | } | ||||||||
332 | |||||||||
333 | // Check if suitable for a bit test | ||||||||
334 | if (N <= DL.getIndexSizeInBits(0u)) { | ||||||||
335 | SmallPtrSet<const BasicBlock *, 4> Dests; | ||||||||
336 | for (auto I : SI.cases()) | ||||||||
337 | Dests.insert(I.getCaseSuccessor()); | ||||||||
338 | |||||||||
339 | if (TLI->isSuitableForBitTests(Dests.size(), N, MinCaseVal, MaxCaseVal, | ||||||||
340 | DL)) | ||||||||
341 | return 1; | ||||||||
342 | } | ||||||||
343 | |||||||||
344 | // Check if suitable for a jump table. | ||||||||
345 | if (IsJTAllowed) { | ||||||||
346 | if (N < 2 || N < TLI->getMinimumJumpTableEntries()) | ||||||||
347 | return N; | ||||||||
348 | uint64_t Range = | ||||||||
349 | (MaxCaseVal - MinCaseVal) | ||||||||
350 | .getLimitedValue(std::numeric_limits<uint64_t>::max() - 1) + 1; | ||||||||
351 | // Check whether a range of clusters is dense enough for a jump table | ||||||||
352 | if (TLI->isSuitableForJumpTable(&SI, N, Range, PSI, BFI)) { | ||||||||
353 | JumpTableSize = Range; | ||||||||
354 | return 1; | ||||||||
355 | } | ||||||||
356 | } | ||||||||
357 | return N; | ||||||||
358 | } | ||||||||
359 | |||||||||
360 | bool shouldBuildLookupTables() { | ||||||||
361 | const TargetLoweringBase *TLI = getTLI(); | ||||||||
362 | return TLI->isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) || | ||||||||
363 | TLI->isOperationLegalOrCustom(ISD::BRIND, MVT::Other); | ||||||||
364 | } | ||||||||
365 | |||||||||
366 | bool haveFastSqrt(Type *Ty) { | ||||||||
367 | const TargetLoweringBase *TLI = getTLI(); | ||||||||
368 | EVT VT = TLI->getValueType(DL, Ty); | ||||||||
369 | return TLI->isTypeLegal(VT) && | ||||||||
370 | TLI->isOperationLegalOrCustom(ISD::FSQRT, VT); | ||||||||
371 | } | ||||||||
372 | |||||||||
373 | bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) { | ||||||||
374 | return true; | ||||||||
375 | } | ||||||||
376 | |||||||||
377 | unsigned getFPOpCost(Type *Ty) { | ||||||||
378 | // Check whether FADD is available, as a proxy for floating-point in | ||||||||
379 | // general. | ||||||||
380 | const TargetLoweringBase *TLI = getTLI(); | ||||||||
381 | EVT VT = TLI->getValueType(DL, Ty); | ||||||||
382 | if (TLI->isOperationLegalOrCustomOrPromote(ISD::FADD, VT)) | ||||||||
383 | return TargetTransformInfo::TCC_Basic; | ||||||||
384 | return TargetTransformInfo::TCC_Expensive; | ||||||||
385 | } | ||||||||
386 | |||||||||
387 | unsigned getInliningThresholdMultiplier() { return 1; } | ||||||||
388 | |||||||||
389 | int getInlinerVectorBonusPercent() { return 150; } | ||||||||
390 | |||||||||
391 | void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, | ||||||||
392 | TTI::UnrollingPreferences &UP) { | ||||||||
393 | // This unrolling functionality is target independent, but to provide some | ||||||||
394 | // motivation for its intended use, for x86: | ||||||||
395 | |||||||||
396 | // According to the Intel 64 and IA-32 Architectures Optimization Reference | ||||||||
397 | // Manual, Intel Core models and later have a loop stream detector (and | ||||||||
398 | // associated uop queue) that can benefit from partial unrolling. | ||||||||
399 | // The relevant requirements are: | ||||||||
400 | // - The loop must have no more than 4 (8 for Nehalem and later) branches | ||||||||
401 | // taken, and none of them may be calls. | ||||||||
402 | // - The loop can have no more than 18 (28 for Nehalem and later) uops. | ||||||||
403 | |||||||||
404 | // According to the Software Optimization Guide for AMD Family 15h | ||||||||
405 | // Processors, models 30h-4fh (Steamroller and later) have a loop predictor | ||||||||
406 | // and loop buffer which can benefit from partial unrolling. | ||||||||
407 | // The relevant requirements are: | ||||||||
408 | // - The loop must have fewer than 16 branches | ||||||||
409 | // - The loop must have less than 40 uops in all executed loop branches | ||||||||
410 | |||||||||
411 | // The number of taken branches in a loop is hard to estimate here, and | ||||||||
412 | // benchmarking has revealed that it is better not to be conservative when | ||||||||
413 | // estimating the branch count. As a result, we'll ignore the branch limits | ||||||||
414 | // until someone finds a case where it matters in practice. | ||||||||
415 | |||||||||
416 | unsigned MaxOps; | ||||||||
417 | const TargetSubtargetInfo *ST = getST(); | ||||||||
418 | if (PartialUnrollingThreshold.getNumOccurrences() > 0) | ||||||||
419 | MaxOps = PartialUnrollingThreshold; | ||||||||
420 | else if (ST->getSchedModel().LoopMicroOpBufferSize > 0) | ||||||||
421 | MaxOps = ST->getSchedModel().LoopMicroOpBufferSize; | ||||||||
422 | else | ||||||||
423 | return; | ||||||||
424 | |||||||||
425 | // Scan the loop: don't unroll loops with calls. | ||||||||
426 | for (BasicBlock *BB : L->blocks()) { | ||||||||
427 | for (Instruction &I : *BB) { | ||||||||
428 | if (isa<CallInst>(I) || isa<InvokeInst>(I)) { | ||||||||
429 | if (const Function *F = cast<CallBase>(I).getCalledFunction()) { | ||||||||
430 | if (!thisT()->isLoweredToCall(F)) | ||||||||
431 | continue; | ||||||||
432 | } | ||||||||
433 | |||||||||
434 | return; | ||||||||
435 | } | ||||||||
436 | } | ||||||||
437 | } | ||||||||
438 | |||||||||
439 | // Enable runtime and partial unrolling up to the specified size. | ||||||||
440 | // Enable using trip count upper bound to unroll loops. | ||||||||
441 | UP.Partial = UP.Runtime = UP.UpperBound = true; | ||||||||
442 | UP.PartialThreshold = MaxOps; | ||||||||
443 | |||||||||
444 | // Avoid unrolling when optimizing for size. | ||||||||
445 | UP.OptSizeThreshold = 0; | ||||||||
446 | UP.PartialOptSizeThreshold = 0; | ||||||||
447 | |||||||||
448 | // Set number of instructions optimized when "back edge" | ||||||||
449 | // becomes "fall through" to default value of 2. | ||||||||
450 | UP.BEInsns = 2; | ||||||||
451 | } | ||||||||
452 | |||||||||
453 | void getPeelingPreferences(Loop *L, ScalarEvolution &SE, | ||||||||
454 | TTI::PeelingPreferences &PP) { | ||||||||
455 | PP.PeelCount = 0; | ||||||||
456 | PP.AllowPeeling = true; | ||||||||
457 | PP.AllowLoopNestsPeeling = false; | ||||||||
458 | PP.PeelProfiledIterations = true; | ||||||||
459 | } | ||||||||
460 | |||||||||
461 | bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, | ||||||||
462 | AssumptionCache &AC, | ||||||||
463 | TargetLibraryInfo *LibInfo, | ||||||||
464 | HardwareLoopInfo &HWLoopInfo) { | ||||||||
465 | return BaseT::isHardwareLoopProfitable(L, SE, AC, LibInfo, HWLoopInfo); | ||||||||
466 | } | ||||||||
467 | |||||||||
468 | bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, ScalarEvolution &SE, | ||||||||
469 | AssumptionCache &AC, TargetLibraryInfo *TLI, | ||||||||
470 | DominatorTree *DT, | ||||||||
471 | const LoopAccessInfo *LAI) { | ||||||||
472 | return BaseT::preferPredicateOverEpilogue(L, LI, SE, AC, TLI, DT, LAI); | ||||||||
473 | } | ||||||||
474 | |||||||||
475 | bool emitGetActiveLaneMask() { | ||||||||
476 | return BaseT::emitGetActiveLaneMask(); | ||||||||
477 | } | ||||||||
478 | |||||||||
479 | Optional<Instruction *> instCombineIntrinsic(InstCombiner &IC, | ||||||||
480 | IntrinsicInst &II) { | ||||||||
481 | return BaseT::instCombineIntrinsic(IC, II); | ||||||||
482 | } | ||||||||
483 | |||||||||
484 | Optional<Value *> simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, | ||||||||
485 | IntrinsicInst &II, | ||||||||
486 | APInt DemandedMask, | ||||||||
487 | KnownBits &Known, | ||||||||
488 | bool &KnownBitsComputed) { | ||||||||
489 | return BaseT::simplifyDemandedUseBitsIntrinsic(IC, II, DemandedMask, Known, | ||||||||
490 | KnownBitsComputed); | ||||||||
491 | } | ||||||||
492 | |||||||||
493 | Optional<Value *> simplifyDemandedVectorEltsIntrinsic( | ||||||||
494 | InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, | ||||||||
495 | APInt &UndefElts2, APInt &UndefElts3, | ||||||||
496 | std::function<void(Instruction *, unsigned, APInt, APInt &)> | ||||||||
497 | SimplifyAndSetOp) { | ||||||||
498 | return BaseT::simplifyDemandedVectorEltsIntrinsic( | ||||||||
499 | IC, II, DemandedElts, UndefElts, UndefElts2, UndefElts3, | ||||||||
500 | SimplifyAndSetOp); | ||||||||
501 | } | ||||||||
502 | |||||||||
503 | int getInstructionLatency(const Instruction *I) { | ||||||||
504 | if (isa<LoadInst>(I)) | ||||||||
505 | return getST()->getSchedModel().DefaultLoadLatency; | ||||||||
506 | |||||||||
507 | return BaseT::getInstructionLatency(I); | ||||||||
508 | } | ||||||||
509 | |||||||||
510 | virtual Optional<unsigned> | ||||||||
511 | getCacheSize(TargetTransformInfo::CacheLevel Level) const { | ||||||||
512 | return Optional<unsigned>( | ||||||||
513 | getST()->getCacheSize(static_cast<unsigned>(Level))); | ||||||||
514 | } | ||||||||
515 | |||||||||
516 | virtual Optional<unsigned> | ||||||||
517 | getCacheAssociativity(TargetTransformInfo::CacheLevel Level) const { | ||||||||
518 | Optional<unsigned> TargetResult = | ||||||||
519 | getST()->getCacheAssociativity(static_cast<unsigned>(Level)); | ||||||||
520 | |||||||||
521 | if (TargetResult) | ||||||||
522 | return TargetResult; | ||||||||
523 | |||||||||
524 | return BaseT::getCacheAssociativity(Level); | ||||||||
525 | } | ||||||||
526 | |||||||||
527 | virtual unsigned getCacheLineSize() const { | ||||||||
528 | return getST()->getCacheLineSize(); | ||||||||
529 | } | ||||||||
530 | |||||||||
531 | virtual unsigned getPrefetchDistance() const { | ||||||||
532 | return getST()->getPrefetchDistance(); | ||||||||
533 | } | ||||||||
534 | |||||||||
535 | virtual unsigned getMinPrefetchStride(unsigned NumMemAccesses, | ||||||||
536 | unsigned NumStridedMemAccesses, | ||||||||
537 | unsigned NumPrefetches, | ||||||||
538 | bool HasCall) const { | ||||||||
539 | return getST()->getMinPrefetchStride(NumMemAccesses, NumStridedMemAccesses, | ||||||||
540 | NumPrefetches, HasCall); | ||||||||
541 | } | ||||||||
542 | |||||||||
543 | virtual unsigned getMaxPrefetchIterationsAhead() const { | ||||||||
544 | return getST()->getMaxPrefetchIterationsAhead(); | ||||||||
545 | } | ||||||||
546 | |||||||||
547 | virtual bool enableWritePrefetching() const { | ||||||||
548 | return getST()->enableWritePrefetching(); | ||||||||
549 | } | ||||||||
550 | |||||||||
551 | /// @} | ||||||||
552 | |||||||||
553 | /// \name Vector TTI Implementations | ||||||||
554 | /// @{ | ||||||||
555 | |||||||||
556 | unsigned getRegisterBitWidth(bool Vector) const { return 32; } | ||||||||
557 | |||||||||
558 | /// Estimate the overhead of scalarizing an instruction. Insert and Extract | ||||||||
559 | /// are set if the demanded result elements need to be inserted and/or | ||||||||
560 | /// extracted from vectors. | ||||||||
561 | unsigned getScalarizationOverhead(VectorType *InTy, const APInt &DemandedElts, | ||||||||
562 | bool Insert, bool Extract) { | ||||||||
563 | /// FIXME: a bitfield is not a reasonable abstraction for talking about | ||||||||
564 | /// which elements are needed from a scalable vector | ||||||||
565 | auto *Ty = cast<FixedVectorType>(InTy); | ||||||||
566 | |||||||||
567 | assert(DemandedElts.getBitWidth() == Ty->getNumElements() &&((DemandedElts.getBitWidth() == Ty->getNumElements() && "Vector size mismatch") ? static_cast<void> (0) : __assert_fail ("DemandedElts.getBitWidth() == Ty->getNumElements() && \"Vector size mismatch\"" , "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/BasicTTIImpl.h" , 568, __PRETTY_FUNCTION__)) | ||||||||
568 | "Vector size mismatch")((DemandedElts.getBitWidth() == Ty->getNumElements() && "Vector size mismatch") ? static_cast<void> (0) : __assert_fail ("DemandedElts.getBitWidth() == Ty->getNumElements() && \"Vector size mismatch\"" , "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/BasicTTIImpl.h" , 568, __PRETTY_FUNCTION__)); | ||||||||
569 | |||||||||
570 | unsigned Cost = 0; | ||||||||
571 | |||||||||
572 | for (int i = 0, e = Ty->getNumElements(); i < e; ++i) { | ||||||||
573 | if (!DemandedElts[i]) | ||||||||
574 | continue; | ||||||||
575 | if (Insert) | ||||||||
576 | Cost += thisT()->getVectorInstrCost(Instruction::InsertElement, Ty, i); | ||||||||
577 | if (Extract) | ||||||||
578 | Cost += thisT()->getVectorInstrCost(Instruction::ExtractElement, Ty, i); | ||||||||
579 | } | ||||||||
580 | |||||||||
581 | return Cost; | ||||||||
582 | } | ||||||||
583 | |||||||||
584 | /// Helper wrapper for the DemandedElts variant of getScalarizationOverhead. | ||||||||
585 | unsigned getScalarizationOverhead(VectorType *InTy, bool Insert, | ||||||||
586 | bool Extract) { | ||||||||
587 | auto *Ty = cast<FixedVectorType>(InTy); | ||||||||
588 | |||||||||
589 | APInt DemandedElts = APInt::getAllOnesValue(Ty->getNumElements()); | ||||||||
590 | return thisT()->getScalarizationOverhead(Ty, DemandedElts, Insert, Extract); | ||||||||
591 | } | ||||||||
592 | |||||||||
593 | /// Estimate the overhead of scalarizing an instructions unique | ||||||||
594 | /// non-constant operands. The types of the arguments are ordinarily | ||||||||
595 | /// scalar, in which case the costs are multiplied with VF. | ||||||||
596 | unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args, | ||||||||
597 | unsigned VF) { | ||||||||
598 | unsigned Cost = 0; | ||||||||
599 | SmallPtrSet<const Value*, 4> UniqueOperands; | ||||||||
600 | for (const Value *A : Args) { | ||||||||
601 | if (!isa<Constant>(A) && UniqueOperands.insert(A).second) { | ||||||||
602 | auto *VecTy = dyn_cast<VectorType>(A->getType()); | ||||||||
603 | if (VecTy) { | ||||||||
604 | // If A is a vector operand, VF should be 1 or correspond to A. | ||||||||
605 | assert((VF == 1 ||(((VF == 1 || VF == cast<FixedVectorType>(VecTy)->getNumElements ()) && "Vector argument does not match VF") ? static_cast <void> (0) : __assert_fail ("(VF == 1 || VF == cast<FixedVectorType>(VecTy)->getNumElements()) && \"Vector argument does not match VF\"" , "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/BasicTTIImpl.h" , 607, __PRETTY_FUNCTION__)) | ||||||||
606 | VF == cast<FixedVectorType>(VecTy)->getNumElements()) &&(((VF == 1 || VF == cast<FixedVectorType>(VecTy)->getNumElements ()) && "Vector argument does not match VF") ? static_cast <void> (0) : __assert_fail ("(VF == 1 || VF == cast<FixedVectorType>(VecTy)->getNumElements()) && \"Vector argument does not match VF\"" , "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/BasicTTIImpl.h" , 607, __PRETTY_FUNCTION__)) | ||||||||
607 | "Vector argument does not match VF")(((VF == 1 || VF == cast<FixedVectorType>(VecTy)->getNumElements ()) && "Vector argument does not match VF") ? static_cast <void> (0) : __assert_fail ("(VF == 1 || VF == cast<FixedVectorType>(VecTy)->getNumElements()) && \"Vector argument does not match VF\"" , "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/BasicTTIImpl.h" , 607, __PRETTY_FUNCTION__)); | ||||||||
608 | } | ||||||||
609 | else | ||||||||
610 | VecTy = FixedVectorType::get(A->getType(), VF); | ||||||||
611 | |||||||||
612 | Cost += getScalarizationOverhead(VecTy, false, true); | ||||||||
613 | } | ||||||||
614 | } | ||||||||
615 | |||||||||
616 | return Cost; | ||||||||
617 | } | ||||||||
618 | |||||||||
619 | unsigned getScalarizationOverhead(VectorType *InTy, | ||||||||
620 | ArrayRef<const Value *> Args) { | ||||||||
621 | auto *Ty = cast<FixedVectorType>(InTy); | ||||||||
622 | |||||||||
623 | unsigned Cost = 0; | ||||||||
624 | |||||||||
625 | Cost += getScalarizationOverhead(Ty, true, false); | ||||||||
626 | if (!Args.empty()) | ||||||||
627 | Cost += getOperandsScalarizationOverhead(Args, Ty->getNumElements()); | ||||||||
628 | else | ||||||||
629 | // When no information on arguments is provided, we add the cost | ||||||||
630 | // associated with one argument as a heuristic. | ||||||||
631 | Cost += getScalarizationOverhead(Ty, false, true); | ||||||||
632 | |||||||||
633 | return Cost; | ||||||||
634 | } | ||||||||
635 | |||||||||
636 | unsigned getMaxInterleaveFactor(unsigned VF) { return 1; } | ||||||||
637 | |||||||||
638 | unsigned getArithmeticInstrCost( | ||||||||
639 | unsigned Opcode, Type *Ty, | ||||||||
640 | TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput, | ||||||||
641 | TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue, | ||||||||
642 | TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue, | ||||||||
643 | TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None, | ||||||||
644 | TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None, | ||||||||
645 | ArrayRef<const Value *> Args = ArrayRef<const Value *>(), | ||||||||
646 | const Instruction *CxtI = nullptr) { | ||||||||
647 | // Check if any of the operands are vector operands. | ||||||||
648 | const TargetLoweringBase *TLI = getTLI(); | ||||||||
649 | int ISD = TLI->InstructionOpcodeToISD(Opcode); | ||||||||
650 | assert(ISD && "Invalid opcode")((ISD && "Invalid opcode") ? static_cast<void> ( 0) : __assert_fail ("ISD && \"Invalid opcode\"", "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/BasicTTIImpl.h" , 650, __PRETTY_FUNCTION__)); | ||||||||
651 | |||||||||
652 | // TODO: Handle more cost kinds. | ||||||||
653 | if (CostKind != TTI::TCK_RecipThroughput) | ||||||||
654 | return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, | ||||||||
655 | Opd1Info, Opd2Info, | ||||||||
656 | Opd1PropInfo, Opd2PropInfo, | ||||||||
657 | Args, CxtI); | ||||||||
658 | |||||||||
659 | std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty); | ||||||||
660 | |||||||||
661 | bool IsFloat = Ty->isFPOrFPVectorTy(); | ||||||||
662 | // Assume that floating point arithmetic operations cost twice as much as | ||||||||
663 | // integer operations. | ||||||||
664 | unsigned OpCost = (IsFloat ? 2 : 1); | ||||||||
665 | |||||||||
666 | if (TLI->isOperationLegalOrPromote(ISD, LT.second)) { | ||||||||
667 | // The operation is legal. Assume it costs 1. | ||||||||
668 | // TODO: Once we have extract/insert subvector cost we need to use them. | ||||||||
669 | return LT.first * OpCost; | ||||||||
670 | } | ||||||||
671 | |||||||||
672 | if (!TLI->isOperationExpand(ISD, LT.second)) { | ||||||||
673 | // If the operation is custom lowered, then assume that the code is twice | ||||||||
674 | // as expensive. | ||||||||
675 | return LT.first * 2 * OpCost; | ||||||||
676 | } | ||||||||
677 | |||||||||
678 | // Else, assume that we need to scalarize this op. | ||||||||
679 | // TODO: If one of the types get legalized by splitting, handle this | ||||||||
680 | // similarly to what getCastInstrCost() does. | ||||||||
681 | if (auto *VTy = dyn_cast<VectorType>(Ty)) { | ||||||||
682 | unsigned Num = cast<FixedVectorType>(VTy)->getNumElements(); | ||||||||
683 | unsigned Cost = thisT()->getArithmeticInstrCost( | ||||||||
684 | Opcode, VTy->getScalarType(), CostKind, Opd1Info, Opd2Info, | ||||||||
685 | Opd1PropInfo, Opd2PropInfo, Args, CxtI); | ||||||||
686 | // Return the cost of multiple scalar invocation plus the cost of | ||||||||
687 | // inserting and extracting the values. | ||||||||
688 | return getScalarizationOverhead(VTy, Args) + Num * Cost; | ||||||||
689 | } | ||||||||
690 | |||||||||
691 | // We don't know anything about this scalar instruction. | ||||||||
692 | return OpCost; | ||||||||
693 | } | ||||||||
694 | |||||||||
695 | unsigned getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, int Index, | ||||||||
696 | VectorType *SubTp) { | ||||||||
697 | |||||||||
698 | switch (Kind) { | ||||||||
699 | case TTI::SK_Broadcast: | ||||||||
700 | return getBroadcastShuffleOverhead(cast<FixedVectorType>(Tp)); | ||||||||
701 | case TTI::SK_Select: | ||||||||
702 | case TTI::SK_Reverse: | ||||||||
703 | case TTI::SK_Transpose: | ||||||||
704 | case TTI::SK_PermuteSingleSrc: | ||||||||
705 | case TTI::SK_PermuteTwoSrc: | ||||||||
706 | return getPermuteShuffleOverhead(cast<FixedVectorType>(Tp)); | ||||||||
707 | case TTI::SK_ExtractSubvector: | ||||||||
708 | return getExtractSubvectorOverhead(cast<FixedVectorType>(Tp), Index, | ||||||||
709 | cast<FixedVectorType>(SubTp)); | ||||||||
710 | case TTI::SK_InsertSubvector: | ||||||||
711 | return getInsertSubvectorOverhead(cast<FixedVectorType>(Tp), Index, | ||||||||
712 | cast<FixedVectorType>(SubTp)); | ||||||||
713 | } | ||||||||
714 | llvm_unreachable("Unknown TTI::ShuffleKind")::llvm::llvm_unreachable_internal("Unknown TTI::ShuffleKind", "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/BasicTTIImpl.h" , 714); | ||||||||
715 | } | ||||||||
716 | |||||||||
717 | unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, | ||||||||
718 | TTI::CastContextHint CCH, | ||||||||
719 | TTI::TargetCostKind CostKind, | ||||||||
720 | const Instruction *I = nullptr) { | ||||||||
721 | if (BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I) == 0) | ||||||||
722 | return 0; | ||||||||
723 | |||||||||
724 | const TargetLoweringBase *TLI = getTLI(); | ||||||||
725 | int ISD = TLI->InstructionOpcodeToISD(Opcode); | ||||||||
726 | assert(ISD && "Invalid opcode")((ISD && "Invalid opcode") ? static_cast<void> ( 0) : __assert_fail ("ISD && \"Invalid opcode\"", "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/BasicTTIImpl.h" , 726, __PRETTY_FUNCTION__)); | ||||||||
727 | std::pair<unsigned, MVT> SrcLT = TLI->getTypeLegalizationCost(DL, Src); | ||||||||
728 | std::pair<unsigned, MVT> DstLT = TLI->getTypeLegalizationCost(DL, Dst); | ||||||||
729 | |||||||||
730 | TypeSize SrcSize = SrcLT.second.getSizeInBits(); | ||||||||
731 | TypeSize DstSize = DstLT.second.getSizeInBits(); | ||||||||
732 | bool IntOrPtrSrc = Src->isIntegerTy() || Src->isPointerTy(); | ||||||||
733 | bool IntOrPtrDst = Dst->isIntegerTy() || Dst->isPointerTy(); | ||||||||
734 | |||||||||
735 | switch (Opcode) { | ||||||||
736 | default: | ||||||||
737 | break; | ||||||||
738 | case Instruction::Trunc: | ||||||||
739 | // Check for NOOP conversions. | ||||||||
740 | if (TLI->isTruncateFree(SrcLT.second, DstLT.second)) | ||||||||
741 | return 0; | ||||||||
742 | LLVM_FALLTHROUGH[[gnu::fallthrough]]; | ||||||||
743 | case Instruction::BitCast: | ||||||||
744 | // Bitcast between types that are legalized to the same type are free and | ||||||||
745 | // assume int to/from ptr of the same size is also free. | ||||||||
746 | if (SrcLT.first == DstLT.first && IntOrPtrSrc == IntOrPtrDst && | ||||||||
747 | SrcSize == DstSize) | ||||||||
748 | return 0; | ||||||||
749 | break; | ||||||||
750 | case Instruction::FPExt: | ||||||||
751 | if (I && getTLI()->isExtFree(I)) | ||||||||
752 | return 0; | ||||||||
753 | break; | ||||||||
754 | case Instruction::ZExt: | ||||||||
755 | if (TLI->isZExtFree(SrcLT.second, DstLT.second)) | ||||||||
756 | return 0; | ||||||||
757 | LLVM_FALLTHROUGH[[gnu::fallthrough]]; | ||||||||
758 | case Instruction::SExt: | ||||||||
759 | if (I && getTLI()->isExtFree(I)) | ||||||||
760 | return 0; | ||||||||
761 | |||||||||
762 | // If this is a zext/sext of a load, return 0 if the corresponding | ||||||||
763 | // extending load exists on target. | ||||||||
764 | if (CCH == TTI::CastContextHint::Normal) { | ||||||||
765 | EVT ExtVT = EVT::getEVT(Dst); | ||||||||
766 | EVT LoadVT = EVT::getEVT(Src); | ||||||||
767 | unsigned LType = | ||||||||
768 | ((Opcode == Instruction::ZExt) ? ISD::ZEXTLOAD : ISD::SEXTLOAD); | ||||||||
769 | if (TLI->isLoadExtLegal(LType, ExtVT, LoadVT)) | ||||||||
770 | return 0; | ||||||||
771 | } | ||||||||
772 | break; | ||||||||
773 | case Instruction::AddrSpaceCast: | ||||||||
774 | if (TLI->isFreeAddrSpaceCast(Src->getPointerAddressSpace(), | ||||||||
775 | Dst->getPointerAddressSpace())) | ||||||||
776 | return 0; | ||||||||
777 | break; | ||||||||
778 | } | ||||||||
779 | |||||||||
780 | auto *SrcVTy = dyn_cast<VectorType>(Src); | ||||||||
781 | auto *DstVTy = dyn_cast<VectorType>(Dst); | ||||||||
782 | |||||||||
783 | // If the cast is marked as legal (or promote) then assume low cost. | ||||||||
784 | if (SrcLT.first == DstLT.first && | ||||||||
785 | TLI->isOperationLegalOrPromote(ISD, DstLT.second)) | ||||||||
786 | return SrcLT.first; | ||||||||
787 | |||||||||
788 | // Handle scalar conversions. | ||||||||
789 | if (!SrcVTy && !DstVTy) { | ||||||||
790 | // Just check the op cost. If the operation is legal then assume it costs | ||||||||
791 | // 1. | ||||||||
792 | if (!TLI->isOperationExpand(ISD, DstLT.second)) | ||||||||
793 | return 1; | ||||||||
794 | |||||||||
795 | // Assume that illegal scalar instruction are expensive. | ||||||||
796 | return 4; | ||||||||
797 | } | ||||||||
798 | |||||||||
799 | // Check vector-to-vector casts. | ||||||||
800 | if (DstVTy && SrcVTy) { | ||||||||
801 | // If the cast is between same-sized registers, then the check is simple. | ||||||||
802 | if (SrcLT.first == DstLT.first && SrcSize == DstSize) { | ||||||||
803 | |||||||||
804 | // Assume that Zext is done using AND. | ||||||||
805 | if (Opcode == Instruction::ZExt) | ||||||||
806 | return SrcLT.first; | ||||||||
807 | |||||||||
808 | // Assume that sext is done using SHL and SRA. | ||||||||
809 | if (Opcode == Instruction::SExt) | ||||||||
810 | return SrcLT.first * 2; | ||||||||
811 | |||||||||
812 | // Just check the op cost. If the operation is legal then assume it | ||||||||
813 | // costs | ||||||||
814 | // 1 and multiply by the type-legalization overhead. | ||||||||
815 | if (!TLI->isOperationExpand(ISD, DstLT.second)) | ||||||||
816 | return SrcLT.first * 1; | ||||||||
817 | } | ||||||||
818 | |||||||||
819 | // If we are legalizing by splitting, query the concrete TTI for the cost | ||||||||
820 | // of casting the original vector twice. We also need to factor in the | ||||||||
821 | // cost of the split itself. Count that as 1, to be consistent with | ||||||||
822 | // TLI->getTypeLegalizationCost(). | ||||||||
823 | bool SplitSrc = | ||||||||
824 | TLI->getTypeAction(Src->getContext(), TLI->getValueType(DL, Src)) == | ||||||||
825 | TargetLowering::TypeSplitVector; | ||||||||
826 | bool SplitDst = | ||||||||
827 | TLI->getTypeAction(Dst->getContext(), TLI->getValueType(DL, Dst)) == | ||||||||
828 | TargetLowering::TypeSplitVector; | ||||||||
829 | if ((SplitSrc || SplitDst) && | ||||||||
830 | cast<FixedVectorType>(SrcVTy)->getNumElements() > 1 && | ||||||||
831 | cast<FixedVectorType>(DstVTy)->getNumElements() > 1) { | ||||||||
832 | Type *SplitDstTy = VectorType::getHalfElementsVectorType(DstVTy); | ||||||||
833 | Type *SplitSrcTy = VectorType::getHalfElementsVectorType(SrcVTy); | ||||||||
834 | T *TTI = static_cast<T *>(this); | ||||||||
835 | // If both types need to be split then the split is free. | ||||||||
836 | unsigned SplitCost = | ||||||||
837 | (!SplitSrc || !SplitDst) ? TTI->getVectorSplitCost() : 0; | ||||||||
838 | return SplitCost + | ||||||||
839 | (2 * TTI->getCastInstrCost(Opcode, SplitDstTy, SplitSrcTy, CCH, | ||||||||
840 | CostKind, I)); | ||||||||
841 | } | ||||||||
842 | |||||||||
843 | // In other cases where the source or destination are illegal, assume | ||||||||
844 | // the operation will get scalarized. | ||||||||
845 | unsigned Num = cast<FixedVectorType>(DstVTy)->getNumElements(); | ||||||||
846 | unsigned Cost = thisT()->getCastInstrCost( | ||||||||
847 | Opcode, Dst->getScalarType(), Src->getScalarType(), CCH, CostKind, I); | ||||||||
848 | |||||||||
849 | // Return the cost of multiple scalar invocation plus the cost of | ||||||||
850 | // inserting and extracting the values. | ||||||||
851 | return getScalarizationOverhead(DstVTy, true, true) + Num * Cost; | ||||||||
852 | } | ||||||||
853 | |||||||||
854 | // We already handled vector-to-vector and scalar-to-scalar conversions. | ||||||||
855 | // This | ||||||||
856 | // is where we handle bitcast between vectors and scalars. We need to assume | ||||||||
857 | // that the conversion is scalarized in one way or another. | ||||||||
858 | if (Opcode == Instruction::BitCast) { | ||||||||
859 | // Illegal bitcasts are done by storing and loading from a stack slot. | ||||||||
860 | return (SrcVTy ? getScalarizationOverhead(SrcVTy, false, true) : 0) + | ||||||||
861 | (DstVTy ? getScalarizationOverhead(DstVTy, true, false) : 0); | ||||||||
862 | } | ||||||||
863 | |||||||||
864 | llvm_unreachable("Unhandled cast")::llvm::llvm_unreachable_internal("Unhandled cast", "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/BasicTTIImpl.h" , 864); | ||||||||
865 | } | ||||||||
866 | |||||||||
867 | unsigned getExtractWithExtendCost(unsigned Opcode, Type *Dst, | ||||||||
868 | VectorType *VecTy, unsigned Index) { | ||||||||
869 | return thisT()->getVectorInstrCost(Instruction::ExtractElement, VecTy, | ||||||||
870 | Index) + | ||||||||
871 | thisT()->getCastInstrCost(Opcode, Dst, VecTy->getElementType(), | ||||||||
872 | TTI::CastContextHint::None, TTI::TCK_RecipThroughput); | ||||||||
873 | } | ||||||||
874 | |||||||||
875 | unsigned getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind) { | ||||||||
876 | return BaseT::getCFInstrCost(Opcode, CostKind); | ||||||||
877 | } | ||||||||
878 | |||||||||
879 | unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, | ||||||||
880 | TTI::TargetCostKind CostKind, | ||||||||
881 | const Instruction *I = nullptr) { | ||||||||
882 | const TargetLoweringBase *TLI = getTLI(); | ||||||||
883 | int ISD = TLI->InstructionOpcodeToISD(Opcode); | ||||||||
884 | assert(ISD && "Invalid opcode")((ISD && "Invalid opcode") ? static_cast<void> ( 0) : __assert_fail ("ISD && \"Invalid opcode\"", "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/BasicTTIImpl.h" , 884, __PRETTY_FUNCTION__)); | ||||||||
885 | |||||||||
886 | // TODO: Handle other cost kinds. | ||||||||
887 | if (CostKind
| ||||||||
888 | return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, CostKind, I); | ||||||||
889 | |||||||||
890 | // Selects on vectors are actually vector selects. | ||||||||
891 | if (ISD == ISD::SELECT) { | ||||||||
892 | assert(CondTy && "CondTy must exist")((CondTy && "CondTy must exist") ? static_cast<void > (0) : __assert_fail ("CondTy && \"CondTy must exist\"" , "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/BasicTTIImpl.h" , 892, __PRETTY_FUNCTION__)); | ||||||||
893 | if (CondTy->isVectorTy()) | ||||||||
894 | ISD = ISD::VSELECT; | ||||||||
895 | } | ||||||||
896 | std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy); | ||||||||
897 | |||||||||
898 | if (!(ValTy->isVectorTy() && !LT.second.isVector()) && | ||||||||
899 | !TLI->isOperationExpand(ISD, LT.second)) { | ||||||||
900 | // The operation is legal. Assume it costs 1. Multiply | ||||||||
901 | // by the type-legalization overhead. | ||||||||
902 | return LT.first * 1; | ||||||||
903 | } | ||||||||
904 | |||||||||
905 | // Otherwise, assume that the cast is scalarized. | ||||||||
906 | // TODO: If one of the types get legalized by splitting, handle this | ||||||||
907 | // similarly to what getCastInstrCost() does. | ||||||||
908 | if (auto *ValVTy
| ||||||||
909 | unsigned Num = cast<FixedVectorType>(ValVTy)->getNumElements(); | ||||||||
910 | if (CondTy) | ||||||||
911 | CondTy = CondTy->getScalarType(); | ||||||||
912 | unsigned Cost = thisT()->getCmpSelInstrCost( | ||||||||
913 | Opcode, ValVTy->getScalarType(), CondTy, CostKind, I); | ||||||||
914 | |||||||||
915 | // Return the cost of multiple scalar invocation plus the cost of | ||||||||
916 | // inserting and extracting the values. | ||||||||
917 | return getScalarizationOverhead(ValVTy, true, false) + Num * Cost; | ||||||||
918 | } | ||||||||
919 | |||||||||
920 | // Unknown scalar opcode. | ||||||||
921 | return 1; | ||||||||
922 | } | ||||||||
923 | |||||||||
924 | unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) { | ||||||||
925 | std::pair<unsigned, MVT> LT = | ||||||||
926 | getTLI()->getTypeLegalizationCost(DL, Val->getScalarType()); | ||||||||
927 | |||||||||
928 | return LT.first; | ||||||||
929 | } | ||||||||
930 | |||||||||
931 | unsigned getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment, | ||||||||
932 | unsigned AddressSpace, | ||||||||
933 | TTI::TargetCostKind CostKind, | ||||||||
934 | const Instruction *I = nullptr) { | ||||||||
935 | assert(!Src->isVoidTy() && "Invalid type")((!Src->isVoidTy() && "Invalid type") ? static_cast <void> (0) : __assert_fail ("!Src->isVoidTy() && \"Invalid type\"" , "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/BasicTTIImpl.h" , 935, __PRETTY_FUNCTION__)); | ||||||||
936 | // Assume types, such as structs, are expensive. | ||||||||
937 | if (getTLI()->getValueType(DL, Src, true) == MVT::Other) | ||||||||
938 | return 4; | ||||||||
939 | std::pair<unsigned, MVT> LT = getTLI()->getTypeLegalizationCost(DL, Src); | ||||||||
940 | |||||||||
941 | // Assuming that all loads of legal types cost 1. | ||||||||
942 | unsigned Cost = LT.first; | ||||||||
943 | if (CostKind != TTI::TCK_RecipThroughput) | ||||||||
944 | return Cost; | ||||||||
945 | |||||||||
946 | if (Src->isVectorTy() && | ||||||||
947 | Src->getPrimitiveSizeInBits() < LT.second.getSizeInBits()) { | ||||||||
948 | // This is a vector load that legalizes to a larger type than the vector | ||||||||
949 | // itself. Unless the corresponding extending load or truncating store is | ||||||||
950 | // legal, then this will scalarize. | ||||||||
951 | TargetLowering::LegalizeAction LA = TargetLowering::Expand; | ||||||||
952 | EVT MemVT = getTLI()->getValueType(DL, Src); | ||||||||
953 | if (Opcode == Instruction::Store) | ||||||||
954 | LA = getTLI()->getTruncStoreAction(LT.second, MemVT); | ||||||||
955 | else | ||||||||
956 | LA = getTLI()->getLoadExtAction(ISD::EXTLOAD, LT.second, MemVT); | ||||||||
957 | |||||||||
958 | if (LA != TargetLowering::Legal && LA != TargetLowering::Custom) { | ||||||||
959 | // This is a vector load/store for some illegal type that is scalarized. | ||||||||
960 | // We must account for the cost of building or decomposing the vector. | ||||||||
961 | Cost += getScalarizationOverhead(cast<VectorType>(Src), | ||||||||
962 | Opcode != Instruction::Store, | ||||||||
963 | Opcode == Instruction::Store); | ||||||||
964 | } | ||||||||
965 | } | ||||||||
966 | |||||||||
967 | return Cost; | ||||||||
968 | } | ||||||||
969 | |||||||||
970 | unsigned getInterleavedMemoryOpCost( | ||||||||
971 | unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices, | ||||||||
972 | Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, | ||||||||
973 | bool UseMaskForCond = false, bool UseMaskForGaps = false) { | ||||||||
974 | auto *VT = cast<FixedVectorType>(VecTy); | ||||||||
975 | |||||||||
976 | unsigned NumElts = VT->getNumElements(); | ||||||||
977 | assert(Factor > 1 && NumElts % Factor == 0 && "Invalid interleave factor")((Factor > 1 && NumElts % Factor == 0 && "Invalid interleave factor" ) ? static_cast<void> (0) : __assert_fail ("Factor > 1 && NumElts % Factor == 0 && \"Invalid interleave factor\"" , "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/BasicTTIImpl.h" , 977, __PRETTY_FUNCTION__)); | ||||||||
978 | |||||||||
979 | unsigned NumSubElts = NumElts / Factor; | ||||||||
980 | auto *SubVT = FixedVectorType::get(VT->getElementType(), NumSubElts); | ||||||||
981 | |||||||||
982 | // Firstly, the cost of load/store operation. | ||||||||
983 | unsigned Cost; | ||||||||
984 | if (UseMaskForCond || UseMaskForGaps) | ||||||||
985 | Cost = thisT()->getMaskedMemoryOpCost(Opcode, VecTy, Alignment, | ||||||||
986 | AddressSpace, CostKind); | ||||||||
987 | else | ||||||||
988 | Cost = thisT()->getMemoryOpCost(Opcode, VecTy, Alignment, AddressSpace, | ||||||||
989 | CostKind); | ||||||||
990 | |||||||||
991 | // Legalize the vector type, and get the legalized and unlegalized type | ||||||||
992 | // sizes. | ||||||||
993 | MVT VecTyLT = getTLI()->getTypeLegalizationCost(DL, VecTy).second; | ||||||||
994 | unsigned VecTySize = thisT()->getDataLayout().getTypeStoreSize(VecTy); | ||||||||
995 | unsigned VecTyLTSize = VecTyLT.getStoreSize(); | ||||||||
996 | |||||||||
997 | // Return the ceiling of dividing A by B. | ||||||||
998 | auto ceil = [](unsigned A, unsigned B) { return (A + B - 1) / B; }; | ||||||||
999 | |||||||||
1000 | // Scale the cost of the memory operation by the fraction of legalized | ||||||||
1001 | // instructions that will actually be used. We shouldn't account for the | ||||||||
1002 | // cost of dead instructions since they will be removed. | ||||||||
1003 | // | ||||||||
1004 | // E.g., An interleaved load of factor 8: | ||||||||
1005 | // %vec = load <16 x i64>, <16 x i64>* %ptr | ||||||||
1006 | // %v0 = shufflevector %vec, undef, <0, 8> | ||||||||
1007 | // | ||||||||
1008 | // If <16 x i64> is legalized to 8 v2i64 loads, only 2 of the loads will be | ||||||||
1009 | // used (those corresponding to elements [0:1] and [8:9] of the unlegalized | ||||||||
1010 | // type). The other loads are unused. | ||||||||
1011 | // | ||||||||
1012 | // We only scale the cost of loads since interleaved store groups aren't | ||||||||
1013 | // allowed to have gaps. | ||||||||
1014 | if (Opcode == Instruction::Load && VecTySize > VecTyLTSize) { | ||||||||
1015 | // The number of loads of a legal type it will take to represent a load | ||||||||
1016 | // of the unlegalized vector type. | ||||||||
1017 | unsigned NumLegalInsts = ceil(VecTySize, VecTyLTSize); | ||||||||
1018 | |||||||||
1019 | // The number of elements of the unlegalized type that correspond to a | ||||||||
1020 | // single legal instruction. | ||||||||
1021 | unsigned NumEltsPerLegalInst = ceil(NumElts, NumLegalInsts); | ||||||||
1022 | |||||||||
1023 | // Determine which legal instructions will be used. | ||||||||
1024 | BitVector UsedInsts(NumLegalInsts, false); | ||||||||
1025 | for (unsigned Index : Indices) | ||||||||
1026 | for (unsigned Elt = 0; Elt < NumSubElts; ++Elt) | ||||||||
1027 | UsedInsts.set((Index + Elt * Factor) / NumEltsPerLegalInst); | ||||||||
1028 | |||||||||
1029 | // Scale the cost of the load by the fraction of legal instructions that | ||||||||
1030 | // will be used. | ||||||||
1031 | Cost *= UsedInsts.count() / NumLegalInsts; | ||||||||
1032 | } | ||||||||
1033 | |||||||||
1034 | // Then plus the cost of interleave operation. | ||||||||
1035 | if (Opcode == Instruction::Load) { | ||||||||
1036 | // The interleave cost is similar to extract sub vectors' elements | ||||||||
1037 | // from the wide vector, and insert them into sub vectors. | ||||||||
1038 | // | ||||||||
1039 | // E.g. An interleaved load of factor 2 (with one member of index 0): | ||||||||
1040 | // %vec = load <8 x i32>, <8 x i32>* %ptr | ||||||||
1041 | // %v0 = shuffle %vec, undef, <0, 2, 4, 6> ; Index 0 | ||||||||
1042 | // The cost is estimated as extract elements at 0, 2, 4, 6 from the | ||||||||
1043 | // <8 x i32> vector and insert them into a <4 x i32> vector. | ||||||||
1044 | |||||||||
1045 | assert(Indices.size() <= Factor &&((Indices.size() <= Factor && "Interleaved memory op has too many members" ) ? static_cast<void> (0) : __assert_fail ("Indices.size() <= Factor && \"Interleaved memory op has too many members\"" , "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/BasicTTIImpl.h" , 1046, __PRETTY_FUNCTION__)) | ||||||||
1046 | "Interleaved memory op has too many members")((Indices.size() <= Factor && "Interleaved memory op has too many members" ) ? static_cast<void> (0) : __assert_fail ("Indices.size() <= Factor && \"Interleaved memory op has too many members\"" , "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/BasicTTIImpl.h" , 1046, __PRETTY_FUNCTION__)); | ||||||||
1047 | |||||||||
1048 | for (unsigned Index : Indices) { | ||||||||
1049 | assert(Index < Factor && "Invalid index for interleaved memory op")((Index < Factor && "Invalid index for interleaved memory op" ) ? static_cast<void> (0) : __assert_fail ("Index < Factor && \"Invalid index for interleaved memory op\"" , "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/BasicTTIImpl.h" , 1049, __PRETTY_FUNCTION__)); | ||||||||
1050 | |||||||||
1051 | // Extract elements from loaded vector for each sub vector. | ||||||||
1052 | for (unsigned i = 0; i < NumSubElts; i++) | ||||||||
1053 | Cost += thisT()->getVectorInstrCost(Instruction::ExtractElement, VT, | ||||||||
1054 | Index + i * Factor); | ||||||||
1055 | } | ||||||||
1056 | |||||||||
1057 | unsigned InsSubCost = 0; | ||||||||
1058 | for (unsigned i = 0; i < NumSubElts; i++) | ||||||||
1059 | InsSubCost += | ||||||||
1060 | thisT()->getVectorInstrCost(Instruction::InsertElement, SubVT, i); | ||||||||
1061 | |||||||||
1062 | Cost += Indices.size() * InsSubCost; | ||||||||
1063 | } else { | ||||||||
1064 | // The interleave cost is extract all elements from sub vectors, and | ||||||||
1065 | // insert them into the wide vector. | ||||||||
1066 | // | ||||||||
1067 | // E.g. An interleaved store of factor 2: | ||||||||
1068 | // %v0_v1 = shuffle %v0, %v1, <0, 4, 1, 5, 2, 6, 3, 7> | ||||||||
1069 | // store <8 x i32> %interleaved.vec, <8 x i32>* %ptr | ||||||||
1070 | // The cost is estimated as extract all elements from both <4 x i32> | ||||||||
1071 | // vectors and insert into the <8 x i32> vector. | ||||||||
1072 | |||||||||
1073 | unsigned ExtSubCost = 0; | ||||||||
1074 | for (unsigned i = 0; i < NumSubElts; i++) | ||||||||
1075 | ExtSubCost += | ||||||||
1076 | thisT()->getVectorInstrCost(Instruction::ExtractElement, SubVT, i); | ||||||||
1077 | Cost += ExtSubCost * Factor; | ||||||||
1078 | |||||||||
1079 | for (unsigned i = 0; i < NumElts; i++) | ||||||||
1080 | Cost += static_cast<T *>(this) | ||||||||
1081 | ->getVectorInstrCost(Instruction::InsertElement, VT, i); | ||||||||
1082 | } | ||||||||
1083 | |||||||||
1084 | if (!UseMaskForCond) | ||||||||
1085 | return Cost; | ||||||||
1086 | |||||||||
1087 | Type *I8Type = Type::getInt8Ty(VT->getContext()); | ||||||||
1088 | auto *MaskVT = FixedVectorType::get(I8Type, NumElts); | ||||||||
1089 | SubVT = FixedVectorType::get(I8Type, NumSubElts); | ||||||||
1090 | |||||||||
1091 | // The Mask shuffling cost is extract all the elements of the Mask | ||||||||
1092 | // and insert each of them Factor times into the wide vector: | ||||||||
1093 | // | ||||||||
1094 | // E.g. an interleaved group with factor 3: | ||||||||
1095 | // %mask = icmp ult <8 x i32> %vec1, %vec2 | ||||||||
1096 | // %interleaved.mask = shufflevector <8 x i1> %mask, <8 x i1> undef, | ||||||||
1097 | // <24 x i32> <0,0,0,1,1,1,2,2,2,3,3,3,4,4,4,5,5,5,6,6,6,7,7,7> | ||||||||
1098 | // The cost is estimated as extract all mask elements from the <8xi1> mask | ||||||||
1099 | // vector and insert them factor times into the <24xi1> shuffled mask | ||||||||
1100 | // vector. | ||||||||
1101 | for (unsigned i = 0; i < NumSubElts; i++) | ||||||||
1102 | Cost += | ||||||||
1103 | thisT()->getVectorInstrCost(Instruction::ExtractElement, SubVT, i); | ||||||||
1104 | |||||||||
1105 | for (unsigned i = 0; i < NumElts; i++) | ||||||||
1106 | Cost += | ||||||||
1107 | thisT()->getVectorInstrCost(Instruction::InsertElement, MaskVT, i); | ||||||||
1108 | |||||||||
1109 | // The Gaps mask is invariant and created outside the loop, therefore the | ||||||||
1110 | // cost of creating it is not accounted for here. However if we have both | ||||||||
1111 | // a MaskForGaps and some other mask that guards the execution of the | ||||||||
1112 | // memory access, we need to account for the cost of And-ing the two masks | ||||||||
1113 | // inside the loop. | ||||||||
1114 | if (UseMaskForGaps) | ||||||||
1115 | Cost += thisT()->getArithmeticInstrCost(BinaryOperator::And, MaskVT, | ||||||||
1116 | CostKind); | ||||||||
1117 | |||||||||
1118 | return Cost; | ||||||||
1119 | } | ||||||||
1120 | |||||||||
1121 | /// Get intrinsic cost based on arguments. | ||||||||
1122 | unsigned getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, | ||||||||
1123 | TTI::TargetCostKind CostKind) { | ||||||||
1124 | Intrinsic::ID IID = ICA.getID(); | ||||||||
1125 | |||||||||
1126 | // Special case some scalar intrinsics. | ||||||||
1127 | if (CostKind != TTI::TCK_RecipThroughput) { | ||||||||
1128 | switch (IID) { | ||||||||
1129 | default: | ||||||||
1130 | break; | ||||||||
1131 | case Intrinsic::cttz: | ||||||||
1132 | if (getTLI()->isCheapToSpeculateCttz()) | ||||||||
1133 | return TargetTransformInfo::TCC_Basic; | ||||||||
1134 | break; | ||||||||
1135 | case Intrinsic::ctlz: | ||||||||
1136 | if (getTLI()->isCheapToSpeculateCtlz()) | ||||||||
1137 | return TargetTransformInfo::TCC_Basic; | ||||||||
1138 | break; | ||||||||
1139 | case Intrinsic::memcpy: | ||||||||
1140 | return thisT()->getMemcpyCost(ICA.getInst()); | ||||||||
1141 | // TODO: other libc intrinsics. | ||||||||
1142 | } | ||||||||
1143 | return BaseT::getIntrinsicInstrCost(ICA, CostKind); | ||||||||
1144 | } | ||||||||
1145 | |||||||||
1146 | if (BaseT::getIntrinsicInstrCost(ICA, CostKind) == 0) | ||||||||
1147 | return 0; | ||||||||
1148 | |||||||||
1149 | // TODO: Combine these two logic paths. | ||||||||
1150 | if (ICA.isTypeBasedOnly()) | ||||||||
1151 | return getTypeBasedIntrinsicInstrCost(ICA, CostKind); | ||||||||
1152 | |||||||||
1153 | Type *RetTy = ICA.getReturnType(); | ||||||||
1154 | unsigned VF = ICA.getVectorFactor(); | ||||||||
1155 | unsigned RetVF = | ||||||||
1156 | (RetTy->isVectorTy() ? cast<FixedVectorType>(RetTy)->getNumElements() | ||||||||
1157 | : 1); | ||||||||
1158 | assert((RetVF == 1 || VF == 1) && "VF > 1 and RetVF is a vector type")(((RetVF == 1 || VF == 1) && "VF > 1 and RetVF is a vector type" ) ? static_cast<void> (0) : __assert_fail ("(RetVF == 1 || VF == 1) && \"VF > 1 and RetVF is a vector type\"" , "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/BasicTTIImpl.h" , 1158, __PRETTY_FUNCTION__)); | ||||||||
1159 | const IntrinsicInst *I = ICA.getInst(); | ||||||||
1160 | const SmallVectorImpl<const Value *> &Args = ICA.getArgs(); | ||||||||
1161 | FastMathFlags FMF = ICA.getFlags(); | ||||||||
1162 | |||||||||
1163 | switch (IID) { | ||||||||
1164 | default: { | ||||||||
1165 | // Assume that we need to scalarize this intrinsic. | ||||||||
1166 | SmallVector<Type *, 4> Types; | ||||||||
1167 | for (const Value *Op : Args) { | ||||||||
1168 | Type *OpTy = Op->getType(); | ||||||||
1169 | assert(VF == 1 || !OpTy->isVectorTy())((VF == 1 || !OpTy->isVectorTy()) ? static_cast<void> (0) : __assert_fail ("VF == 1 || !OpTy->isVectorTy()", "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/BasicTTIImpl.h" , 1169, __PRETTY_FUNCTION__)); | ||||||||
1170 | Types.push_back(VF == 1 ? OpTy : FixedVectorType::get(OpTy, VF)); | ||||||||
1171 | } | ||||||||
1172 | |||||||||
1173 | if (VF > 1 && !RetTy->isVoidTy()) | ||||||||
1174 | RetTy = FixedVectorType::get(RetTy, VF); | ||||||||
1175 | |||||||||
1176 | // Compute the scalarization overhead based on Args for a vector | ||||||||
1177 | // intrinsic. A vectorizer will pass a scalar RetTy and VF > 1, while | ||||||||
1178 | // CostModel will pass a vector RetTy and VF is 1. | ||||||||
1179 | unsigned ScalarizationCost = std::numeric_limits<unsigned>::max(); | ||||||||
1180 | if (RetVF > 1 || VF > 1) { | ||||||||
1181 | ScalarizationCost = 0; | ||||||||
1182 | if (!RetTy->isVoidTy()) | ||||||||
1183 | ScalarizationCost += | ||||||||
1184 | getScalarizationOverhead(cast<VectorType>(RetTy), true, false); | ||||||||
1185 | ScalarizationCost += getOperandsScalarizationOverhead(Args, VF); | ||||||||
1186 | } | ||||||||
1187 | |||||||||
1188 | IntrinsicCostAttributes Attrs(IID, RetTy, Types, FMF, | ||||||||
1189 | ScalarizationCost, I); | ||||||||
1190 | return thisT()->getIntrinsicInstrCost(Attrs, CostKind); | ||||||||
1191 | } | ||||||||
1192 | case Intrinsic::masked_scatter: { | ||||||||
1193 | assert(VF == 1 && "Can't vectorize types here.")((VF == 1 && "Can't vectorize types here.") ? static_cast <void> (0) : __assert_fail ("VF == 1 && \"Can't vectorize types here.\"" , "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/BasicTTIImpl.h" , 1193, __PRETTY_FUNCTION__)); | ||||||||
1194 | const Value *Mask = Args[3]; | ||||||||
1195 | bool VarMask = !isa<Constant>(Mask); | ||||||||
1196 | Align Alignment = cast<ConstantInt>(Args[2])->getAlignValue(); | ||||||||
1197 | return thisT()->getGatherScatterOpCost(Instruction::Store, | ||||||||
1198 | Args[0]->getType(), Args[1], | ||||||||
1199 | VarMask, Alignment, CostKind, I); | ||||||||
1200 | } | ||||||||
1201 | case Intrinsic::masked_gather: { | ||||||||
1202 | assert(VF == 1 && "Can't vectorize types here.")((VF == 1 && "Can't vectorize types here.") ? static_cast <void> (0) : __assert_fail ("VF == 1 && \"Can't vectorize types here.\"" , "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/BasicTTIImpl.h" , 1202, __PRETTY_FUNCTION__)); | ||||||||
1203 | const Value *Mask = Args[2]; | ||||||||
1204 | bool VarMask = !isa<Constant>(Mask); | ||||||||
1205 | Align Alignment = cast<ConstantInt>(Args[1])->getAlignValue(); | ||||||||
1206 | return thisT()->getGatherScatterOpCost(Instruction::Load, RetTy, Args[0], | ||||||||
1207 | VarMask, Alignment, CostKind, I); | ||||||||
1208 | } | ||||||||
1209 | case Intrinsic::experimental_vector_reduce_add: | ||||||||
1210 | case Intrinsic::experimental_vector_reduce_mul: | ||||||||
1211 | case Intrinsic::experimental_vector_reduce_and: | ||||||||
1212 | case Intrinsic::experimental_vector_reduce_or: | ||||||||
1213 | case Intrinsic::experimental_vector_reduce_xor: | ||||||||
1214 | case Intrinsic::experimental_vector_reduce_v2_fadd: | ||||||||
1215 | case Intrinsic::experimental_vector_reduce_v2_fmul: | ||||||||
1216 | case Intrinsic::experimental_vector_reduce_smax: | ||||||||
1217 | case Intrinsic::experimental_vector_reduce_smin: | ||||||||
1218 | case Intrinsic::experimental_vector_reduce_fmax: | ||||||||
1219 | case Intrinsic::experimental_vector_reduce_fmin: | ||||||||
1220 | case Intrinsic::experimental_vector_reduce_umax: | ||||||||
1221 | case Intrinsic::experimental_vector_reduce_umin: { | ||||||||
1222 | IntrinsicCostAttributes Attrs(IID, RetTy, Args[0]->getType(), FMF, 1, I); | ||||||||
1223 | return getIntrinsicInstrCost(Attrs, CostKind); | ||||||||
1224 | } | ||||||||
1225 | case Intrinsic::fshl: | ||||||||
1226 | case Intrinsic::fshr: { | ||||||||
1227 | const Value *X = Args[0]; | ||||||||
1228 | const Value *Y = Args[1]; | ||||||||
1229 | const Value *Z = Args[2]; | ||||||||
1230 | TTI::OperandValueProperties OpPropsX, OpPropsY, OpPropsZ, OpPropsBW; | ||||||||
1231 | TTI::OperandValueKind OpKindX = TTI::getOperandInfo(X, OpPropsX); | ||||||||
1232 | TTI::OperandValueKind OpKindY = TTI::getOperandInfo(Y, OpPropsY); | ||||||||
1233 | TTI::OperandValueKind OpKindZ = TTI::getOperandInfo(Z, OpPropsZ); | ||||||||
1234 | TTI::OperandValueKind OpKindBW = TTI::OK_UniformConstantValue; | ||||||||
1235 | OpPropsBW = isPowerOf2_32(RetTy->getScalarSizeInBits()) ? TTI::OP_PowerOf2 | ||||||||
1236 | : TTI::OP_None; | ||||||||
1237 | // fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW))) | ||||||||
1238 | // fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW)) | ||||||||
1239 | unsigned Cost = 0; | ||||||||
1240 | Cost += | ||||||||
1241 | thisT()->getArithmeticInstrCost(BinaryOperator::Or, RetTy, CostKind); | ||||||||
1242 | Cost += | ||||||||
1243 | thisT()->getArithmeticInstrCost(BinaryOperator::Sub, RetTy, CostKind); | ||||||||
1244 | Cost += thisT()->getArithmeticInstrCost( | ||||||||
1245 | BinaryOperator::Shl, RetTy, CostKind, OpKindX, OpKindZ, OpPropsX); | ||||||||
1246 | Cost += thisT()->getArithmeticInstrCost( | ||||||||
1247 | BinaryOperator::LShr, RetTy, CostKind, OpKindY, OpKindZ, OpPropsY); | ||||||||
1248 | // Non-constant shift amounts requires a modulo. | ||||||||
1249 | if (OpKindZ != TTI::OK_UniformConstantValue && | ||||||||
1250 | OpKindZ != TTI::OK_NonUniformConstantValue) | ||||||||
1251 | Cost += thisT()->getArithmeticInstrCost(BinaryOperator::URem, RetTy, | ||||||||
1252 | CostKind, OpKindZ, OpKindBW, | ||||||||
1253 | OpPropsZ, OpPropsBW); | ||||||||
1254 | // For non-rotates (X != Y) we must add shift-by-zero handling costs. | ||||||||
1255 | if (X != Y) { | ||||||||
1256 | Type *CondTy = RetTy->getWithNewBitWidth(1); | ||||||||
1257 | Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy, | ||||||||
1258 | CostKind); | ||||||||
1259 | Cost += thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, | ||||||||
1260 | CondTy, CostKind); | ||||||||
1261 | } | ||||||||
1262 | return Cost; | ||||||||
1263 | } | ||||||||
1264 | } | ||||||||
1265 | } | ||||||||
1266 | |||||||||
1267 | /// Get intrinsic cost based on argument types. | ||||||||
1268 | /// If ScalarizationCostPassed is std::numeric_limits<unsigned>::max(), the | ||||||||
1269 | /// cost of scalarizing the arguments and the return value will be computed | ||||||||
1270 | /// based on types. | ||||||||
1271 | unsigned getTypeBasedIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, | ||||||||
1272 | TTI::TargetCostKind CostKind) { | ||||||||
1273 | Intrinsic::ID IID = ICA.getID(); | ||||||||
1274 | Type *RetTy = ICA.getReturnType(); | ||||||||
1275 | const SmallVectorImpl<Type *> &Tys = ICA.getArgTypes(); | ||||||||
1276 | FastMathFlags FMF = ICA.getFlags(); | ||||||||
1277 | unsigned ScalarizationCostPassed = ICA.getScalarizationCost(); | ||||||||
1278 | bool SkipScalarizationCost = ICA.skipScalarizationCost(); | ||||||||
1279 | |||||||||
1280 | auto *VecOpTy = Tys.empty() ? nullptr : dyn_cast<VectorType>(Tys[0]); | ||||||||
1281 | |||||||||
1282 | SmallVector<unsigned, 2> ISDs; | ||||||||
1283 | unsigned SingleCallCost = 10; // Library call cost. Make it expensive. | ||||||||
1284 | switch (IID) { | ||||||||
1285 | default: { | ||||||||
1286 | // Assume that we need to scalarize this intrinsic. | ||||||||
1287 | unsigned ScalarizationCost = ScalarizationCostPassed; | ||||||||
1288 | unsigned ScalarCalls = 1; | ||||||||
1289 | Type *ScalarRetTy = RetTy; | ||||||||
1290 | if (auto *RetVTy = dyn_cast<VectorType>(RetTy)) { | ||||||||
1291 | if (!SkipScalarizationCost) | ||||||||
1292 | ScalarizationCost = getScalarizationOverhead(RetVTy, true, false); | ||||||||
1293 | ScalarCalls = std::max(ScalarCalls, | ||||||||
1294 | cast<FixedVectorType>(RetVTy)->getNumElements()); | ||||||||
1295 | ScalarRetTy = RetTy->getScalarType(); | ||||||||
1296 | } | ||||||||
1297 | SmallVector<Type *, 4> ScalarTys; | ||||||||
1298 | for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) { | ||||||||
1299 | Type *Ty = Tys[i]; | ||||||||
1300 | if (auto *VTy = dyn_cast<VectorType>(Ty)) { | ||||||||
1301 | if (!SkipScalarizationCost) | ||||||||
1302 | ScalarizationCost += getScalarizationOverhead(VTy, false, true); | ||||||||
1303 | ScalarCalls = std::max(ScalarCalls, | ||||||||
1304 | cast<FixedVectorType>(VTy)->getNumElements()); | ||||||||
1305 | Ty = Ty->getScalarType(); | ||||||||
1306 | } | ||||||||
1307 | ScalarTys.push_back(Ty); | ||||||||
1308 | } | ||||||||
1309 | if (ScalarCalls == 1) | ||||||||
1310 | return 1; // Return cost of a scalar intrinsic. Assume it to be cheap. | ||||||||
1311 | |||||||||
1312 | IntrinsicCostAttributes ScalarAttrs(IID, ScalarRetTy, ScalarTys, FMF); | ||||||||
1313 | unsigned ScalarCost = | ||||||||
1314 | thisT()->getIntrinsicInstrCost(ScalarAttrs, CostKind); | ||||||||
1315 | |||||||||
1316 | return ScalarCalls * ScalarCost + ScalarizationCost; | ||||||||
1317 | } | ||||||||
1318 | // Look for intrinsics that can be lowered directly or turned into a scalar | ||||||||
1319 | // intrinsic call. | ||||||||
1320 | case Intrinsic::sqrt: | ||||||||
1321 | ISDs.push_back(ISD::FSQRT); | ||||||||
1322 | break; | ||||||||
1323 | case Intrinsic::sin: | ||||||||
1324 | ISDs.push_back(ISD::FSIN); | ||||||||
1325 | break; | ||||||||
1326 | case Intrinsic::cos: | ||||||||
1327 | ISDs.push_back(ISD::FCOS); | ||||||||
1328 | break; | ||||||||
1329 | case Intrinsic::exp: | ||||||||
1330 | ISDs.push_back(ISD::FEXP); | ||||||||
1331 | break; | ||||||||
1332 | case Intrinsic::exp2: | ||||||||
1333 | ISDs.push_back(ISD::FEXP2); | ||||||||
1334 | break; | ||||||||
1335 | case Intrinsic::log: | ||||||||
1336 | ISDs.push_back(ISD::FLOG); | ||||||||
1337 | break; | ||||||||
1338 | case Intrinsic::log10: | ||||||||
1339 | ISDs.push_back(ISD::FLOG10); | ||||||||
1340 | break; | ||||||||
1341 | case Intrinsic::log2: | ||||||||
1342 | ISDs.push_back(ISD::FLOG2); | ||||||||
1343 | break; | ||||||||
1344 | case Intrinsic::fabs: | ||||||||
1345 | ISDs.push_back(ISD::FABS); | ||||||||
1346 | break; | ||||||||
1347 | case Intrinsic::canonicalize: | ||||||||
1348 | ISDs.push_back(ISD::FCANONICALIZE); | ||||||||
1349 | break; | ||||||||
1350 | case Intrinsic::minnum: | ||||||||
1351 | ISDs.push_back(ISD::FMINNUM); | ||||||||
1352 | break; | ||||||||
1353 | case Intrinsic::maxnum: | ||||||||
1354 | ISDs.push_back(ISD::FMAXNUM); | ||||||||
1355 | break; | ||||||||
1356 | case Intrinsic::copysign: | ||||||||
1357 | ISDs.push_back(ISD::FCOPYSIGN); | ||||||||
1358 | break; | ||||||||
1359 | case Intrinsic::floor: | ||||||||
1360 | ISDs.push_back(ISD::FFLOOR); | ||||||||
1361 | break; | ||||||||
1362 | case Intrinsic::ceil: | ||||||||
1363 | ISDs.push_back(ISD::FCEIL); | ||||||||
1364 | break; | ||||||||
1365 | case Intrinsic::trunc: | ||||||||
1366 | ISDs.push_back(ISD::FTRUNC); | ||||||||
1367 | break; | ||||||||
1368 | case Intrinsic::nearbyint: | ||||||||
1369 | ISDs.push_back(ISD::FNEARBYINT); | ||||||||
1370 | break; | ||||||||
1371 | case Intrinsic::rint: | ||||||||
1372 | ISDs.push_back(ISD::FRINT); | ||||||||
1373 | break; | ||||||||
1374 | case Intrinsic::round: | ||||||||
1375 | ISDs.push_back(ISD::FROUND); | ||||||||
1376 | break; | ||||||||
1377 | case Intrinsic::roundeven: | ||||||||
1378 | ISDs.push_back(ISD::FROUNDEVEN); | ||||||||
1379 | break; | ||||||||
1380 | case Intrinsic::pow: | ||||||||
1381 | ISDs.push_back(ISD::FPOW); | ||||||||
1382 | break; | ||||||||
1383 | case Intrinsic::fma: | ||||||||
1384 | ISDs.push_back(ISD::FMA); | ||||||||
1385 | break; | ||||||||
1386 | case Intrinsic::fmuladd: | ||||||||
1387 | ISDs.push_back(ISD::FMA); | ||||||||
1388 | break; | ||||||||
1389 | case Intrinsic::experimental_constrained_fmuladd: | ||||||||
1390 | ISDs.push_back(ISD::STRICT_FMA); | ||||||||
1391 | break; | ||||||||
1392 | // FIXME: We should return 0 whenever getIntrinsicCost == TCC_Free. | ||||||||
1393 | case Intrinsic::lifetime_start: | ||||||||
1394 | case Intrinsic::lifetime_end: | ||||||||
1395 | case Intrinsic::sideeffect: | ||||||||
1396 | return 0; | ||||||||
1397 | case Intrinsic::masked_store: { | ||||||||
1398 | Type *Ty = Tys[0]; | ||||||||
1399 | Align TyAlign = thisT()->DL.getABITypeAlign(Ty); | ||||||||
1400 | return thisT()->getMaskedMemoryOpCost(Instruction::Store, Ty, TyAlign, 0, | ||||||||
1401 | CostKind); | ||||||||
1402 | } | ||||||||
1403 | case Intrinsic::masked_load: { | ||||||||
1404 | Type *Ty = RetTy; | ||||||||
1405 | Align TyAlign = thisT()->DL.getABITypeAlign(Ty); | ||||||||
1406 | return thisT()->getMaskedMemoryOpCost(Instruction::Load, Ty, TyAlign, 0, | ||||||||
1407 | CostKind); | ||||||||
1408 | } | ||||||||
1409 | case Intrinsic::experimental_vector_reduce_add: | ||||||||
1410 | return thisT()->getArithmeticReductionCost(Instruction::Add, VecOpTy, | ||||||||
1411 | /*IsPairwiseForm=*/false, | ||||||||
1412 | CostKind); | ||||||||
1413 | case Intrinsic::experimental_vector_reduce_mul: | ||||||||
1414 | return thisT()->getArithmeticReductionCost(Instruction::Mul, VecOpTy, | ||||||||
1415 | /*IsPairwiseForm=*/false, | ||||||||
1416 | CostKind); | ||||||||
1417 | case Intrinsic::experimental_vector_reduce_and: | ||||||||
1418 | return thisT()->getArithmeticReductionCost(Instruction::And, VecOpTy, | ||||||||
1419 | /*IsPairwiseForm=*/false, | ||||||||
1420 | CostKind); | ||||||||
1421 | case Intrinsic::experimental_vector_reduce_or: | ||||||||
1422 | return thisT()->getArithmeticReductionCost(Instruction::Or, VecOpTy, | ||||||||
1423 | /*IsPairwiseForm=*/false, | ||||||||
1424 | CostKind); | ||||||||
1425 | case Intrinsic::experimental_vector_reduce_xor: | ||||||||
1426 | return thisT()->getArithmeticReductionCost(Instruction::Xor, VecOpTy, | ||||||||
1427 | /*IsPairwiseForm=*/false, | ||||||||
1428 | CostKind); | ||||||||
1429 | case Intrinsic::experimental_vector_reduce_v2_fadd: | ||||||||
1430 | // FIXME: Add new flag for cost of strict reductions. | ||||||||
1431 | return thisT()->getArithmeticReductionCost(Instruction::FAdd, VecOpTy, | ||||||||
1432 | /*IsPairwiseForm=*/false, | ||||||||
1433 | CostKind); | ||||||||
1434 | case Intrinsic::experimental_vector_reduce_v2_fmul: | ||||||||
1435 | // FIXME: Add new flag for cost of strict reductions. | ||||||||
1436 | return thisT()->getArithmeticReductionCost(Instruction::FMul, VecOpTy, | ||||||||
1437 | /*IsPairwiseForm=*/false, | ||||||||
1438 | CostKind); | ||||||||
1439 | case Intrinsic::experimental_vector_reduce_smax: | ||||||||
1440 | case Intrinsic::experimental_vector_reduce_smin: | ||||||||
1441 | case Intrinsic::experimental_vector_reduce_fmax: | ||||||||
1442 | case Intrinsic::experimental_vector_reduce_fmin: | ||||||||
1443 | return thisT()->getMinMaxReductionCost( | ||||||||
1444 | VecOpTy, cast<VectorType>(CmpInst::makeCmpResultType(VecOpTy)), | ||||||||
1445 | /*IsPairwiseForm=*/false, | ||||||||
1446 | /*IsUnsigned=*/false, CostKind); | ||||||||
1447 | case Intrinsic::experimental_vector_reduce_umax: | ||||||||
1448 | case Intrinsic::experimental_vector_reduce_umin: | ||||||||
1449 | return thisT()->getMinMaxReductionCost( | ||||||||
1450 | VecOpTy, cast<VectorType>(CmpInst::makeCmpResultType(VecOpTy)), | ||||||||
1451 | /*IsPairwiseForm=*/false, | ||||||||
1452 | /*IsUnsigned=*/true, CostKind); | ||||||||
1453 | case Intrinsic::abs: | ||||||||
1454 | case Intrinsic::smax: | ||||||||
1455 | case Intrinsic::smin: | ||||||||
1456 | case Intrinsic::umax: | ||||||||
1457 | case Intrinsic::umin: { | ||||||||
1458 | // abs(X) = select(icmp(X,0),X,sub(0,X)) | ||||||||
1459 | // minmax(X,Y) = select(icmp(X,Y),X,Y) | ||||||||
1460 | Type *CondTy = RetTy->getWithNewBitWidth(1); | ||||||||
1461 | unsigned Cost = 0; | ||||||||
1462 | // TODO: Ideally getCmpSelInstrCost would accept an icmp condition code. | ||||||||
1463 | Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy, | ||||||||
1464 | CostKind); | ||||||||
1465 | Cost += thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, CondTy, | ||||||||
1466 | CostKind); | ||||||||
1467 | // TODO: Should we add an OperandValueProperties::OP_Zero property? | ||||||||
1468 | if (IID == Intrinsic::abs) | ||||||||
1469 | Cost += thisT()->getArithmeticInstrCost( | ||||||||
1470 | BinaryOperator::Sub, RetTy, CostKind, TTI::OK_UniformConstantValue); | ||||||||
1471 | return Cost; | ||||||||
1472 | } | ||||||||
1473 | case Intrinsic::sadd_sat: | ||||||||
1474 | case Intrinsic::ssub_sat: { | ||||||||
1475 | Type *CondTy = RetTy->getWithNewBitWidth(1); | ||||||||
1476 | |||||||||
1477 | Type *OpTy = StructType::create({RetTy, CondTy}); | ||||||||
1478 | Intrinsic::ID OverflowOp = IID == Intrinsic::sadd_sat | ||||||||
1479 | ? Intrinsic::sadd_with_overflow | ||||||||
1480 | : Intrinsic::ssub_with_overflow; | ||||||||
1481 | |||||||||
1482 | // SatMax -> Overflow && SumDiff < 0 | ||||||||
1483 | // SatMin -> Overflow && SumDiff >= 0 | ||||||||
1484 | unsigned Cost = 0; | ||||||||
1485 | IntrinsicCostAttributes Attrs(OverflowOp, OpTy, {RetTy, RetTy}, FMF, | ||||||||
1486 | ScalarizationCostPassed); | ||||||||
1487 | Cost += thisT()->getIntrinsicInstrCost(Attrs, CostKind); | ||||||||
1488 | Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy, | ||||||||
1489 | CostKind); | ||||||||
1490 | Cost += 2 * thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, | ||||||||
1491 | CondTy, CostKind); | ||||||||
1492 | return Cost; | ||||||||
1493 | } | ||||||||
1494 | case Intrinsic::uadd_sat: | ||||||||
1495 | case Intrinsic::usub_sat: { | ||||||||
1496 | Type *CondTy = RetTy->getWithNewBitWidth(1); | ||||||||
1497 | |||||||||
1498 | Type *OpTy = StructType::create({RetTy, CondTy}); | ||||||||
1499 | Intrinsic::ID OverflowOp = IID == Intrinsic::uadd_sat | ||||||||
1500 | ? Intrinsic::uadd_with_overflow | ||||||||
1501 | : Intrinsic::usub_with_overflow; | ||||||||
1502 | |||||||||
1503 | unsigned Cost = 0; | ||||||||
1504 | IntrinsicCostAttributes Attrs(OverflowOp, OpTy, {RetTy, RetTy}, FMF, | ||||||||
1505 | ScalarizationCostPassed); | ||||||||
1506 | Cost += thisT()->getIntrinsicInstrCost(Attrs, CostKind); | ||||||||
1507 | Cost += thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, CondTy, | ||||||||
1508 | CostKind); | ||||||||
1509 | return Cost; | ||||||||
1510 | } | ||||||||
1511 | case Intrinsic::smul_fix: | ||||||||
1512 | case Intrinsic::umul_fix: { | ||||||||
1513 | unsigned ExtSize = RetTy->getScalarSizeInBits() * 2; | ||||||||
1514 | Type *ExtTy = RetTy->getWithNewBitWidth(ExtSize); | ||||||||
1515 | |||||||||
1516 | unsigned ExtOp = | ||||||||
1517 | IID == Intrinsic::smul_fix ? Instruction::SExt : Instruction::ZExt; | ||||||||
1518 | TTI::CastContextHint CCH = TTI::CastContextHint::None; | ||||||||
1519 | |||||||||
1520 | unsigned Cost = 0; | ||||||||
1521 | Cost += 2 * thisT()->getCastInstrCost(ExtOp, ExtTy, RetTy, CCH, CostKind); | ||||||||
1522 | Cost += | ||||||||
1523 | thisT()->getArithmeticInstrCost(Instruction::Mul, ExtTy, CostKind); | ||||||||
1524 | Cost += 2 * thisT()->getCastInstrCost(Instruction::Trunc, RetTy, ExtTy, | ||||||||
1525 | CCH, CostKind); | ||||||||
1526 | Cost += thisT()->getArithmeticInstrCost(Instruction::LShr, RetTy, | ||||||||
1527 | CostKind, TTI::OK_AnyValue, | ||||||||
1528 | TTI::OK_UniformConstantValue); | ||||||||
1529 | Cost += thisT()->getArithmeticInstrCost(Instruction::Shl, RetTy, CostKind, | ||||||||
1530 | TTI::OK_AnyValue, | ||||||||
1531 | TTI::OK_UniformConstantValue); | ||||||||
1532 | Cost += thisT()->getArithmeticInstrCost(Instruction::Or, RetTy, CostKind); | ||||||||
1533 | return Cost; | ||||||||
1534 | } | ||||||||
1535 | case Intrinsic::sadd_with_overflow: | ||||||||
1536 | case Intrinsic::ssub_with_overflow: { | ||||||||
1537 | Type *SumTy = RetTy->getContainedType(0); | ||||||||
1538 | Type *OverflowTy = RetTy->getContainedType(1); | ||||||||
1539 | unsigned Opcode = IID == Intrinsic::sadd_with_overflow | ||||||||
1540 | ? BinaryOperator::Add | ||||||||
1541 | : BinaryOperator::Sub; | ||||||||
1542 | |||||||||
1543 | // LHSSign -> LHS >= 0 | ||||||||
1544 | // RHSSign -> RHS >= 0 | ||||||||
1545 | // SumSign -> Sum >= 0 | ||||||||
1546 | // | ||||||||
1547 | // Add: | ||||||||
1548 | // Overflow -> (LHSSign == RHSSign) && (LHSSign != SumSign) | ||||||||
1549 | // Sub: | ||||||||
1550 | // Overflow -> (LHSSign != RHSSign) && (LHSSign != SumSign) | ||||||||
1551 | unsigned Cost = 0; | ||||||||
1552 | Cost += thisT()->getArithmeticInstrCost(Opcode, SumTy, CostKind); | ||||||||
1553 | Cost += 3 * thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, SumTy, | ||||||||
1554 | OverflowTy, CostKind); | ||||||||
1555 | Cost += 2 * thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, OverflowTy, | ||||||||
1556 | OverflowTy, CostKind); | ||||||||
1557 | Cost += thisT()->getArithmeticInstrCost(BinaryOperator::And, OverflowTy, | ||||||||
1558 | CostKind); | ||||||||
1559 | return Cost; | ||||||||
1560 | } | ||||||||
1561 | case Intrinsic::uadd_with_overflow: | ||||||||
1562 | case Intrinsic::usub_with_overflow: { | ||||||||
1563 | Type *SumTy = RetTy->getContainedType(0); | ||||||||
1564 | Type *OverflowTy = RetTy->getContainedType(1); | ||||||||
1565 | unsigned Opcode = IID == Intrinsic::uadd_with_overflow | ||||||||
1566 | ? BinaryOperator::Add | ||||||||
1567 | : BinaryOperator::Sub; | ||||||||
1568 | |||||||||
1569 | unsigned Cost = 0; | ||||||||
1570 | Cost += thisT()->getArithmeticInstrCost(Opcode, SumTy, CostKind); | ||||||||
1571 | Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, SumTy, | ||||||||
1572 | OverflowTy, CostKind); | ||||||||
1573 | return Cost; | ||||||||
1574 | } | ||||||||
1575 | case Intrinsic::smul_with_overflow: | ||||||||
1576 | case Intrinsic::umul_with_overflow: { | ||||||||
1577 | Type *MulTy = RetTy->getContainedType(0); | ||||||||
1578 | Type *OverflowTy = RetTy->getContainedType(1); | ||||||||
1579 | unsigned ExtSize = MulTy->getScalarSizeInBits() * 2; | ||||||||
1580 | Type *ExtTy = MulTy->getWithNewBitWidth(ExtSize); | ||||||||
1581 | |||||||||
1582 | unsigned ExtOp = | ||||||||
1583 | IID == Intrinsic::smul_fix ? Instruction::SExt : Instruction::ZExt; | ||||||||
1584 | TTI::CastContextHint CCH = TTI::CastContextHint::None; | ||||||||
1585 | |||||||||
1586 | unsigned Cost = 0; | ||||||||
1587 | Cost += 2 * thisT()->getCastInstrCost(ExtOp, ExtTy, MulTy, CCH, CostKind); | ||||||||
1588 | Cost += | ||||||||
1589 | thisT()->getArithmeticInstrCost(Instruction::Mul, ExtTy, CostKind); | ||||||||
1590 | Cost += 2 * thisT()->getCastInstrCost(Instruction::Trunc, MulTy, ExtTy, | ||||||||
1591 | CCH, CostKind); | ||||||||
1592 | Cost += thisT()->getArithmeticInstrCost(Instruction::LShr, MulTy, | ||||||||
1593 | CostKind, TTI::OK_AnyValue, | ||||||||
1594 | TTI::OK_UniformConstantValue); | ||||||||
1595 | |||||||||
1596 | if (IID == Intrinsic::smul_with_overflow) | ||||||||
1597 | Cost += thisT()->getArithmeticInstrCost(Instruction::AShr, MulTy, | ||||||||
1598 | CostKind, TTI::OK_AnyValue, | ||||||||
1599 | TTI::OK_UniformConstantValue); | ||||||||
1600 | |||||||||
1601 | Cost += thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, MulTy, | ||||||||
1602 | OverflowTy, CostKind); | ||||||||
1603 | return Cost; | ||||||||
1604 | } | ||||||||
1605 | case Intrinsic::ctpop: | ||||||||
1606 | ISDs.push_back(ISD::CTPOP); | ||||||||
1607 | // In case of legalization use TCC_Expensive. This is cheaper than a | ||||||||
1608 | // library call but still not a cheap instruction. | ||||||||
1609 | SingleCallCost = TargetTransformInfo::TCC_Expensive; | ||||||||
1610 | break; | ||||||||
1611 | // FIXME: ctlz, cttz, ... | ||||||||
1612 | case Intrinsic::bswap: | ||||||||
1613 | ISDs.push_back(ISD::BSWAP); | ||||||||
1614 | break; | ||||||||
1615 | case Intrinsic::bitreverse: | ||||||||
1616 | ISDs.push_back(ISD::BITREVERSE); | ||||||||
1617 | break; | ||||||||
1618 | } | ||||||||
1619 | |||||||||
1620 | const TargetLoweringBase *TLI = getTLI(); | ||||||||
1621 | std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, RetTy); | ||||||||
1622 | |||||||||
1623 | SmallVector<unsigned, 2> LegalCost; | ||||||||
1624 | SmallVector<unsigned, 2> CustomCost; | ||||||||
1625 | for (unsigned ISD : ISDs) { | ||||||||
1626 | if (TLI->isOperationLegalOrPromote(ISD, LT.second)) { | ||||||||
1627 | if (IID == Intrinsic::fabs && LT.second.isFloatingPoint() && | ||||||||
1628 | TLI->isFAbsFree(LT.second)) { | ||||||||
1629 | return 0; | ||||||||
1630 | } | ||||||||
1631 | |||||||||
1632 | // The operation is legal. Assume it costs 1. | ||||||||
1633 | // If the type is split to multiple registers, assume that there is some | ||||||||
1634 | // overhead to this. | ||||||||
1635 | // TODO: Once we have extract/insert subvector cost we need to use them. | ||||||||
1636 | if (LT.first > 1) | ||||||||
1637 | LegalCost.push_back(LT.first * 2); | ||||||||
1638 | else | ||||||||
1639 | LegalCost.push_back(LT.first * 1); | ||||||||
1640 | } else if (!TLI->isOperationExpand(ISD, LT.second)) { | ||||||||
1641 | // If the operation is custom lowered then assume | ||||||||
1642 | // that the code is twice as expensive. | ||||||||
1643 | CustomCost.push_back(LT.first * 2); | ||||||||
1644 | } | ||||||||
1645 | } | ||||||||
1646 | |||||||||
1647 | auto *MinLegalCostI = std::min_element(LegalCost.begin(), LegalCost.end()); | ||||||||
1648 | if (MinLegalCostI != LegalCost.end()) | ||||||||
1649 | return *MinLegalCostI; | ||||||||
1650 | |||||||||
1651 | auto MinCustomCostI = | ||||||||
1652 | std::min_element(CustomCost.begin(), CustomCost.end()); | ||||||||
1653 | if (MinCustomCostI != CustomCost.end()) | ||||||||
1654 | return *MinCustomCostI; | ||||||||
1655 | |||||||||
1656 | // If we can't lower fmuladd into an FMA estimate the cost as a floating | ||||||||
1657 | // point mul followed by an add. | ||||||||
1658 | if (IID == Intrinsic::fmuladd) | ||||||||
1659 | return thisT()->getArithmeticInstrCost(BinaryOperator::FMul, RetTy, | ||||||||
1660 | CostKind) + | ||||||||
1661 | thisT()->getArithmeticInstrCost(BinaryOperator::FAdd, RetTy, | ||||||||
1662 | CostKind); | ||||||||
1663 | if (IID == Intrinsic::experimental_constrained_fmuladd) { | ||||||||
1664 | IntrinsicCostAttributes FMulAttrs( | ||||||||
1665 | Intrinsic::experimental_constrained_fmul, RetTy, Tys); | ||||||||
1666 | IntrinsicCostAttributes FAddAttrs( | ||||||||
1667 | Intrinsic::experimental_constrained_fadd, RetTy, Tys); | ||||||||
1668 | return thisT()->getIntrinsicInstrCost(FMulAttrs, CostKind) + | ||||||||
1669 | thisT()->getIntrinsicInstrCost(FAddAttrs, CostKind); | ||||||||
1670 | } | ||||||||
1671 | |||||||||
1672 | // Else, assume that we need to scalarize this intrinsic. For math builtins | ||||||||
1673 | // this will emit a costly libcall, adding call overhead and spills. Make it | ||||||||
1674 | // very expensive. | ||||||||
1675 | if (auto *RetVTy = dyn_cast<VectorType>(RetTy)) { | ||||||||
1676 | unsigned ScalarizationCost = SkipScalarizationCost ? | ||||||||
1677 | ScalarizationCostPassed : getScalarizationOverhead(RetVTy, true, false); | ||||||||
1678 | |||||||||
1679 | unsigned ScalarCalls = cast<FixedVectorType>(RetVTy)->getNumElements(); | ||||||||
1680 | SmallVector<Type *, 4> ScalarTys; | ||||||||
1681 | for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) { | ||||||||
1682 | Type *Ty = Tys[i]; | ||||||||
1683 | if (Ty->isVectorTy()) | ||||||||
1684 | Ty = Ty->getScalarType(); | ||||||||
1685 | ScalarTys.push_back(Ty); | ||||||||
1686 | } | ||||||||
1687 | IntrinsicCostAttributes Attrs(IID, RetTy->getScalarType(), ScalarTys, FMF); | ||||||||
1688 | unsigned ScalarCost = thisT()->getIntrinsicInstrCost(Attrs, CostKind); | ||||||||
1689 | for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) { | ||||||||
1690 | if (auto *VTy = dyn_cast<VectorType>(Tys[i])) { | ||||||||
1691 | if (!ICA.skipScalarizationCost()) | ||||||||
1692 | ScalarizationCost += getScalarizationOverhead(VTy, false, true); | ||||||||
1693 | ScalarCalls = std::max(ScalarCalls, | ||||||||
1694 | cast<FixedVectorType>(VTy)->getNumElements()); | ||||||||
1695 | } | ||||||||
1696 | } | ||||||||
1697 | return ScalarCalls * ScalarCost + ScalarizationCost; | ||||||||
1698 | } | ||||||||
1699 | |||||||||
1700 | // This is going to be turned into a library call, make it expensive. | ||||||||
1701 | return SingleCallCost; | ||||||||
1702 | } | ||||||||
1703 | |||||||||
1704 | /// Compute a cost of the given call instruction. | ||||||||
1705 | /// | ||||||||
1706 | /// Compute the cost of calling function F with return type RetTy and | ||||||||
1707 | /// argument types Tys. F might be nullptr, in this case the cost of an | ||||||||
1708 | /// arbitrary call with the specified signature will be returned. | ||||||||
1709 | /// This is used, for instance, when we estimate call of a vector | ||||||||
1710 | /// counterpart of the given function. | ||||||||
1711 | /// \param F Called function, might be nullptr. | ||||||||
1712 | /// \param RetTy Return value types. | ||||||||
1713 | /// \param Tys Argument types. | ||||||||
1714 | /// \returns The cost of Call instruction. | ||||||||
1715 | unsigned getCallInstrCost(Function *F, Type *RetTy, ArrayRef<Type *> Tys, | ||||||||
1716 | TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency) { | ||||||||
1717 | return 10; | ||||||||
1718 | } | ||||||||
1719 | |||||||||
1720 | unsigned getNumberOfParts(Type *Tp) { | ||||||||
1721 | std::pair<unsigned, MVT> LT = getTLI()->getTypeLegalizationCost(DL, Tp); | ||||||||
1722 | return LT.first; | ||||||||
1723 | } | ||||||||
1724 | |||||||||
1725 | unsigned getAddressComputationCost(Type *Ty, ScalarEvolution *, | ||||||||
1726 | const SCEV *) { | ||||||||
1727 | return 0; | ||||||||
1728 | } | ||||||||
1729 | |||||||||
1730 | /// Try to calculate arithmetic and shuffle op costs for reduction operations. | ||||||||
1731 | /// We're assuming that reduction operation are performing the following way: | ||||||||
1732 | /// 1. Non-pairwise reduction | ||||||||
1733 | /// %val1 = shufflevector<n x t> %val, <n x t> %undef, | ||||||||
1734 | /// <n x i32> <i32 n/2, i32 n/2 + 1, ..., i32 n, i32 undef, ..., i32 undef> | ||||||||
1735 | /// \----------------v-------------/ \----------v------------/ | ||||||||
1736 | /// n/2 elements n/2 elements | ||||||||
1737 | /// %red1 = op <n x t> %val, <n x t> val1 | ||||||||
1738 | /// After this operation we have a vector %red1 where only the first n/2 | ||||||||
1739 | /// elements are meaningful, the second n/2 elements are undefined and can be | ||||||||
1740 | /// dropped. All other operations are actually working with the vector of | ||||||||
1741 | /// length n/2, not n, though the real vector length is still n. | ||||||||
1742 | /// %val2 = shufflevector<n x t> %red1, <n x t> %undef, | ||||||||
1743 | /// <n x i32> <i32 n/4, i32 n/4 + 1, ..., i32 n/2, i32 undef, ..., i32 undef> | ||||||||
1744 | /// \----------------v-------------/ \----------v------------/ | ||||||||
1745 | /// n/4 elements 3*n/4 elements | ||||||||
1746 | /// %red2 = op <n x t> %red1, <n x t> val2 - working with the vector of | ||||||||
1747 | /// length n/2, the resulting vector has length n/4 etc. | ||||||||
1748 | /// 2. Pairwise reduction: | ||||||||
1749 | /// Everything is the same except for an additional shuffle operation which | ||||||||
1750 | /// is used to produce operands for pairwise kind of reductions. | ||||||||
1751 | /// %val1 = shufflevector<n x t> %val, <n x t> %undef, | ||||||||
1752 | /// <n x i32> <i32 0, i32 2, ..., i32 n-2, i32 undef, ..., i32 undef> | ||||||||
1753 | /// \-------------v----------/ \----------v------------/ | ||||||||
1754 | /// n/2 elements n/2 elements | ||||||||
1755 | /// %val2 = shufflevector<n x t> %val, <n x t> %undef, | ||||||||
1756 | /// <n x i32> <i32 1, i32 3, ..., i32 n-1, i32 undef, ..., i32 undef> | ||||||||
1757 | /// \-------------v----------/ \----------v------------/ | ||||||||
1758 | /// n/2 elements n/2 elements | ||||||||
1759 | /// %red1 = op <n x t> %val1, <n x t> val2 | ||||||||
1760 | /// Again, the operation is performed on <n x t> vector, but the resulting | ||||||||
1761 | /// vector %red1 is <n/2 x t> vector. | ||||||||
1762 | /// | ||||||||
1763 | /// The cost model should take into account that the actual length of the | ||||||||
1764 | /// vector is reduced on each iteration. | ||||||||
1765 | unsigned getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, | ||||||||
1766 | bool IsPairwise, | ||||||||
1767 | TTI::TargetCostKind CostKind) { | ||||||||
1768 | Type *ScalarTy = Ty->getElementType(); | ||||||||
1769 | unsigned NumVecElts = cast<FixedVectorType>(Ty)->getNumElements(); | ||||||||
1770 | unsigned NumReduxLevels = Log2_32(NumVecElts); | ||||||||
1771 | unsigned ArithCost = 0; | ||||||||
1772 | unsigned ShuffleCost = 0; | ||||||||
1773 | std::pair<unsigned, MVT> LT = | ||||||||
1774 | thisT()->getTLI()->getTypeLegalizationCost(DL, Ty); | ||||||||
1775 | unsigned LongVectorCount = 0; | ||||||||
1776 | unsigned MVTLen = | ||||||||
1777 | LT.second.isVector() ? LT.second.getVectorNumElements() : 1; | ||||||||
1778 | while (NumVecElts > MVTLen) { | ||||||||
1779 | NumVecElts /= 2; | ||||||||
1780 | VectorType *SubTy = FixedVectorType::get(ScalarTy, NumVecElts); | ||||||||
1781 | // Assume the pairwise shuffles add a cost. | ||||||||
1782 | ShuffleCost += | ||||||||
1783 | (IsPairwise + 1) * thisT()->getShuffleCost(TTI::SK_ExtractSubvector, | ||||||||
1784 | Ty, NumVecElts, SubTy); | ||||||||
1785 | ArithCost += thisT()->getArithmeticInstrCost(Opcode, SubTy, CostKind); | ||||||||
1786 | Ty = SubTy; | ||||||||
1787 | ++LongVectorCount; | ||||||||
1788 | } | ||||||||
1789 | |||||||||
1790 | NumReduxLevels -= LongVectorCount; | ||||||||
1791 | |||||||||
1792 | // The minimal length of the vector is limited by the real length of vector | ||||||||
1793 | // operations performed on the current platform. That's why several final | ||||||||
1794 | // reduction operations are performed on the vectors with the same | ||||||||
1795 | // architecture-dependent length. | ||||||||
1796 | |||||||||
1797 | // Non pairwise reductions need one shuffle per reduction level. Pairwise | ||||||||
1798 | // reductions need two shuffles on every level, but the last one. On that | ||||||||
1799 | // level one of the shuffles is <0, u, u, ...> which is identity. | ||||||||
1800 | unsigned NumShuffles = NumReduxLevels; | ||||||||
1801 | if (IsPairwise && NumReduxLevels >= 1) | ||||||||
1802 | NumShuffles += NumReduxLevels - 1; | ||||||||
1803 | ShuffleCost += NumShuffles * | ||||||||
1804 | thisT()->getShuffleCost(TTI::SK_PermuteSingleSrc, Ty, 0, Ty); | ||||||||
1805 | ArithCost += NumReduxLevels * thisT()->getArithmeticInstrCost(Opcode, Ty); | ||||||||
1806 | return ShuffleCost + ArithCost + | ||||||||
1807 | thisT()->getVectorInstrCost(Instruction::ExtractElement, Ty, 0); | ||||||||
1808 | } | ||||||||
1809 | |||||||||
1810 | /// Try to calculate op costs for min/max reduction operations. | ||||||||
1811 | /// \param CondTy Conditional type for the Select instruction. | ||||||||
1812 | unsigned getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy, | ||||||||
1813 | bool IsPairwise, bool IsUnsigned, | ||||||||
1814 | TTI::TargetCostKind CostKind) { | ||||||||
1815 | Type *ScalarTy = Ty->getElementType(); | ||||||||
1816 | Type *ScalarCondTy = CondTy->getElementType(); | ||||||||
1817 | unsigned NumVecElts = cast<FixedVectorType>(Ty)->getNumElements(); | ||||||||
1818 | unsigned NumReduxLevels = Log2_32(NumVecElts); | ||||||||
1819 | unsigned CmpOpcode; | ||||||||
1820 | if (Ty->isFPOrFPVectorTy()) { | ||||||||
1821 | CmpOpcode = Instruction::FCmp; | ||||||||
1822 | } else { | ||||||||
1823 | assert(Ty->isIntOrIntVectorTy() &&((Ty->isIntOrIntVectorTy() && "expecting floating point or integer type for min/max reduction" ) ? static_cast<void> (0) : __assert_fail ("Ty->isIntOrIntVectorTy() && \"expecting floating point or integer type for min/max reduction\"" , "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/BasicTTIImpl.h" , 1824, __PRETTY_FUNCTION__)) | ||||||||
1824 | "expecting floating point or integer type for min/max reduction")((Ty->isIntOrIntVectorTy() && "expecting floating point or integer type for min/max reduction" ) ? static_cast<void> (0) : __assert_fail ("Ty->isIntOrIntVectorTy() && \"expecting floating point or integer type for min/max reduction\"" , "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/BasicTTIImpl.h" , 1824, __PRETTY_FUNCTION__)); | ||||||||
1825 | CmpOpcode = Instruction::ICmp; | ||||||||
1826 | } | ||||||||
1827 | unsigned MinMaxCost = 0; | ||||||||
1828 | unsigned ShuffleCost = 0; | ||||||||
1829 | std::pair<unsigned, MVT> LT = | ||||||||
1830 | thisT()->getTLI()->getTypeLegalizationCost(DL, Ty); | ||||||||
1831 | unsigned LongVectorCount = 0; | ||||||||
1832 | unsigned MVTLen = | ||||||||
1833 | LT.second.isVector() ? LT.second.getVectorNumElements() : 1; | ||||||||
1834 | while (NumVecElts > MVTLen) { | ||||||||
1835 | NumVecElts /= 2; | ||||||||
1836 | auto *SubTy = FixedVectorType::get(ScalarTy, NumVecElts); | ||||||||
1837 | CondTy = FixedVectorType::get(ScalarCondTy, NumVecElts); | ||||||||
1838 | |||||||||
1839 | // Assume the pairwise shuffles add a cost. | ||||||||
1840 | ShuffleCost += | ||||||||
1841 | (IsPairwise + 1) * thisT()->getShuffleCost(TTI::SK_ExtractSubvector, | ||||||||
1842 | Ty, NumVecElts, SubTy); | ||||||||
1843 | MinMaxCost += | ||||||||
1844 | thisT()->getCmpSelInstrCost(CmpOpcode, SubTy, CondTy, CostKind) + | ||||||||
1845 | thisT()->getCmpSelInstrCost(Instruction::Select, SubTy, CondTy, | ||||||||
1846 | CostKind); | ||||||||
1847 | Ty = SubTy; | ||||||||
1848 | ++LongVectorCount; | ||||||||
1849 | } | ||||||||
1850 | |||||||||
1851 | NumReduxLevels -= LongVectorCount; | ||||||||
1852 | |||||||||
1853 | // The minimal length of the vector is limited by the real length of vector | ||||||||
1854 | // operations performed on the current platform. That's why several final | ||||||||
1855 | // reduction opertions are perfomed on the vectors with the same | ||||||||
1856 | // architecture-dependent length. | ||||||||
1857 | |||||||||
1858 | // Non pairwise reductions need one shuffle per reduction level. Pairwise | ||||||||
1859 | // reductions need two shuffles on every level, but the last one. On that | ||||||||
1860 | // level one of the shuffles is <0, u, u, ...> which is identity. | ||||||||
1861 | unsigned NumShuffles = NumReduxLevels; | ||||||||
1862 | if (IsPairwise && NumReduxLevels >= 1) | ||||||||
1863 | NumShuffles += NumReduxLevels - 1; | ||||||||
1864 | ShuffleCost += NumShuffles * | ||||||||
1865 | thisT()->getShuffleCost(TTI::SK_PermuteSingleSrc, Ty, 0, Ty); | ||||||||
1866 | MinMaxCost += | ||||||||
1867 | NumReduxLevels * | ||||||||
1868 | (thisT()->getCmpSelInstrCost(CmpOpcode, Ty, CondTy, CostKind) + | ||||||||
1869 | thisT()->getCmpSelInstrCost(Instruction::Select, Ty, CondTy, | ||||||||
1870 | CostKind)); | ||||||||
1871 | // The last min/max should be in vector registers and we counted it above. | ||||||||
1872 | // So just need a single extractelement. | ||||||||
1873 | return ShuffleCost + MinMaxCost + | ||||||||
1874 | thisT()->getVectorInstrCost(Instruction::ExtractElement, Ty, 0); | ||||||||
1875 | } | ||||||||
1876 | |||||||||
1877 | unsigned getVectorSplitCost() { return 1; } | ||||||||
1878 | |||||||||
1879 | /// @} | ||||||||
1880 | }; | ||||||||
1881 | |||||||||
1882 | /// Concrete BasicTTIImpl that can be used if no further customization | ||||||||
1883 | /// is needed. | ||||||||
1884 | class BasicTTIImpl : public BasicTTIImplBase<BasicTTIImpl> { | ||||||||
1885 | using BaseT = BasicTTIImplBase<BasicTTIImpl>; | ||||||||
1886 | |||||||||
1887 | friend class BasicTTIImplBase<BasicTTIImpl>; | ||||||||
1888 | |||||||||
1889 | const TargetSubtargetInfo *ST; | ||||||||
1890 | const TargetLoweringBase *TLI; | ||||||||
1891 | |||||||||
1892 | const TargetSubtargetInfo *getST() const { return ST; } | ||||||||
1893 | const TargetLoweringBase *getTLI() const { return TLI; } | ||||||||
1894 | |||||||||
1895 | public: | ||||||||
1896 | explicit BasicTTIImpl(const TargetMachine *TM, const Function &F); | ||||||||
1897 | }; | ||||||||
1898 | |||||||||
1899 | } // end namespace llvm | ||||||||
1900 | |||||||||
1901 | #endif // LLVM_CODEGEN_BASICTTIIMPL_H |
1 | //===- llvm/CodeGen/TargetLowering.h - Target Lowering Info -----*- C++ -*-===// | ||||||||
2 | // | ||||||||
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||||||||
4 | // See https://llvm.org/LICENSE.txt for license information. | ||||||||
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||||||||
6 | // | ||||||||
7 | //===----------------------------------------------------------------------===// | ||||||||
8 | /// | ||||||||
9 | /// \file | ||||||||
10 | /// This file describes how to lower LLVM code to machine code. This has two | ||||||||
11 | /// main components: | ||||||||
12 | /// | ||||||||
13 | /// 1. Which ValueTypes are natively supported by the target. | ||||||||
14 | /// 2. Which operations are supported for supported ValueTypes. | ||||||||
15 | /// 3. Cost thresholds for alternative implementations of certain operations. | ||||||||
16 | /// | ||||||||
17 | /// In addition it has a few other components, like information about FP | ||||||||
18 | /// immediates. | ||||||||
19 | /// | ||||||||
20 | //===----------------------------------------------------------------------===// | ||||||||
21 | |||||||||
22 | #ifndef LLVM_CODEGEN_TARGETLOWERING_H | ||||||||
23 | #define LLVM_CODEGEN_TARGETLOWERING_H | ||||||||
24 | |||||||||
25 | #include "llvm/ADT/APInt.h" | ||||||||
26 | #include "llvm/ADT/ArrayRef.h" | ||||||||
27 | #include "llvm/ADT/DenseMap.h" | ||||||||
28 | #include "llvm/ADT/STLExtras.h" | ||||||||
29 | #include "llvm/ADT/SmallVector.h" | ||||||||
30 | #include "llvm/ADT/StringRef.h" | ||||||||
31 | #include "llvm/CodeGen/DAGCombine.h" | ||||||||
32 | #include "llvm/CodeGen/ISDOpcodes.h" | ||||||||
33 | #include "llvm/CodeGen/RuntimeLibcalls.h" | ||||||||
34 | #include "llvm/CodeGen/SelectionDAG.h" | ||||||||
35 | #include "llvm/CodeGen/SelectionDAGNodes.h" | ||||||||
36 | #include "llvm/CodeGen/TargetCallingConv.h" | ||||||||
37 | #include "llvm/CodeGen/ValueTypes.h" | ||||||||
38 | #include "llvm/IR/Attributes.h" | ||||||||
39 | #include "llvm/IR/CallingConv.h" | ||||||||
40 | #include "llvm/IR/DataLayout.h" | ||||||||
41 | #include "llvm/IR/DerivedTypes.h" | ||||||||
42 | #include "llvm/IR/Function.h" | ||||||||
43 | #include "llvm/IR/IRBuilder.h" | ||||||||
44 | #include "llvm/IR/InlineAsm.h" | ||||||||
45 | #include "llvm/IR/Instruction.h" | ||||||||
46 | #include "llvm/IR/Instructions.h" | ||||||||
47 | #include "llvm/IR/Type.h" | ||||||||
48 | #include "llvm/Support/Alignment.h" | ||||||||
49 | #include "llvm/Support/AtomicOrdering.h" | ||||||||
50 | #include "llvm/Support/Casting.h" | ||||||||
51 | #include "llvm/Support/ErrorHandling.h" | ||||||||
52 | #include "llvm/Support/MachineValueType.h" | ||||||||
53 | #include <algorithm> | ||||||||
54 | #include <cassert> | ||||||||
55 | #include <climits> | ||||||||
56 | #include <cstdint> | ||||||||
57 | #include <iterator> | ||||||||
58 | #include <map> | ||||||||
59 | #include <string> | ||||||||
60 | #include <utility> | ||||||||
61 | #include <vector> | ||||||||
62 | |||||||||
63 | namespace llvm { | ||||||||
64 | |||||||||
65 | class BranchProbability; | ||||||||
66 | class CCState; | ||||||||
67 | class CCValAssign; | ||||||||
68 | class Constant; | ||||||||
69 | class FastISel; | ||||||||
70 | class FunctionLoweringInfo; | ||||||||
71 | class GlobalValue; | ||||||||
72 | class GISelKnownBits; | ||||||||
73 | class IntrinsicInst; | ||||||||
74 | struct KnownBits; | ||||||||
75 | class LegacyDivergenceAnalysis; | ||||||||
76 | class LLVMContext; | ||||||||
77 | class MachineBasicBlock; | ||||||||
78 | class MachineFunction; | ||||||||
79 | class MachineInstr; | ||||||||
80 | class MachineJumpTableInfo; | ||||||||
81 | class MachineLoop; | ||||||||
82 | class MachineRegisterInfo; | ||||||||
83 | class MCContext; | ||||||||
84 | class MCExpr; | ||||||||
85 | class Module; | ||||||||
86 | class ProfileSummaryInfo; | ||||||||
87 | class TargetLibraryInfo; | ||||||||
88 | class TargetMachine; | ||||||||
89 | class TargetRegisterClass; | ||||||||
90 | class TargetRegisterInfo; | ||||||||
91 | class TargetTransformInfo; | ||||||||
92 | class Value; | ||||||||
93 | |||||||||
94 | namespace Sched { | ||||||||
95 | |||||||||
96 | enum Preference { | ||||||||
97 | None, // No preference | ||||||||
98 | Source, // Follow source order. | ||||||||
99 | RegPressure, // Scheduling for lowest register pressure. | ||||||||
100 | Hybrid, // Scheduling for both latency and register pressure. | ||||||||
101 | ILP, // Scheduling for ILP in low register pressure mode. | ||||||||
102 | VLIW // Scheduling for VLIW targets. | ||||||||
103 | }; | ||||||||
104 | |||||||||
105 | } // end namespace Sched | ||||||||
106 | |||||||||
107 | // MemOp models a memory operation, either memset or memcpy/memmove. | ||||||||
108 | struct MemOp { | ||||||||
109 | private: | ||||||||
110 | // Shared | ||||||||
111 | uint64_t Size; | ||||||||
112 | bool DstAlignCanChange; // true if destination alignment can satisfy any | ||||||||
113 | // constraint. | ||||||||
114 | Align DstAlign; // Specified alignment of the memory operation. | ||||||||
115 | |||||||||
116 | bool AllowOverlap; | ||||||||
117 | // memset only | ||||||||
118 | bool IsMemset; // If setthis memory operation is a memset. | ||||||||
119 | bool ZeroMemset; // If set clears out memory with zeros. | ||||||||
120 | // memcpy only | ||||||||
121 | bool MemcpyStrSrc; // Indicates whether the memcpy source is an in-register | ||||||||
122 | // constant so it does not need to be loaded. | ||||||||
123 | Align SrcAlign; // Inferred alignment of the source or default value if the | ||||||||
124 | // memory operation does not need to load the value. | ||||||||
125 | public: | ||||||||
126 | static MemOp Copy(uint64_t Size, bool DstAlignCanChange, Align DstAlign, | ||||||||
127 | Align SrcAlign, bool IsVolatile, | ||||||||
128 | bool MemcpyStrSrc = false) { | ||||||||
129 | MemOp Op; | ||||||||
130 | Op.Size = Size; | ||||||||
131 | Op.DstAlignCanChange = DstAlignCanChange; | ||||||||
132 | Op.DstAlign = DstAlign; | ||||||||
133 | Op.AllowOverlap = !IsVolatile; | ||||||||
134 | Op.IsMemset = false; | ||||||||
135 | Op.ZeroMemset = false; | ||||||||
136 | Op.MemcpyStrSrc = MemcpyStrSrc; | ||||||||
137 | Op.SrcAlign = SrcAlign; | ||||||||
138 | return Op; | ||||||||
139 | } | ||||||||
140 | |||||||||
141 | static MemOp Set(uint64_t Size, bool DstAlignCanChange, Align DstAlign, | ||||||||
142 | bool IsZeroMemset, bool IsVolatile) { | ||||||||
143 | MemOp Op; | ||||||||
144 | Op.Size = Size; | ||||||||
145 | Op.DstAlignCanChange = DstAlignCanChange; | ||||||||
146 | Op.DstAlign = DstAlign; | ||||||||
147 | Op.AllowOverlap = !IsVolatile; | ||||||||
148 | Op.IsMemset = true; | ||||||||
149 | Op.ZeroMemset = IsZeroMemset; | ||||||||
150 | Op.MemcpyStrSrc = false; | ||||||||
151 | return Op; | ||||||||
152 | } | ||||||||
153 | |||||||||
154 | uint64_t size() const { return Size; } | ||||||||
155 | Align getDstAlign() const { | ||||||||
156 | assert(!DstAlignCanChange)((!DstAlignCanChange) ? static_cast<void> (0) : __assert_fail ("!DstAlignCanChange", "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/TargetLowering.h" , 156, __PRETTY_FUNCTION__)); | ||||||||
157 | return DstAlign; | ||||||||
158 | } | ||||||||
159 | bool isFixedDstAlign() const { return !DstAlignCanChange; } | ||||||||
160 | bool allowOverlap() const { return AllowOverlap; } | ||||||||
161 | bool isMemset() const { return IsMemset; } | ||||||||
162 | bool isMemcpy() const { return !IsMemset; } | ||||||||
163 | bool isMemcpyWithFixedDstAlign() const { | ||||||||
164 | return isMemcpy() && !DstAlignCanChange; | ||||||||
165 | } | ||||||||
166 | bool isZeroMemset() const { return isMemset() && ZeroMemset; } | ||||||||
167 | bool isMemcpyStrSrc() const { | ||||||||
168 | assert(isMemcpy() && "Must be a memcpy")((isMemcpy() && "Must be a memcpy") ? static_cast< void> (0) : __assert_fail ("isMemcpy() && \"Must be a memcpy\"" , "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/TargetLowering.h" , 168, __PRETTY_FUNCTION__)); | ||||||||
169 | return MemcpyStrSrc; | ||||||||
170 | } | ||||||||
171 | Align getSrcAlign() const { | ||||||||
172 | assert(isMemcpy() && "Must be a memcpy")((isMemcpy() && "Must be a memcpy") ? static_cast< void> (0) : __assert_fail ("isMemcpy() && \"Must be a memcpy\"" , "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/TargetLowering.h" , 172, __PRETTY_FUNCTION__)); | ||||||||
173 | return SrcAlign; | ||||||||
174 | } | ||||||||
175 | bool isSrcAligned(Align AlignCheck) const { | ||||||||
176 | return isMemset() || llvm::isAligned(AlignCheck, SrcAlign.value()); | ||||||||
177 | } | ||||||||
178 | bool isDstAligned(Align AlignCheck) const { | ||||||||
179 | return DstAlignCanChange || llvm::isAligned(AlignCheck, DstAlign.value()); | ||||||||
180 | } | ||||||||
181 | bool isAligned(Align AlignCheck) const { | ||||||||
182 | return isSrcAligned(AlignCheck) && isDstAligned(AlignCheck); | ||||||||
183 | } | ||||||||
184 | }; | ||||||||
185 | |||||||||
186 | /// This base class for TargetLowering contains the SelectionDAG-independent | ||||||||
187 | /// parts that can be used from the rest of CodeGen. | ||||||||
188 | class TargetLoweringBase { | ||||||||
189 | public: | ||||||||
190 | /// This enum indicates whether operations are valid for a target, and if not, | ||||||||
191 | /// what action should be used to make them valid. | ||||||||
192 | enum LegalizeAction : uint8_t { | ||||||||
193 | Legal, // The target natively supports this operation. | ||||||||
194 | Promote, // This operation should be executed in a larger type. | ||||||||
195 | Expand, // Try to expand this to other ops, otherwise use a libcall. | ||||||||
196 | LibCall, // Don't try to expand this to other ops, always use a libcall. | ||||||||
197 | Custom // Use the LowerOperation hook to implement custom lowering. | ||||||||
198 | }; | ||||||||
199 | |||||||||
200 | /// This enum indicates whether a types are legal for a target, and if not, | ||||||||
201 | /// what action should be used to make them valid. | ||||||||
202 | enum LegalizeTypeAction : uint8_t { | ||||||||
203 | TypeLegal, // The target natively supports this type. | ||||||||
204 | TypePromoteInteger, // Replace this integer with a larger one. | ||||||||
205 | TypeExpandInteger, // Split this integer into two of half the size. | ||||||||
206 | TypeSoftenFloat, // Convert this float to a same size integer type. | ||||||||
207 | TypeExpandFloat, // Split this float into two of half the size. | ||||||||
208 | TypeScalarizeVector, // Replace this one-element vector with its element. | ||||||||
209 | TypeSplitVector, // Split this vector into two of half the size. | ||||||||
210 | TypeWidenVector, // This vector should be widened into a larger vector. | ||||||||
211 | TypePromoteFloat, // Replace this float with a larger one. | ||||||||
212 | TypeSoftPromoteHalf, // Soften half to i16 and use float to do arithmetic. | ||||||||
213 | TypeScalarizeScalableVector, // This action is explicitly left unimplemented. | ||||||||
214 | // While it is theoretically possible to | ||||||||
215 | // legalize operations on scalable types with a | ||||||||
216 | // loop that handles the vscale * #lanes of the | ||||||||
217 | // vector, this is non-trivial at SelectionDAG | ||||||||
218 | // level and these types are better to be | ||||||||
219 | // widened or promoted. | ||||||||
220 | }; | ||||||||
221 | |||||||||
222 | /// LegalizeKind holds the legalization kind that needs to happen to EVT | ||||||||
223 | /// in order to type-legalize it. | ||||||||
224 | using LegalizeKind = std::pair<LegalizeTypeAction, EVT>; | ||||||||
225 | |||||||||
226 | /// Enum that describes how the target represents true/false values. | ||||||||
227 | enum BooleanContent { | ||||||||
228 | UndefinedBooleanContent, // Only bit 0 counts, the rest can hold garbage. | ||||||||
229 | ZeroOrOneBooleanContent, // All bits zero except for bit 0. | ||||||||
230 | ZeroOrNegativeOneBooleanContent // All bits equal to bit 0. | ||||||||
231 | }; | ||||||||
232 | |||||||||
233 | /// Enum that describes what type of support for selects the target has. | ||||||||
234 | enum SelectSupportKind { | ||||||||
235 | ScalarValSelect, // The target supports scalar selects (ex: cmov). | ||||||||
236 | ScalarCondVectorVal, // The target supports selects with a scalar condition | ||||||||
237 | // and vector values (ex: cmov). | ||||||||
238 | VectorMaskSelect // The target supports vector selects with a vector | ||||||||
239 | // mask (ex: x86 blends). | ||||||||
240 | }; | ||||||||
241 | |||||||||
242 | /// Enum that specifies what an atomic load/AtomicRMWInst is expanded | ||||||||
243 | /// to, if at all. Exists because different targets have different levels of | ||||||||
244 | /// support for these atomic instructions, and also have different options | ||||||||
245 | /// w.r.t. what they should expand to. | ||||||||
246 | enum class AtomicExpansionKind { | ||||||||
247 | None, // Don't expand the instruction. | ||||||||
248 | LLSC, // Expand the instruction into loadlinked/storeconditional; used | ||||||||
249 | // by ARM/AArch64. | ||||||||
250 | LLOnly, // Expand the (load) instruction into just a load-linked, which has | ||||||||
251 | // greater atomic guarantees than a normal load. | ||||||||
252 | CmpXChg, // Expand the instruction into cmpxchg; used by at least X86. | ||||||||
253 | MaskedIntrinsic, // Use a target-specific intrinsic for the LL/SC loop. | ||||||||
254 | }; | ||||||||
255 | |||||||||
256 | /// Enum that specifies when a multiplication should be expanded. | ||||||||
257 | enum class MulExpansionKind { | ||||||||
258 | Always, // Always expand the instruction. | ||||||||
259 | OnlyLegalOrCustom, // Only expand when the resulting instructions are legal | ||||||||
260 | // or custom. | ||||||||
261 | }; | ||||||||
262 | |||||||||
263 | /// Enum that specifies when a float negation is beneficial. | ||||||||
264 | enum class NegatibleCost { | ||||||||
265 | Cheaper = 0, // Negated expression is cheaper. | ||||||||
266 | Neutral = 1, // Negated expression has the same cost. | ||||||||
267 | Expensive = 2 // Negated expression is more expensive. | ||||||||
268 | }; | ||||||||
269 | |||||||||
270 | class ArgListEntry { | ||||||||
271 | public: | ||||||||
272 | Value *Val = nullptr; | ||||||||
273 | SDValue Node = SDValue(); | ||||||||
274 | Type *Ty = nullptr; | ||||||||
275 | bool IsSExt : 1; | ||||||||
276 | bool IsZExt : 1; | ||||||||
277 | bool IsInReg : 1; | ||||||||
278 | bool IsSRet : 1; | ||||||||
279 | bool IsNest : 1; | ||||||||
280 | bool IsByVal : 1; | ||||||||
281 | bool IsByRef : 1; | ||||||||
282 | bool IsInAlloca : 1; | ||||||||
283 | bool IsPreallocated : 1; | ||||||||
284 | bool IsReturned : 1; | ||||||||
285 | bool IsSwiftSelf : 1; | ||||||||
286 | bool IsSwiftError : 1; | ||||||||
287 | bool IsCFGuardTarget : 1; | ||||||||
288 | MaybeAlign Alignment = None; | ||||||||
289 | Type *ByValType = nullptr; | ||||||||
290 | Type *PreallocatedType = nullptr; | ||||||||
291 | |||||||||
292 | ArgListEntry() | ||||||||
293 | : IsSExt(false), IsZExt(false), IsInReg(false), IsSRet(false), | ||||||||
294 | IsNest(false), IsByVal(false), IsByRef(false), IsInAlloca(false), | ||||||||
295 | IsPreallocated(false), IsReturned(false), IsSwiftSelf(false), | ||||||||
296 | IsSwiftError(false), IsCFGuardTarget(false) {} | ||||||||
297 | |||||||||
298 | void setAttributes(const CallBase *Call, unsigned ArgIdx); | ||||||||
299 | }; | ||||||||
300 | using ArgListTy = std::vector<ArgListEntry>; | ||||||||
301 | |||||||||
302 | virtual void markLibCallAttributes(MachineFunction *MF, unsigned CC, | ||||||||
303 | ArgListTy &Args) const {}; | ||||||||
304 | |||||||||
305 | static ISD::NodeType getExtendForContent(BooleanContent Content) { | ||||||||
306 | switch (Content) { | ||||||||
307 | case UndefinedBooleanContent: | ||||||||
308 | // Extend by adding rubbish bits. | ||||||||
309 | return ISD::ANY_EXTEND; | ||||||||
310 | case ZeroOrOneBooleanContent: | ||||||||
311 | // Extend by adding zero bits. | ||||||||
312 | return ISD::ZERO_EXTEND; | ||||||||
313 | case ZeroOrNegativeOneBooleanContent: | ||||||||
314 | // Extend by copying the sign bit. | ||||||||
315 | return ISD::SIGN_EXTEND; | ||||||||
316 | } | ||||||||
317 | llvm_unreachable("Invalid content kind")::llvm::llvm_unreachable_internal("Invalid content kind", "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/TargetLowering.h" , 317); | ||||||||
318 | } | ||||||||
319 | |||||||||
320 | explicit TargetLoweringBase(const TargetMachine &TM); | ||||||||
321 | TargetLoweringBase(const TargetLoweringBase &) = delete; | ||||||||
322 | TargetLoweringBase &operator=(const TargetLoweringBase &) = delete; | ||||||||
323 | virtual ~TargetLoweringBase() = default; | ||||||||
324 | |||||||||
325 | /// Return true if the target support strict float operation | ||||||||
326 | bool isStrictFPEnabled() const { | ||||||||
327 | return IsStrictFPEnabled; | ||||||||
328 | } | ||||||||
329 | |||||||||
330 | protected: | ||||||||
331 | /// Initialize all of the actions to default values. | ||||||||
332 | void initActions(); | ||||||||
333 | |||||||||
334 | public: | ||||||||
335 | const TargetMachine &getTargetMachine() const { return TM; } | ||||||||
336 | |||||||||
337 | virtual bool useSoftFloat() const { return false; } | ||||||||
338 | |||||||||
339 | /// Return the pointer type for the given address space, defaults to | ||||||||
340 | /// the pointer type from the data layout. | ||||||||
341 | /// FIXME: The default needs to be removed once all the code is updated. | ||||||||
342 | virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS = 0) const { | ||||||||
343 | return MVT::getIntegerVT(DL.getPointerSizeInBits(AS)); | ||||||||
344 | } | ||||||||
345 | |||||||||
346 | /// Return the in-memory pointer type for the given address space, defaults to | ||||||||
347 | /// the pointer type from the data layout. FIXME: The default needs to be | ||||||||
348 | /// removed once all the code is updated. | ||||||||
349 | MVT getPointerMemTy(const DataLayout &DL, uint32_t AS = 0) const { | ||||||||
350 | return MVT::getIntegerVT(DL.getPointerSizeInBits(AS)); | ||||||||
351 | } | ||||||||
352 | |||||||||
353 | /// Return the type for frame index, which is determined by | ||||||||
354 | /// the alloca address space specified through the data layout. | ||||||||
355 | MVT getFrameIndexTy(const DataLayout &DL) const { | ||||||||
356 | return getPointerTy(DL, DL.getAllocaAddrSpace()); | ||||||||
357 | } | ||||||||
358 | |||||||||
359 | /// Return the type for code pointers, which is determined by the program | ||||||||
360 | /// address space specified through the data layout. | ||||||||
361 | MVT getProgramPointerTy(const DataLayout &DL) const { | ||||||||
362 | return getPointerTy(DL, DL.getProgramAddressSpace()); | ||||||||
363 | } | ||||||||
364 | |||||||||
365 | /// Return the type for operands of fence. | ||||||||
366 | /// TODO: Let fence operands be of i32 type and remove this. | ||||||||
367 | virtual MVT getFenceOperandTy(const DataLayout &DL) const { | ||||||||
368 | return getPointerTy(DL); | ||||||||
369 | } | ||||||||
370 | |||||||||
371 | /// EVT is not used in-tree, but is used by out-of-tree target. | ||||||||
372 | /// A documentation for this function would be nice... | ||||||||
373 | virtual MVT getScalarShiftAmountTy(const DataLayout &, EVT) const; | ||||||||
374 | |||||||||
375 | EVT getShiftAmountTy(EVT LHSTy, const DataLayout &DL, | ||||||||
376 | bool LegalTypes = true) const; | ||||||||
377 | |||||||||
378 | /// Return the preferred type to use for a shift opcode, given the shifted | ||||||||
379 | /// amount type is \p ShiftValueTy. | ||||||||
380 | LLVM_READONLY__attribute__((__pure__)) | ||||||||
381 | virtual LLT getPreferredShiftAmountTy(LLT ShiftValueTy) const { | ||||||||
382 | return ShiftValueTy; | ||||||||
383 | } | ||||||||
384 | |||||||||
385 | /// Returns the type to be used for the index operand of: | ||||||||
386 | /// ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT, | ||||||||
387 | /// ISD::INSERT_SUBVECTOR, and ISD::EXTRACT_SUBVECTOR | ||||||||
388 | virtual MVT getVectorIdxTy(const DataLayout &DL) const { | ||||||||
389 | return getPointerTy(DL); | ||||||||
390 | } | ||||||||
391 | |||||||||
392 | /// This callback is used to inspect load/store instructions and add | ||||||||
393 | /// target-specific MachineMemOperand flags to them. The default | ||||||||
394 | /// implementation does nothing. | ||||||||
395 | virtual MachineMemOperand::Flags getTargetMMOFlags(const Instruction &I) const { | ||||||||
396 | return MachineMemOperand::MONone; | ||||||||
397 | } | ||||||||
398 | |||||||||
399 | MachineMemOperand::Flags getLoadMemOperandFlags(const LoadInst &LI, | ||||||||
400 | const DataLayout &DL) const; | ||||||||
401 | MachineMemOperand::Flags getStoreMemOperandFlags(const StoreInst &SI, | ||||||||
402 | const DataLayout &DL) const; | ||||||||
403 | MachineMemOperand::Flags getAtomicMemOperandFlags(const Instruction &AI, | ||||||||
404 | const DataLayout &DL) const; | ||||||||
405 | |||||||||
406 | virtual bool isSelectSupported(SelectSupportKind /*kind*/) const { | ||||||||
407 | return true; | ||||||||
408 | } | ||||||||
409 | |||||||||
410 | /// Return true if it is profitable to convert a select of FP constants into | ||||||||
411 | /// a constant pool load whose address depends on the select condition. The | ||||||||
412 | /// parameter may be used to differentiate a select with FP compare from | ||||||||
413 | /// integer compare. | ||||||||
414 | virtual bool reduceSelectOfFPConstantLoads(EVT CmpOpVT) const { | ||||||||
415 | return true; | ||||||||
416 | } | ||||||||
417 | |||||||||
418 | /// Return true if multiple condition registers are available. | ||||||||
419 | bool hasMultipleConditionRegisters() const { | ||||||||
420 | return HasMultipleConditionRegisters; | ||||||||
421 | } | ||||||||
422 | |||||||||
423 | /// Return true if the target has BitExtract instructions. | ||||||||
424 | bool hasExtractBitsInsn() const { return HasExtractBitsInsn; } | ||||||||
425 | |||||||||
426 | /// Return the preferred vector type legalization action. | ||||||||
427 | virtual TargetLoweringBase::LegalizeTypeAction | ||||||||
428 | getPreferredVectorAction(MVT VT) const { | ||||||||
429 | // The default action for one element vectors is to scalarize | ||||||||
430 | if (VT.getVectorElementCount() == 1) | ||||||||
431 | return TypeScalarizeVector; | ||||||||
432 | // The default action for an odd-width vector is to widen. | ||||||||
433 | if (!VT.isPow2VectorType()) | ||||||||
434 | return TypeWidenVector; | ||||||||
435 | // The default action for other vectors is to promote | ||||||||
436 | return TypePromoteInteger; | ||||||||
437 | } | ||||||||
438 | |||||||||
439 | // Return true if the half type should be passed around as i16, but promoted | ||||||||
440 | // to float around arithmetic. The default behavior is to pass around as | ||||||||
441 | // float and convert around loads/stores/bitcasts and other places where | ||||||||
442 | // the size matters. | ||||||||
443 | virtual bool softPromoteHalfType() const { return false; } | ||||||||
444 | |||||||||
445 | // There are two general methods for expanding a BUILD_VECTOR node: | ||||||||
446 | // 1. Use SCALAR_TO_VECTOR on the defined scalar values and then shuffle | ||||||||
447 | // them together. | ||||||||
448 | // 2. Build the vector on the stack and then load it. | ||||||||
449 | // If this function returns true, then method (1) will be used, subject to | ||||||||
450 | // the constraint that all of the necessary shuffles are legal (as determined | ||||||||
451 | // by isShuffleMaskLegal). If this function returns false, then method (2) is | ||||||||
452 | // always used. The vector type, and the number of defined values, are | ||||||||
453 | // provided. | ||||||||
454 | virtual bool | ||||||||
455 | shouldExpandBuildVectorWithShuffles(EVT /* VT */, | ||||||||
456 | unsigned DefinedValues) const { | ||||||||
457 | return DefinedValues < 3; | ||||||||
458 | } | ||||||||
459 | |||||||||
460 | /// Return true if integer divide is usually cheaper than a sequence of | ||||||||
461 | /// several shifts, adds, and multiplies for this target. | ||||||||
462 | /// The definition of "cheaper" may depend on whether we're optimizing | ||||||||
463 | /// for speed or for size. | ||||||||
464 | virtual bool isIntDivCheap(EVT VT, AttributeList Attr) const { return false; } | ||||||||
465 | |||||||||
466 | /// Return true if the target can handle a standalone remainder operation. | ||||||||
467 | virtual bool hasStandaloneRem(EVT VT) const { | ||||||||
468 | return true; | ||||||||
469 | } | ||||||||
470 | |||||||||
471 | /// Return true if SQRT(X) shouldn't be replaced with X*RSQRT(X). | ||||||||
472 | virtual bool isFsqrtCheap(SDValue X, SelectionDAG &DAG) const { | ||||||||
473 | // Default behavior is to replace SQRT(X) with X*RSQRT(X). | ||||||||
474 | return false; | ||||||||
475 | } | ||||||||
476 | |||||||||
477 | /// Reciprocal estimate status values used by the functions below. | ||||||||
478 | enum ReciprocalEstimate : int { | ||||||||
479 | Unspecified = -1, | ||||||||
480 | Disabled = 0, | ||||||||
481 | Enabled = 1 | ||||||||
482 | }; | ||||||||
483 | |||||||||
484 | /// Return a ReciprocalEstimate enum value for a square root of the given type | ||||||||
485 | /// based on the function's attributes. If the operation is not overridden by | ||||||||
486 | /// the function's attributes, "Unspecified" is returned and target defaults | ||||||||
487 | /// are expected to be used for instruction selection. | ||||||||
488 | int getRecipEstimateSqrtEnabled(EVT VT, MachineFunction &MF) const; | ||||||||
489 | |||||||||
490 | /// Return a ReciprocalEstimate enum value for a division of the given type | ||||||||
491 | /// based on the function's attributes. If the operation is not overridden by | ||||||||
492 | /// the function's attributes, "Unspecified" is returned and target defaults | ||||||||
493 | /// are expected to be used for instruction selection. | ||||||||
494 | int getRecipEstimateDivEnabled(EVT VT, MachineFunction &MF) const; | ||||||||
495 | |||||||||
496 | /// Return the refinement step count for a square root of the given type based | ||||||||
497 | /// on the function's attributes. If the operation is not overridden by | ||||||||
498 | /// the function's attributes, "Unspecified" is returned and target defaults | ||||||||
499 | /// are expected to be used for instruction selection. | ||||||||
500 | int getSqrtRefinementSteps(EVT VT, MachineFunction &MF) const; | ||||||||
501 | |||||||||
502 | /// Return the refinement step count for a division of the given type based | ||||||||
503 | /// on the function's attributes. If the operation is not overridden by | ||||||||
504 | /// the function's attributes, "Unspecified" is returned and target defaults | ||||||||
505 | /// are expected to be used for instruction selection. | ||||||||
506 | int getDivRefinementSteps(EVT VT, MachineFunction &MF) const; | ||||||||
507 | |||||||||
508 | /// Returns true if target has indicated at least one type should be bypassed. | ||||||||
509 | bool isSlowDivBypassed() const { return !BypassSlowDivWidths.empty(); } | ||||||||
510 | |||||||||
511 | /// Returns map of slow types for division or remainder with corresponding | ||||||||
512 | /// fast types | ||||||||
513 | const DenseMap<unsigned int, unsigned int> &getBypassSlowDivWidths() const { | ||||||||
514 | return BypassSlowDivWidths; | ||||||||
515 | } | ||||||||
516 | |||||||||
517 | /// Return true if Flow Control is an expensive operation that should be | ||||||||
518 | /// avoided. | ||||||||
519 | bool isJumpExpensive() const { return JumpIsExpensive; } | ||||||||
520 | |||||||||
521 | /// Return true if selects are only cheaper than branches if the branch is | ||||||||
522 | /// unlikely to be predicted right. | ||||||||
523 | bool isPredictableSelectExpensive() const { | ||||||||
524 | return PredictableSelectIsExpensive; | ||||||||
525 | } | ||||||||
526 | |||||||||
527 | virtual bool fallBackToDAGISel(const Instruction &Inst) const { | ||||||||
528 | return false; | ||||||||
529 | } | ||||||||
530 | |||||||||
531 | /// If a branch or a select condition is skewed in one direction by more than | ||||||||
532 | /// this factor, it is very likely to be predicted correctly. | ||||||||
533 | virtual BranchProbability getPredictableBranchThreshold() const; | ||||||||
534 | |||||||||
535 | /// Return true if the following transform is beneficial: | ||||||||
536 | /// fold (conv (load x)) -> (load (conv*)x) | ||||||||
537 | /// On architectures that don't natively support some vector loads | ||||||||
538 | /// efficiently, casting the load to a smaller vector of larger types and | ||||||||
539 | /// loading is more efficient, however, this can be undone by optimizations in | ||||||||
540 | /// dag combiner. | ||||||||
541 | virtual bool isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT, | ||||||||
542 | const SelectionDAG &DAG, | ||||||||
543 | const MachineMemOperand &MMO) const { | ||||||||
544 | // Don't do if we could do an indexed load on the original type, but not on | ||||||||
545 | // the new one. | ||||||||
546 | if (!LoadVT.isSimple() || !BitcastVT.isSimple()) | ||||||||
547 | return true; | ||||||||
548 | |||||||||
549 | MVT LoadMVT = LoadVT.getSimpleVT(); | ||||||||
550 | |||||||||
551 | // Don't bother doing this if it's just going to be promoted again later, as | ||||||||
552 | // doing so might interfere with other combines. | ||||||||
553 | if (getOperationAction(ISD::LOAD, LoadMVT) == Promote && | ||||||||
554 | getTypeToPromoteTo(ISD::LOAD, LoadMVT) == BitcastVT.getSimpleVT()) | ||||||||
555 | return false; | ||||||||
556 | |||||||||
557 | bool Fast = false; | ||||||||
558 | return allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), BitcastVT, | ||||||||
559 | MMO, &Fast) && Fast; | ||||||||
560 | } | ||||||||
561 | |||||||||
562 | /// Return true if the following transform is beneficial: | ||||||||
563 | /// (store (y (conv x)), y*)) -> (store x, (x*)) | ||||||||
564 | virtual bool isStoreBitCastBeneficial(EVT StoreVT, EVT BitcastVT, | ||||||||
565 | const SelectionDAG &DAG, | ||||||||
566 | const MachineMemOperand &MMO) const { | ||||||||
567 | // Default to the same logic as loads. | ||||||||
568 | return isLoadBitCastBeneficial(StoreVT, BitcastVT, DAG, MMO); | ||||||||
569 | } | ||||||||
570 | |||||||||
571 | /// Return true if it is expected to be cheaper to do a store of a non-zero | ||||||||
572 | /// vector constant with the given size and type for the address space than to | ||||||||
573 | /// store the individual scalar element constants. | ||||||||
574 | virtual bool storeOfVectorConstantIsCheap(EVT MemVT, | ||||||||
575 | unsigned NumElem, | ||||||||
576 | unsigned AddrSpace) const { | ||||||||
577 | return false; | ||||||||
578 | } | ||||||||
579 | |||||||||
580 | /// Allow store merging for the specified type after legalization in addition | ||||||||
581 | /// to before legalization. This may transform stores that do not exist | ||||||||
582 | /// earlier (for example, stores created from intrinsics). | ||||||||
583 | virtual bool mergeStoresAfterLegalization(EVT MemVT) const { | ||||||||
584 | return true; | ||||||||
585 | } | ||||||||
586 | |||||||||
587 | /// Returns if it's reasonable to merge stores to MemVT size. | ||||||||
588 | virtual bool canMergeStoresTo(unsigned AS, EVT MemVT, | ||||||||
589 | const SelectionDAG &DAG) const { | ||||||||
590 | return true; | ||||||||
591 | } | ||||||||
592 | |||||||||
593 | /// Return true if it is cheap to speculate a call to intrinsic cttz. | ||||||||
594 | virtual bool isCheapToSpeculateCttz() const { | ||||||||
595 | return false; | ||||||||
596 | } | ||||||||
597 | |||||||||
598 | /// Return true if it is cheap to speculate a call to intrinsic ctlz. | ||||||||
599 | virtual bool isCheapToSpeculateCtlz() const { | ||||||||
600 | return false; | ||||||||
601 | } | ||||||||
602 | |||||||||
603 | /// Return true if ctlz instruction is fast. | ||||||||
604 | virtual bool isCtlzFast() const { | ||||||||
605 | return false; | ||||||||
606 | } | ||||||||
607 | |||||||||
608 | /// Return true if instruction generated for equality comparison is folded | ||||||||
609 | /// with instruction generated for signed comparison. | ||||||||
610 | virtual bool isEqualityCmpFoldedWithSignedCmp() const { return true; } | ||||||||
611 | |||||||||
612 | /// Return true if it is safe to transform an integer-domain bitwise operation | ||||||||
613 | /// into the equivalent floating-point operation. This should be set to true | ||||||||
614 | /// if the target has IEEE-754-compliant fabs/fneg operations for the input | ||||||||
615 | /// type. | ||||||||
616 | virtual bool hasBitPreservingFPLogic(EVT VT) const { | ||||||||
617 | return false; | ||||||||
618 | } | ||||||||
619 | |||||||||
620 | /// Return true if it is cheaper to split the store of a merged int val | ||||||||
621 | /// from a pair of smaller values into multiple stores. | ||||||||
622 | virtual bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const { | ||||||||
623 | return false; | ||||||||
624 | } | ||||||||
625 | |||||||||
626 | /// Return if the target supports combining a | ||||||||
627 | /// chain like: | ||||||||
628 | /// \code | ||||||||
629 | /// %andResult = and %val1, #mask | ||||||||
630 | /// %icmpResult = icmp %andResult, 0 | ||||||||
631 | /// \endcode | ||||||||
632 | /// into a single machine instruction of a form like: | ||||||||
633 | /// \code | ||||||||
634 | /// cc = test %register, #mask | ||||||||
635 | /// \endcode | ||||||||
636 | virtual bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const { | ||||||||
637 | return false; | ||||||||
638 | } | ||||||||
639 | |||||||||
640 | /// Use bitwise logic to make pairs of compares more efficient. For example: | ||||||||
641 | /// and (seteq A, B), (seteq C, D) --> seteq (or (xor A, B), (xor C, D)), 0 | ||||||||
642 | /// This should be true when it takes more than one instruction to lower | ||||||||
643 | /// setcc (cmp+set on x86 scalar), when bitwise ops are faster than logic on | ||||||||
644 | /// condition bits (crand on PowerPC), and/or when reducing cmp+br is a win. | ||||||||
645 | virtual bool convertSetCCLogicToBitwiseLogic(EVT VT) const { | ||||||||
646 | return false; | ||||||||
647 | } | ||||||||
648 | |||||||||
649 | /// Return the preferred operand type if the target has a quick way to compare | ||||||||
650 | /// integer values of the given size. Assume that any legal integer type can | ||||||||
651 | /// be compared efficiently. Targets may override this to allow illegal wide | ||||||||
652 | /// types to return a vector type if there is support to compare that type. | ||||||||
653 | virtual MVT hasFastEqualityCompare(unsigned NumBits) const { | ||||||||
654 | MVT VT = MVT::getIntegerVT(NumBits); | ||||||||
655 | return isTypeLegal(VT) ? VT : MVT::INVALID_SIMPLE_VALUE_TYPE; | ||||||||
656 | } | ||||||||
657 | |||||||||
658 | /// Return true if the target should transform: | ||||||||
659 | /// (X & Y) == Y ---> (~X & Y) == 0 | ||||||||
660 | /// (X & Y) != Y ---> (~X & Y) != 0 | ||||||||
661 | /// | ||||||||
662 | /// This may be profitable if the target has a bitwise and-not operation that | ||||||||
663 | /// sets comparison flags. A target may want to limit the transformation based | ||||||||
664 | /// on the type of Y or if Y is a constant. | ||||||||
665 | /// | ||||||||
666 | /// Note that the transform will not occur if Y is known to be a power-of-2 | ||||||||
667 | /// because a mask and compare of a single bit can be handled by inverting the | ||||||||
668 | /// predicate, for example: | ||||||||
669 | /// (X & 8) == 8 ---> (X & 8) != 0 | ||||||||
670 | virtual bool hasAndNotCompare(SDValue Y) const { | ||||||||
671 | return false; | ||||||||
672 | } | ||||||||
673 | |||||||||
674 | /// Return true if the target has a bitwise and-not operation: | ||||||||
675 | /// X = ~A & B | ||||||||
676 | /// This can be used to simplify select or other instructions. | ||||||||
677 | virtual bool hasAndNot(SDValue X) const { | ||||||||
678 | // If the target has the more complex version of this operation, assume that | ||||||||
679 | // it has this operation too. | ||||||||
680 | return hasAndNotCompare(X); | ||||||||
681 | } | ||||||||
682 | |||||||||
683 | /// Return true if the target has a bit-test instruction: | ||||||||
684 | /// (X & (1 << Y)) ==/!= 0 | ||||||||
685 | /// This knowledge can be used to prevent breaking the pattern, | ||||||||
686 | /// or creating it if it could be recognized. | ||||||||
687 | virtual bool hasBitTest(SDValue X, SDValue Y) const { return false; } | ||||||||
688 | |||||||||
689 | /// There are two ways to clear extreme bits (either low or high): | ||||||||
690 | /// Mask: x & (-1 << y) (the instcombine canonical form) | ||||||||
691 | /// Shifts: x >> y << y | ||||||||
692 | /// Return true if the variant with 2 variable shifts is preferred. | ||||||||
693 | /// Return false if there is no preference. | ||||||||
694 | virtual bool shouldFoldMaskToVariableShiftPair(SDValue X) const { | ||||||||
695 | // By default, let's assume that no one prefers shifts. | ||||||||
696 | return false; | ||||||||
697 | } | ||||||||
698 | |||||||||
699 | /// Return true if it is profitable to fold a pair of shifts into a mask. | ||||||||
700 | /// This is usually true on most targets. But some targets, like Thumb1, | ||||||||
701 | /// have immediate shift instructions, but no immediate "and" instruction; | ||||||||
702 | /// this makes the fold unprofitable. | ||||||||
703 | virtual bool shouldFoldConstantShiftPairToMask(const SDNode *N, | ||||||||
704 | CombineLevel Level) const { | ||||||||
705 | return true; | ||||||||
706 | } | ||||||||
707 | |||||||||
708 | /// Should we tranform the IR-optimal check for whether given truncation | ||||||||
709 | /// down into KeptBits would be truncating or not: | ||||||||
710 | /// (add %x, (1 << (KeptBits-1))) srccond (1 << KeptBits) | ||||||||
711 | /// Into it's more traditional form: | ||||||||
712 | /// ((%x << C) a>> C) dstcond %x | ||||||||
713 | /// Return true if we should transform. | ||||||||
714 | /// Return false if there is no preference. | ||||||||
715 | virtual bool shouldTransformSignedTruncationCheck(EVT XVT, | ||||||||
716 | unsigned KeptBits) const { | ||||||||
717 | // By default, let's assume that no one prefers shifts. | ||||||||
718 | return false; | ||||||||
719 | } | ||||||||
720 | |||||||||
721 | /// Given the pattern | ||||||||
722 | /// (X & (C l>>/<< Y)) ==/!= 0 | ||||||||
723 | /// return true if it should be transformed into: | ||||||||
724 | /// ((X <</l>> Y) & C) ==/!= 0 | ||||||||
725 | /// WARNING: if 'X' is a constant, the fold may deadlock! | ||||||||
726 | /// FIXME: we could avoid passing XC, but we can't use isConstOrConstSplat() | ||||||||
727 | /// here because it can end up being not linked in. | ||||||||
728 | virtual bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd( | ||||||||
729 | SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y, | ||||||||
730 | unsigned OldShiftOpcode, unsigned NewShiftOpcode, | ||||||||
731 | SelectionDAG &DAG) const { | ||||||||
732 | if (hasBitTest(X, Y)) { | ||||||||
733 | // One interesting pattern that we'd want to form is 'bit test': | ||||||||
734 | // ((1 << Y) & C) ==/!= 0 | ||||||||
735 | // But we also need to be careful not to try to reverse that fold. | ||||||||
736 | |||||||||
737 | // Is this '1 << Y' ? | ||||||||
738 | if (OldShiftOpcode == ISD::SHL && CC->isOne()) | ||||||||
739 | return false; // Keep the 'bit test' pattern. | ||||||||
740 | |||||||||
741 | // Will it be '1 << Y' after the transform ? | ||||||||
742 | if (XC && NewShiftOpcode == ISD::SHL && XC->isOne()) | ||||||||
743 | return true; // Do form the 'bit test' pattern. | ||||||||
744 | } | ||||||||
745 | |||||||||
746 | // If 'X' is a constant, and we transform, then we will immediately | ||||||||
747 | // try to undo the fold, thus causing endless combine loop. | ||||||||
748 | // So by default, let's assume everyone prefers the fold | ||||||||
749 | // iff 'X' is not a constant. | ||||||||
750 | return !XC; | ||||||||
751 | } | ||||||||
752 | |||||||||
753 | /// These two forms are equivalent: | ||||||||
754 | /// sub %y, (xor %x, -1) | ||||||||
755 | /// add (add %x, 1), %y | ||||||||
756 | /// The variant with two add's is IR-canonical. | ||||||||
757 | /// Some targets may prefer one to the other. | ||||||||
758 | virtual bool preferIncOfAddToSubOfNot(EVT VT) const { | ||||||||
759 | // By default, let's assume that everyone prefers the form with two add's. | ||||||||
760 | return true; | ||||||||
761 | } | ||||||||
762 | |||||||||
763 | /// Return true if the target wants to use the optimization that | ||||||||
764 | /// turns ext(promotableInst1(...(promotableInstN(load)))) into | ||||||||
765 | /// promotedInst1(...(promotedInstN(ext(load)))). | ||||||||
766 | bool enableExtLdPromotion() const { return EnableExtLdPromotion; } | ||||||||
767 | |||||||||
768 | /// Return true if the target can combine store(extractelement VectorTy, | ||||||||
769 | /// Idx). | ||||||||
770 | /// \p Cost[out] gives the cost of that transformation when this is true. | ||||||||
771 | virtual bool canCombineStoreAndExtract(Type *VectorTy, Value *Idx, | ||||||||
772 | unsigned &Cost) const { | ||||||||
773 | return false; | ||||||||
774 | } | ||||||||
775 | |||||||||
776 | /// Return true if inserting a scalar into a variable element of an undef | ||||||||
777 | /// vector is more efficiently handled by splatting the scalar instead. | ||||||||
778 | virtual bool shouldSplatInsEltVarIndex(EVT) const { | ||||||||
779 | return false; | ||||||||
780 | } | ||||||||
781 | |||||||||
782 | /// Return true if target always beneficiates from combining into FMA for a | ||||||||
783 | /// given value type. This must typically return false on targets where FMA | ||||||||
784 | /// takes more cycles to execute than FADD. | ||||||||
785 | virtual bool enableAggressiveFMAFusion(EVT VT) const { | ||||||||
786 | return false; | ||||||||
787 | } | ||||||||
788 | |||||||||
789 | /// Return the ValueType of the result of SETCC operations. | ||||||||
790 | virtual EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, | ||||||||
791 | EVT VT) const; | ||||||||
792 | |||||||||
793 | /// Return the ValueType for comparison libcalls. Comparions libcalls include | ||||||||
794 | /// floating point comparion calls, and Ordered/Unordered check calls on | ||||||||
795 | /// floating point numbers. | ||||||||
796 | virtual | ||||||||
797 | MVT::SimpleValueType getCmpLibcallReturnType() const; | ||||||||
798 | |||||||||
799 | /// For targets without i1 registers, this gives the nature of the high-bits | ||||||||
800 | /// of boolean values held in types wider than i1. | ||||||||
801 | /// | ||||||||
802 | /// "Boolean values" are special true/false values produced by nodes like | ||||||||
803 | /// SETCC and consumed (as the condition) by nodes like SELECT and BRCOND. | ||||||||
804 | /// Not to be confused with general values promoted from i1. Some cpus | ||||||||
805 | /// distinguish between vectors of boolean and scalars; the isVec parameter | ||||||||
806 | /// selects between the two kinds. For example on X86 a scalar boolean should | ||||||||
807 | /// be zero extended from i1, while the elements of a vector of booleans | ||||||||
808 | /// should be sign extended from i1. | ||||||||
809 | /// | ||||||||
810 | /// Some cpus also treat floating point types the same way as they treat | ||||||||
811 | /// vectors instead of the way they treat scalars. | ||||||||
812 | BooleanContent getBooleanContents(bool isVec, bool isFloat) const { | ||||||||
813 | if (isVec) | ||||||||
814 | return BooleanVectorContents; | ||||||||
815 | return isFloat ? BooleanFloatContents : BooleanContents; | ||||||||
816 | } | ||||||||
817 | |||||||||
818 | BooleanContent getBooleanContents(EVT Type) const { | ||||||||
819 | return getBooleanContents(Type.isVector(), Type.isFloatingPoint()); | ||||||||
820 | } | ||||||||
821 | |||||||||
822 | /// Return target scheduling preference. | ||||||||
823 | Sched::Preference getSchedulingPreference() const { | ||||||||
824 | return SchedPreferenceInfo; | ||||||||
825 | } | ||||||||
826 | |||||||||
827 | /// Some scheduler, e.g. hybrid, can switch to different scheduling heuristics | ||||||||
828 | /// for different nodes. This function returns the preference (or none) for | ||||||||
829 | /// the given node. | ||||||||
830 | virtual Sched::Preference getSchedulingPreference(SDNode *) const { | ||||||||
831 | return Sched::None; | ||||||||
832 | } | ||||||||
833 | |||||||||
834 | /// Return the register class that should be used for the specified value | ||||||||
835 | /// type. | ||||||||
836 | virtual const TargetRegisterClass *getRegClassFor(MVT VT, bool isDivergent = false) const { | ||||||||
837 | (void)isDivergent; | ||||||||
838 | const TargetRegisterClass *RC = RegClassForVT[VT.SimpleTy]; | ||||||||
839 | assert(RC && "This value type is not natively supported!")((RC && "This value type is not natively supported!") ? static_cast<void> (0) : __assert_fail ("RC && \"This value type is not natively supported!\"" , "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/TargetLowering.h" , 839, __PRETTY_FUNCTION__)); | ||||||||
840 | return RC; | ||||||||
841 | } | ||||||||
842 | |||||||||
843 | /// Allows target to decide about the register class of the | ||||||||
844 | /// specific value that is live outside the defining block. | ||||||||
845 | /// Returns true if the value needs uniform register class. | ||||||||
846 | virtual bool requiresUniformRegister(MachineFunction &MF, | ||||||||
847 | const Value *) const { | ||||||||
848 | return false; | ||||||||
849 | } | ||||||||
850 | |||||||||
851 | /// Return the 'representative' register class for the specified value | ||||||||
852 | /// type. | ||||||||
853 | /// | ||||||||
854 | /// The 'representative' register class is the largest legal super-reg | ||||||||
855 | /// register class for the register class of the value type. For example, on | ||||||||
856 | /// i386 the rep register class for i8, i16, and i32 are GR32; while the rep | ||||||||
857 | /// register class is GR64 on x86_64. | ||||||||
858 | virtual const TargetRegisterClass *getRepRegClassFor(MVT VT) const { | ||||||||
859 | const TargetRegisterClass *RC = RepRegClassForVT[VT.SimpleTy]; | ||||||||
860 | return RC; | ||||||||
861 | } | ||||||||
862 | |||||||||
863 | /// Return the cost of the 'representative' register class for the specified | ||||||||
864 | /// value type. | ||||||||
865 | virtual uint8_t getRepRegClassCostFor(MVT VT) const { | ||||||||
866 | return RepRegClassCostForVT[VT.SimpleTy]; | ||||||||
867 | } | ||||||||
868 | |||||||||
869 | /// Return true if SHIFT instructions should be expanded to SHIFT_PARTS | ||||||||
870 | /// instructions, and false if a library call is preferred (e.g for code-size | ||||||||
871 | /// reasons). | ||||||||
872 | virtual bool shouldExpandShift(SelectionDAG &DAG, SDNode *N) const { | ||||||||
873 | return true; | ||||||||
874 | } | ||||||||
875 | |||||||||
876 | /// Return true if the target has native support for the specified value type. | ||||||||
877 | /// This means that it has a register that directly holds it without | ||||||||
878 | /// promotions or expansions. | ||||||||
879 | bool isTypeLegal(EVT VT) const { | ||||||||
880 | assert(!VT.isSimple() ||((!VT.isSimple() || (unsigned)VT.getSimpleVT().SimpleTy < array_lengthof (RegClassForVT)) ? static_cast<void> (0) : __assert_fail ("!VT.isSimple() || (unsigned)VT.getSimpleVT().SimpleTy < array_lengthof(RegClassForVT)" , "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/TargetLowering.h" , 881, __PRETTY_FUNCTION__)) | ||||||||
881 | (unsigned)VT.getSimpleVT().SimpleTy < array_lengthof(RegClassForVT))((!VT.isSimple() || (unsigned)VT.getSimpleVT().SimpleTy < array_lengthof (RegClassForVT)) ? static_cast<void> (0) : __assert_fail ("!VT.isSimple() || (unsigned)VT.getSimpleVT().SimpleTy < array_lengthof(RegClassForVT)" , "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/TargetLowering.h" , 881, __PRETTY_FUNCTION__)); | ||||||||
882 | return VT.isSimple() && RegClassForVT[VT.getSimpleVT().SimpleTy] != nullptr; | ||||||||
883 | } | ||||||||
884 | |||||||||
885 | class ValueTypeActionImpl { | ||||||||
886 | /// ValueTypeActions - For each value type, keep a LegalizeTypeAction enum | ||||||||
887 | /// that indicates how instruction selection should deal with the type. | ||||||||
888 | LegalizeTypeAction ValueTypeActions[MVT::LAST_VALUETYPE]; | ||||||||
889 | |||||||||
890 | public: | ||||||||
891 | ValueTypeActionImpl() { | ||||||||
892 | std::fill(std::begin(ValueTypeActions), std::end(ValueTypeActions), | ||||||||
893 | TypeLegal); | ||||||||
894 | } | ||||||||
895 | |||||||||
896 | LegalizeTypeAction getTypeAction(MVT VT) const { | ||||||||
897 | return ValueTypeActions[VT.SimpleTy]; | ||||||||
898 | } | ||||||||
899 | |||||||||
900 | void setTypeAction(MVT VT, LegalizeTypeAction Action) { | ||||||||
901 | ValueTypeActions[VT.SimpleTy] = Action; | ||||||||
902 | } | ||||||||
903 | }; | ||||||||
904 | |||||||||
905 | const ValueTypeActionImpl &getValueTypeActions() const { | ||||||||
906 | return ValueTypeActions; | ||||||||
907 | } | ||||||||
908 | |||||||||
909 | /// Return how we should legalize values of this type, either it is already | ||||||||
910 | /// legal (return 'Legal') or we need to promote it to a larger type (return | ||||||||
911 | /// 'Promote'), or we need to expand it into multiple registers of smaller | ||||||||
912 | /// integer type (return 'Expand'). 'Custom' is not an option. | ||||||||
913 | LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const { | ||||||||
914 | return getTypeConversion(Context, VT).first; | ||||||||
915 | } | ||||||||
916 | LegalizeTypeAction getTypeAction(MVT VT) const { | ||||||||
917 | return ValueTypeActions.getTypeAction(VT); | ||||||||
918 | } | ||||||||
919 | |||||||||
920 | /// For types supported by the target, this is an identity function. For | ||||||||
921 | /// types that must be promoted to larger types, this returns the larger type | ||||||||
922 | /// to promote to. For integer types that are larger than the largest integer | ||||||||
923 | /// register, this contains one step in the expansion to get to the smaller | ||||||||
924 | /// register. For illegal floating point types, this returns the integer type | ||||||||
925 | /// to transform to. | ||||||||
926 | EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const { | ||||||||
927 | return getTypeConversion(Context, VT).second; | ||||||||
928 | } | ||||||||
929 | |||||||||
930 | /// For types supported by the target, this is an identity function. For | ||||||||
931 | /// types that must be expanded (i.e. integer types that are larger than the | ||||||||
932 | /// largest integer register or illegal floating point types), this returns | ||||||||
933 | /// the largest legal type it will be expanded to. | ||||||||
934 | EVT getTypeToExpandTo(LLVMContext &Context, EVT VT) const { | ||||||||
935 | assert(!VT.isVector())((!VT.isVector()) ? static_cast<void> (0) : __assert_fail ("!VT.isVector()", "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/TargetLowering.h" , 935, __PRETTY_FUNCTION__)); | ||||||||
936 | while (true) { | ||||||||
937 | switch (getTypeAction(Context, VT)) { | ||||||||
938 | case TypeLegal: | ||||||||
939 | return VT; | ||||||||
940 | case TypeExpandInteger: | ||||||||
941 | VT = getTypeToTransformTo(Context, VT); | ||||||||
942 | break; | ||||||||
943 | default: | ||||||||
944 | llvm_unreachable("Type is not legal nor is it to be expanded!")::llvm::llvm_unreachable_internal("Type is not legal nor is it to be expanded!" , "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/TargetLowering.h" , 944); | ||||||||
945 | } | ||||||||
946 | } | ||||||||
947 | } | ||||||||
948 | |||||||||
949 | /// Vector types are broken down into some number of legal first class types. | ||||||||
950 | /// For example, EVT::v8f32 maps to 2 EVT::v4f32 with Altivec or SSE1, or 8 | ||||||||
951 | /// promoted EVT::f64 values with the X86 FP stack. Similarly, EVT::v2i64 | ||||||||
952 | /// turns into 4 EVT::i32 values with both PPC and X86. | ||||||||
953 | /// | ||||||||
954 | /// This method returns the number of registers needed, and the VT for each | ||||||||
955 | /// register. It also returns the VT and quantity of the intermediate values | ||||||||
956 | /// before they are promoted/expanded. | ||||||||
957 | unsigned getVectorTypeBreakdown(LLVMContext &Context, EVT VT, | ||||||||
958 | EVT &IntermediateVT, | ||||||||
959 | unsigned &NumIntermediates, | ||||||||
960 | MVT &RegisterVT) const; | ||||||||
961 | |||||||||
962 | /// Certain targets such as MIPS require that some types such as vectors are | ||||||||
963 | /// always broken down into scalars in some contexts. This occurs even if the | ||||||||
964 | /// vector type is legal. | ||||||||
965 | virtual unsigned getVectorTypeBreakdownForCallingConv( | ||||||||
966 | LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT, | ||||||||
967 | unsigned &NumIntermediates, MVT &RegisterVT) const { | ||||||||
968 | return getVectorTypeBreakdown(Context, VT, IntermediateVT, NumIntermediates, | ||||||||
969 | RegisterVT); | ||||||||
970 | } | ||||||||
971 | |||||||||
972 | struct IntrinsicInfo { | ||||||||
973 | unsigned opc = 0; // target opcode | ||||||||
974 | EVT memVT; // memory VT | ||||||||
975 | |||||||||
976 | // value representing memory location | ||||||||
977 | PointerUnion<const Value *, const PseudoSourceValue *> ptrVal; | ||||||||
978 | |||||||||
979 | int offset = 0; // offset off of ptrVal | ||||||||
980 | uint64_t size = 0; // the size of the memory location | ||||||||
981 | // (taken from memVT if zero) | ||||||||
982 | MaybeAlign align = Align(1); // alignment | ||||||||
983 | |||||||||
984 | MachineMemOperand::Flags flags = MachineMemOperand::MONone; | ||||||||
985 | IntrinsicInfo() = default; | ||||||||
986 | }; | ||||||||
987 | |||||||||
988 | /// Given an intrinsic, checks if on the target the intrinsic will need to map | ||||||||
989 | /// to a MemIntrinsicNode (touches memory). If this is the case, it returns | ||||||||
990 | /// true and store the intrinsic information into the IntrinsicInfo that was | ||||||||
991 | /// passed to the function. | ||||||||
992 | virtual bool getTgtMemIntrinsic(IntrinsicInfo &, const CallInst &, | ||||||||
993 | MachineFunction &, | ||||||||
994 | unsigned /*Intrinsic*/) const { | ||||||||
995 | return false; | ||||||||
996 | } | ||||||||
997 | |||||||||
998 | /// Returns true if the target can instruction select the specified FP | ||||||||
999 | /// immediate natively. If false, the legalizer will materialize the FP | ||||||||
1000 | /// immediate as a load from a constant pool. | ||||||||
1001 | virtual bool isFPImmLegal(const APFloat & /*Imm*/, EVT /*VT*/, | ||||||||
1002 | bool ForCodeSize = false) const { | ||||||||
1003 | return false; | ||||||||
1004 | } | ||||||||
1005 | |||||||||
1006 | /// Targets can use this to indicate that they only support *some* | ||||||||
1007 | /// VECTOR_SHUFFLE operations, those with specific masks. By default, if a | ||||||||
1008 | /// target supports the VECTOR_SHUFFLE node, all mask values are assumed to be | ||||||||
1009 | /// legal. | ||||||||
1010 | virtual bool isShuffleMaskLegal(ArrayRef<int> /*Mask*/, EVT /*VT*/) const { | ||||||||
1011 | return true; | ||||||||
1012 | } | ||||||||
1013 | |||||||||
1014 | /// Returns true if the operation can trap for the value type. | ||||||||
1015 | /// | ||||||||
1016 | /// VT must be a legal type. By default, we optimistically assume most | ||||||||
1017 | /// operations don't trap except for integer divide and remainder. | ||||||||
1018 | virtual bool canOpTrap(unsigned Op, EVT VT) const; | ||||||||
1019 | |||||||||
1020 | /// Similar to isShuffleMaskLegal. Targets can use this to indicate if there | ||||||||
1021 | /// is a suitable VECTOR_SHUFFLE that can be used to replace a VAND with a | ||||||||
1022 | /// constant pool entry. | ||||||||
1023 | virtual bool isVectorClearMaskLegal(ArrayRef<int> /*Mask*/, | ||||||||
1024 | EVT /*VT*/) const { | ||||||||
1025 | return false; | ||||||||
1026 | } | ||||||||
1027 | |||||||||
1028 | /// Return how this operation should be treated: either it is legal, needs to | ||||||||
1029 | /// be promoted to a larger size, needs to be expanded to some other code | ||||||||
1030 | /// sequence, or the target has a custom expander for it. | ||||||||
1031 | LegalizeAction getOperationAction(unsigned Op, EVT VT) const { | ||||||||
1032 | if (VT.isExtended()) return Expand; | ||||||||
1033 | // If a target-specific SDNode requires legalization, require the target | ||||||||
1034 | // to provide custom legalization for it. | ||||||||
1035 | if (Op >= array_lengthof(OpActions[0])) return Custom; | ||||||||
1036 | return OpActions[(unsigned)VT.getSimpleVT().SimpleTy][Op]; | ||||||||
1037 | } | ||||||||
1038 | |||||||||
1039 | /// Custom method defined by each target to indicate if an operation which | ||||||||
1040 | /// may require a scale is supported natively by the target. | ||||||||
1041 | /// If not, the operation is illegal. | ||||||||
1042 | virtual bool isSupportedFixedPointOperation(unsigned Op, EVT VT, | ||||||||
1043 | unsigned Scale) const { | ||||||||
1044 | return false; | ||||||||
1045 | } | ||||||||
1046 | |||||||||
1047 | /// Some fixed point operations may be natively supported by the target but | ||||||||
1048 | /// only for specific scales. This method allows for checking | ||||||||
1049 | /// if the width is supported by the target for a given operation that may | ||||||||
1050 | /// depend on scale. | ||||||||
1051 | LegalizeAction getFixedPointOperationAction(unsigned Op, EVT VT, | ||||||||
1052 | unsigned Scale) const { | ||||||||
1053 | auto Action = getOperationAction(Op, VT); | ||||||||
1054 | if (Action != Legal) | ||||||||
1055 | return Action; | ||||||||
1056 | |||||||||
1057 | // This operation is supported in this type but may only work on specific | ||||||||
1058 | // scales. | ||||||||
1059 | bool Supported; | ||||||||
1060 | switch (Op) { | ||||||||
1061 | default: | ||||||||
1062 | llvm_unreachable("Unexpected fixed point operation.")::llvm::llvm_unreachable_internal("Unexpected fixed point operation." , "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/TargetLowering.h" , 1062); | ||||||||
1063 | case ISD::SMULFIX: | ||||||||
1064 | case ISD::SMULFIXSAT: | ||||||||
1065 | case ISD::UMULFIX: | ||||||||
1066 | case ISD::UMULFIXSAT: | ||||||||
1067 | case ISD::SDIVFIX: | ||||||||
1068 | case ISD::SDIVFIXSAT: | ||||||||
1069 | case ISD::UDIVFIX: | ||||||||
1070 | case ISD::UDIVFIXSAT: | ||||||||
1071 | Supported = isSupportedFixedPointOperation(Op, VT, Scale); | ||||||||
1072 | break; | ||||||||
1073 | } | ||||||||
1074 | |||||||||
1075 | return Supported ? Action : Expand; | ||||||||
1076 | } | ||||||||
1077 | |||||||||
1078 | // If Op is a strict floating-point operation, return the result | ||||||||
1079 | // of getOperationAction for the equivalent non-strict operation. | ||||||||
1080 | LegalizeAction getStrictFPOperationAction(unsigned Op, EVT VT) const { | ||||||||
1081 | unsigned EqOpc; | ||||||||
1082 | switch (Op) { | ||||||||
1083 | default: llvm_unreachable("Unexpected FP pseudo-opcode")::llvm::llvm_unreachable_internal("Unexpected FP pseudo-opcode" , "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/TargetLowering.h" , 1083); | ||||||||
1084 | #define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \ | ||||||||
1085 | case ISD::STRICT_##DAGN: EqOpc = ISD::DAGN; break; | ||||||||
1086 | #define CMP_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \ | ||||||||
1087 | case ISD::STRICT_##DAGN: EqOpc = ISD::SETCC; break; | ||||||||
1088 | #include "llvm/IR/ConstrainedOps.def" | ||||||||
1089 | } | ||||||||
1090 | |||||||||
1091 | return getOperationAction(EqOpc, VT); | ||||||||
1092 | } | ||||||||
1093 | |||||||||
1094 | /// Return true if the specified operation is legal on this target or can be | ||||||||
1095 | /// made legal with custom lowering. This is used to help guide high-level | ||||||||
1096 | /// lowering decisions. LegalOnly is an optional convenience for code paths | ||||||||
1097 | /// traversed pre and post legalisation. | ||||||||
1098 | bool isOperationLegalOrCustom(unsigned Op, EVT VT, | ||||||||
1099 | bool LegalOnly = false) const { | ||||||||
1100 | if (LegalOnly) | ||||||||
1101 | return isOperationLegal(Op, VT); | ||||||||
1102 | |||||||||
1103 | return (VT == MVT::Other || isTypeLegal(VT)) && | ||||||||
1104 | (getOperationAction(Op, VT) == Legal || | ||||||||
1105 | getOperationAction(Op, VT) == Custom); | ||||||||
1106 | } | ||||||||
1107 | |||||||||
1108 | /// Return true if the specified operation is legal on this target or can be | ||||||||
1109 | /// made legal using promotion. This is used to help guide high-level lowering | ||||||||
1110 | /// decisions. LegalOnly is an optional convenience for code paths traversed | ||||||||
1111 | /// pre and post legalisation. | ||||||||
1112 | bool isOperationLegalOrPromote(unsigned Op, EVT VT, | ||||||||
1113 | bool LegalOnly = false) const { | ||||||||
1114 | if (LegalOnly) | ||||||||
1115 | return isOperationLegal(Op, VT); | ||||||||
1116 | |||||||||
1117 | return (VT == MVT::Other || isTypeLegal(VT)) && | ||||||||
1118 | (getOperationAction(Op, VT) == Legal || | ||||||||
1119 | getOperationAction(Op, VT) == Promote); | ||||||||
1120 | } | ||||||||
1121 | |||||||||
1122 | /// Return true if the specified operation is legal on this target or can be | ||||||||
1123 | /// made legal with custom lowering or using promotion. This is used to help | ||||||||
1124 | /// guide high-level lowering decisions. LegalOnly is an optional convenience | ||||||||
1125 | /// for code paths traversed pre and post legalisation. | ||||||||
1126 | bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT, | ||||||||
1127 | bool LegalOnly = false) const { | ||||||||
1128 | if (LegalOnly) | ||||||||
1129 | return isOperationLegal(Op, VT); | ||||||||
1130 | |||||||||
1131 | return (VT == MVT::Other || isTypeLegal(VT)) && | ||||||||
1132 | (getOperationAction(Op, VT) == Legal || | ||||||||
1133 | getOperationAction(Op, VT) == Custom || | ||||||||
1134 | getOperationAction(Op, VT) == Promote); | ||||||||
1135 | } | ||||||||
1136 | |||||||||
1137 | /// Return true if the operation uses custom lowering, regardless of whether | ||||||||
1138 | /// the type is legal or not. | ||||||||
1139 | bool isOperationCustom(unsigned Op, EVT VT) const { | ||||||||
1140 | return getOperationAction(Op, VT) == Custom; | ||||||||
1141 | } | ||||||||
1142 | |||||||||
1143 | /// Return true if lowering to a jump table is allowed. | ||||||||
1144 | virtual bool areJTsAllowed(const Function *Fn) const { | ||||||||
1145 | if (Fn->getFnAttribute("no-jump-tables").getValueAsString() == "true") | ||||||||
1146 | return false; | ||||||||
1147 | |||||||||
1148 | return isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) || | ||||||||
1149 | isOperationLegalOrCustom(ISD::BRIND, MVT::Other); | ||||||||
1150 | } | ||||||||
1151 | |||||||||
1152 | /// Check whether the range [Low,High] fits in a machine word. | ||||||||
1153 | bool rangeFitsInWord(const APInt &Low, const APInt &High, | ||||||||
1154 | const DataLayout &DL) const { | ||||||||
1155 | // FIXME: Using the pointer type doesn't seem ideal. | ||||||||
1156 | uint64_t BW = DL.getIndexSizeInBits(0u); | ||||||||
1157 | uint64_t Range = (High - Low).getLimitedValue(UINT64_MAX(18446744073709551615UL) - 1) + 1; | ||||||||
1158 | return Range <= BW; | ||||||||
1159 | } | ||||||||
1160 | |||||||||
1161 | /// Return true if lowering to a jump table is suitable for a set of case | ||||||||
1162 | /// clusters which may contain \p NumCases cases, \p Range range of values. | ||||||||
1163 | virtual bool isSuitableForJumpTable(const SwitchInst *SI, uint64_t NumCases, | ||||||||
1164 | uint64_t Range, ProfileSummaryInfo *PSI, | ||||||||
1165 | BlockFrequencyInfo *BFI) const; | ||||||||
1166 | |||||||||
1167 | /// Return true if lowering to a bit test is suitable for a set of case | ||||||||
1168 | /// clusters which contains \p NumDests unique destinations, \p Low and | ||||||||
1169 | /// \p High as its lowest and highest case values, and expects \p NumCmps | ||||||||
1170 | /// case value comparisons. Check if the number of destinations, comparison | ||||||||
1171 | /// metric, and range are all suitable. | ||||||||
1172 | bool isSuitableForBitTests(unsigned NumDests, unsigned NumCmps, | ||||||||
1173 | const APInt &Low, const APInt &High, | ||||||||
1174 | const DataLayout &DL) const { | ||||||||
1175 | // FIXME: I don't think NumCmps is the correct metric: a single case and a | ||||||||
1176 | // range of cases both require only one branch to lower. Just looking at the | ||||||||
1177 | // number of clusters and destinations should be enough to decide whether to | ||||||||
1178 | // build bit tests. | ||||||||
1179 | |||||||||
1180 | // To lower a range with bit tests, the range must fit the bitwidth of a | ||||||||
1181 | // machine word. | ||||||||
1182 | if (!rangeFitsInWord(Low, High, DL)) | ||||||||
1183 | return false; | ||||||||
1184 | |||||||||
1185 | // Decide whether it's profitable to lower this range with bit tests. Each | ||||||||
1186 | // destination requires a bit test and branch, and there is an overall range | ||||||||
1187 | // check branch. For a small number of clusters, separate comparisons might | ||||||||
1188 | // be cheaper, and for many destinations, splitting the range might be | ||||||||
1189 | // better. | ||||||||
1190 | return (NumDests == 1 && NumCmps >= 3) || (NumDests == 2 && NumCmps >= 5) || | ||||||||
1191 | (NumDests == 3 && NumCmps >= 6); | ||||||||
1192 | } | ||||||||
1193 | |||||||||
1194 | /// Return true if the specified operation is illegal on this target or | ||||||||
1195 | /// unlikely to be made legal with custom lowering. This is used to help guide | ||||||||
1196 | /// high-level lowering decisions. | ||||||||
1197 | bool isOperationExpand(unsigned Op, EVT VT) const { | ||||||||
1198 | return (!isTypeLegal(VT) || getOperationAction(Op, VT) == Expand); | ||||||||
1199 | } | ||||||||
1200 | |||||||||
1201 | /// Return true if the specified operation is legal on this target. | ||||||||
1202 | bool isOperationLegal(unsigned Op, EVT VT) const { | ||||||||
1203 | return (VT == MVT::Other || isTypeLegal(VT)) && | ||||||||
1204 | getOperationAction(Op, VT) == Legal; | ||||||||
1205 | } | ||||||||
1206 | |||||||||
1207 | /// Return how this load with extension should be treated: either it is legal, | ||||||||
1208 | /// needs to be promoted to a larger size, needs to be expanded to some other | ||||||||
1209 | /// code sequence, or the target has a custom expander for it. | ||||||||
1210 | LegalizeAction getLoadExtAction(unsigned ExtType, EVT ValVT, | ||||||||
1211 | EVT MemVT) const { | ||||||||
1212 | if (ValVT.isExtended() || MemVT.isExtended()) return Expand; | ||||||||
1213 | unsigned ValI = (unsigned) ValVT.getSimpleVT().SimpleTy; | ||||||||
1214 | unsigned MemI = (unsigned) MemVT.getSimpleVT().SimpleTy; | ||||||||
1215 | assert(ExtType < ISD::LAST_LOADEXT_TYPE && ValI < MVT::LAST_VALUETYPE &&((ExtType < ISD::LAST_LOADEXT_TYPE && ValI < MVT ::LAST_VALUETYPE && MemI < MVT::LAST_VALUETYPE && "Table isn't big enough!") ? static_cast<void> (0) : __assert_fail ("ExtType < ISD::LAST_LOADEXT_TYPE && ValI < MVT::LAST_VALUETYPE && MemI < MVT::LAST_VALUETYPE && \"Table isn't big enough!\"" , "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/TargetLowering.h" , 1216, __PRETTY_FUNCTION__)) | ||||||||
1216 | MemI < MVT::LAST_VALUETYPE && "Table isn't big enough!")((ExtType < ISD::LAST_LOADEXT_TYPE && ValI < MVT ::LAST_VALUETYPE && MemI < MVT::LAST_VALUETYPE && "Table isn't big enough!") ? static_cast<void> (0) : __assert_fail ("ExtType < ISD::LAST_LOADEXT_TYPE && ValI < MVT::LAST_VALUETYPE && MemI < MVT::LAST_VALUETYPE && \"Table isn't big enough!\"" , "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/TargetLowering.h" , 1216, __PRETTY_FUNCTION__)); | ||||||||
1217 | unsigned Shift = 4 * ExtType; | ||||||||
1218 | return (LegalizeAction)((LoadExtActions[ValI][MemI] >> Shift) & 0xf); | ||||||||
1219 | } | ||||||||
1220 | |||||||||
1221 | /// Return true if the specified load with extension is legal on this target. | ||||||||
1222 | bool isLoadExtLegal(unsigned ExtType, EVT ValVT, EVT MemVT) const { | ||||||||
1223 | return getLoadExtAction(ExtType, ValVT, MemVT) == Legal; | ||||||||
1224 | } | ||||||||
1225 | |||||||||
1226 | /// Return true if the specified load with extension is legal or custom | ||||||||
1227 | /// on this target. | ||||||||
1228 | bool isLoadExtLegalOrCustom(unsigned ExtType, EVT ValVT, EVT MemVT) const { | ||||||||
1229 | return getLoadExtAction(ExtType, ValVT, MemVT) == Legal || | ||||||||
1230 | getLoadExtAction(ExtType, ValVT, MemVT) == Custom; | ||||||||
1231 | } | ||||||||
1232 | |||||||||
1233 | /// Return how this store with truncation should be treated: either it is | ||||||||
1234 | /// legal, needs to be promoted to a larger size, needs to be expanded to some | ||||||||
1235 | /// other code sequence, or the target has a custom expander for it. | ||||||||
1236 | LegalizeAction getTruncStoreAction(EVT ValVT, EVT MemVT) const { | ||||||||
1237 | if (ValVT.isExtended() || MemVT.isExtended()) return Expand; | ||||||||
1238 | unsigned ValI = (unsigned) ValVT.getSimpleVT().SimpleTy; | ||||||||
1239 | unsigned MemI = (unsigned) MemVT.getSimpleVT().SimpleTy; | ||||||||
1240 | assert(ValI < MVT::LAST_VALUETYPE && MemI < MVT::LAST_VALUETYPE &&((ValI < MVT::LAST_VALUETYPE && MemI < MVT::LAST_VALUETYPE && "Table isn't big enough!") ? static_cast<void> (0) : __assert_fail ("ValI < MVT::LAST_VALUETYPE && MemI < MVT::LAST_VALUETYPE && \"Table isn't big enough!\"" , "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/TargetLowering.h" , 1241, __PRETTY_FUNCTION__)) | ||||||||
1241 | "Table isn't big enough!")((ValI < MVT::LAST_VALUETYPE && MemI < MVT::LAST_VALUETYPE && "Table isn't big enough!") ? static_cast<void> (0) : __assert_fail ("ValI < MVT::LAST_VALUETYPE && MemI < MVT::LAST_VALUETYPE && \"Table isn't big enough!\"" , "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/TargetLowering.h" , 1241, __PRETTY_FUNCTION__)); | ||||||||
1242 | return TruncStoreActions[ValI][MemI]; | ||||||||
1243 | } | ||||||||
1244 | |||||||||
1245 | /// Return true if the specified store with truncation is legal on this | ||||||||
1246 | /// target. | ||||||||
1247 | bool isTruncStoreLegal(EVT ValVT, EVT MemVT) const { | ||||||||
1248 | return isTypeLegal(ValVT) && getTruncStoreAction(ValVT, MemVT) == Legal; | ||||||||
1249 | } | ||||||||
1250 | |||||||||
1251 | /// Return true if the specified store with truncation has solution on this | ||||||||
1252 | /// target. | ||||||||
1253 | bool isTruncStoreLegalOrCustom(EVT ValVT, EVT MemVT) const { | ||||||||
1254 | return isTypeLegal(ValVT) && | ||||||||
1255 | (getTruncStoreAction(ValVT, MemVT) == Legal || | ||||||||
1256 | getTruncStoreAction(ValVT, MemVT) == Custom); | ||||||||
1257 | } | ||||||||
1258 | |||||||||
1259 | /// Return how the indexed load should be treated: either it is legal, needs | ||||||||
1260 | /// to be promoted to a larger size, needs to be expanded to some other code | ||||||||
1261 | /// sequence, or the target has a custom expander for it. | ||||||||
1262 | LegalizeAction getIndexedLoadAction(unsigned IdxMode, MVT VT) const { | ||||||||
1263 | return getIndexedModeAction(IdxMode, VT, IMAB_Load); | ||||||||
1264 | } | ||||||||
1265 | |||||||||
1266 | /// Return true if the specified indexed load is legal on this target. | ||||||||
1267 | bool isIndexedLoadLegal(unsigned IdxMode, EVT VT) const { | ||||||||
1268 | return VT.isSimple() && | ||||||||
1269 | (getIndexedLoadAction(IdxMode, VT.getSimpleVT()) == Legal || | ||||||||
1270 | getIndexedLoadAction(IdxMode, VT.getSimpleVT()) == Custom); | ||||||||
1271 | } | ||||||||
1272 | |||||||||
1273 | /// Return how the indexed store should be treated: either it is legal, needs | ||||||||
1274 | /// to be promoted to a larger size, needs to be expanded to some other code | ||||||||
1275 | /// sequence, or the target has a custom expander for it. | ||||||||
1276 | LegalizeAction getIndexedStoreAction(unsigned IdxMode, MVT VT) const { | ||||||||
1277 | return getIndexedModeAction(IdxMode, VT, IMAB_Store); | ||||||||
1278 | } | ||||||||
1279 | |||||||||
1280 | /// Return true if the specified indexed load is legal on this target. | ||||||||
1281 | bool isIndexedStoreLegal(unsigned IdxMode, EVT VT) const { | ||||||||
1282 | return VT.isSimple() && | ||||||||
1283 | (getIndexedStoreAction(IdxMode, VT.getSimpleVT()) == Legal || | ||||||||
1284 | getIndexedStoreAction(IdxMode, VT.getSimpleVT()) == Custom); | ||||||||
1285 | } | ||||||||
1286 | |||||||||
1287 | /// Return how the indexed load should be treated: either it is legal, needs | ||||||||
1288 | /// to be promoted to a larger size, needs to be expanded to some other code | ||||||||
1289 | /// sequence, or the target has a custom expander for it. | ||||||||
1290 | LegalizeAction getIndexedMaskedLoadAction(unsigned IdxMode, MVT VT) const { | ||||||||
1291 | return getIndexedModeAction(IdxMode, VT, IMAB_MaskedLoad); | ||||||||
1292 | } | ||||||||
1293 | |||||||||
1294 | /// Return true if the specified indexed load is legal on this target. | ||||||||
1295 | bool isIndexedMaskedLoadLegal(unsigned IdxMode, EVT VT) const { | ||||||||
1296 | return VT.isSimple() && | ||||||||
1297 | (getIndexedMaskedLoadAction(IdxMode, VT.getSimpleVT()) == Legal || | ||||||||
1298 | getIndexedMaskedLoadAction(IdxMode, VT.getSimpleVT()) == Custom); | ||||||||
1299 | } | ||||||||
1300 | |||||||||
1301 | /// Return how the indexed store should be treated: either it is legal, needs | ||||||||
1302 | /// to be promoted to a larger size, needs to be expanded to some other code | ||||||||
1303 | /// sequence, or the target has a custom expander for it. | ||||||||
1304 | LegalizeAction getIndexedMaskedStoreAction(unsigned IdxMode, MVT VT) const { | ||||||||
1305 | return getIndexedModeAction(IdxMode, VT, IMAB_MaskedStore); | ||||||||
1306 | } | ||||||||
1307 | |||||||||
1308 | /// Return true if the specified indexed load is legal on this target. | ||||||||
1309 | bool isIndexedMaskedStoreLegal(unsigned IdxMode, EVT VT) const { | ||||||||
1310 | return VT.isSimple() && | ||||||||
1311 | (getIndexedMaskedStoreAction(IdxMode, VT.getSimpleVT()) == Legal || | ||||||||
1312 | getIndexedMaskedStoreAction(IdxMode, VT.getSimpleVT()) == Custom); | ||||||||
1313 | } | ||||||||
1314 | |||||||||
1315 | /// Return how the condition code should be treated: either it is legal, needs | ||||||||
1316 | /// to be expanded to some other code sequence, or the target has a custom | ||||||||
1317 | /// expander for it. | ||||||||
1318 | LegalizeAction | ||||||||
1319 | getCondCodeAction(ISD::CondCode CC, MVT VT) const { | ||||||||
1320 | assert((unsigned)CC < array_lengthof(CondCodeActions) &&(((unsigned)CC < array_lengthof(CondCodeActions) && ((unsigned)VT.SimpleTy >> 3) < array_lengthof(CondCodeActions [0]) && "Table isn't big enough!") ? static_cast<void > (0) : __assert_fail ("(unsigned)CC < array_lengthof(CondCodeActions) && ((unsigned)VT.SimpleTy >> 3) < array_lengthof(CondCodeActions[0]) && \"Table isn't big enough!\"" , "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/TargetLowering.h" , 1322, __PRETTY_FUNCTION__)) | ||||||||
1321 | ((unsigned)VT.SimpleTy >> 3) < array_lengthof(CondCodeActions[0]) &&(((unsigned)CC < array_lengthof(CondCodeActions) && ((unsigned)VT.SimpleTy >> 3) < array_lengthof(CondCodeActions [0]) && "Table isn't big enough!") ? static_cast<void > (0) : __assert_fail ("(unsigned)CC < array_lengthof(CondCodeActions) && ((unsigned)VT.SimpleTy >> 3) < array_lengthof(CondCodeActions[0]) && \"Table isn't big enough!\"" , "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/TargetLowering.h" , 1322, __PRETTY_FUNCTION__)) | ||||||||
1322 | "Table isn't big enough!")(((unsigned)CC < array_lengthof(CondCodeActions) && ((unsigned)VT.SimpleTy >> 3) < array_lengthof(CondCodeActions [0]) && "Table isn't big enough!") ? static_cast<void > (0) : __assert_fail ("(unsigned)CC < array_lengthof(CondCodeActions) && ((unsigned)VT.SimpleTy >> 3) < array_lengthof(CondCodeActions[0]) && \"Table isn't big enough!\"" , "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/TargetLowering.h" , 1322, __PRETTY_FUNCTION__)); | ||||||||
1323 | // See setCondCodeAction for how this is encoded. | ||||||||
1324 | uint32_t Shift = 4 * (VT.SimpleTy & 0x7); | ||||||||
1325 | uint32_t Value = CondCodeActions[CC][VT.SimpleTy >> 3]; | ||||||||
1326 | LegalizeAction Action = (LegalizeAction) ((Value >> Shift) & 0xF); | ||||||||
1327 | assert(Action != Promote && "Can't promote condition code!")((Action != Promote && "Can't promote condition code!" ) ? static_cast<void> (0) : __assert_fail ("Action != Promote && \"Can't promote condition code!\"" , "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/TargetLowering.h" , 1327, __PRETTY_FUNCTION__)); | ||||||||
1328 | return Action; | ||||||||
1329 | } | ||||||||
1330 | |||||||||
1331 | /// Return true if the specified condition code is legal on this target. | ||||||||
1332 | bool isCondCodeLegal(ISD::CondCode CC, MVT VT) const { | ||||||||
1333 | return getCondCodeAction(CC, VT) == Legal; | ||||||||
1334 | } | ||||||||
1335 | |||||||||
1336 | /// Return true if the specified condition code is legal or custom on this | ||||||||
1337 | /// target. | ||||||||
1338 | bool isCondCodeLegalOrCustom(ISD::CondCode CC, MVT VT) const { | ||||||||
1339 | return getCondCodeAction(CC, VT) == Legal || | ||||||||
1340 | getCondCodeAction(CC, VT) == Custom; | ||||||||
1341 | } | ||||||||
1342 | |||||||||
1343 | /// If the action for this operation is to promote, this method returns the | ||||||||
1344 | /// ValueType to promote to. | ||||||||
1345 | MVT getTypeToPromoteTo(unsigned Op, MVT VT) const { | ||||||||
1346 | assert(getOperationAction(Op, VT) == Promote &&((getOperationAction(Op, VT) == Promote && "This operation isn't promoted!" ) ? static_cast<void> (0) : __assert_fail ("getOperationAction(Op, VT) == Promote && \"This operation isn't promoted!\"" , "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/TargetLowering.h" , 1347, __PRETTY_FUNCTION__)) | ||||||||
1347 | "This operation isn't promoted!")((getOperationAction(Op, VT) == Promote && "This operation isn't promoted!" ) ? static_cast<void> (0) : __assert_fail ("getOperationAction(Op, VT) == Promote && \"This operation isn't promoted!\"" , "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/TargetLowering.h" , 1347, __PRETTY_FUNCTION__)); | ||||||||
1348 | |||||||||
1349 | // See if this has an explicit type specified. | ||||||||
1350 | std::map<std::pair<unsigned, MVT::SimpleValueType>, | ||||||||
1351 | MVT::SimpleValueType>::const_iterator PTTI = | ||||||||
1352 | PromoteToType.find(std::make_pair(Op, VT.SimpleTy)); | ||||||||
1353 | if (PTTI != PromoteToType.end()) return PTTI->second; | ||||||||
1354 | |||||||||
1355 | assert((VT.isInteger() || VT.isFloatingPoint()) &&(((VT.isInteger() || VT.isFloatingPoint()) && "Cannot autopromote this type, add it with AddPromotedToType." ) ? static_cast<void> (0) : __assert_fail ("(VT.isInteger() || VT.isFloatingPoint()) && \"Cannot autopromote this type, add it with AddPromotedToType.\"" , "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/TargetLowering.h" , 1356, __PRETTY_FUNCTION__)) | ||||||||
1356 | "Cannot autopromote this type, add it with AddPromotedToType.")(((VT.isInteger() || VT.isFloatingPoint()) && "Cannot autopromote this type, add it with AddPromotedToType." ) ? static_cast<void> (0) : __assert_fail ("(VT.isInteger() || VT.isFloatingPoint()) && \"Cannot autopromote this type, add it with AddPromotedToType.\"" , "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/TargetLowering.h" , 1356, __PRETTY_FUNCTION__)); | ||||||||
1357 | |||||||||
1358 | MVT NVT = VT; | ||||||||
1359 | do { | ||||||||
1360 | NVT = (MVT::SimpleValueType)(NVT.SimpleTy+1); | ||||||||
1361 | assert(NVT.isInteger() == VT.isInteger() && NVT != MVT::isVoid &&((NVT.isInteger() == VT.isInteger() && NVT != MVT::isVoid && "Didn't find type to promote to!") ? static_cast< void> (0) : __assert_fail ("NVT.isInteger() == VT.isInteger() && NVT != MVT::isVoid && \"Didn't find type to promote to!\"" , "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/TargetLowering.h" , 1362, __PRETTY_FUNCTION__)) | ||||||||
1362 | "Didn't find type to promote to!")((NVT.isInteger() == VT.isInteger() && NVT != MVT::isVoid && "Didn't find type to promote to!") ? static_cast< void> (0) : __assert_fail ("NVT.isInteger() == VT.isInteger() && NVT != MVT::isVoid && \"Didn't find type to promote to!\"" , "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/TargetLowering.h" , 1362, __PRETTY_FUNCTION__)); | ||||||||
1363 | } while (!isTypeLegal(NVT) || | ||||||||
1364 | getOperationAction(Op, NVT) == Promote); | ||||||||
1365 | return NVT; | ||||||||
1366 | } | ||||||||
1367 | |||||||||
1368 | /// Return the EVT corresponding to this LLVM type. This is fixed by the LLVM | ||||||||
1369 | /// operations except for the pointer size. If AllowUnknown is true, this | ||||||||
1370 | /// will return MVT::Other for types with no EVT counterpart (e.g. structs), | ||||||||
1371 | /// otherwise it will assert. | ||||||||
1372 | EVT getValueType(const DataLayout &DL, Type *Ty, | ||||||||
1373 | bool AllowUnknown = false) const { | ||||||||
1374 | // Lower scalar pointers to native pointer types. | ||||||||
1375 | if (auto *PTy = dyn_cast<PointerType>(Ty)) | ||||||||
1376 | return getPointerTy(DL, PTy->getAddressSpace()); | ||||||||
1377 | |||||||||
1378 | if (auto *VTy = dyn_cast<VectorType>(Ty)) { | ||||||||
1379 | Type *EltTy = VTy->getElementType(); | ||||||||
1380 | // Lower vectors of pointers to native pointer types. | ||||||||
1381 | if (auto *PTy
| ||||||||
1382 | EVT PointerTy(getPointerTy(DL, PTy->getAddressSpace())); | ||||||||
1383 | EltTy = PointerTy.getTypeForEVT(Ty->getContext()); | ||||||||
1384 | } | ||||||||
1385 | return EVT::getVectorVT(Ty->getContext(), EVT::getEVT(EltTy, false), | ||||||||
| |||||||||
1386 | VTy->getElementCount()); | ||||||||
1387 | } | ||||||||
1388 | |||||||||
1389 | return EVT::getEVT(Ty, AllowUnknown); | ||||||||
1390 | } | ||||||||
1391 | |||||||||
1392 | EVT getMemValueType(const DataLayout &DL, Type *Ty, | ||||||||
1393 | bool AllowUnknown = false) const { | ||||||||
1394 | // Lower scalar pointers to native pointer types. | ||||||||
1395 | if (PointerType *PTy = dyn_cast<PointerType>(Ty)) | ||||||||
1396 | return getPointerMemTy(DL, PTy->getAddressSpace()); | ||||||||
1397 | else if (VectorType *VTy = dyn_cast<VectorType>(Ty)) { | ||||||||
1398 | Type *Elm = VTy->getElementType(); | ||||||||
1399 | if (PointerType *PT = dyn_cast<PointerType>(Elm)) { | ||||||||
1400 | EVT PointerTy(getPointerMemTy(DL, PT->getAddressSpace())); | ||||||||
1401 | Elm = PointerTy.getTypeForEVT(Ty->getContext()); | ||||||||
1402 | } | ||||||||
1403 | return EVT::getVectorVT(Ty->getContext(), EVT::getEVT(Elm, false), | ||||||||
1404 | VTy->getElementCount()); | ||||||||
1405 | } | ||||||||
1406 | |||||||||
1407 | return getValueType(DL, Ty, AllowUnknown); | ||||||||
1408 | } | ||||||||
1409 | |||||||||
1410 | |||||||||
1411 | /// Return the MVT corresponding to this LLVM type. See getValueType. | ||||||||
1412 | MVT getSimpleValueType(const DataLayout &DL, Type *Ty, | ||||||||
1413 | bool AllowUnknown = false) const { | ||||||||
1414 | return getValueType(DL, Ty, AllowUnknown).getSimpleVT(); | ||||||||
1415 | } | ||||||||
1416 | |||||||||
1417 | /// Return the desired alignment for ByVal or InAlloca aggregate function | ||||||||
1418 | /// arguments in the caller parameter area. This is the actual alignment, not | ||||||||
1419 | /// its logarithm. | ||||||||
1420 | virtual unsigned getByValTypeAlignment(Type *Ty, const DataLayout &DL) const; | ||||||||
1421 | |||||||||
1422 | /// Return the type of registers that this ValueType will eventually require. | ||||||||
1423 | MVT getRegisterType(MVT VT) const { | ||||||||
1424 | assert((unsigned)VT.SimpleTy < array_lengthof(RegisterTypeForVT))(((unsigned)VT.SimpleTy < array_lengthof(RegisterTypeForVT )) ? static_cast<void> (0) : __assert_fail ("(unsigned)VT.SimpleTy < array_lengthof(RegisterTypeForVT)" , "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/TargetLowering.h" , 1424, __PRETTY_FUNCTION__)); | ||||||||
1425 | return RegisterTypeForVT[VT.SimpleTy]; | ||||||||
1426 | } | ||||||||
1427 | |||||||||
1428 | /// Return the type of registers that this ValueType will eventually require. | ||||||||
1429 | MVT getRegisterType(LLVMContext &Context, EVT VT) const { | ||||||||
1430 | if (VT.isSimple()) { | ||||||||
1431 | assert((unsigned)VT.getSimpleVT().SimpleTy <(((unsigned)VT.getSimpleVT().SimpleTy < array_lengthof(RegisterTypeForVT )) ? static_cast<void> (0) : __assert_fail ("(unsigned)VT.getSimpleVT().SimpleTy < array_lengthof(RegisterTypeForVT)" , "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/TargetLowering.h" , 1432, __PRETTY_FUNCTION__)) | ||||||||
1432 | array_lengthof(RegisterTypeForVT))(((unsigned)VT.getSimpleVT().SimpleTy < array_lengthof(RegisterTypeForVT )) ? static_cast<void> (0) : __assert_fail ("(unsigned)VT.getSimpleVT().SimpleTy < array_lengthof(RegisterTypeForVT)" , "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/TargetLowering.h" , 1432, __PRETTY_FUNCTION__)); | ||||||||
1433 | return RegisterTypeForVT[VT.getSimpleVT().SimpleTy]; | ||||||||
1434 | } | ||||||||
1435 | if (VT.isVector()) { | ||||||||
1436 | EVT VT1; | ||||||||
1437 | MVT RegisterVT; | ||||||||
1438 | unsigned NumIntermediates; | ||||||||
1439 | (void)getVectorTypeBreakdown(Context, VT, VT1, | ||||||||
1440 | NumIntermediates, RegisterVT); | ||||||||
1441 | return RegisterVT; | ||||||||
1442 | } | ||||||||
1443 | if (VT.isInteger()) { | ||||||||
1444 | return getRegisterType(Context, getTypeToTransformTo(Context, VT)); | ||||||||
1445 | } | ||||||||
1446 | llvm_unreachable("Unsupported extended type!")::llvm::llvm_unreachable_internal("Unsupported extended type!" , "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/TargetLowering.h" , 1446); | ||||||||
1447 | } | ||||||||
1448 | |||||||||
1449 | /// Return the number of registers that this ValueType will eventually | ||||||||
1450 | /// require. | ||||||||
1451 | /// | ||||||||
1452 | /// This is one for any types promoted to live in larger registers, but may be | ||||||||
1453 | /// more than one for types (like i64) that are split into pieces. For types | ||||||||
1454 | /// like i140, which are first promoted then expanded, it is the number of | ||||||||
1455 | /// registers needed to hold all the bits of the original type. For an i140 | ||||||||
1456 | /// on a 32 bit machine this means 5 registers. | ||||||||
1457 | unsigned getNumRegisters(LLVMContext &Context, EVT VT) const { | ||||||||
1458 | if (VT.isSimple()) { | ||||||||
1459 | assert((unsigned)VT.getSimpleVT().SimpleTy <(((unsigned)VT.getSimpleVT().SimpleTy < array_lengthof(NumRegistersForVT )) ? static_cast<void> (0) : __assert_fail ("(unsigned)VT.getSimpleVT().SimpleTy < array_lengthof(NumRegistersForVT)" , "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/TargetLowering.h" , 1460, __PRETTY_FUNCTION__)) | ||||||||
1460 | array_lengthof(NumRegistersForVT))(((unsigned)VT.getSimpleVT().SimpleTy < array_lengthof(NumRegistersForVT )) ? static_cast<void> (0) : __assert_fail ("(unsigned)VT.getSimpleVT().SimpleTy < array_lengthof(NumRegistersForVT)" , "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/TargetLowering.h" , 1460, __PRETTY_FUNCTION__)); | ||||||||
1461 | return NumRegistersForVT[VT.getSimpleVT().SimpleTy]; | ||||||||
1462 | } | ||||||||
1463 | if (VT.isVector()) { | ||||||||
1464 | EVT VT1; | ||||||||
1465 | MVT VT2; | ||||||||
1466 | unsigned NumIntermediates; | ||||||||
1467 | return getVectorTypeBreakdown(Context, VT, VT1, NumIntermediates, VT2); | ||||||||
1468 | } | ||||||||
1469 | if (VT.isInteger()) { | ||||||||
1470 | unsigned BitWidth = VT.getSizeInBits(); | ||||||||
1471 | unsigned RegWidth = getRegisterType(Context, VT).getSizeInBits(); | ||||||||
1472 | return (BitWidth + RegWidth - 1) / RegWidth; | ||||||||
1473 | } | ||||||||
1474 | llvm_unreachable("Unsupported extended type!")::llvm::llvm_unreachable_internal("Unsupported extended type!" , "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/TargetLowering.h" , 1474); | ||||||||
1475 | } | ||||||||
1476 | |||||||||
1477 | /// Certain combinations of ABIs, Targets and features require that types | ||||||||
1478 | /// are legal for some operations and not for other operations. | ||||||||
1479 | /// For MIPS all vector types must be passed through the integer register set. | ||||||||
1480 | virtual MVT getRegisterTypeForCallingConv(LLVMContext &Context, | ||||||||
1481 | CallingConv::ID CC, EVT VT) const { | ||||||||
1482 | return getRegisterType(Context, VT); | ||||||||
1483 | } | ||||||||
1484 | |||||||||
1485 | /// Certain targets require unusual breakdowns of certain types. For MIPS, | ||||||||
1486 | /// this occurs when a vector type is used, as vector are passed through the | ||||||||
1487 | /// integer register set. | ||||||||
1488 | virtual unsigned getNumRegistersForCallingConv(LLVMContext &Context, | ||||||||
1489 | CallingConv::ID CC, | ||||||||
1490 | EVT VT) const { | ||||||||
1491 | return getNumRegisters(Context, VT); | ||||||||
1492 | } | ||||||||
1493 | |||||||||
1494 | /// Certain targets have context senstive alignment requirements, where one | ||||||||
1495 | /// type has the alignment requirement of another type. | ||||||||
1496 | virtual Align getABIAlignmentForCallingConv(Type *ArgTy, | ||||||||
1497 | DataLayout DL) const { | ||||||||
1498 | return DL.getABITypeAlign(ArgTy); | ||||||||
1499 | } | ||||||||
1500 | |||||||||
1501 | /// If true, then instruction selection should seek to shrink the FP constant | ||||||||
1502 | /// of the specified type to a smaller type in order to save space and / or | ||||||||
1503 | /// reduce runtime. | ||||||||
1504 | virtual bool ShouldShrinkFPConstant(EVT) const { return true; } | ||||||||
1505 | |||||||||
1506 | /// Return true if it is profitable to reduce a load to a smaller type. | ||||||||
1507 | /// Example: (i16 (trunc (i32 (load x))) -> i16 load x | ||||||||
1508 | virtual bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy, | ||||||||
1509 | EVT NewVT) const { | ||||||||
1510 | // By default, assume that it is cheaper to extract a subvector from a wide | ||||||||
1511 | // vector load rather than creating multiple narrow vector loads. | ||||||||
1512 | if (NewVT.isVector() && !Load->hasOneUse()) | ||||||||
1513 | return false; | ||||||||
1514 | |||||||||
1515 | return true; | ||||||||
1516 | } | ||||||||
1517 | |||||||||
1518 | /// When splitting a value of the specified type into parts, does the Lo | ||||||||
1519 | /// or Hi part come first? This usually follows the endianness, except | ||||||||
1520 | /// for ppcf128, where the Hi part always comes first. | ||||||||
1521 | bool hasBigEndianPartOrdering(EVT VT, const DataLayout &DL) const { | ||||||||
1522 | return DL.isBigEndian() || VT == MVT::ppcf128; | ||||||||
1523 | } | ||||||||
1524 | |||||||||
1525 | /// If true, the target has custom DAG combine transformations that it can | ||||||||
1526 | /// perform for the specified node. | ||||||||
1527 | bool hasTargetDAGCombine(ISD::NodeType NT) const { | ||||||||
1528 | assert(unsigned(NT >> 3) < array_lengthof(TargetDAGCombineArray))((unsigned(NT >> 3) < array_lengthof(TargetDAGCombineArray )) ? static_cast<void> (0) : __assert_fail ("unsigned(NT >> 3) < array_lengthof(TargetDAGCombineArray)" , "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/TargetLowering.h" , 1528, __PRETTY_FUNCTION__)); | ||||||||
1529 | return TargetDAGCombineArray[NT >> 3] & (1 << (NT&7)); | ||||||||
1530 | } | ||||||||
1531 | |||||||||
1532 | unsigned getGatherAllAliasesMaxDepth() const { | ||||||||
1533 | return GatherAllAliasesMaxDepth; | ||||||||
1534 | } | ||||||||
1535 | |||||||||
1536 | /// Returns the size of the platform's va_list object. | ||||||||
1537 | virtual unsigned getVaListSizeInBits(const DataLayout &DL) const { | ||||||||
1538 | return getPointerTy(DL).getSizeInBits(); | ||||||||
1539 | } | ||||||||
1540 | |||||||||
1541 | /// Get maximum # of store operations permitted for llvm.memset | ||||||||
1542 | /// | ||||||||
1543 | /// This function returns the maximum number of store operations permitted | ||||||||
1544 | /// to replace a call to llvm.memset. The value is set by the target at the | ||||||||
1545 | /// performance threshold for such a replacement. If OptSize is true, | ||||||||
1546 | /// return the limit for functions that have OptSize attribute. | ||||||||
1547 | unsigned getMaxStoresPerMemset(bool OptSize) const { | ||||||||
1548 | return OptSize ? MaxStoresPerMemsetOptSize : MaxStoresPerMemset; | ||||||||
1549 | } | ||||||||
1550 | |||||||||
1551 | /// Get maximum # of store operations permitted for llvm.memcpy | ||||||||
1552 | /// | ||||||||
1553 | /// This function returns the maximum number of store operations permitted | ||||||||
1554 | /// to replace a call to llvm.memcpy. The value is set by the target at the | ||||||||
1555 | /// performance threshold for such a replacement. If OptSize is true, | ||||||||
1556 | /// return the limit for functions that have OptSize attribute. | ||||||||
1557 | unsigned getMaxStoresPerMemcpy(bool OptSize) const { | ||||||||
1558 | return OptSize ? MaxStoresPerMemcpyOptSize : MaxStoresPerMemcpy; | ||||||||
1559 | } | ||||||||
1560 | |||||||||
1561 | /// \brief Get maximum # of store operations to be glued together | ||||||||
1562 | /// | ||||||||
1563 | /// This function returns the maximum number of store operations permitted | ||||||||
1564 | /// to glue together during lowering of llvm.memcpy. The value is set by | ||||||||
1565 | // the target at the performance threshold for such a replacement. | ||||||||
1566 | virtual unsigned getMaxGluedStoresPerMemcpy() const { | ||||||||
1567 | return MaxGluedStoresPerMemcpy; | ||||||||
1568 | } | ||||||||
1569 | |||||||||
1570 | /// Get maximum # of load operations permitted for memcmp | ||||||||
1571 | /// | ||||||||
1572 | /// This function returns the maximum number of load operations permitted | ||||||||
1573 | /// to replace a call to memcmp. The value is set by the target at the | ||||||||
1574 | /// performance threshold for such a replacement. If OptSize is true, | ||||||||
1575 | /// return the limit for functions that have OptSize attribute. | ||||||||
1576 | unsigned getMaxExpandSizeMemcmp(bool OptSize) const { | ||||||||
1577 | return OptSize ? MaxLoadsPerMemcmpOptSize : MaxLoadsPerMemcmp; | ||||||||
1578 | } | ||||||||
1579 | |||||||||
1580 | /// Get maximum # of store operations permitted for llvm.memmove | ||||||||
1581 | /// | ||||||||
1582 | /// This function returns the maximum number of store operations permitted | ||||||||
1583 | /// to replace a call to llvm.memmove. The value is set by the target at the | ||||||||
1584 | /// performance threshold for such a replacement. If OptSize is true, | ||||||||
1585 | /// return the limit for functions that have OptSize attribute. | ||||||||
1586 | unsigned getMaxStoresPerMemmove(bool OptSize) const { | ||||||||
1587 | return OptSize ? MaxStoresPerMemmoveOptSize : MaxStoresPerMemmove; | ||||||||
1588 | } | ||||||||
1589 | |||||||||
1590 | /// Determine if the target supports unaligned memory accesses. | ||||||||
1591 | /// | ||||||||
1592 | /// This function returns true if the target allows unaligned memory accesses | ||||||||
1593 | /// of the specified type in the given address space. If true, it also returns | ||||||||
1594 | /// whether the unaligned memory access is "fast" in the last argument by | ||||||||
1595 | /// reference. This is used, for example, in situations where an array | ||||||||
1596 | /// copy/move/set is converted to a sequence of store operations. Its use | ||||||||
1597 | /// helps to ensure that such replacements don't generate code that causes an | ||||||||
1598 | /// alignment error (trap) on the target machine. | ||||||||
1599 | virtual bool allowsMisalignedMemoryAccesses( | ||||||||
1600 | EVT, unsigned AddrSpace = 0, unsigned Align = 1, | ||||||||
1601 | MachineMemOperand::Flags Flags = MachineMemOperand::MONone, | ||||||||
1602 | bool * /*Fast*/ = nullptr) const { | ||||||||
1603 | return false; | ||||||||
1604 | } | ||||||||
1605 | |||||||||
1606 | /// LLT handling variant. | ||||||||
1607 | virtual bool allowsMisalignedMemoryAccesses( | ||||||||
1608 | LLT, unsigned AddrSpace = 0, Align Alignment = Align(1), | ||||||||
1609 | MachineMemOperand::Flags Flags = MachineMemOperand::MONone, | ||||||||
1610 | bool * /*Fast*/ = nullptr) const { | ||||||||
1611 | return false; | ||||||||
1612 | } | ||||||||
1613 | |||||||||
1614 | /// This function returns true if the memory access is aligned or if the | ||||||||
1615 | /// target allows this specific unaligned memory access. If the access is | ||||||||
1616 | /// allowed, the optional final parameter returns if the access is also fast | ||||||||
1617 | /// (as defined by the target). | ||||||||
1618 | bool allowsMemoryAccessForAlignment( | ||||||||
1619 | LLVMContext &Context, const DataLayout &DL, EVT VT, | ||||||||
1620 | unsigned AddrSpace = 0, Align Alignment = Align(1), | ||||||||
1621 | MachineMemOperand::Flags Flags = MachineMemOperand::MONone, | ||||||||
1622 | bool *Fast = nullptr) const; | ||||||||
1623 | |||||||||
1624 | /// Return true if the memory access of this type is aligned or if the target | ||||||||
1625 | /// allows this specific unaligned access for the given MachineMemOperand. | ||||||||
1626 | /// If the access is allowed, the optional final parameter returns if the | ||||||||
1627 | /// access is also fast (as defined by the target). | ||||||||
1628 | bool allowsMemoryAccessForAlignment(LLVMContext &Context, | ||||||||
1629 | const DataLayout &DL, EVT VT, | ||||||||
1630 | const MachineMemOperand &MMO, | ||||||||
1631 | bool *Fast = nullptr) const; | ||||||||
1632 | |||||||||
1633 | /// Return true if the target supports a memory access of this type for the | ||||||||
1634 | /// given address space and alignment. If the access is allowed, the optional | ||||||||
1635 | /// final parameter returns if the access is also fast (as defined by the | ||||||||
1636 | /// target). | ||||||||
1637 | virtual bool | ||||||||
1638 | allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT, | ||||||||
1639 | unsigned AddrSpace = 0, Align Alignment = Align(1), | ||||||||
1640 | MachineMemOperand::Flags Flags = MachineMemOperand::MONone, | ||||||||
1641 | bool *Fast = nullptr) const; | ||||||||
1642 | |||||||||
1643 | /// Return true if the target supports a memory access of this type for the | ||||||||
1644 | /// given MachineMemOperand. If the access is allowed, the optional | ||||||||
1645 | /// final parameter returns if the access is also fast (as defined by the | ||||||||
1646 | /// target). | ||||||||
1647 | bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT, | ||||||||
1648 | const MachineMemOperand &MMO, | ||||||||
1649 | bool *Fast = nullptr) const; | ||||||||
1650 | |||||||||
1651 | /// Returns the target specific optimal type for load and store operations as | ||||||||
1652 | /// a result of memset, memcpy, and memmove lowering. | ||||||||
1653 | /// It returns EVT::Other if the type should be determined using generic | ||||||||
1654 | /// target-independent logic. | ||||||||
1655 | virtual EVT | ||||||||
1656 | getOptimalMemOpType(const MemOp &Op, | ||||||||
1657 | const AttributeList & /*FuncAttributes*/) const { | ||||||||
1658 | return MVT::Other; | ||||||||
1659 | } | ||||||||
1660 | |||||||||
1661 | /// LLT returning variant. | ||||||||
1662 | virtual LLT | ||||||||
1663 | getOptimalMemOpLLT(const MemOp &Op, | ||||||||
1664 | const AttributeList & /*FuncAttributes*/) const { | ||||||||
1665 | return LLT(); | ||||||||
1666 | } | ||||||||
1667 | |||||||||
1668 | /// Returns true if it's safe to use load / store of the specified type to | ||||||||
1669 | /// expand memcpy / memset inline. | ||||||||
1670 | /// | ||||||||
1671 | /// This is mostly true for all types except for some special cases. For | ||||||||
1672 | /// example, on X86 targets without SSE2 f64 load / store are done with fldl / | ||||||||
1673 | /// fstpl which also does type conversion. Note the specified type doesn't | ||||||||
1674 | /// have to be legal as the hook is used before type legalization. | ||||||||
1675 | virtual bool isSafeMemOpType(MVT /*VT*/) const { return true; } | ||||||||
1676 | |||||||||
1677 | /// Return lower limit for number of blocks in a jump table. | ||||||||
1678 | virtual unsigned getMinimumJumpTableEntries() const; | ||||||||
1679 | |||||||||
1680 | /// Return lower limit of the density in a jump table. | ||||||||
1681 | unsigned getMinimumJumpTableDensity(bool OptForSize) const; | ||||||||
1682 | |||||||||
1683 | /// Return upper limit for number of entries in a jump table. | ||||||||
1684 | /// Zero if no limit. | ||||||||
1685 | unsigned getMaximumJumpTableSize() const; | ||||||||
1686 | |||||||||
1687 | virtual bool isJumpTableRelative() const; | ||||||||
1688 | |||||||||
1689 | /// Return true if a mulh[s|u] node for a specific type is cheaper than | ||||||||
1690 | /// a multiply followed by a shift. This is false by default. | ||||||||
1691 | virtual bool isMulhCheaperThanMulShift(EVT Type) const { return false; } | ||||||||
1692 | |||||||||
1693 | /// If a physical register, this specifies the register that | ||||||||
1694 | /// llvm.savestack/llvm.restorestack should save and restore. | ||||||||
1695 | unsigned getStackPointerRegisterToSaveRestore() const { | ||||||||
1696 | return StackPointerRegisterToSaveRestore; | ||||||||
1697 | } | ||||||||
1698 | |||||||||
1699 | /// If a physical register, this returns the register that receives the | ||||||||
1700 | /// exception address on entry to an EH pad. | ||||||||
1701 | virtual Register | ||||||||
1702 | getExceptionPointerRegister(const Constant *PersonalityFn) const { | ||||||||
1703 | return Register(); | ||||||||
1704 | } | ||||||||
1705 | |||||||||
1706 | /// If a physical register, this returns the register that receives the | ||||||||
1707 | /// exception typeid on entry to a landing pad. | ||||||||
1708 | virtual Register | ||||||||
1709 | getExceptionSelectorRegister(const Constant *PersonalityFn) const { | ||||||||
1710 | return Register(); | ||||||||
1711 | } | ||||||||
1712 | |||||||||
1713 | virtual bool needsFixedCatchObjects() const { | ||||||||
1714 | report_fatal_error("Funclet EH is not implemented for this target"); | ||||||||
1715 | } | ||||||||
1716 | |||||||||
1717 | /// Return the minimum stack alignment of an argument. | ||||||||
1718 | Align getMinStackArgumentAlignment() const { | ||||||||
1719 | return MinStackArgumentAlignment; | ||||||||
1720 | } | ||||||||
1721 | |||||||||
1722 | /// Return the minimum function alignment. | ||||||||
1723 | Align getMinFunctionAlignment() const { return MinFunctionAlignment; } | ||||||||
1724 | |||||||||
1725 | /// Return the preferred function alignment. | ||||||||
1726 | Align getPrefFunctionAlignment() const { return PrefFunctionAlignment; } | ||||||||
1727 | |||||||||
1728 | /// Return the preferred loop alignment. | ||||||||
1729 | virtual Align getPrefLoopAlignment(MachineLoop *ML = nullptr) const { | ||||||||
1730 | return PrefLoopAlignment; | ||||||||
1731 | } | ||||||||
1732 | |||||||||
1733 | /// Should loops be aligned even when the function is marked OptSize (but not | ||||||||
1734 | /// MinSize). | ||||||||
1735 | virtual bool alignLoopsWithOptSize() const { | ||||||||
1736 | return false; | ||||||||
1737 | } | ||||||||
1738 | |||||||||
1739 | /// If the target has a standard location for the stack protector guard, | ||||||||
1740 | /// returns the address of that location. Otherwise, returns nullptr. | ||||||||
1741 | /// DEPRECATED: please override useLoadStackGuardNode and customize | ||||||||
1742 | /// LOAD_STACK_GUARD, or customize \@llvm.stackguard(). | ||||||||
1743 | virtual Value *getIRStackGuard(IRBuilder<> &IRB) const; | ||||||||
1744 | |||||||||
1745 | /// Inserts necessary declarations for SSP (stack protection) purpose. | ||||||||
1746 | /// Should be used only when getIRStackGuard returns nullptr. | ||||||||
1747 | virtual void insertSSPDeclarations(Module &M) const; | ||||||||
1748 | |||||||||
1749 | /// Return the variable that's previously inserted by insertSSPDeclarations, | ||||||||
1750 | /// if any, otherwise return nullptr. Should be used only when | ||||||||
1751 | /// getIRStackGuard returns nullptr. | ||||||||
1752 | virtual Value *getSDagStackGuard(const Module &M) const; | ||||||||
1753 | |||||||||
1754 | /// If this function returns true, stack protection checks should XOR the | ||||||||
1755 | /// frame pointer (or whichever pointer is used to address locals) into the | ||||||||
1756 | /// stack guard value before checking it. getIRStackGuard must return nullptr | ||||||||
1757 | /// if this returns true. | ||||||||
1758 | virtual bool useStackGuardXorFP() const { return false; } | ||||||||
1759 | |||||||||
1760 | /// If the target has a standard stack protection check function that | ||||||||
1761 | /// performs validation and error handling, returns the function. Otherwise, | ||||||||
1762 | /// returns nullptr. Must be previously inserted by insertSSPDeclarations. | ||||||||
1763 | /// Should be used only when getIRStackGuard returns nullptr. | ||||||||
1764 | virtual Function *getSSPStackGuardCheck(const Module &M) const; | ||||||||
1765 | |||||||||
1766 | protected: | ||||||||
1767 | Value *getDefaultSafeStackPointerLocation(IRBuilder<> &IRB, | ||||||||
1768 | bool UseTLS) const; | ||||||||
1769 | |||||||||
1770 | public: | ||||||||
1771 | /// Returns the target-specific address of the unsafe stack pointer. | ||||||||
1772 | virtual Value *getSafeStackPointerLocation(IRBuilder<> &IRB) const; | ||||||||
1773 | |||||||||
1774 | /// Returns the name of the symbol used to emit stack probes or the empty | ||||||||
1775 | /// string if not applicable. | ||||||||
1776 | virtual bool hasStackProbeSymbol(MachineFunction &MF) const { return false; } | ||||||||
1777 | |||||||||
1778 | virtual bool hasInlineStackProbe(MachineFunction &MF) const { return false; } | ||||||||
1779 | |||||||||
1780 | virtual StringRef getStackProbeSymbolName(MachineFunction &MF) const { | ||||||||
1781 | return ""; | ||||||||
1782 | } | ||||||||
1783 | |||||||||
1784 | /// Returns true if a cast from SrcAS to DestAS is "cheap", such that e.g. we | ||||||||
1785 | /// are happy to sink it into basic blocks. A cast may be free, but not | ||||||||
1786 | /// necessarily a no-op. e.g. a free truncate from a 64-bit to 32-bit pointer. | ||||||||
1787 | virtual bool isFreeAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const; | ||||||||
1788 | |||||||||
1789 | /// Return true if the pointer arguments to CI should be aligned by aligning | ||||||||
1790 | /// the object whose address is being passed. If so then MinSize is set to the | ||||||||
1791 | /// minimum size the object must be to be aligned and PrefAlign is set to the | ||||||||
1792 | /// preferred alignment. | ||||||||
1793 | virtual bool shouldAlignPointerArgs(CallInst * /*CI*/, unsigned & /*MinSize*/, | ||||||||
1794 | unsigned & /*PrefAlign*/) const { | ||||||||
1795 | return false; | ||||||||
1796 | } | ||||||||
1797 | |||||||||
1798 | //===--------------------------------------------------------------------===// | ||||||||
1799 | /// \name Helpers for TargetTransformInfo implementations | ||||||||
1800 | /// @{ | ||||||||
1801 | |||||||||
1802 | /// Get the ISD node that corresponds to the Instruction class opcode. | ||||||||
1803 | int InstructionOpcodeToISD(unsigned Opcode) const; | ||||||||
1804 | |||||||||
1805 | /// Estimate the cost of type-legalization and the legalized type. | ||||||||
1806 | std::pair<int, MVT> getTypeLegalizationCost(const DataLayout &DL, | ||||||||
1807 | Type *Ty) const; | ||||||||
1808 | |||||||||
1809 | /// @} | ||||||||
1810 | |||||||||
1811 | //===--------------------------------------------------------------------===// | ||||||||
1812 | /// \name Helpers for atomic expansion. | ||||||||
1813 | /// @{ | ||||||||
1814 | |||||||||
1815 | /// Returns the maximum atomic operation size (in bits) supported by | ||||||||
1816 | /// the backend. Atomic operations greater than this size (as well | ||||||||
1817 | /// as ones that are not naturally aligned), will be expanded by | ||||||||
1818 | /// AtomicExpandPass into an __atomic_* library call. | ||||||||
1819 | unsigned getMaxAtomicSizeInBitsSupported() const { | ||||||||
1820 | return MaxAtomicSizeInBitsSupported; | ||||||||
1821 | } | ||||||||
1822 | |||||||||
1823 | /// Returns the size of the smallest cmpxchg or ll/sc instruction | ||||||||
1824 | /// the backend supports. Any smaller operations are widened in | ||||||||
1825 | /// AtomicExpandPass. | ||||||||
1826 | /// | ||||||||
1827 | /// Note that *unlike* operations above the maximum size, atomic ops | ||||||||
1828 | /// are still natively supported below the minimum; they just | ||||||||
1829 | /// require a more complex expansion. | ||||||||
1830 | unsigned getMinCmpXchgSizeInBits() const { return MinCmpXchgSizeInBits; } | ||||||||
1831 | |||||||||
1832 | /// Whether the target supports unaligned atomic operations. | ||||||||
1833 | bool supportsUnalignedAtomics() const { return SupportsUnalignedAtomics; } | ||||||||
1834 | |||||||||
1835 | /// Whether AtomicExpandPass should automatically insert fences and reduce | ||||||||
1836 | /// ordering for this atomic. This should be true for most architectures with | ||||||||
1837 | /// weak memory ordering. Defaults to false. | ||||||||
1838 | virtual bool shouldInsertFencesForAtomic(const Instruction *I) const { | ||||||||
1839 | return false; | ||||||||
1840 | } | ||||||||
1841 | |||||||||
1842 | /// Perform a load-linked operation on Addr, returning a "Value *" with the | ||||||||
1843 | /// corresponding pointee type. This may entail some non-trivial operations to | ||||||||
1844 | /// truncate or reconstruct types that will be illegal in the backend. See | ||||||||
1845 | /// ARMISelLowering for an example implementation. | ||||||||
1846 | virtual Value *emitLoadLinked(IRBuilder<> &Builder, Value *Addr, | ||||||||
1847 | AtomicOrdering Ord) const { | ||||||||
1848 | llvm_unreachable("Load linked unimplemented on this target")::llvm::llvm_unreachable_internal("Load linked unimplemented on this target" , "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/TargetLowering.h" , 1848); | ||||||||
1849 | } | ||||||||
1850 | |||||||||
1851 | /// Perform a store-conditional operation to Addr. Return the status of the | ||||||||
1852 | /// store. This should be 0 if the store succeeded, non-zero otherwise. | ||||||||
1853 | virtual Value *emitStoreConditional(IRBuilder<> &Builder, Value *Val, | ||||||||
1854 | Value *Addr, AtomicOrdering Ord) const { | ||||||||
1855 | llvm_unreachable("Store conditional unimplemented on this target")::llvm::llvm_unreachable_internal("Store conditional unimplemented on this target" , "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/TargetLowering.h" , 1855); | ||||||||
1856 | } | ||||||||
1857 | |||||||||
1858 | /// Perform a masked atomicrmw using a target-specific intrinsic. This | ||||||||
1859 | /// represents the core LL/SC loop which will be lowered at a late stage by | ||||||||
1860 | /// the backend. | ||||||||
1861 | virtual Value *emitMaskedAtomicRMWIntrinsic(IRBuilder<> &Builder, | ||||||||
1862 | AtomicRMWInst *AI, | ||||||||
1863 | Value *AlignedAddr, Value *Incr, | ||||||||
1864 | Value *Mask, Value *ShiftAmt, | ||||||||
1865 | AtomicOrdering Ord) const { | ||||||||
1866 | llvm_unreachable("Masked atomicrmw expansion unimplemented on this target")::llvm::llvm_unreachable_internal("Masked atomicrmw expansion unimplemented on this target" , "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/TargetLowering.h" , 1866); | ||||||||
1867 | } | ||||||||
1868 | |||||||||
1869 | /// Perform a masked cmpxchg using a target-specific intrinsic. This | ||||||||
1870 | /// represents the core LL/SC loop which will be lowered at a late stage by | ||||||||
1871 | /// the backend. | ||||||||
1872 | virtual Value *emitMaskedAtomicCmpXchgIntrinsic( | ||||||||
1873 | IRBuilder<> &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, | ||||||||
1874 | Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const { | ||||||||
1875 | llvm_unreachable("Masked cmpxchg expansion unimplemented on this target")::llvm::llvm_unreachable_internal("Masked cmpxchg expansion unimplemented on this target" , "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/TargetLowering.h" , 1875); | ||||||||
1876 | } | ||||||||
1877 | |||||||||
1878 | /// Inserts in the IR a target-specific intrinsic specifying a fence. | ||||||||
1879 | /// It is called by AtomicExpandPass before expanding an | ||||||||
1880 | /// AtomicRMW/AtomicCmpXchg/AtomicStore/AtomicLoad | ||||||||
1881 | /// if shouldInsertFencesForAtomic returns true. | ||||||||
1882 | /// | ||||||||
1883 | /// Inst is the original atomic instruction, prior to other expansions that | ||||||||
1884 | /// may be performed. | ||||||||
1885 | /// | ||||||||
1886 | /// This function should either return a nullptr, or a pointer to an IR-level | ||||||||
1887 | /// Instruction*. Even complex fence sequences can be represented by a | ||||||||
1888 | /// single Instruction* through an intrinsic to be lowered later. | ||||||||
1889 | /// Backends should override this method to produce target-specific intrinsic | ||||||||
1890 | /// for their fences. | ||||||||
1891 | /// FIXME: Please note that the default implementation here in terms of | ||||||||
1892 | /// IR-level fences exists for historical/compatibility reasons and is | ||||||||
1893 | /// *unsound* ! Fences cannot, in general, be used to restore sequential | ||||||||
1894 | /// consistency. For example, consider the following example: | ||||||||
1895 | /// atomic<int> x = y = 0; | ||||||||
1896 | /// int r1, r2, r3, r4; | ||||||||
1897 | /// Thread 0: | ||||||||
1898 | /// x.store(1); | ||||||||
1899 | /// Thread 1: | ||||||||
1900 | /// y.store(1); | ||||||||
1901 | /// Thread 2: | ||||||||
1902 | /// r1 = x.load(); | ||||||||
1903 | /// r2 = y.load(); | ||||||||
1904 | /// Thread 3: | ||||||||
1905 | /// r3 = y.load(); | ||||||||
1906 | /// r4 = x.load(); | ||||||||
1907 | /// r1 = r3 = 1 and r2 = r4 = 0 is impossible as long as the accesses are all | ||||||||
1908 | /// seq_cst. But if they are lowered to monotonic accesses, no amount of | ||||||||
1909 | /// IR-level fences can prevent it. | ||||||||
1910 | /// @{ | ||||||||
1911 | virtual Instruction *emitLeadingFence(IRBuilder<> &Builder, Instruction *Inst, | ||||||||
1912 | AtomicOrdering Ord) const { | ||||||||
1913 | if (isReleaseOrStronger(Ord) && Inst->hasAtomicStore()) | ||||||||
1914 | return Builder.CreateFence(Ord); | ||||||||
1915 | else | ||||||||
1916 | return nullptr; | ||||||||
1917 | } | ||||||||
1918 | |||||||||
1919 | virtual Instruction *emitTrailingFence(IRBuilder<> &Builder, | ||||||||
1920 | Instruction *Inst, | ||||||||
1921 | AtomicOrdering Ord) const { | ||||||||
1922 | if (isAcquireOrStronger(Ord)) | ||||||||
1923 | return Builder.CreateFence(Ord); | ||||||||
1924 | else | ||||||||
1925 | return nullptr; | ||||||||
1926 | } | ||||||||
1927 | /// @} | ||||||||
1928 | |||||||||
1929 | // Emits code that executes when the comparison result in the ll/sc | ||||||||
1930 | // expansion of a cmpxchg instruction is such that the store-conditional will | ||||||||
1931 | // not execute. This makes it possible to balance out the load-linked with | ||||||||
1932 | // a dedicated instruction, if desired. | ||||||||
1933 | // E.g., on ARM, if ldrex isn't followed by strex, the exclusive monitor would | ||||||||
1934 | // be unnecessarily held, except if clrex, inserted by this hook, is executed. | ||||||||
1935 | virtual void emitAtomicCmpXchgNoStoreLLBalance(IRBuilder<> &Builder) const {} | ||||||||
1936 | |||||||||
1937 | /// Returns true if the given (atomic) store should be expanded by the | ||||||||
1938 | /// IR-level AtomicExpand pass into an "atomic xchg" which ignores its input. | ||||||||
1939 | virtual bool shouldExpandAtomicStoreInIR(StoreInst *SI) const { | ||||||||
1940 | return false; | ||||||||
1941 | } | ||||||||
1942 | |||||||||
1943 | /// Returns true if arguments should be sign-extended in lib calls. | ||||||||
1944 | virtual bool shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const { | ||||||||
1945 | return IsSigned; | ||||||||
1946 | } | ||||||||
1947 | |||||||||
1948 | /// Returns true if arguments should be extended in lib calls. | ||||||||
1949 | virtual bool shouldExtendTypeInLibCall(EVT Type) const { | ||||||||
1950 | return true; | ||||||||
1951 | } | ||||||||
1952 | |||||||||
1953 | /// Returns how the given (atomic) load should be expanded by the | ||||||||
1954 | /// IR-level AtomicExpand pass. | ||||||||
1955 | virtual AtomicExpansionKind shouldExpandAtomicLoadInIR(LoadInst *LI) const { | ||||||||
1956 | return AtomicExpansionKind::None; | ||||||||
1957 | } | ||||||||
1958 | |||||||||
1959 | /// Returns how the given atomic cmpxchg should be expanded by the IR-level | ||||||||
1960 | /// AtomicExpand pass. | ||||||||
1961 | virtual AtomicExpansionKind | ||||||||
1962 | shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const { | ||||||||
1963 | return AtomicExpansionKind::None; | ||||||||
1964 | } | ||||||||
1965 | |||||||||
1966 | /// Returns how the IR-level AtomicExpand pass should expand the given | ||||||||
1967 | /// AtomicRMW, if at all. Default is to never expand. | ||||||||
1968 | virtual AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const { | ||||||||
1969 | return RMW->isFloatingPointOperation() ? | ||||||||
1970 | AtomicExpansionKind::CmpXChg : AtomicExpansionKind::None; | ||||||||
1971 | } | ||||||||
1972 | |||||||||
1973 | /// On some platforms, an AtomicRMW that never actually modifies the value | ||||||||
1974 | /// (such as fetch_add of 0) can be turned into a fence followed by an | ||||||||
1975 | /// atomic load. This may sound useless, but it makes it possible for the | ||||||||
1976 | /// processor to keep the cacheline shared, dramatically improving | ||||||||
1977 | /// performance. And such idempotent RMWs are useful for implementing some | ||||||||
1978 | /// kinds of locks, see for example (justification + benchmarks): | ||||||||
1979 | /// http://www.hpl.hp.com/techreports/2012/HPL-2012-68.pdf | ||||||||
1980 | /// This method tries doing that transformation, returning the atomic load if | ||||||||
1981 | /// it succeeds, and nullptr otherwise. | ||||||||
1982 | /// If shouldExpandAtomicLoadInIR returns true on that load, it will undergo | ||||||||
1983 | /// another round of expansion. | ||||||||
1984 | virtual LoadInst * | ||||||||
1985 | lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *RMWI) const { | ||||||||
1986 | return nullptr; | ||||||||
1987 | } | ||||||||
1988 | |||||||||
1989 | /// Returns how the platform's atomic operations are extended (ZERO_EXTEND, | ||||||||
1990 | /// SIGN_EXTEND, or ANY_EXTEND). | ||||||||
1991 | virtual ISD::NodeType getExtendForAtomicOps() const { | ||||||||
1992 | return ISD::ZERO_EXTEND; | ||||||||
1993 | } | ||||||||
1994 | |||||||||
1995 | /// Returns how the platform's atomic compare and swap expects its comparison | ||||||||
1996 | /// value to be extended (ZERO_EXTEND, SIGN_EXTEND, or ANY_EXTEND). This is | ||||||||
1997 | /// separate from getExtendForAtomicOps, which is concerned with the | ||||||||
1998 | /// sign-extension of the instruction's output, whereas here we are concerned | ||||||||
1999 | /// with the sign-extension of the input. For targets with compare-and-swap | ||||||||
2000 | /// instructions (or sub-word comparisons in their LL/SC loop expansions), | ||||||||
2001 | /// the input can be ANY_EXTEND, but the output will still have a specific | ||||||||
2002 | /// extension. | ||||||||
2003 | virtual ISD::NodeType getExtendForAtomicCmpSwapArg() const { | ||||||||
2004 | return ISD::ANY_EXTEND; | ||||||||
2005 | } | ||||||||
2006 | |||||||||
2007 | /// @} | ||||||||
2008 | |||||||||
2009 | /// Returns true if we should normalize | ||||||||
2010 | /// select(N0&N1, X, Y) => select(N0, select(N1, X, Y), Y) and | ||||||||
2011 | /// select(N0|N1, X, Y) => select(N0, select(N1, X, Y, Y)) if it is likely | ||||||||
2012 | /// that it saves us from materializing N0 and N1 in an integer register. | ||||||||
2013 | /// Targets that are able to perform and/or on flags should return false here. | ||||||||
2014 | virtual bool shouldNormalizeToSelectSequence(LLVMContext &Context, | ||||||||
2015 | EVT VT) const { | ||||||||
2016 | // If a target has multiple condition registers, then it likely has logical | ||||||||
2017 | // operations on those registers. | ||||||||
2018 | if (hasMultipleConditionRegisters()) | ||||||||
2019 | return false; | ||||||||
2020 | // Only do the transform if the value won't be split into multiple | ||||||||
2021 | // registers. | ||||||||
2022 | LegalizeTypeAction Action = getTypeAction(Context, VT); | ||||||||
2023 | return Action != TypeExpandInteger && Action != TypeExpandFloat && | ||||||||
2024 | Action != TypeSplitVector; | ||||||||
2025 | } | ||||||||
2026 | |||||||||
2027 | virtual bool isProfitableToCombineMinNumMaxNum(EVT VT) const { return true; } | ||||||||
2028 | |||||||||
2029 | /// Return true if a select of constants (select Cond, C1, C2) should be | ||||||||
2030 | /// transformed into simple math ops with the condition value. For example: | ||||||||
2031 | /// select Cond, C1, C1-1 --> add (zext Cond), C1-1 | ||||||||
2032 | virtual bool convertSelectOfConstantsToMath(EVT VT) const { | ||||||||
2033 | return false; | ||||||||
2034 | } | ||||||||
2035 | |||||||||
2036 | /// Return true if it is profitable to transform an integer | ||||||||
2037 | /// multiplication-by-constant into simpler operations like shifts and adds. | ||||||||
2038 | /// This may be true if the target does not directly support the | ||||||||
2039 | /// multiplication operation for the specified type or the sequence of simpler | ||||||||
2040 | /// ops is faster than the multiply. | ||||||||
2041 | virtual bool decomposeMulByConstant(LLVMContext &Context, | ||||||||
2042 | EVT VT, SDValue C) const { | ||||||||
2043 | return false; | ||||||||
2044 | } | ||||||||
2045 | |||||||||
2046 | /// Return true if it is more correct/profitable to use strict FP_TO_INT | ||||||||
2047 | /// conversion operations - canonicalizing the FP source value instead of | ||||||||
2048 | /// converting all cases and then selecting based on value. | ||||||||
2049 | /// This may be true if the target throws exceptions for out of bounds | ||||||||
2050 | /// conversions or has fast FP CMOV. | ||||||||
2051 | virtual bool shouldUseStrictFP_TO_INT(EVT FpVT, EVT IntVT, | ||||||||
2052 | bool IsSigned) const { | ||||||||
2053 | return false; | ||||||||
2054 | } | ||||||||
2055 | |||||||||
2056 | //===--------------------------------------------------------------------===// | ||||||||
2057 | // TargetLowering Configuration Methods - These methods should be invoked by | ||||||||
2058 | // the derived class constructor to configure this object for the target. | ||||||||
2059 | // | ||||||||
2060 | protected: | ||||||||
2061 | /// Specify how the target extends the result of integer and floating point | ||||||||
2062 | /// boolean values from i1 to a wider type. See getBooleanContents. | ||||||||
2063 | void setBooleanContents(BooleanContent Ty) { | ||||||||
2064 | BooleanContents = Ty; | ||||||||
2065 | BooleanFloatContents = Ty; | ||||||||
2066 | } | ||||||||
2067 | |||||||||
2068 | /// Specify how the target extends the result of integer and floating point | ||||||||
2069 | /// boolean values from i1 to a wider type. See getBooleanContents. | ||||||||
2070 | void setBooleanContents(BooleanContent IntTy, BooleanContent FloatTy) { | ||||||||
2071 | BooleanContents = IntTy; | ||||||||
2072 | BooleanFloatContents = FloatTy; | ||||||||
2073 | } | ||||||||
2074 | |||||||||
2075 | /// Specify how the target extends the result of a vector boolean value from a | ||||||||
2076 | /// vector of i1 to a wider type. See getBooleanContents. | ||||||||
2077 | void setBooleanVectorContents(BooleanContent Ty) { | ||||||||
2078 | BooleanVectorContents = Ty; | ||||||||
2079 | } | ||||||||
2080 | |||||||||
2081 | /// Specify the target scheduling preference. | ||||||||
2082 | void setSchedulingPreference(Sched::Preference Pref) { | ||||||||
2083 | SchedPreferenceInfo = Pref; | ||||||||
2084 | } | ||||||||
2085 | |||||||||
2086 | /// Indicate the minimum number of blocks to generate jump tables. | ||||||||
2087 | void setMinimumJumpTableEntries(unsigned Val); | ||||||||
2088 | |||||||||
2089 | /// Indicate the maximum number of entries in jump tables. | ||||||||
2090 | /// Set to zero to generate unlimited jump tables. | ||||||||
2091 | void setMaximumJumpTableSize(unsigned); | ||||||||
2092 | |||||||||
2093 | /// If set to a physical register, this specifies the register that | ||||||||
2094 | /// llvm.savestack/llvm.restorestack should save and restore. | ||||||||
2095 | void setStackPointerRegisterToSaveRestore(Register R) { | ||||||||
2096 | StackPointerRegisterToSaveRestore = R; | ||||||||
2097 | } | ||||||||
2098 | |||||||||
2099 | /// Tells the code generator that the target has multiple (allocatable) | ||||||||
2100 | /// condition registers that can be used to store the results of comparisons | ||||||||
2101 | /// for use by selects and conditional branches. With multiple condition | ||||||||
2102 | /// registers, the code generator will not aggressively sink comparisons into | ||||||||
2103 | /// the blocks of their users. | ||||||||
2104 | void setHasMultipleConditionRegisters(bool hasManyRegs = true) { | ||||||||
2105 | HasMultipleConditionRegisters = hasManyRegs; | ||||||||
2106 | } | ||||||||
2107 | |||||||||
2108 | /// Tells the code generator that the target has BitExtract instructions. | ||||||||
2109 | /// The code generator will aggressively sink "shift"s into the blocks of | ||||||||
2110 | /// their users if the users will generate "and" instructions which can be | ||||||||
2111 | /// combined with "shift" to BitExtract instructions. | ||||||||
2112 | void setHasExtractBitsInsn(bool hasExtractInsn = true) { | ||||||||
2113 | HasExtractBitsInsn = hasExtractInsn; | ||||||||
2114 | } | ||||||||
2115 | |||||||||
2116 | /// Tells the code generator not to expand logic operations on comparison | ||||||||
2117 | /// predicates into separate sequences that increase the amount of flow | ||||||||
2118 | /// control. | ||||||||
2119 | void setJumpIsExpensive(bool isExpensive = true); | ||||||||
2120 | |||||||||
2121 | /// Tells the code generator which bitwidths to bypass. | ||||||||
2122 | void addBypassSlowDiv(unsigned int SlowBitWidth, unsigned int FastBitWidth) { | ||||||||
2123 | BypassSlowDivWidths[SlowBitWidth] = FastBitWidth; | ||||||||
2124 | } | ||||||||
2125 | |||||||||
2126 | /// Add the specified register class as an available regclass for the | ||||||||
2127 | /// specified value type. This indicates the selector can handle values of | ||||||||
2128 | /// that class natively. | ||||||||
2129 | void addRegisterClass(MVT VT, const TargetRegisterClass *RC) { | ||||||||
2130 | assert((unsigned)VT.SimpleTy < array_lengthof(RegClassForVT))(((unsigned)VT.SimpleTy < array_lengthof(RegClassForVT)) ? static_cast<void> (0) : __assert_fail ("(unsigned)VT.SimpleTy < array_lengthof(RegClassForVT)" , "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/TargetLowering.h" , 2130, __PRETTY_FUNCTION__)); | ||||||||
2131 | RegClassForVT[VT.SimpleTy] = RC; | ||||||||
2132 | } | ||||||||
2133 | |||||||||
2134 | /// Return the largest legal super-reg register class of the register class | ||||||||
2135 | /// for the specified type and its associated "cost". | ||||||||
2136 | virtual std::pair<const TargetRegisterClass *, uint8_t> | ||||||||
2137 | findRepresentativeClass(const TargetRegisterInfo *TRI, MVT VT) const; | ||||||||
2138 | |||||||||
2139 | /// Once all of the register classes are added, this allows us to compute | ||||||||
2140 | /// derived properties we expose. | ||||||||
2141 | void computeRegisterProperties(const TargetRegisterInfo *TRI); | ||||||||
2142 | |||||||||
2143 | /// Indicate that the specified operation does not work with the specified | ||||||||
2144 | /// type and indicate what to do about it. Note that VT may refer to either | ||||||||
2145 | /// the type of a result or that of an operand of Op. | ||||||||
2146 | void setOperationAction(unsigned Op, MVT VT, | ||||||||
2147 | LegalizeAction Action) { | ||||||||
2148 | assert(Op < array_lengthof(OpActions[0]) && "Table isn't big enough!")((Op < array_lengthof(OpActions[0]) && "Table isn't big enough!" ) ? static_cast<void> (0) : __assert_fail ("Op < array_lengthof(OpActions[0]) && \"Table isn't big enough!\"" , "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/TargetLowering.h" , 2148, __PRETTY_FUNCTION__)); | ||||||||
2149 | OpActions[(unsigned)VT.SimpleTy][Op] = Action; | ||||||||
2150 | } | ||||||||
2151 | |||||||||
2152 | /// Indicate that the specified load with extension does not work with the | ||||||||
2153 | /// specified type and indicate what to do about it. | ||||||||
2154 | void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, | ||||||||
2155 | LegalizeAction Action) { | ||||||||
2156 | assert(ExtType < ISD::LAST_LOADEXT_TYPE && ValVT.isValid() &&((ExtType < ISD::LAST_LOADEXT_TYPE && ValVT.isValid () && MemVT.isValid() && "Table isn't big enough!" ) ? static_cast<void> (0) : __assert_fail ("ExtType < ISD::LAST_LOADEXT_TYPE && ValVT.isValid() && MemVT.isValid() && \"Table isn't big enough!\"" , "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/TargetLowering.h" , 2157, __PRETTY_FUNCTION__)) | ||||||||
2157 | MemVT.isValid() && "Table isn't big enough!")((ExtType < ISD::LAST_LOADEXT_TYPE && ValVT.isValid () && MemVT.isValid() && "Table isn't big enough!" ) ? static_cast<void> (0) : __assert_fail ("ExtType < ISD::LAST_LOADEXT_TYPE && ValVT.isValid() && MemVT.isValid() && \"Table isn't big enough!\"" , "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/TargetLowering.h" , 2157, __PRETTY_FUNCTION__)); | ||||||||
2158 | assert((unsigned)Action < 0x10 && "too many bits for bitfield array")(((unsigned)Action < 0x10 && "too many bits for bitfield array" ) ? static_cast<void> (0) : __assert_fail ("(unsigned)Action < 0x10 && \"too many bits for bitfield array\"" , "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/TargetLowering.h" , 2158, __PRETTY_FUNCTION__)); | ||||||||
2159 | unsigned Shift = 4 * ExtType; | ||||||||
2160 | LoadExtActions[ValVT.SimpleTy][MemVT.SimpleTy] &= ~((uint16_t)0xF << Shift); | ||||||||
2161 | LoadExtActions[ValVT.SimpleTy][MemVT.SimpleTy] |= (uint16_t)Action << Shift; | ||||||||
2162 | } | ||||||||
2163 | |||||||||
2164 | /// Indicate that the specified truncating store does not work with the | ||||||||
2165 | /// specified type and indicate what to do about it. | ||||||||
2166 | void setTruncStoreAction(MVT ValVT, MVT MemVT, | ||||||||
2167 | LegalizeAction Action) { | ||||||||
2168 | assert(ValVT.isValid() && MemVT.isValid() && "Table isn't big enough!")((ValVT.isValid() && MemVT.isValid() && "Table isn't big enough!" ) ? static_cast<void> (0) : __assert_fail ("ValVT.isValid() && MemVT.isValid() && \"Table isn't big enough!\"" , "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/TargetLowering.h" , 2168, __PRETTY_FUNCTION__)); | ||||||||
2169 | TruncStoreActions[(unsigned)ValVT.SimpleTy][MemVT.SimpleTy] = Action; | ||||||||
2170 | } | ||||||||
2171 | |||||||||
2172 | /// Indicate that the specified indexed load does or does not work with the | ||||||||
2173 | /// specified type and indicate what to do abort it. | ||||||||
2174 | /// | ||||||||
2175 | /// NOTE: All indexed mode loads are initialized to Expand in | ||||||||
2176 | /// TargetLowering.cpp | ||||||||
2177 | void setIndexedLoadAction(unsigned IdxMode, MVT VT, LegalizeAction Action) { | ||||||||
2178 | setIndexedModeAction(IdxMode, VT, IMAB_Load, Action); | ||||||||
2179 | } | ||||||||
2180 | |||||||||
2181 | /// Indicate that the specified indexed store does or does not work with the | ||||||||
2182 | /// specified type and indicate what to do about it. | ||||||||
2183 | /// | ||||||||
2184 | /// NOTE: All indexed mode stores are initialized to Expand in | ||||||||
2185 | /// TargetLowering.cpp | ||||||||
2186 | void setIndexedStoreAction(unsigned IdxMode, MVT VT, LegalizeAction Action) { | ||||||||
2187 | setIndexedModeAction(IdxMode, VT, IMAB_Store, Action); | ||||||||
2188 | } | ||||||||
2189 | |||||||||
2190 | /// Indicate that the specified indexed masked load does or does not work with | ||||||||
2191 | /// the specified type and indicate what to do about it. | ||||||||
2192 | /// | ||||||||
2193 | /// NOTE: All indexed mode masked loads are initialized to Expand in | ||||||||
2194 | /// TargetLowering.cpp | ||||||||
2195 | void setIndexedMaskedLoadAction(unsigned IdxMode, MVT VT, | ||||||||
2196 | LegalizeAction Action) { | ||||||||
2197 | setIndexedModeAction(IdxMode, VT, IMAB_MaskedLoad, Action); | ||||||||
2198 | } | ||||||||
2199 | |||||||||
2200 | /// Indicate that the specified indexed masked store does or does not work | ||||||||
2201 | /// with the specified type and indicate what to do about it. | ||||||||
2202 | /// | ||||||||
2203 | /// NOTE: All indexed mode masked stores are initialized to Expand in | ||||||||
2204 | /// TargetLowering.cpp | ||||||||
2205 | void setIndexedMaskedStoreAction(unsigned IdxMode, MVT VT, | ||||||||
2206 | LegalizeAction Action) { | ||||||||
2207 | setIndexedModeAction(IdxMode, VT, IMAB_MaskedStore, Action); | ||||||||
2208 | } | ||||||||
2209 | |||||||||
2210 | /// Indicate that the specified condition code is or isn't supported on the | ||||||||
2211 | /// target and indicate what to do about it. | ||||||||
2212 | void setCondCodeAction(ISD::CondCode CC, MVT VT, | ||||||||
2213 | LegalizeAction Action) { | ||||||||
2214 | assert(VT.isValid() && (unsigned)CC < array_lengthof(CondCodeActions) &&((VT.isValid() && (unsigned)CC < array_lengthof(CondCodeActions ) && "Table isn't big enough!") ? static_cast<void > (0) : __assert_fail ("VT.isValid() && (unsigned)CC < array_lengthof(CondCodeActions) && \"Table isn't big enough!\"" , "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/TargetLowering.h" , 2215, __PRETTY_FUNCTION__)) | ||||||||
2215 | "Table isn't big enough!")((VT.isValid() && (unsigned)CC < array_lengthof(CondCodeActions ) && "Table isn't big enough!") ? static_cast<void > (0) : __assert_fail ("VT.isValid() && (unsigned)CC < array_lengthof(CondCodeActions) && \"Table isn't big enough!\"" , "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/TargetLowering.h" , 2215, __PRETTY_FUNCTION__)); | ||||||||
2216 | assert((unsigned)Action < 0x10 && "too many bits for bitfield array")(((unsigned)Action < 0x10 && "too many bits for bitfield array" ) ? static_cast<void> (0) : __assert_fail ("(unsigned)Action < 0x10 && \"too many bits for bitfield array\"" , "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/TargetLowering.h" , 2216, __PRETTY_FUNCTION__)); | ||||||||
2217 | /// The lower 3 bits of the SimpleTy index into Nth 4bit set from the 32-bit | ||||||||
2218 | /// value and the upper 29 bits index into the second dimension of the array | ||||||||
2219 | /// to select what 32-bit value to use. | ||||||||
2220 | uint32_t Shift = 4 * (VT.SimpleTy & 0x7); | ||||||||
2221 | CondCodeActions[CC][VT.SimpleTy >> 3] &= ~((uint32_t)0xF << Shift); | ||||||||
2222 | CondCodeActions[CC][VT.SimpleTy >> 3] |= (uint32_t)Action << Shift; | ||||||||
2223 | } | ||||||||
2224 | |||||||||
2225 | /// If Opc/OrigVT is specified as being promoted, the promotion code defaults | ||||||||
2226 | /// to trying a larger integer/fp until it can find one that works. If that | ||||||||
2227 | /// default is insufficient, this method can be used by the target to override | ||||||||
2228 | /// the default. | ||||||||
2229 | void AddPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT) { | ||||||||
2230 | PromoteToType[std::make_pair(Opc, OrigVT.SimpleTy)] = DestVT.SimpleTy; | ||||||||
2231 | } | ||||||||
2232 | |||||||||
2233 | /// Convenience method to set an operation to Promote and specify the type | ||||||||
2234 | /// in a single call. | ||||||||
2235 | void setOperationPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT) { | ||||||||
2236 | setOperationAction(Opc, OrigVT, Promote); | ||||||||
2237 | AddPromotedToType(Opc, OrigVT, DestVT); | ||||||||
2238 | } | ||||||||
2239 | |||||||||
2240 | /// Targets should invoke this method for each target independent node that | ||||||||
2241 | /// they want to provide a custom DAG combiner for by implementing the | ||||||||
2242 | /// PerformDAGCombine virtual method. | ||||||||
2243 | void setTargetDAGCombine(ISD::NodeType NT) { | ||||||||
2244 | assert(unsigned(NT >> 3) < array_lengthof(TargetDAGCombineArray))((unsigned(NT >> 3) < array_lengthof(TargetDAGCombineArray )) ? static_cast<void> (0) : __assert_fail ("unsigned(NT >> 3) < array_lengthof(TargetDAGCombineArray)" , "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/TargetLowering.h" , 2244, __PRETTY_FUNCTION__)); | ||||||||
2245 | TargetDAGCombineArray[NT >> 3] |= 1 << (NT&7); | ||||||||
2246 | } | ||||||||
2247 | |||||||||
2248 | /// Set the target's minimum function alignment. | ||||||||
2249 | void setMinFunctionAlignment(Align Alignment) { | ||||||||
2250 | MinFunctionAlignment = Alignment; | ||||||||
2251 | } | ||||||||
2252 | |||||||||
2253 | /// Set the target's preferred function alignment. This should be set if | ||||||||
2254 | /// there is a performance benefit to higher-than-minimum alignment | ||||||||
2255 | void setPrefFunctionAlignment(Align Alignment) { | ||||||||
2256 | PrefFunctionAlignment = Alignment; | ||||||||
2257 | } | ||||||||
2258 | |||||||||
2259 | /// Set the target's preferred loop alignment. Default alignment is one, it | ||||||||
2260 | /// means the target does not care about loop alignment. The target may also | ||||||||
2261 | /// override getPrefLoopAlignment to provide per-loop values. | ||||||||
2262 | void setPrefLoopAlignment(Align Alignment) { PrefLoopAlignment = Alignment; } | ||||||||
2263 | |||||||||
2264 | /// Set the minimum stack alignment of an argument. | ||||||||
2265 | void setMinStackArgumentAlignment(Align Alignment) { | ||||||||
2266 | MinStackArgumentAlignment = Alignment; | ||||||||
2267 | } | ||||||||
2268 | |||||||||
2269 | /// Set the maximum atomic operation size supported by the | ||||||||
2270 | /// backend. Atomic operations greater than this size (as well as | ||||||||
2271 | /// ones that are not naturally aligned), will be expanded by | ||||||||
2272 | /// AtomicExpandPass into an __atomic_* library call. | ||||||||
2273 | void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits) { | ||||||||
2274 | MaxAtomicSizeInBitsSupported = SizeInBits; | ||||||||
2275 | } | ||||||||
2276 | |||||||||
2277 | /// Sets the minimum cmpxchg or ll/sc size supported by the backend. | ||||||||
2278 | void setMinCmpXchgSizeInBits(unsigned SizeInBits) { | ||||||||
2279 | MinCmpXchgSizeInBits = SizeInBits; | ||||||||
2280 | } | ||||||||
2281 | |||||||||
2282 | /// Sets whether unaligned atomic operations are supported. | ||||||||
2283 | void setSupportsUnalignedAtomics(bool UnalignedSupported) { | ||||||||
2284 | SupportsUnalignedAtomics = UnalignedSupported; | ||||||||
2285 | } | ||||||||
2286 | |||||||||
2287 | public: | ||||||||
2288 | //===--------------------------------------------------------------------===// | ||||||||
2289 | // Addressing mode description hooks (used by LSR etc). | ||||||||
2290 | // | ||||||||
2291 | |||||||||
2292 | /// CodeGenPrepare sinks address calculations into the same BB as Load/Store | ||||||||
2293 | /// instructions reading the address. This allows as much computation as | ||||||||
2294 | /// possible to be done in the address mode for that operand. This hook lets | ||||||||
2295 | /// targets also pass back when this should be done on intrinsics which | ||||||||
2296 | /// load/store. | ||||||||
2297 | virtual bool getAddrModeArguments(IntrinsicInst * /*I*/, | ||||||||
2298 | SmallVectorImpl<Value*> &/*Ops*/, | ||||||||
2299 | Type *&/*AccessTy*/) const { | ||||||||
2300 | return false; | ||||||||
2301 | } | ||||||||
2302 | |||||||||
2303 | /// This represents an addressing mode of: | ||||||||
2304 | /// BaseGV + BaseOffs + BaseReg + Scale*ScaleReg | ||||||||
2305 | /// If BaseGV is null, there is no BaseGV. | ||||||||
2306 | /// If BaseOffs is zero, there is no base offset. | ||||||||
2307 | /// If HasBaseReg is false, there is no base register. | ||||||||
2308 | /// If Scale is zero, there is no ScaleReg. Scale of 1 indicates a reg with | ||||||||
2309 | /// no scale. | ||||||||
2310 | struct AddrMode { | ||||||||
2311 | GlobalValue *BaseGV = nullptr; | ||||||||
2312 | int64_t BaseOffs = 0; | ||||||||
2313 | bool HasBaseReg = false; | ||||||||
2314 | int64_t Scale = 0; | ||||||||
2315 | AddrMode() = default; | ||||||||
2316 | }; | ||||||||
2317 | |||||||||
2318 | /// Return true if the addressing mode represented by AM is legal for this | ||||||||
2319 | /// target, for a load/store of the specified type. | ||||||||
2320 | /// | ||||||||
2321 | /// The type may be VoidTy, in which case only return true if the addressing | ||||||||
2322 | /// mode is legal for a load/store of any legal type. TODO: Handle | ||||||||
2323 | /// pre/postinc as well. | ||||||||
2324 | /// | ||||||||
2325 | /// If the address space cannot be determined, it will be -1. | ||||||||
2326 | /// | ||||||||
2327 | /// TODO: Remove default argument | ||||||||
2328 | virtual bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, | ||||||||
2329 | Type *Ty, unsigned AddrSpace, | ||||||||
2330 | Instruction *I = nullptr) const; | ||||||||
2331 | |||||||||
2332 | /// Return the cost of the scaling factor used in the addressing mode | ||||||||
2333 | /// represented by AM for this target, for a load/store of the specified type. | ||||||||
2334 | /// | ||||||||
2335 | /// If the AM is supported, the return value must be >= 0. | ||||||||
2336 | /// If the AM is not supported, it returns a negative value. | ||||||||
2337 | /// TODO: Handle pre/postinc as well. | ||||||||
2338 | /// TODO: Remove default argument | ||||||||
2339 | virtual int getScalingFactorCost(const DataLayout &DL, const AddrMode &AM, | ||||||||
2340 | Type *Ty, unsigned AS = 0) const { | ||||||||
2341 | // Default: assume that any scaling factor used in a legal AM is free. | ||||||||
2342 | if (isLegalAddressingMode(DL, AM, Ty, AS)) | ||||||||
2343 | return 0; | ||||||||
2344 | return -1; | ||||||||
2345 | } | ||||||||
2346 | |||||||||
2347 | /// Return true if the specified immediate is legal icmp immediate, that is | ||||||||
2348 | /// the target has icmp instructions which can compare a register against the | ||||||||
2349 | /// immediate without having to materialize the immediate into a register. | ||||||||
2350 | virtual bool isLegalICmpImmediate(int64_t) const { | ||||||||
2351 | return true; | ||||||||
2352 | } | ||||||||
2353 | |||||||||
2354 | /// Return true if the specified immediate is legal add immediate, that is the | ||||||||
2355 | /// target has add instructions which can add a register with the immediate | ||||||||
2356 | /// without having to materialize the immediate into a register. | ||||||||
2357 | virtual bool isLegalAddImmediate(int64_t) const { | ||||||||
2358 | return true; | ||||||||
2359 | } | ||||||||
2360 | |||||||||
2361 | /// Return true if the specified immediate is legal for the value input of a | ||||||||
2362 | /// store instruction. | ||||||||
2363 | virtual bool isLegalStoreImmediate(int64_t Value) const { | ||||||||
2364 | // Default implementation assumes that at least 0 works since it is likely | ||||||||
2365 | // that a zero register exists or a zero immediate is allowed. | ||||||||
2366 | return Value == 0; | ||||||||
2367 | } | ||||||||
2368 | |||||||||
2369 | /// Return true if it's significantly cheaper to shift a vector by a uniform | ||||||||
2370 | /// scalar than by an amount which will vary across each lane. On x86 before | ||||||||
2371 | /// AVX2 for example, there is a "psllw" instruction for the former case, but | ||||||||
2372 | /// no simple instruction for a general "a << b" operation on vectors. | ||||||||
2373 | /// This should also apply to lowering for vector funnel shifts (rotates). | ||||||||
2374 | virtual bool isVectorShiftByScalarCheap(Type *Ty) const { | ||||||||
2375 | return false; | ||||||||
2376 | } | ||||||||
2377 | |||||||||
2378 | /// Given a shuffle vector SVI representing a vector splat, return a new | ||||||||
2379 | /// scalar type of size equal to SVI's scalar type if the new type is more | ||||||||
2380 | /// profitable. Returns nullptr otherwise. For example under MVE float splats | ||||||||
2381 | /// are converted to integer to prevent the need to move from SPR to GPR | ||||||||
2382 | /// registers. | ||||||||
2383 | virtual Type* shouldConvertSplatType(ShuffleVectorInst* SVI) const { | ||||||||
2384 | return nullptr; | ||||||||
2385 | } | ||||||||
2386 | |||||||||
2387 | /// Given a set in interconnected phis of type 'From' that are loaded/stored | ||||||||
2388 | /// or bitcast to type 'To', return true if the set should be converted to | ||||||||
2389 | /// 'To'. | ||||||||
2390 | virtual bool shouldConvertPhiType(Type *From, Type *To) const { | ||||||||
2391 | return (From->isIntegerTy() || From->isFloatingPointTy()) && | ||||||||
2392 | (To->isIntegerTy() || To->isFloatingPointTy()); | ||||||||
2393 | } | ||||||||
2394 | |||||||||
2395 | /// Returns true if the opcode is a commutative binary operation. | ||||||||
2396 | virtual bool isCommutativeBinOp(unsigned Opcode) const { | ||||||||
2397 | // FIXME: This should get its info from the td file. | ||||||||
2398 | switch (Opcode) { | ||||||||
2399 | case ISD::ADD: | ||||||||
2400 | case ISD::SMIN: | ||||||||
2401 | case ISD::SMAX: | ||||||||
2402 | case ISD::UMIN: | ||||||||
2403 | case ISD::UMAX: | ||||||||
2404 | case ISD::MUL: | ||||||||
2405 | case ISD::MULHU: | ||||||||
2406 | case ISD::MULHS: | ||||||||
2407 | case ISD::SMUL_LOHI: | ||||||||
2408 | case ISD::UMUL_LOHI: | ||||||||
2409 | case ISD::FADD: | ||||||||
2410 | case ISD::FMUL: | ||||||||
2411 | case ISD::AND: | ||||||||
2412 | case ISD::OR: | ||||||||
2413 | case ISD::XOR: | ||||||||
2414 | case ISD::SADDO: | ||||||||
2415 | case ISD::UADDO: | ||||||||
2416 | case ISD::ADDC: | ||||||||
2417 | case ISD::ADDE: | ||||||||
2418 | case ISD::SADDSAT: | ||||||||
2419 | case ISD::UADDSAT: | ||||||||
2420 | case ISD::FMINNUM: | ||||||||
2421 | case ISD::FMAXNUM: | ||||||||
2422 | case ISD::FMINNUM_IEEE: | ||||||||
2423 | case ISD::FMAXNUM_IEEE: | ||||||||
2424 | case ISD::FMINIMUM: | ||||||||
2425 | case ISD::FMAXIMUM: | ||||||||
2426 | return true; | ||||||||
2427 | default: return false; | ||||||||
2428 | } | ||||||||
2429 | } | ||||||||
2430 | |||||||||
2431 | /// Return true if the node is a math/logic binary operator. | ||||||||
2432 | virtual bool isBinOp(unsigned Opcode) const { | ||||||||
2433 | // A commutative binop must be a binop. | ||||||||
2434 | if (isCommutativeBinOp(Opcode)) | ||||||||
2435 | return true; | ||||||||
2436 | // These are non-commutative binops. | ||||||||
2437 | switch (Opcode) { | ||||||||
2438 | case ISD::SUB: | ||||||||
2439 | case ISD::SHL: | ||||||||
2440 | case ISD::SRL: | ||||||||
2441 | case ISD::SRA: | ||||||||
2442 | case ISD::SDIV: | ||||||||
2443 | case ISD::UDIV: | ||||||||
2444 | case ISD::SREM: | ||||||||
2445 | case ISD::UREM: | ||||||||
2446 | case ISD::FSUB: | ||||||||
2447 | case ISD::FDIV: | ||||||||
2448 | case ISD::FREM: | ||||||||
2449 | return true; | ||||||||
2450 | default: | ||||||||
2451 | return false; | ||||||||
2452 | } | ||||||||
2453 | } | ||||||||
2454 | |||||||||
2455 | /// Return true if it's free to truncate a value of type FromTy to type | ||||||||
2456 | /// ToTy. e.g. On x86 it's free to truncate a i32 value in register EAX to i16 | ||||||||
2457 | /// by referencing its sub-register AX. | ||||||||
2458 | /// Targets must return false when FromTy <= ToTy. | ||||||||
2459 | virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const { | ||||||||
2460 | return false; | ||||||||
2461 | } | ||||||||
2462 | |||||||||
2463 | /// Return true if a truncation from FromTy to ToTy is permitted when deciding | ||||||||
2464 | /// whether a call is in tail position. Typically this means that both results | ||||||||
2465 | /// would be assigned to the same register or stack slot, but it could mean | ||||||||
2466 | /// the target performs adequate checks of its own before proceeding with the | ||||||||
2467 | /// tail call. Targets must return false when FromTy <= ToTy. | ||||||||
2468 | virtual bool allowTruncateForTailCall(Type *FromTy, Type *ToTy) const { | ||||||||
2469 | return false; | ||||||||
2470 | } | ||||||||
2471 | |||||||||
2472 | virtual bool isTruncateFree(EVT FromVT, EVT ToVT) const { | ||||||||
2473 | return false; | ||||||||
2474 | } | ||||||||
2475 | |||||||||
2476 | virtual bool isProfitableToHoist(Instruction *I) const { return true; } | ||||||||
2477 | |||||||||
2478 | /// Return true if the extension represented by \p I is free. | ||||||||
2479 | /// Unlikely the is[Z|FP]ExtFree family which is based on types, | ||||||||
2480 | /// this method can use the context provided by \p I to decide | ||||||||
2481 | /// whether or not \p I is free. | ||||||||
2482 | /// This method extends the behavior of the is[Z|FP]ExtFree family. | ||||||||
2483 | /// In other words, if is[Z|FP]Free returns true, then this method | ||||||||
2484 | /// returns true as well. The converse is not true. | ||||||||
2485 | /// The target can perform the adequate checks by overriding isExtFreeImpl. | ||||||||
2486 | /// \pre \p I must be a sign, zero, or fp extension. | ||||||||
2487 | bool isExtFree(const Instruction *I) const { | ||||||||
2488 | switch (I->getOpcode()) { | ||||||||
2489 | case Instruction::FPExt: | ||||||||
2490 | if (isFPExtFree(EVT::getEVT(I->getType()), | ||||||||
2491 | EVT::getEVT(I->getOperand(0)->getType()))) | ||||||||
2492 | return true; | ||||||||
2493 | break; | ||||||||
2494 | case Instruction::ZExt: | ||||||||
2495 | if (isZExtFree(I->getOperand(0)->getType(), I->getType())) | ||||||||
2496 | return true; | ||||||||
2497 | break; | ||||||||
2498 | case Instruction::SExt: | ||||||||
2499 | break; | ||||||||
2500 | default: | ||||||||
2501 | llvm_unreachable("Instruction is not an extension")::llvm::llvm_unreachable_internal("Instruction is not an extension" , "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/TargetLowering.h" , 2501); | ||||||||
2502 | } | ||||||||
2503 | return isExtFreeImpl(I); | ||||||||
2504 | } | ||||||||
2505 | |||||||||
2506 | /// Return true if \p Load and \p Ext can form an ExtLoad. | ||||||||
2507 | /// For example, in AArch64 | ||||||||
2508 | /// %L = load i8, i8* %ptr | ||||||||
2509 | /// %E = zext i8 %L to i32 | ||||||||
2510 | /// can be lowered into one load instruction | ||||||||
2511 | /// ldrb w0, [x0] | ||||||||
2512 | bool isExtLoad(const LoadInst *Load, const Instruction *Ext, | ||||||||
2513 | const DataLayout &DL) const { | ||||||||
2514 | EVT VT = getValueType(DL, Ext->getType()); | ||||||||
2515 | EVT LoadVT = getValueType(DL, Load->getType()); | ||||||||
2516 | |||||||||
2517 | // If the load has other users and the truncate is not free, the ext | ||||||||
2518 | // probably isn't free. | ||||||||
2519 | if (!Load->hasOneUse() && (isTypeLegal(LoadVT) || !isTypeLegal(VT)) && | ||||||||
2520 | !isTruncateFree(Ext->getType(), Load->getType())) | ||||||||
2521 | return false; | ||||||||
2522 | |||||||||
2523 | // Check whether the target supports casts folded into loads. | ||||||||
2524 | unsigned LType; | ||||||||
2525 | if (isa<ZExtInst>(Ext)) | ||||||||
2526 | LType = ISD::ZEXTLOAD; | ||||||||
2527 | else { | ||||||||
2528 | assert(isa<SExtInst>(Ext) && "Unexpected ext type!")((isa<SExtInst>(Ext) && "Unexpected ext type!") ? static_cast<void> (0) : __assert_fail ("isa<SExtInst>(Ext) && \"Unexpected ext type!\"" , "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/TargetLowering.h" , 2528, __PRETTY_FUNCTION__)); | ||||||||
2529 | LType = ISD::SEXTLOAD; | ||||||||
2530 | } | ||||||||
2531 | |||||||||
2532 | return isLoadExtLegal(LType, VT, LoadVT); | ||||||||
2533 | } | ||||||||
2534 | |||||||||
2535 | /// Return true if any actual instruction that defines a value of type FromTy | ||||||||
2536 | /// implicitly zero-extends the value to ToTy in the result register. | ||||||||
2537 | /// | ||||||||
2538 | /// The function should return true when it is likely that the truncate can | ||||||||
2539 | /// be freely folded with an instruction defining a value of FromTy. If | ||||||||
2540 | /// the defining instruction is unknown (because you're looking at a | ||||||||
2541 | /// function argument, PHI, etc.) then the target may require an | ||||||||
2542 | /// explicit truncate, which is not necessarily free, but this function | ||||||||
2543 | /// does not deal with those cases. | ||||||||
2544 | /// Targets must return false when FromTy >= ToTy. | ||||||||
2545 | virtual bool isZExtFree(Type *FromTy, Type *ToTy) const { | ||||||||
2546 | return false; | ||||||||
2547 | } | ||||||||
2548 | |||||||||
2549 | virtual bool isZExtFree(EVT FromTy, EVT ToTy) const { | ||||||||
2550 | return false; | ||||||||
2551 | } | ||||||||
2552 | |||||||||
2553 | /// Return true if sign-extension from FromTy to ToTy is cheaper than | ||||||||
2554 | /// zero-extension. | ||||||||
2555 | virtual bool isSExtCheaperThanZExt(EVT FromTy, EVT ToTy) const { | ||||||||
2556 | return false; | ||||||||
2557 | } | ||||||||
2558 | |||||||||
2559 | /// Return true if sinking I's operands to the same basic block as I is | ||||||||
2560 | /// profitable, e.g. because the operands can be folded into a target | ||||||||
2561 | /// instruction during instruction selection. After calling the function | ||||||||
2562 | /// \p Ops contains the Uses to sink ordered by dominance (dominating users | ||||||||
2563 | /// come first). | ||||||||
2564 | virtual bool shouldSinkOperands(Instruction *I, | ||||||||
2565 | SmallVectorImpl<Use *> &Ops) const { | ||||||||
2566 | return false; | ||||||||
2567 | } | ||||||||
2568 | |||||||||
2569 | /// Return true if the target supplies and combines to a paired load | ||||||||
2570 | /// two loaded values of type LoadedType next to each other in memory. | ||||||||
2571 | /// RequiredAlignment gives the minimal alignment constraints that must be met | ||||||||
2572 | /// to be able to select this paired load. | ||||||||
2573 | /// | ||||||||
2574 | /// This information is *not* used to generate actual paired loads, but it is | ||||||||
2575 | /// used to generate a sequence of loads that is easier to combine into a | ||||||||
2576 | /// paired load. | ||||||||
2577 | /// For instance, something like this: | ||||||||
2578 | /// a = load i64* addr | ||||||||
2579 | /// b = trunc i64 a to i32 | ||||||||
2580 | /// c = lshr i64 a, 32 | ||||||||
2581 | /// d = trunc i64 c to i32 | ||||||||
2582 | /// will be optimized into: | ||||||||
2583 | /// b = load i32* addr1 | ||||||||
2584 | /// d = load i32* addr2 | ||||||||
2585 | /// Where addr1 = addr2 +/- sizeof(i32). | ||||||||
2586 | /// | ||||||||
2587 | /// In other words, unless the target performs a post-isel load combining, | ||||||||
2588 | /// this information should not be provided because it will generate more | ||||||||
2589 | /// loads. | ||||||||
2590 | virtual bool hasPairedLoad(EVT /*LoadedType*/, | ||||||||
2591 | Align & /*RequiredAlignment*/) const { | ||||||||
2592 | return false; | ||||||||
2593 | } | ||||||||
2594 | |||||||||
2595 | /// Return true if the target has a vector blend instruction. | ||||||||
2596 | virtual bool hasVectorBlend() const { return false; } | ||||||||
2597 | |||||||||
2598 | /// Get the maximum supported factor for interleaved memory accesses. | ||||||||
2599 | /// Default to be the minimum interleave factor: 2. | ||||||||
2600 | virtual unsigned getMaxSupportedInterleaveFactor() const { return 2; } | ||||||||
2601 | |||||||||
2602 | /// Lower an interleaved load to target specific intrinsics. Return | ||||||||
2603 | /// true on success. | ||||||||
2604 | /// | ||||||||
2605 | /// \p LI is the vector load instruction. | ||||||||
2606 | /// \p Shuffles is the shufflevector list to DE-interleave the loaded vector. | ||||||||
2607 | /// \p Indices is the corresponding indices for each shufflevector. | ||||||||
2608 | /// \p Factor is the interleave factor. | ||||||||
2609 | virtual bool lowerInterleavedLoad(LoadInst *LI, | ||||||||
2610 | ArrayRef<ShuffleVectorInst *> Shuffles, | ||||||||
2611 | ArrayRef<unsigned> Indices, | ||||||||
2612 | unsigned Factor) const { | ||||||||
2613 | return false; | ||||||||
2614 | } | ||||||||
2615 | |||||||||
2616 | /// Lower an interleaved store to target specific intrinsics. Return | ||||||||
2617 | /// true on success. | ||||||||
2618 | /// | ||||||||
2619 | /// \p SI is the vector store instruction. | ||||||||
2620 | /// \p SVI is the shufflevector to RE-interleave the stored vector. | ||||||||
2621 | /// \p Factor is the interleave factor. | ||||||||
2622 | virtual bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI, | ||||||||
2623 | unsigned Factor) const { | ||||||||
2624 | return false; | ||||||||
2625 | } | ||||||||
2626 | |||||||||
2627 | /// Return true if zero-extending the specific node Val to type VT2 is free | ||||||||
2628 | /// (either because it's implicitly zero-extended such as ARM ldrb / ldrh or | ||||||||
2629 | /// because it's folded such as X86 zero-extending loads). | ||||||||
2630 | virtual bool isZExtFree(SDValue Val, EVT VT2) const { | ||||||||
2631 | return isZExtFree(Val.getValueType(), VT2); | ||||||||
2632 | } | ||||||||
2633 | |||||||||
2634 | /// Return true if an fpext operation is free (for instance, because | ||||||||
2635 | /// single-precision floating-point numbers are implicitly extended to | ||||||||
2636 | /// double-precision). | ||||||||
2637 | virtual bool isFPExtFree(EVT DestVT, EVT SrcVT) const { | ||||||||
2638 | assert(SrcVT.isFloatingPoint() && DestVT.isFloatingPoint() &&((SrcVT.isFloatingPoint() && DestVT.isFloatingPoint() && "invalid fpext types") ? static_cast<void> ( 0) : __assert_fail ("SrcVT.isFloatingPoint() && DestVT.isFloatingPoint() && \"invalid fpext types\"" , "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/TargetLowering.h" , 2639, __PRETTY_FUNCTION__)) | ||||||||
2639 | "invalid fpext types")((SrcVT.isFloatingPoint() && DestVT.isFloatingPoint() && "invalid fpext types") ? static_cast<void> ( 0) : __assert_fail ("SrcVT.isFloatingPoint() && DestVT.isFloatingPoint() && \"invalid fpext types\"" , "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/TargetLowering.h" , 2639, __PRETTY_FUNCTION__)); | ||||||||
2640 | return false; | ||||||||
2641 | } | ||||||||
2642 | |||||||||
2643 | /// Return true if an fpext operation input to an \p Opcode operation is free | ||||||||
2644 | /// (for instance, because half-precision floating-point numbers are | ||||||||
2645 | /// implicitly extended to float-precision) for an FMA instruction. | ||||||||
2646 | virtual bool isFPExtFoldable(const SelectionDAG &DAG, unsigned Opcode, | ||||||||
2647 | EVT DestVT, EVT SrcVT) const { | ||||||||
2648 | assert(DestVT.isFloatingPoint() && SrcVT.isFloatingPoint() &&((DestVT.isFloatingPoint() && SrcVT.isFloatingPoint() && "invalid fpext types") ? static_cast<void> ( 0) : __assert_fail ("DestVT.isFloatingPoint() && SrcVT.isFloatingPoint() && \"invalid fpext types\"" , "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/TargetLowering.h" , 2649, __PRETTY_FUNCTION__)) | ||||||||
2649 | "invalid fpext types")((DestVT.isFloatingPoint() && SrcVT.isFloatingPoint() && "invalid fpext types") ? static_cast<void> ( 0) : __assert_fail ("DestVT.isFloatingPoint() && SrcVT.isFloatingPoint() && \"invalid fpext types\"" , "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/TargetLowering.h" , 2649, __PRETTY_FUNCTION__)); | ||||||||
2650 | return isFPExtFree(DestVT, SrcVT); | ||||||||
2651 | } | ||||||||
2652 | |||||||||
2653 | /// Return true if folding a vector load into ExtVal (a sign, zero, or any | ||||||||
2654 | /// extend node) is profitable. | ||||||||
2655 | virtual bool isVectorLoadExtDesirable(SDValue ExtVal) const { return false; } | ||||||||
2656 | |||||||||
2657 | /// Return true if an fneg operation is free to the point where it is never | ||||||||
2658 | /// worthwhile to replace it with a bitwise operation. | ||||||||
2659 | virtual bool isFNegFree(EVT VT) const { | ||||||||
2660 | assert(VT.isFloatingPoint())((VT.isFloatingPoint()) ? static_cast<void> (0) : __assert_fail ("VT.isFloatingPoint()", "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/TargetLowering.h" , 2660, __PRETTY_FUNCTION__)); | ||||||||
2661 | return false; | ||||||||
2662 | } | ||||||||
2663 | |||||||||
2664 | /// Return true if an fabs operation is free to the point where it is never | ||||||||
2665 | /// worthwhile to replace it with a bitwise operation. | ||||||||
2666 | virtual bool isFAbsFree(EVT VT) const { | ||||||||
2667 | assert(VT.isFloatingPoint())((VT.isFloatingPoint()) ? static_cast<void> (0) : __assert_fail ("VT.isFloatingPoint()", "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/TargetLowering.h" , 2667, __PRETTY_FUNCTION__)); | ||||||||
2668 | return false; | ||||||||
2669 | } | ||||||||
2670 | |||||||||
2671 | /// Return true if an FMA operation is faster than a pair of fmul and fadd | ||||||||
2672 | /// instructions. fmuladd intrinsics will be expanded to FMAs when this method | ||||||||
2673 | /// returns true, otherwise fmuladd is expanded to fmul + fadd. | ||||||||
2674 | /// | ||||||||
2675 | /// NOTE: This may be called before legalization on types for which FMAs are | ||||||||
2676 | /// not legal, but should return true if those types will eventually legalize | ||||||||
2677 | /// to types that support FMAs. After legalization, it will only be called on | ||||||||
2678 | /// types that support FMAs (via Legal or Custom actions) | ||||||||
2679 | virtual bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, | ||||||||
2680 | EVT) const { | ||||||||
2681 | return false; | ||||||||
2682 | } | ||||||||
2683 | |||||||||
2684 | /// IR version | ||||||||
2685 | virtual bool isFMAFasterThanFMulAndFAdd(const Function &F, Type *) const { | ||||||||
2686 | return false; | ||||||||
2687 | } | ||||||||
2688 | |||||||||
2689 | /// Returns true if be combined with to form an ISD::FMAD. \p N may be an | ||||||||
2690 | /// ISD::FADD, ISD::FSUB, or an ISD::FMUL which will be distributed into an | ||||||||
2691 | /// fadd/fsub. | ||||||||
2692 | virtual bool isFMADLegal(const SelectionDAG &DAG, const SDNode *N) const { | ||||||||
2693 | assert((N->getOpcode() == ISD::FADD || N->getOpcode() == ISD::FSUB ||(((N->getOpcode() == ISD::FADD || N->getOpcode() == ISD ::FSUB || N->getOpcode() == ISD::FMUL) && "unexpected node in FMAD forming combine" ) ? static_cast<void> (0) : __assert_fail ("(N->getOpcode() == ISD::FADD || N->getOpcode() == ISD::FSUB || N->getOpcode() == ISD::FMUL) && \"unexpected node in FMAD forming combine\"" , "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/TargetLowering.h" , 2695, __PRETTY_FUNCTION__)) | ||||||||
2694 | N->getOpcode() == ISD::FMUL) &&(((N->getOpcode() == ISD::FADD || N->getOpcode() == ISD ::FSUB || N->getOpcode() == ISD::FMUL) && "unexpected node in FMAD forming combine" ) ? static_cast<void> (0) : __assert_fail ("(N->getOpcode() == ISD::FADD || N->getOpcode() == ISD::FSUB || N->getOpcode() == ISD::FMUL) && \"unexpected node in FMAD forming combine\"" , "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/TargetLowering.h" , 2695, __PRETTY_FUNCTION__)) | ||||||||
2695 | "unexpected node in FMAD forming combine")(((N->getOpcode() == ISD::FADD || N->getOpcode() == ISD ::FSUB || N->getOpcode() == ISD::FMUL) && "unexpected node in FMAD forming combine" ) ? static_cast<void> (0) : __assert_fail ("(N->getOpcode() == ISD::FADD || N->getOpcode() == ISD::FSUB || N->getOpcode() == ISD::FMUL) && \"unexpected node in FMAD forming combine\"" , "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/TargetLowering.h" , 2695, __PRETTY_FUNCTION__)); | ||||||||
2696 | return isOperationLegal(ISD::FMAD, N->getValueType(0)); | ||||||||
2697 | } | ||||||||
2698 | |||||||||
2699 | /// Return true if it's profitable to narrow operations of type VT1 to | ||||||||
2700 | /// VT2. e.g. on x86, it's profitable to narrow from i32 to i8 but not from | ||||||||
2701 | /// i32 to i16. | ||||||||
2702 | virtual bool isNarrowingProfitable(EVT /*VT1*/, EVT /*VT2*/) const { | ||||||||
2703 | return false; | ||||||||
2704 | } | ||||||||
2705 | |||||||||
2706 | /// Return true if it is beneficial to convert a load of a constant to | ||||||||
2707 | /// just the constant itself. | ||||||||
2708 | /// On some targets it might be more efficient to use a combination of | ||||||||
2709 | /// arithmetic instructions to materialize the constant instead of loading it | ||||||||
2710 | /// from a constant pool. | ||||||||
2711 | virtual bool shouldConvertConstantLoadToIntImm(const APInt &Imm, | ||||||||
2712 | Type *Ty) const { | ||||||||
2713 | return false; | ||||||||
2714 | } | ||||||||
2715 | |||||||||
2716 | /// Return true if EXTRACT_SUBVECTOR is cheap for extracting this result type | ||||||||
2717 | /// from this source type with this index. This is needed because | ||||||||
2718 | /// EXTRACT_SUBVECTOR usually has custom lowering that depends on the index of | ||||||||
2719 | /// the first element, and only the target knows which lowering is cheap. | ||||||||
2720 | virtual bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, | ||||||||
2721 | unsigned Index) const { | ||||||||
2722 | return false; | ||||||||
2723 | } | ||||||||
2724 | |||||||||
2725 | /// Try to convert an extract element of a vector binary operation into an | ||||||||
2726 | /// extract element followed by a scalar operation. | ||||||||
2727 | virtual bool shouldScalarizeBinop(SDValue VecOp) const { | ||||||||
2728 | return false; | ||||||||
2729 | } | ||||||||
2730 | |||||||||
2731 | /// Return true if extraction of a scalar element from the given vector type | ||||||||
2732 | /// at the given index is cheap. For example, if scalar operations occur on | ||||||||
2733 | /// the same register file as vector operations, then an extract element may | ||||||||
2734 | /// be a sub-register rename rather than an actual instruction. | ||||||||
2735 | virtual bool isExtractVecEltCheap(EVT VT, unsigned Index) const { | ||||||||
2736 | return false; | ||||||||
2737 | } | ||||||||
2738 | |||||||||
2739 | /// Try to convert math with an overflow comparison into the corresponding DAG | ||||||||
2740 | /// node operation. Targets may want to override this independently of whether | ||||||||
2741 | /// the operation is legal/custom for the given type because it may obscure | ||||||||
2742 | /// matching of other patterns. | ||||||||
2743 | virtual bool shouldFormOverflowOp(unsigned Opcode, EVT VT, | ||||||||
2744 | bool MathUsed) const { | ||||||||
2745 | // TODO: The default logic is inherited from code in CodeGenPrepare. | ||||||||
2746 | // The opcode should not make a difference by default? | ||||||||
2747 | if (Opcode != ISD::UADDO) | ||||||||
2748 | return false; | ||||||||
2749 | |||||||||
2750 | // Allow the transform as long as we have an integer type that is not | ||||||||
2751 | // obviously illegal and unsupported and if the math result is used | ||||||||
2752 | // besides the overflow check. On some targets (e.g. SPARC), it is | ||||||||
2753 | // not profitable to form on overflow op if the math result has no | ||||||||
2754 | // concrete users. | ||||||||
2755 | if (VT.isVector()) | ||||||||
2756 | return false; | ||||||||
2757 | return MathUsed && (VT.isSimple() || !isOperationExpand(Opcode, VT)); | ||||||||
2758 | } | ||||||||
2759 | |||||||||
2760 | // Return true if it is profitable to use a scalar input to a BUILD_VECTOR | ||||||||
2761 | // even if the vector itself has multiple uses. | ||||||||
2762 | virtual bool aggressivelyPreferBuildVectorSources(EVT VecVT) const { | ||||||||
2763 | return false; | ||||||||
2764 | } | ||||||||
2765 | |||||||||
2766 | // Return true if CodeGenPrepare should consider splitting large offset of a | ||||||||
2767 | // GEP to make the GEP fit into the addressing mode and can be sunk into the | ||||||||
2768 | // same blocks of its users. | ||||||||
2769 | virtual bool shouldConsiderGEPOffsetSplit() const { return false; } | ||||||||
2770 | |||||||||
2771 | /// Return true if creating a shift of the type by the given | ||||||||
2772 | /// amount is not profitable. | ||||||||
2773 | virtual bool shouldAvoidTransformToShift(EVT VT, unsigned Amount) const { | ||||||||
2774 | return false; | ||||||||
2775 | } | ||||||||
2776 | |||||||||
2777 | //===--------------------------------------------------------------------===// | ||||||||
2778 | // Runtime Library hooks | ||||||||
2779 | // | ||||||||
2780 | |||||||||
2781 | /// Rename the default libcall routine name for the specified libcall. | ||||||||
2782 | void setLibcallName(RTLIB::Libcall Call, const char *Name) { | ||||||||
2783 | LibcallRoutineNames[Call] = Name; | ||||||||
2784 | } | ||||||||
2785 | |||||||||
2786 | /// Get the libcall routine name for the specified libcall. | ||||||||
2787 | const char *getLibcallName(RTLIB::Libcall Call) const { | ||||||||
2788 | return LibcallRoutineNames[Call]; | ||||||||
2789 | } | ||||||||
2790 | |||||||||
2791 | /// Override the default CondCode to be used to test the result of the | ||||||||
2792 | /// comparison libcall against zero. | ||||||||
2793 | void setCmpLibcallCC(RTLIB::Libcall Call, ISD::CondCode CC) { | ||||||||
2794 | CmpLibcallCCs[Call] = CC; | ||||||||
2795 | } | ||||||||
2796 | |||||||||
2797 | /// Get the CondCode that's to be used to test the result of the comparison | ||||||||
2798 | /// libcall against zero. | ||||||||
2799 | ISD::CondCode getCmpLibcallCC(RTLIB::Libcall Call) const { | ||||||||
2800 | return CmpLibcallCCs[Call]; | ||||||||
2801 | } | ||||||||
2802 | |||||||||
2803 | /// Set the CallingConv that should be used for the specified libcall. | ||||||||
2804 | void setLibcallCallingConv(RTLIB::Libcall Call, CallingConv::ID CC) { | ||||||||
2805 | LibcallCallingConvs[Call] = CC; | ||||||||
2806 | } | ||||||||
2807 | |||||||||
2808 | /// Get the CallingConv that should be used for the specified libcall. | ||||||||
2809 | CallingConv::ID getLibcallCallingConv(RTLIB::Libcall Call) const { | ||||||||
2810 | return LibcallCallingConvs[Call]; | ||||||||
2811 | } | ||||||||
2812 | |||||||||
2813 | /// Execute target specific actions to finalize target lowering. | ||||||||
2814 | /// This is used to set extra flags in MachineFrameInformation and freezing | ||||||||
2815 | /// the set of reserved registers. | ||||||||
2816 | /// The default implementation just freezes the set of reserved registers. | ||||||||
2817 | virtual void finalizeLowering(MachineFunction &MF) const; | ||||||||
2818 | |||||||||
2819 | //===----------------------------------------------------------------------===// | ||||||||
2820 | // GlobalISel Hooks | ||||||||
2821 | //===----------------------------------------------------------------------===// | ||||||||
2822 | /// Check whether or not \p MI needs to be moved close to its uses. | ||||||||
2823 | virtual bool shouldLocalize(const MachineInstr &MI, const TargetTransformInfo *TTI) const; | ||||||||
2824 | |||||||||
2825 | |||||||||
2826 | private: | ||||||||
2827 | const TargetMachine &TM; | ||||||||
2828 | |||||||||
2829 | /// Tells the code generator that the target has multiple (allocatable) | ||||||||
2830 | /// condition registers that can be used to store the results of comparisons | ||||||||
2831 | /// for use by selects and conditional branches. With multiple condition | ||||||||
2832 | /// registers, the code generator will not aggressively sink comparisons into | ||||||||
2833 | /// the blocks of their users. | ||||||||
2834 | bool HasMultipleConditionRegisters; | ||||||||
2835 | |||||||||
2836 | /// Tells the code generator that the target has BitExtract instructions. | ||||||||
2837 | /// The code generator will aggressively sink "shift"s into the blocks of | ||||||||
2838 | /// their users if the users will generate "and" instructions which can be | ||||||||
2839 | /// combined with "shift" to BitExtract instructions. | ||||||||
2840 | bool HasExtractBitsInsn; | ||||||||
2841 | |||||||||
2842 | /// Tells the code generator to bypass slow divide or remainder | ||||||||
2843 | /// instructions. For example, BypassSlowDivWidths[32,8] tells the code | ||||||||
2844 | /// generator to bypass 32-bit integer div/rem with an 8-bit unsigned integer | ||||||||
2845 | /// div/rem when the operands are positive and less than 256. | ||||||||
2846 | DenseMap <unsigned int, unsigned int> BypassSlowDivWidths; | ||||||||
2847 | |||||||||
2848 | /// Tells the code generator that it shouldn't generate extra flow control | ||||||||
2849 | /// instructions and should attempt to combine flow control instructions via | ||||||||
2850 | /// predication. | ||||||||
2851 | bool JumpIsExpensive; | ||||||||
2852 | |||||||||
2853 | /// Information about the contents of the high-bits in boolean values held in | ||||||||
2854 | /// a type wider than i1. See getBooleanContents. | ||||||||
2855 | BooleanContent BooleanContents; | ||||||||
2856 | |||||||||
2857 | /// Information about the contents of the high-bits in boolean values held in | ||||||||
2858 | /// a type wider than i1. See getBooleanContents. | ||||||||
2859 | BooleanContent BooleanFloatContents; | ||||||||
2860 | |||||||||
2861 | /// Information about the contents of the high-bits in boolean vector values | ||||||||
2862 | /// when the element type is wider than i1. See getBooleanContents. | ||||||||
2863 | BooleanContent BooleanVectorContents; | ||||||||
2864 | |||||||||
2865 | /// The target scheduling preference: shortest possible total cycles or lowest | ||||||||
2866 | /// register usage. | ||||||||
2867 | Sched::Preference SchedPreferenceInfo; | ||||||||
2868 | |||||||||
2869 | /// The minimum alignment that any argument on the stack needs to have. | ||||||||
2870 | Align MinStackArgumentAlignment; | ||||||||
2871 | |||||||||
2872 | /// The minimum function alignment (used when optimizing for size, and to | ||||||||
2873 | /// prevent explicitly provided alignment from leading to incorrect code). | ||||||||
2874 | Align MinFunctionAlignment; | ||||||||
2875 | |||||||||
2876 | /// The preferred function alignment (used when alignment unspecified and | ||||||||
2877 | /// optimizing for speed). | ||||||||
2878 | Align PrefFunctionAlignment; | ||||||||
2879 | |||||||||
2880 | /// The preferred loop alignment (in log2 bot in bytes). | ||||||||
2881 | Align PrefLoopAlignment; | ||||||||
2882 | |||||||||
2883 | /// Size in bits of the maximum atomics size the backend supports. | ||||||||
2884 | /// Accesses larger than this will be expanded by AtomicExpandPass. | ||||||||
2885 | unsigned MaxAtomicSizeInBitsSupported; | ||||||||
2886 | |||||||||
2887 | /// Size in bits of the minimum cmpxchg or ll/sc operation the | ||||||||
2888 | /// backend supports. | ||||||||
2889 | unsigned MinCmpXchgSizeInBits; | ||||||||
2890 | |||||||||
2891 | /// This indicates if the target supports unaligned atomic operations. | ||||||||
2892 | bool SupportsUnalignedAtomics; | ||||||||
2893 | |||||||||
2894 | /// If set to a physical register, this specifies the register that | ||||||||
2895 | /// llvm.savestack/llvm.restorestack should save and restore. | ||||||||
2896 | Register StackPointerRegisterToSaveRestore; | ||||||||
2897 | |||||||||
2898 | /// This indicates the default register class to use for each ValueType the | ||||||||
2899 | /// target supports natively. | ||||||||
2900 | const TargetRegisterClass *RegClassForVT[MVT::LAST_VALUETYPE]; | ||||||||
2901 | uint16_t NumRegistersForVT[MVT::LAST_VALUETYPE]; | ||||||||
2902 | MVT RegisterTypeForVT[MVT::LAST_VALUETYPE]; | ||||||||
2903 | |||||||||
2904 | /// This indicates the "representative" register class to use for each | ||||||||
2905 | /// ValueType the target supports natively. This information is used by the | ||||||||
2906 | /// scheduler to track register pressure. By default, the representative | ||||||||
2907 | /// register class is the largest legal super-reg register class of the | ||||||||
2908 | /// register class of the specified type. e.g. On x86, i8, i16, and i32's | ||||||||
2909 | /// representative class would be GR32. | ||||||||
2910 | const TargetRegisterClass *RepRegClassForVT[MVT::LAST_VALUETYPE]; | ||||||||
2911 | |||||||||
2912 | /// This indicates the "cost" of the "representative" register class for each | ||||||||
2913 | /// ValueType. The cost is used by the scheduler to approximate register | ||||||||
2914 | /// pressure. | ||||||||
2915 | uint8_t RepRegClassCostForVT[MVT::LAST_VALUETYPE]; | ||||||||
2916 | |||||||||
2917 | /// For any value types we are promoting or expanding, this contains the value | ||||||||
2918 | /// type that we are changing to. For Expanded types, this contains one step | ||||||||
2919 | /// of the expand (e.g. i64 -> i32), even if there are multiple steps required | ||||||||
2920 | /// (e.g. i64 -> i16). For types natively supported by the system, this holds | ||||||||
2921 | /// the same type (e.g. i32 -> i32). | ||||||||
2922 | MVT TransformToType[MVT::LAST_VALUETYPE]; | ||||||||
2923 | |||||||||
2924 | /// For each operation and each value type, keep a LegalizeAction that | ||||||||
2925 | /// indicates how instruction selection should deal with the operation. Most | ||||||||
2926 | /// operations are Legal (aka, supported natively by the target), but | ||||||||
2927 | /// operations that are not should be described. Note that operations on | ||||||||
2928 | /// non-legal value types are not described here. | ||||||||
2929 | LegalizeAction OpActions[MVT::LAST_VALUETYPE][ISD::BUILTIN_OP_END]; | ||||||||
2930 | |||||||||
2931 | /// For each load extension type and each value type, keep a LegalizeAction | ||||||||
2932 | /// that indicates how instruction selection should deal with a load of a | ||||||||
2933 | /// specific value type and extension type. Uses 4-bits to store the action | ||||||||
2934 | /// for each of the 4 load ext types. | ||||||||
2935 | uint16_t LoadExtActions[MVT::LAST_VALUETYPE][MVT::LAST_VALUETYPE]; | ||||||||
2936 | |||||||||
2937 | /// For each value type pair keep a LegalizeAction that indicates whether a | ||||||||
2938 | /// truncating store of a specific value type and truncating type is legal. | ||||||||
2939 | LegalizeAction TruncStoreActions[MVT::LAST_VALUETYPE][MVT::LAST_VALUETYPE]; | ||||||||
2940 | |||||||||
2941 | /// For each indexed mode and each value type, keep a quad of LegalizeAction | ||||||||
2942 | /// that indicates how instruction selection should deal with the load / | ||||||||
2943 | /// store / maskedload / maskedstore. | ||||||||
2944 | /// | ||||||||
2945 | /// The first dimension is the value_type for the reference. The second | ||||||||
2946 | /// dimension represents the various modes for load store. | ||||||||
2947 | uint16_t IndexedModeActions[MVT::LAST_VALUETYPE][ISD::LAST_INDEXED_MODE]; | ||||||||
2948 | |||||||||
2949 | /// For each condition code (ISD::CondCode) keep a LegalizeAction that | ||||||||
2950 | /// indicates how instruction selection should deal with the condition code. | ||||||||
2951 | /// | ||||||||
2952 | /// Because each CC action takes up 4 bits, we need to have the array size be | ||||||||
2953 | /// large enough to fit all of the value types. This can be done by rounding | ||||||||
2954 | /// up the MVT::LAST_VALUETYPE value to the next multiple of 8. | ||||||||
2955 | uint32_t CondCodeActions[ISD::SETCC_INVALID][(MVT::LAST_VALUETYPE + 7) / 8]; | ||||||||
2956 | |||||||||
2957 | ValueTypeActionImpl ValueTypeActions; | ||||||||
2958 | |||||||||
2959 | private: | ||||||||
2960 | LegalizeKind getTypeConversion(LLVMContext &Context, EVT VT) const; | ||||||||
2961 | |||||||||
2962 | /// Targets can specify ISD nodes that they would like PerformDAGCombine | ||||||||
2963 | /// callbacks for by calling setTargetDAGCombine(), which sets a bit in this | ||||||||
2964 | /// array. | ||||||||
2965 | unsigned char | ||||||||
2966 | TargetDAGCombineArray[(ISD::BUILTIN_OP_END+CHAR_BIT8-1)/CHAR_BIT8]; | ||||||||
2967 | |||||||||
2968 | /// For operations that must be promoted to a specific type, this holds the | ||||||||
2969 | /// destination type. This map should be sparse, so don't hold it as an | ||||||||
2970 | /// array. | ||||||||
2971 | /// | ||||||||
2972 | /// Targets add entries to this map with AddPromotedToType(..), clients access | ||||||||
2973 | /// this with getTypeToPromoteTo(..). | ||||||||
2974 | std::map<std::pair<unsigned, MVT::SimpleValueType>, MVT::SimpleValueType> | ||||||||
2975 | PromoteToType; | ||||||||
2976 | |||||||||
2977 | /// Stores the name each libcall. | ||||||||
2978 | const char *LibcallRoutineNames[RTLIB::UNKNOWN_LIBCALL + 1]; | ||||||||
2979 | |||||||||
2980 | /// The ISD::CondCode that should be used to test the result of each of the | ||||||||
2981 | /// comparison libcall against zero. | ||||||||
2982 | ISD::CondCode CmpLibcallCCs[RTLIB::UNKNOWN_LIBCALL]; | ||||||||
2983 | |||||||||
2984 | /// Stores the CallingConv that should be used for each libcall. | ||||||||
2985 | CallingConv::ID LibcallCallingConvs[RTLIB::UNKNOWN_LIBCALL]; | ||||||||
2986 | |||||||||
2987 | /// Set default libcall names and calling conventions. | ||||||||
2988 | void InitLibcalls(const Triple &TT); | ||||||||
2989 | |||||||||
2990 | /// The bits of IndexedModeActions used to store the legalisation actions | ||||||||
2991 | /// We store the data as | ML | MS | L | S | each taking 4 bits. | ||||||||
2992 | enum IndexedModeActionsBits { | ||||||||
2993 | IMAB_Store = 0, | ||||||||
2994 | IMAB_Load = 4, | ||||||||
2995 | IMAB_MaskedStore = 8, | ||||||||
2996 | IMAB_MaskedLoad = 12 | ||||||||
2997 | }; | ||||||||
2998 | |||||||||
2999 | void setIndexedModeAction(unsigned IdxMode, MVT VT, unsigned Shift, | ||||||||
3000 | LegalizeAction Action) { | ||||||||
3001 | assert(VT.isValid() && IdxMode < ISD::LAST_INDEXED_MODE &&((VT.isValid() && IdxMode < ISD::LAST_INDEXED_MODE && (unsigned)Action < 0xf && "Table isn't big enough!" ) ? static_cast<void> (0) : __assert_fail ("VT.isValid() && IdxMode < ISD::LAST_INDEXED_MODE && (unsigned)Action < 0xf && \"Table isn't big enough!\"" , "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/TargetLowering.h" , 3002, __PRETTY_FUNCTION__)) | ||||||||
3002 | (unsigned)Action < 0xf && "Table isn't big enough!")((VT.isValid() && IdxMode < ISD::LAST_INDEXED_MODE && (unsigned)Action < 0xf && "Table isn't big enough!" ) ? static_cast<void> (0) : __assert_fail ("VT.isValid() && IdxMode < ISD::LAST_INDEXED_MODE && (unsigned)Action < 0xf && \"Table isn't big enough!\"" , "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/TargetLowering.h" , 3002, __PRETTY_FUNCTION__)); | ||||||||
3003 | unsigned Ty = (unsigned)VT.SimpleTy; | ||||||||
3004 | IndexedModeActions[Ty][IdxMode] &= ~(0xf << Shift); | ||||||||
3005 | IndexedModeActions[Ty][IdxMode] |= ((uint16_t)Action) << Shift; | ||||||||
3006 | } | ||||||||
3007 | |||||||||
3008 | LegalizeAction getIndexedModeAction(unsigned IdxMode, MVT VT, | ||||||||
3009 | unsigned Shift) const { | ||||||||
3010 | assert(IdxMode < ISD::LAST_INDEXED_MODE && VT.isValid() &&((IdxMode < ISD::LAST_INDEXED_MODE && VT.isValid() && "Table isn't big enough!") ? static_cast<void> (0) : __assert_fail ("IdxMode < ISD::LAST_INDEXED_MODE && VT.isValid() && \"Table isn't big enough!\"" , "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/TargetLowering.h" , 3011, __PRETTY_FUNCTION__)) | ||||||||
3011 | "Table isn't big enough!")((IdxMode < ISD::LAST_INDEXED_MODE && VT.isValid() && "Table isn't big enough!") ? static_cast<void> (0) : __assert_fail ("IdxMode < ISD::LAST_INDEXED_MODE && VT.isValid() && \"Table isn't big enough!\"" , "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/TargetLowering.h" , 3011, __PRETTY_FUNCTION__)); | ||||||||
3012 | unsigned Ty = (unsigned)VT.SimpleTy; | ||||||||
3013 | return (LegalizeAction)((IndexedModeActions[Ty][IdxMode] >> Shift) & 0xf); | ||||||||
3014 | } | ||||||||
3015 | |||||||||
3016 | protected: | ||||||||
3017 | /// Return true if the extension represented by \p I is free. | ||||||||
3018 | /// \pre \p I is a sign, zero, or fp extension and | ||||||||
3019 | /// is[Z|FP]ExtFree of the related types is not true. | ||||||||
3020 | virtual bool isExtFreeImpl(const Instruction *I) const { return false; } | ||||||||
3021 | |||||||||
3022 | /// Depth that GatherAllAliases should should continue looking for chain | ||||||||
3023 | /// dependencies when trying to find a more preferable chain. As an | ||||||||
3024 | /// approximation, this should be more than the number of consecutive stores | ||||||||
3025 | /// expected to be merged. | ||||||||
3026 | unsigned GatherAllAliasesMaxDepth; | ||||||||
3027 | |||||||||
3028 | /// \brief Specify maximum number of store instructions per memset call. | ||||||||
3029 | /// | ||||||||
3030 | /// When lowering \@llvm.memset this field specifies the maximum number of | ||||||||
3031 | /// store operations that may be substituted for the call to memset. Targets | ||||||||
3032 | /// must set this value based on the cost threshold for that target. Targets | ||||||||
3033 | /// should assume that the memset will be done using as many of the largest | ||||||||
3034 | /// store operations first, followed by smaller ones, if necessary, per | ||||||||
3035 | /// alignment restrictions. For example, storing 9 bytes on a 32-bit machine | ||||||||
3036 | /// with 16-bit alignment would result in four 2-byte stores and one 1-byte | ||||||||
3037 | /// store. This only applies to setting a constant array of a constant size. | ||||||||
3038 | unsigned MaxStoresPerMemset; | ||||||||
3039 | /// Likewise for functions with the OptSize attribute. | ||||||||
3040 | unsigned MaxStoresPerMemsetOptSize; | ||||||||
3041 | |||||||||
3042 | /// \brief Specify maximum number of store instructions per memcpy call. | ||||||||
3043 | /// | ||||||||
3044 | /// When lowering \@llvm.memcpy this field specifies the maximum number of | ||||||||
3045 | /// store operations that may be substituted for a call to memcpy. Targets | ||||||||
3046 | /// must set this value based on the cost threshold for that target. Targets | ||||||||
3047 | /// should assume that the memcpy will be done using as many of the largest | ||||||||
3048 | /// store operations first, followed by smaller ones, if necessary, per | ||||||||
3049 | /// alignment restrictions. For example, storing 7 bytes on a 32-bit machine | ||||||||
3050 | /// with 32-bit alignment would result in one 4-byte store, a one 2-byte store | ||||||||
3051 | /// and one 1-byte store. This only applies to copying a constant array of | ||||||||
3052 | /// constant size. | ||||||||
3053 | unsigned MaxStoresPerMemcpy; | ||||||||
3054 | /// Likewise for functions with the OptSize attribute. | ||||||||
3055 | unsigned MaxStoresPerMemcpyOptSize; | ||||||||
3056 | /// \brief Specify max number of store instructions to glue in inlined memcpy. | ||||||||
3057 | /// | ||||||||
3058 | /// When memcpy is inlined based on MaxStoresPerMemcpy, specify maximum number | ||||||||
3059 | /// of store instructions to keep together. This helps in pairing and | ||||||||
3060 | // vectorization later on. | ||||||||
3061 | unsigned MaxGluedStoresPerMemcpy = 0; | ||||||||
3062 | |||||||||
3063 | /// \brief Specify maximum number of load instructions per memcmp call. | ||||||||
3064 | /// | ||||||||
3065 | /// When lowering \@llvm.memcmp this field specifies the maximum number of | ||||||||
3066 | /// pairs of load operations that may be substituted for a call to memcmp. | ||||||||
3067 | /// Targets must set this value based on the cost threshold for that target. | ||||||||
3068 | /// Targets should assume that the memcmp will be done using as many of the | ||||||||
3069 | /// largest load operations first, followed by smaller ones, if necessary, per | ||||||||
3070 | /// alignment restrictions. For example, loading 7 bytes on a 32-bit machine | ||||||||
3071 | /// with 32-bit alignment would result in one 4-byte load, a one 2-byte load | ||||||||
3072 | /// and one 1-byte load. This only applies to copying a constant array of | ||||||||
3073 | /// constant size. | ||||||||
3074 | unsigned MaxLoadsPerMemcmp; | ||||||||
3075 | /// Likewise for functions with the OptSize attribute. | ||||||||
3076 | unsigned MaxLoadsPerMemcmpOptSize; | ||||||||
3077 | |||||||||
3078 | /// \brief Specify maximum number of store instructions per memmove call. | ||||||||
3079 | /// | ||||||||
3080 | /// When lowering \@llvm.memmove this field specifies the maximum number of | ||||||||
3081 | /// store instructions that may be substituted for a call to memmove. Targets | ||||||||
3082 | /// must set this value based on the cost threshold for that target. Targets | ||||||||
3083 | /// should assume that the memmove will be done using as many of the largest | ||||||||
3084 | /// store operations first, followed by smaller ones, if necessary, per | ||||||||
3085 | /// alignment restrictions. For example, moving 9 bytes on a 32-bit machine | ||||||||
3086 | /// with 8-bit alignment would result in nine 1-byte stores. This only | ||||||||
3087 | /// applies to copying a constant array of constant size. | ||||||||
3088 | unsigned MaxStoresPerMemmove; | ||||||||
3089 | /// Likewise for functions with the OptSize attribute. | ||||||||
3090 | unsigned MaxStoresPerMemmoveOptSize; | ||||||||
3091 | |||||||||
3092 | /// Tells the code generator that select is more expensive than a branch if | ||||||||
3093 | /// the branch is usually predicted right. | ||||||||
3094 | bool PredictableSelectIsExpensive; | ||||||||
3095 | |||||||||
3096 | /// \see enableExtLdPromotion. | ||||||||
3097 | bool EnableExtLdPromotion; | ||||||||
3098 | |||||||||
3099 | /// Return true if the value types that can be represented by the specified | ||||||||
3100 | /// register class are all legal. | ||||||||
3101 | bool isLegalRC(const TargetRegisterInfo &TRI, | ||||||||
3102 | const TargetRegisterClass &RC) const; | ||||||||
3103 | |||||||||
3104 | /// Replace/modify any TargetFrameIndex operands with a targte-dependent | ||||||||
3105 | /// sequence of memory operands that is recognized by PrologEpilogInserter. | ||||||||
3106 | MachineBasicBlock *emitPatchPoint(MachineInstr &MI, | ||||||||
3107 | MachineBasicBlock *MBB) const; | ||||||||
3108 | |||||||||
3109 | /// Replace/modify the XRay custom event operands with target-dependent | ||||||||
3110 | /// details. | ||||||||
3111 | MachineBasicBlock *emitXRayCustomEvent(MachineInstr &MI, | ||||||||
3112 | MachineBasicBlock *MBB) const; | ||||||||
3113 | |||||||||
3114 | /// Replace/modify the XRay typed event operands with target-dependent | ||||||||
3115 | /// details. | ||||||||
3116 | MachineBasicBlock *emitXRayTypedEvent(MachineInstr &MI, | ||||||||
3117 | MachineBasicBlock *MBB) const; | ||||||||
3118 | |||||||||
3119 | bool IsStrictFPEnabled; | ||||||||
3120 | }; | ||||||||
3121 | |||||||||
3122 | /// This class defines information used to lower LLVM code to legal SelectionDAG | ||||||||
3123 | /// operators that the target instruction selector can accept natively. | ||||||||
3124 | /// | ||||||||
3125 | /// This class also defines callbacks that targets must implement to lower | ||||||||
3126 | /// target-specific constructs to SelectionDAG operators. | ||||||||
3127 | class TargetLowering : public TargetLoweringBase { | ||||||||
3128 | public: | ||||||||
3129 | struct DAGCombinerInfo; | ||||||||
3130 | struct MakeLibCallOptions; | ||||||||
3131 | |||||||||
3132 | TargetLowering(const TargetLowering &) = delete; | ||||||||
3133 | TargetLowering &operator=(const TargetLowering &) = delete; | ||||||||
3134 | |||||||||
3135 | explicit TargetLowering(const TargetMachine &TM); | ||||||||
3136 | |||||||||
3137 | bool isPositionIndependent() const; | ||||||||
3138 | |||||||||
3139 | virtual bool isSDNodeSourceOfDivergence(const SDNode *N, | ||||||||
3140 | FunctionLoweringInfo *FLI, | ||||||||
3141 | LegacyDivergenceAnalysis *DA) const { | ||||||||
3142 | return false; | ||||||||
3143 | } | ||||||||
3144 | |||||||||
3145 | virtual bool isSDNodeAlwaysUniform(const SDNode * N) const { | ||||||||
3146 | return false; | ||||||||
3147 | } | ||||||||
3148 | |||||||||
3149 | /// Returns true by value, base pointer and offset pointer and addressing mode | ||||||||
3150 | /// by reference if the node's address can be legally represented as | ||||||||
3151 | /// pre-indexed load / store address. | ||||||||
3152 | virtual bool getPreIndexedAddressParts(SDNode * /*N*/, SDValue &/*Base*/, | ||||||||
3153 | SDValue &/*Offset*/, | ||||||||
3154 | ISD::MemIndexedMode &/*AM*/, | ||||||||
3155 | SelectionDAG &/*DAG*/) const { | ||||||||
3156 | return false; | ||||||||
3157 | } | ||||||||
3158 | |||||||||
3159 | /// Returns true by value, base pointer and offset pointer and addressing mode | ||||||||
3160 | /// by reference if this node can be combined with a load / store to form a | ||||||||
3161 | /// post-indexed load / store. | ||||||||
3162 | virtual bool getPostIndexedAddressParts(SDNode * /*N*/, SDNode * /*Op*/, | ||||||||
3163 | SDValue &/*Base*/, | ||||||||
3164 | SDValue &/*Offset*/, | ||||||||
3165 | ISD::MemIndexedMode &/*AM*/, | ||||||||
3166 | SelectionDAG &/*DAG*/) const { | ||||||||
3167 | return false; | ||||||||
3168 | } | ||||||||
3169 | |||||||||
3170 | /// Returns true if the specified base+offset is a legal indexed addressing | ||||||||
3171 | /// mode for this target. \p MI is the load or store instruction that is being | ||||||||
3172 | /// considered for transformation. | ||||||||
3173 | virtual bool isIndexingLegal(MachineInstr &MI, Register Base, Register Offset, | ||||||||
3174 | bool IsPre, MachineRegisterInfo &MRI) const { | ||||||||
3175 | return false; | ||||||||
3176 | } | ||||||||
3177 | |||||||||
3178 | /// Return the entry encoding for a jump table in the current function. The | ||||||||
3179 | /// returned value is a member of the MachineJumpTableInfo::JTEntryKind enum. | ||||||||
3180 | virtual unsigned getJumpTableEncoding() const; | ||||||||
3181 | |||||||||
3182 | virtual const MCExpr * | ||||||||
3183 | LowerCustomJumpTableEntry(const MachineJumpTableInfo * /*MJTI*/, | ||||||||
3184 | const MachineBasicBlock * /*MBB*/, unsigned /*uid*/, | ||||||||
3185 | MCContext &/*Ctx*/) const { | ||||||||
3186 | llvm_unreachable("Need to implement this hook if target has custom JTIs")::llvm::llvm_unreachable_internal("Need to implement this hook if target has custom JTIs" , "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/TargetLowering.h" , 3186); | ||||||||
3187 | } | ||||||||
3188 | |||||||||
3189 | /// Returns relocation base for the given PIC jumptable. | ||||||||
3190 | virtual SDValue getPICJumpTableRelocBase(SDValue Table, | ||||||||
3191 | SelectionDAG &DAG) const; | ||||||||
3192 | |||||||||
3193 | /// This returns the relocation base for the given PIC jumptable, the same as | ||||||||
3194 | /// getPICJumpTableRelocBase, but as an MCExpr. | ||||||||
3195 | virtual const MCExpr * | ||||||||
3196 | getPICJumpTableRelocBaseExpr(const MachineFunction *MF, | ||||||||
3197 | unsigned JTI, MCContext &Ctx) const; | ||||||||
3198 | |||||||||
3199 | /// Return true if folding a constant offset with the given GlobalAddress is | ||||||||
3200 | /// legal. It is frequently not legal in PIC relocation models. | ||||||||
3201 | virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const; | ||||||||
3202 | |||||||||
3203 | bool isInTailCallPosition(SelectionDAG &DAG, SDNode *Node, | ||||||||
3204 | SDValue &Chain) const; | ||||||||
3205 | |||||||||
3206 | void softenSetCCOperands(SelectionDAG &DAG, EVT VT, SDValue &NewLHS, | ||||||||
3207 | SDValue &NewRHS, ISD::CondCode &CCCode, | ||||||||
3208 | const SDLoc &DL, const SDValue OldLHS, | ||||||||
3209 | const SDValue OldRHS) const; | ||||||||
3210 | |||||||||
3211 | void softenSetCCOperands(SelectionDAG &DAG, EVT VT, SDValue &NewLHS, | ||||||||
3212 | SDValue &NewRHS, ISD::CondCode &CCCode, | ||||||||
3213 | const SDLoc &DL, const SDValue OldLHS, | ||||||||
3214 | const SDValue OldRHS, SDValue &Chain, | ||||||||
3215 | bool IsSignaling = false) const; | ||||||||
3216 | |||||||||
3217 | /// Returns a pair of (return value, chain). | ||||||||
3218 | /// It is an error to pass RTLIB::UNKNOWN_LIBCALL as \p LC. | ||||||||
3219 | std::pair<SDValue, SDValue> makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, | ||||||||
3220 | EVT RetVT, ArrayRef<SDValue> Ops, | ||||||||
3221 | MakeLibCallOptions CallOptions, | ||||||||
3222 | const SDLoc &dl, | ||||||||
3223 | SDValue Chain = SDValue()) const; | ||||||||
3224 | |||||||||
3225 | /// Check whether parameters to a call that are passed in callee saved | ||||||||
3226 | /// registers are the same as from the calling function. This needs to be | ||||||||
3227 | /// checked for tail call eligibility. | ||||||||
3228 | bool parametersInCSRMatch(const MachineRegisterInfo &MRI, | ||||||||
3229 | const uint32_t *CallerPreservedMask, | ||||||||
3230 | const SmallVectorImpl<CCValAssign> &ArgLocs, | ||||||||
3231 | const SmallVectorImpl<SDValue> &OutVals) const; | ||||||||
3232 | |||||||||
3233 | //===--------------------------------------------------------------------===// | ||||||||
3234 | // TargetLowering Optimization Methods | ||||||||
3235 | // | ||||||||
3236 | |||||||||
3237 | /// A convenience struct that encapsulates a DAG, and two SDValues for | ||||||||
3238 | /// returning information from TargetLowering to its clients that want to | ||||||||
3239 | /// combine. | ||||||||
3240 | struct TargetLoweringOpt { | ||||||||
3241 | SelectionDAG &DAG; | ||||||||
3242 | bool LegalTys; | ||||||||
3243 | bool LegalOps; | ||||||||
3244 | SDValue Old; | ||||||||
3245 | SDValue New; | ||||||||
3246 | |||||||||
3247 | explicit TargetLoweringOpt(SelectionDAG &InDAG, | ||||||||
3248 | bool LT, bool LO) : | ||||||||
3249 | DAG(InDAG), LegalTys(LT), LegalOps(LO) {} | ||||||||
3250 | |||||||||
3251 | bool LegalTypes() const { return LegalTys; } | ||||||||
3252 | bool LegalOperations() const { return LegalOps; } | ||||||||
3253 | |||||||||
3254 | bool CombineTo(SDValue O, SDValue N) { | ||||||||
3255 | Old = O; | ||||||||
3256 | New = N; | ||||||||
3257 | return true; | ||||||||
3258 | } | ||||||||
3259 | }; | ||||||||
3260 | |||||||||
3261 | /// Determines the optimal series of memory ops to replace the memset / memcpy. | ||||||||
3262 | /// Return true if the number of memory ops is below the threshold (Limit). | ||||||||
3263 | /// It returns the types of the sequence of memory ops to perform | ||||||||
3264 | /// memset / memcpy by reference. | ||||||||
3265 | bool findOptimalMemOpLowering(std::vector<EVT> &MemOps, unsigned Limit, | ||||||||
3266 | const MemOp &Op, unsigned DstAS, unsigned SrcAS, | ||||||||
3267 | const AttributeList &FuncAttributes) const; | ||||||||
3268 | |||||||||
3269 | /// Check to see if the specified operand of the specified instruction is a | ||||||||
3270 | /// constant integer. If so, check to see if there are any bits set in the | ||||||||
3271 | /// constant that are not demanded. If so, shrink the constant and return | ||||||||
3272 | /// true. | ||||||||
3273 | bool ShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits, | ||||||||
3274 | const APInt &DemandedElts, | ||||||||
3275 | TargetLoweringOpt &TLO) const; | ||||||||
3276 | |||||||||
3277 | /// Helper wrapper around ShrinkDemandedConstant, demanding all elements. | ||||||||
3278 | bool ShrinkDemandedConstant(SDValue Op, const APInt &DemandedBits, | ||||||||
3279 | TargetLoweringOpt &TLO) const; | ||||||||
3280 | |||||||||
3281 | // Target hook to do target-specific const optimization, which is called by | ||||||||
3282 | // ShrinkDemandedConstant. This function should return true if the target | ||||||||
3283 | // doesn't want ShrinkDemandedConstant to further optimize the constant. | ||||||||
3284 | virtual bool targetShrinkDemandedConstant(SDValue Op, | ||||||||
3285 | const APInt &DemandedBits, | ||||||||
3286 | const APInt &DemandedElts, | ||||||||
3287 | TargetLoweringOpt &TLO) const { | ||||||||
3288 | return false; | ||||||||
3289 | } | ||||||||
3290 | |||||||||
3291 | /// Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free. This | ||||||||
3292 | /// uses isZExtFree and ZERO_EXTEND for the widening cast, but it could be | ||||||||
3293 | /// generalized for targets with other types of implicit widening casts. | ||||||||
3294 | bool ShrinkDemandedOp(SDValue Op, unsigned BitWidth, const APInt &Demanded, | ||||||||
3295 | TargetLoweringOpt &TLO) const; | ||||||||
3296 | |||||||||
3297 | /// Look at Op. At this point, we know that only the DemandedBits bits of the | ||||||||
3298 | /// result of Op are ever used downstream. If we can use this information to | ||||||||
3299 | /// simplify Op, create a new simplified DAG node and return true, returning | ||||||||
3300 | /// the original and new nodes in Old and New. Otherwise, analyze the | ||||||||
3301 | /// expression and return a mask of KnownOne and KnownZero bits for the | ||||||||
3302 | /// expression (used to simplify the caller). The KnownZero/One bits may only | ||||||||
3303 | /// be accurate for those bits in the Demanded masks. | ||||||||
3304 | /// \p AssumeSingleUse When this parameter is true, this function will | ||||||||
3305 | /// attempt to simplify \p Op even if there are multiple uses. | ||||||||
3306 | /// Callers are responsible for correctly updating the DAG based on the | ||||||||
3307 | /// results of this function, because simply replacing replacing TLO.Old | ||||||||
3308 | /// with TLO.New will be incorrect when this parameter is true and TLO.Old | ||||||||
3309 | /// has multiple uses. | ||||||||
3310 | bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, | ||||||||
3311 | const APInt &DemandedElts, KnownBits &Known, | ||||||||
3312 | TargetLoweringOpt &TLO, unsigned Depth = 0, | ||||||||
3313 | bool AssumeSingleUse = false) const; | ||||||||
3314 | |||||||||
3315 | /// Helper wrapper around SimplifyDemandedBits, demanding all elements. | ||||||||
3316 | /// Adds Op back to the worklist upon success. | ||||||||
3317 | bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, | ||||||||
3318 | KnownBits &Known, TargetLoweringOpt &TLO, | ||||||||
3319 | unsigned Depth = 0, | ||||||||
3320 | bool AssumeSingleUse = false) const; | ||||||||
3321 | |||||||||
3322 | /// Helper wrapper around SimplifyDemandedBits. | ||||||||
3323 | /// Adds Op back to the worklist upon success. | ||||||||
3324 | bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, | ||||||||
3325 | DAGCombinerInfo &DCI) const; | ||||||||
3326 | |||||||||
3327 | /// More limited version of SimplifyDemandedBits that can be used to "look | ||||||||
3328 | /// through" ops that don't contribute to the DemandedBits/DemandedElts - | ||||||||
3329 | /// bitwise ops etc. | ||||||||
3330 | SDValue SimplifyMultipleUseDemandedBits(SDValue Op, const APInt &DemandedBits, | ||||||||
3331 | const APInt &DemandedElts, | ||||||||
3332 | SelectionDAG &DAG, | ||||||||
3333 | unsigned Depth) const; | ||||||||
3334 | |||||||||
3335 | /// Helper wrapper around SimplifyMultipleUseDemandedBits, demanding all | ||||||||
3336 | /// elements. | ||||||||
3337 | SDValue SimplifyMultipleUseDemandedBits(SDValue Op, const APInt &DemandedBits, | ||||||||
3338 | SelectionDAG &DAG, | ||||||||
3339 | unsigned Depth = 0) const; | ||||||||
3340 | |||||||||
3341 | /// Helper wrapper around SimplifyMultipleUseDemandedBits, demanding all | ||||||||
3342 | /// bits from only some vector elements. | ||||||||
3343 | SDValue SimplifyMultipleUseDemandedVectorElts(SDValue Op, | ||||||||
3344 | const APInt &DemandedElts, | ||||||||
3345 | SelectionDAG &DAG, | ||||||||
3346 | unsigned Depth = 0) const; | ||||||||
3347 | |||||||||
3348 | /// Look at Vector Op. At this point, we know that only the DemandedElts | ||||||||
3349 | /// elements of the result of Op are ever used downstream. If we can use | ||||||||
3350 | /// this information to simplify Op, create a new simplified DAG node and | ||||||||
3351 | /// return true, storing the original and new nodes in TLO. | ||||||||
3352 | /// Otherwise, analyze the expression and return a mask of KnownUndef and | ||||||||
3353 | /// KnownZero elements for the expression (used to simplify the caller). | ||||||||
3354 | /// The KnownUndef/Zero elements may only be accurate for those bits | ||||||||
3355 | /// in the DemandedMask. | ||||||||
3356 | /// \p AssumeSingleUse When this parameter is true, this function will | ||||||||
3357 | /// attempt to simplify \p Op even if there are multiple uses. | ||||||||
3358 | /// Callers are responsible for correctly updating the DAG based on the | ||||||||
3359 | /// results of this function, because simply replacing replacing TLO.Old | ||||||||
3360 | /// with TLO.New will be incorrect when this parameter is true and TLO.Old | ||||||||
3361 | /// has multiple uses. | ||||||||
3362 | bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedEltMask, | ||||||||
3363 | APInt &KnownUndef, APInt &KnownZero, | ||||||||
3364 | TargetLoweringOpt &TLO, unsigned Depth = 0, | ||||||||
3365 | bool AssumeSingleUse = false) const; | ||||||||
3366 | |||||||||
3367 | /// Helper wrapper around SimplifyDemandedVectorElts. | ||||||||
3368 | /// Adds Op back to the worklist upon success. | ||||||||
3369 | bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedElts, | ||||||||
3370 | APInt &KnownUndef, APInt &KnownZero, | ||||||||
3371 | DAGCombinerInfo &DCI) const; | ||||||||
3372 | |||||||||
3373 | /// Determine which of the bits specified in Mask are known to be either zero | ||||||||
3374 | /// or one and return them in the KnownZero/KnownOne bitsets. The DemandedElts | ||||||||
3375 | /// argument allows us to only collect the known bits that are shared by the | ||||||||
3376 | /// requested vector elements. | ||||||||
3377 | virtual void computeKnownBitsForTargetNode(const SDValue Op, | ||||||||
3378 | KnownBits &Known, | ||||||||
3379 | const APInt &DemandedElts, | ||||||||
3380 | const SelectionDAG &DAG, | ||||||||
3381 | unsigned Depth = 0) const; | ||||||||
3382 | |||||||||
3383 | /// Determine which of the bits specified in Mask are known to be either zero | ||||||||
3384 | /// or one and return them in the KnownZero/KnownOne bitsets. The DemandedElts | ||||||||
3385 | /// argument allows us to only collect the known bits that are shared by the | ||||||||
3386 | /// requested vector elements. This is for GISel. | ||||||||
3387 | virtual void computeKnownBitsForTargetInstr(GISelKnownBits &Analysis, | ||||||||
3388 | Register R, KnownBits &Known, | ||||||||
3389 | const APInt &DemandedElts, | ||||||||
3390 | const MachineRegisterInfo &MRI, | ||||||||
3391 | unsigned Depth = 0) const; | ||||||||
3392 | |||||||||
3393 | /// Determine the known alignment for the pointer value \p R. This is can | ||||||||
3394 | /// typically be inferred from the number of low known 0 bits. However, for a | ||||||||
3395 | /// pointer with a non-integral address space, the alignment value may be | ||||||||
3396 | /// independent from the known low bits. | ||||||||
3397 | virtual Align computeKnownAlignForTargetInstr(GISelKnownBits &Analysis, | ||||||||
3398 | Register R, | ||||||||
3399 | const MachineRegisterInfo &MRI, | ||||||||
3400 | unsigned Depth = 0) const; | ||||||||
3401 | |||||||||
3402 | /// Determine which of the bits of FrameIndex \p FIOp are known to be 0. | ||||||||
3403 | /// Default implementation computes low bits based on alignment | ||||||||
3404 | /// information. This should preserve known bits passed into it. | ||||||||
3405 | virtual void computeKnownBitsForFrameIndex(int FIOp, | ||||||||
3406 | KnownBits &Known, | ||||||||
3407 | const MachineFunction &MF) const; | ||||||||
3408 | |||||||||
3409 | /// This method can be implemented by targets that want to expose additional | ||||||||
3410 | /// information about sign bits to the DAG Combiner. The DemandedElts | ||||||||
3411 | /// argument allows us to only collect the minimum sign bits that are shared | ||||||||
3412 | /// by the requested vector elements. | ||||||||
3413 | virtual unsigned ComputeNumSignBitsForTargetNode(SDValue Op, | ||||||||
3414 | const APInt &DemandedElts, | ||||||||
3415 | const SelectionDAG &DAG, | ||||||||
3416 | unsigned Depth = 0) const; | ||||||||
3417 | |||||||||
3418 | /// This method can be implemented by targets that want to expose additional | ||||||||
3419 | /// information about sign bits to GlobalISel combiners. The DemandedElts | ||||||||
3420 | /// argument allows us to only collect the minimum sign bits that are shared | ||||||||
3421 | /// by the requested vector elements. | ||||||||
3422 | virtual unsigned computeNumSignBitsForTargetInstr(GISelKnownBits &Analysis, | ||||||||
3423 | Register R, | ||||||||
3424 | const APInt &DemandedElts, | ||||||||
3425 | const MachineRegisterInfo &MRI, | ||||||||
3426 | unsigned Depth = 0) const; | ||||||||
3427 | |||||||||
3428 | /// Attempt to simplify any target nodes based on the demanded vector | ||||||||
3429 | /// elements, returning true on success. Otherwise, analyze the expression and | ||||||||
3430 | /// return a mask of KnownUndef and KnownZero elements for the expression | ||||||||
3431 | /// (used to simplify the caller). The KnownUndef/Zero elements may only be | ||||||||
3432 | /// accurate for those bits in the DemandedMask. | ||||||||
3433 | virtual bool SimplifyDemandedVectorEltsForTargetNode( | ||||||||
3434 | SDValue Op, const APInt &DemandedElts, APInt &KnownUndef, | ||||||||
3435 | APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth = 0) const; | ||||||||
3436 | |||||||||
3437 | /// Attempt to simplify any target nodes based on the demanded bits/elts, | ||||||||
3438 | /// returning true on success. Otherwise, analyze the | ||||||||
3439 | /// expression and return a mask of KnownOne and KnownZero bits for the | ||||||||
3440 | /// expression (used to simplify the caller). The KnownZero/One bits may only | ||||||||
3441 | /// be accurate for those bits in the Demanded masks. | ||||||||
3442 | virtual bool SimplifyDemandedBitsForTargetNode(SDValue Op, | ||||||||
3443 | const APInt &DemandedBits, | ||||||||
3444 | const APInt &DemandedElts, | ||||||||
3445 | KnownBits &Known, | ||||||||
3446 | TargetLoweringOpt &TLO, | ||||||||
3447 | unsigned Depth = 0) const; | ||||||||
3448 | |||||||||
3449 | /// More limited version of SimplifyDemandedBits that can be used to "look | ||||||||
3450 | /// through" ops that don't contribute to the DemandedBits/DemandedElts - | ||||||||
3451 | /// bitwise ops etc. | ||||||||
3452 | virtual SDValue SimplifyMultipleUseDemandedBitsForTargetNode( | ||||||||
3453 | SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, | ||||||||
3454 | SelectionDAG &DAG, unsigned Depth) const; | ||||||||
3455 | |||||||||
3456 | /// Tries to build a legal vector shuffle using the provided parameters | ||||||||
3457 | /// or equivalent variations. The Mask argument maybe be modified as the | ||||||||
3458 | /// function tries different variations. | ||||||||
3459 | /// Returns an empty SDValue if the operation fails. | ||||||||
3460 | SDValue buildLegalVectorShuffle(EVT VT, const SDLoc &DL, SDValue N0, | ||||||||
3461 | SDValue N1, MutableArrayRef<int> Mask, | ||||||||
3462 | SelectionDAG &DAG) const; | ||||||||
3463 | |||||||||
3464 | /// This method returns the constant pool value that will be loaded by LD. | ||||||||
3465 | /// NOTE: You must check for implicit extensions of the constant by LD. | ||||||||
3466 | virtual const Constant *getTargetConstantFromLoad(LoadSDNode *LD) const; | ||||||||
3467 | |||||||||
3468 | /// If \p SNaN is false, \returns true if \p Op is known to never be any | ||||||||
3469 | /// NaN. If \p sNaN is true, returns if \p Op is known to never be a signaling | ||||||||
3470 | /// NaN. | ||||||||
3471 | virtual bool isKnownNeverNaNForTargetNode(SDValue Op, | ||||||||
3472 | const SelectionDAG &DAG, | ||||||||
3473 | bool SNaN = false, | ||||||||
3474 | unsigned Depth = 0) const; | ||||||||
3475 | struct DAGCombinerInfo { | ||||||||
3476 | void *DC; // The DAG Combiner object. | ||||||||
3477 | CombineLevel Level; | ||||||||
3478 | bool CalledByLegalizer; | ||||||||
3479 | |||||||||
3480 | public: | ||||||||
3481 | SelectionDAG &DAG; | ||||||||
3482 | |||||||||
3483 | DAGCombinerInfo(SelectionDAG &dag, CombineLevel level, bool cl, void *dc) | ||||||||
3484 | : DC(dc), Level(level), CalledByLegalizer(cl), DAG(dag) {} | ||||||||
3485 | |||||||||
3486 | bool isBeforeLegalize() const { return Level == BeforeLegalizeTypes; } | ||||||||
3487 | bool isBeforeLegalizeOps() const { return Level < AfterLegalizeVectorOps; } | ||||||||
3488 | bool isAfterLegalizeDAG() const { return Level >= AfterLegalizeDAG; } | ||||||||
3489 | CombineLevel getDAGCombineLevel() { return Level; } | ||||||||
3490 | bool isCalledByLegalizer() const { return CalledByLegalizer; } | ||||||||
3491 | |||||||||
3492 | void AddToWorklist(SDNode *N); | ||||||||
3493 | SDValue CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo = true); | ||||||||
3494 | SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true); | ||||||||
3495 | SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo = true); | ||||||||
3496 | |||||||||
3497 | bool recursivelyDeleteUnusedNodes(SDNode *N); | ||||||||
3498 | |||||||||
3499 | void CommitTargetLoweringOpt(const TargetLoweringOpt &TLO); | ||||||||
3500 | }; | ||||||||
3501 | |||||||||
3502 | /// Return if the N is a constant or constant vector equal to the true value | ||||||||
3503 | /// from getBooleanContents(). | ||||||||
3504 | bool isConstTrueVal(const SDNode *N) const; | ||||||||
3505 | |||||||||
3506 | /// Return if the N is a constant or constant vector equal to the false value | ||||||||
3507 | /// from getBooleanContents(). | ||||||||
3508 | bool isConstFalseVal(const SDNode *N) const; | ||||||||
3509 | |||||||||
3510 | /// Return if \p N is a True value when extended to \p VT. | ||||||||
3511 | bool isExtendedTrueVal(const ConstantSDNode *N, EVT VT, bool SExt) const; | ||||||||
3512 | |||||||||
3513 | /// Try to simplify a setcc built with the specified operands and cc. If it is | ||||||||
3514 | /// unable to simplify it, return a null SDValue. | ||||||||
3515 | SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, | ||||||||
3516 | bool foldBooleans, DAGCombinerInfo &DCI, | ||||||||
3517 | const SDLoc &dl) const; | ||||||||
3518 | |||||||||
3519 | // For targets which wrap address, unwrap for analysis. | ||||||||
3520 | virtual SDValue unwrapAddress(SDValue N) const { return N; } | ||||||||
3521 | |||||||||
3522 | /// Returns true (and the GlobalValue and the offset) if the node is a | ||||||||
3523 | /// GlobalAddress + offset. | ||||||||
3524 | virtual bool | ||||||||
3525 | isGAPlusOffset(SDNode *N, const GlobalValue* &GA, int64_t &Offset) const; | ||||||||
3526 | |||||||||
3527 | /// This method will be invoked for all target nodes and for any | ||||||||
3528 | /// target-independent nodes that the target has registered with invoke it | ||||||||
3529 | /// for. | ||||||||
3530 | /// | ||||||||
3531 | /// The semantics are as follows: | ||||||||
3532 | /// Return Value: | ||||||||
3533 | /// SDValue.Val == 0 - No change was made | ||||||||
3534 | /// SDValue.Val == N - N was replaced, is dead, and is already handled. | ||||||||
3535 | /// otherwise - N should be replaced by the returned Operand. | ||||||||
3536 | /// | ||||||||
3537 | /// In addition, methods provided by DAGCombinerInfo may be used to perform | ||||||||
3538 | /// more complex transformations. | ||||||||
3539 | /// | ||||||||
3540 | virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const; | ||||||||
3541 | |||||||||
3542 | /// Return true if it is profitable to move this shift by a constant amount | ||||||||
3543 | /// though its operand, adjusting any immediate operands as necessary to | ||||||||
3544 | /// preserve semantics. This transformation may not be desirable if it | ||||||||
3545 | /// disrupts a particularly auspicious target-specific tree (e.g. bitfield | ||||||||
3546 | /// extraction in AArch64). By default, it returns true. | ||||||||
3547 | /// | ||||||||
3548 | /// @param N the shift node | ||||||||
3549 | /// @param Level the current DAGCombine legalization level. | ||||||||
3550 | virtual bool isDesirableToCommuteWithShift(const SDNode *N, | ||||||||
3551 | CombineLevel Level) const { | ||||||||
3552 | return true; | ||||||||
3553 | } | ||||||||
3554 | |||||||||
3555 | /// Return true if the target has native support for the specified value type | ||||||||
3556 | /// and it is 'desirable' to use the type for the given node type. e.g. On x86 | ||||||||
3557 | /// i16 is legal, but undesirable since i16 instruction encodings are longer | ||||||||
3558 | /// and some i16 instructions are slow. | ||||||||
3559 | virtual bool isTypeDesirableForOp(unsigned /*Opc*/, EVT VT) const { | ||||||||
3560 | // By default, assume all legal types are desirable. | ||||||||
3561 | return isTypeLegal(VT); | ||||||||
3562 | } | ||||||||
3563 | |||||||||
3564 | /// Return true if it is profitable for dag combiner to transform a floating | ||||||||
3565 | /// point op of specified opcode to a equivalent op of an integer | ||||||||
3566 | /// type. e.g. f32 load -> i32 load can be profitable on ARM. | ||||||||
3567 | virtual bool isDesirableToTransformToIntegerOp(unsigned /*Opc*/, | ||||||||
3568 | EVT /*VT*/) const { | ||||||||
3569 | return false; | ||||||||
3570 | } | ||||||||
3571 | |||||||||
3572 | /// This method query the target whether it is beneficial for dag combiner to | ||||||||
3573 | /// promote the specified node. If true, it should return the desired | ||||||||
3574 | /// promotion type by reference. | ||||||||
3575 | virtual bool IsDesirableToPromoteOp(SDValue /*Op*/, EVT &/*PVT*/) const { | ||||||||
3576 | return false; | ||||||||
3577 | } | ||||||||
3578 | |||||||||
3579 | /// Return true if the target supports swifterror attribute. It optimizes | ||||||||
3580 | /// loads and stores to reading and writing a specific register. | ||||||||
3581 | virtual bool supportSwiftError() const { | ||||||||
3582 | return false; | ||||||||
3583 | } | ||||||||
3584 | |||||||||
3585 | /// Return true if the target supports that a subset of CSRs for the given | ||||||||
3586 | /// machine function is handled explicitly via copies. | ||||||||
3587 | virtual bool supportSplitCSR(MachineFunction *MF) const { | ||||||||
3588 | return false; | ||||||||
3589 | } | ||||||||
3590 | |||||||||
3591 | /// Perform necessary initialization to handle a subset of CSRs explicitly | ||||||||
3592 | /// via copies. This function is called at the beginning of instruction | ||||||||
3593 | /// selection. | ||||||||
3594 | virtual void initializeSplitCSR(MachineBasicBlock *Entry) const { | ||||||||
3595 | llvm_unreachable("Not Implemented")::llvm::llvm_unreachable_internal("Not Implemented", "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/TargetLowering.h" , 3595); | ||||||||
3596 | } | ||||||||
3597 | |||||||||
3598 | /// Insert explicit copies in entry and exit blocks. We copy a subset of | ||||||||
3599 | /// CSRs to virtual registers in the entry block, and copy them back to | ||||||||
3600 | /// physical registers in the exit blocks. This function is called at the end | ||||||||
3601 | /// of instruction selection. | ||||||||
3602 | virtual void insertCopiesSplitCSR( | ||||||||
3603 | MachineBasicBlock *Entry, | ||||||||
3604 | const SmallVectorImpl<MachineBasicBlock *> &Exits) const { | ||||||||
3605 | llvm_unreachable("Not Implemented")::llvm::llvm_unreachable_internal("Not Implemented", "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/TargetLowering.h" , 3605); | ||||||||
3606 | } | ||||||||
3607 | |||||||||
3608 | /// Return the newly negated expression if the cost is not expensive and | ||||||||
3609 | /// set the cost in \p Cost to indicate that if it is cheaper or neutral to | ||||||||
3610 | /// do the negation. | ||||||||
3611 | virtual SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, | ||||||||
3612 | bool LegalOps, bool OptForSize, | ||||||||
3613 | NegatibleCost &Cost, | ||||||||
3614 | unsigned Depth = 0) const; | ||||||||
3615 | |||||||||
3616 | /// This is the helper function to return the newly negated expression only | ||||||||
3617 | /// when the cost is cheaper. | ||||||||
3618 | SDValue getCheaperNegatedExpression(SDValue Op, SelectionDAG &DAG, | ||||||||
3619 | bool LegalOps, bool OptForSize, | ||||||||
3620 | unsigned Depth = 0) const { | ||||||||
3621 | NegatibleCost Cost = NegatibleCost::Expensive; | ||||||||
3622 | SDValue Neg = | ||||||||
3623 | getNegatedExpression(Op, DAG, LegalOps, OptForSize, Cost, Depth); | ||||||||
3624 | if (Neg && Cost == NegatibleCost::Cheaper) | ||||||||
3625 | return Neg; | ||||||||
3626 | // Remove the new created node to avoid the side effect to the DAG. | ||||||||
3627 | if (Neg && Neg.getNode()->use_empty()) | ||||||||
3628 | DAG.RemoveDeadNode(Neg.getNode()); | ||||||||
3629 | return SDValue(); | ||||||||
3630 | } | ||||||||
3631 | |||||||||
3632 | /// This is the helper function to return the newly negated expression if | ||||||||
3633 | /// the cost is not expensive. | ||||||||
3634 | SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, | ||||||||
3635 | bool OptForSize, unsigned Depth = 0) const { | ||||||||
3636 | NegatibleCost Cost = NegatibleCost::Expensive; | ||||||||
3637 | return getNegatedExpression(Op, DAG, LegalOps, OptForSize, Cost, Depth); | ||||||||
3638 | } | ||||||||
3639 | |||||||||
3640 | //===--------------------------------------------------------------------===// | ||||||||
3641 | // Lowering methods - These methods must be implemented by targets so that | ||||||||
3642 | // the SelectionDAGBuilder code knows how to lower these. | ||||||||
3643 | // | ||||||||
3644 | |||||||||
3645 | /// Target-specific splitting of values into parts that fit a register | ||||||||
3646 | /// storing a legal type | ||||||||
3647 | virtual bool splitValueIntoRegisterParts(SelectionDAG &DAG, const SDLoc &DL, | ||||||||
3648 | SDValue Val, SDValue *Parts, | ||||||||
3649 | unsigned NumParts, MVT PartVT, | ||||||||
3650 | Optional<CallingConv::ID> CC) const { | ||||||||
3651 | return false; | ||||||||
3652 | } | ||||||||
3653 | |||||||||
3654 | /// Target-specific combining of register parts into its original value | ||||||||
3655 | virtual SDValue | ||||||||
3656 | joinRegisterPartsIntoValue(SelectionDAG &DAG, const SDLoc &DL, | ||||||||
3657 | const SDValue *Parts, unsigned NumParts, | ||||||||
3658 | MVT PartVT, EVT ValueVT, | ||||||||
3659 | Optional<CallingConv::ID> CC) const { | ||||||||
3660 | return SDValue(); | ||||||||
3661 | } | ||||||||
3662 | |||||||||
3663 | /// This hook must be implemented to lower the incoming (formal) arguments, | ||||||||
3664 | /// described by the Ins array, into the specified DAG. The implementation | ||||||||
3665 | /// should fill in the InVals array with legal-type argument values, and | ||||||||
3666 | /// return the resulting token chain value. | ||||||||
3667 | virtual SDValue LowerFormalArguments( | ||||||||
3668 | SDValue /*Chain*/, CallingConv::ID /*CallConv*/, bool /*isVarArg*/, | ||||||||
3669 | const SmallVectorImpl<ISD::InputArg> & /*Ins*/, const SDLoc & /*dl*/, | ||||||||
3670 | SelectionDAG & /*DAG*/, SmallVectorImpl<SDValue> & /*InVals*/) const { | ||||||||
3671 | llvm_unreachable("Not Implemented")::llvm::llvm_unreachable_internal("Not Implemented", "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/TargetLowering.h" , 3671); | ||||||||
3672 | } | ||||||||
3673 | |||||||||
3674 | /// This structure contains all information that is necessary for lowering | ||||||||
3675 | /// calls. It is passed to TLI::LowerCallTo when the SelectionDAG builder | ||||||||
3676 | /// needs to lower a call, and targets will see this struct in their LowerCall | ||||||||
3677 | /// implementation. | ||||||||
3678 | struct CallLoweringInfo { | ||||||||
3679 | SDValue Chain; | ||||||||
3680 | Type *RetTy = nullptr; | ||||||||
3681 | bool RetSExt : 1; | ||||||||
3682 | bool RetZExt : 1; | ||||||||
3683 | bool IsVarArg : 1; | ||||||||
3684 | bool IsInReg : 1; | ||||||||
3685 | bool DoesNotReturn : 1; | ||||||||
3686 | bool IsReturnValueUsed : 1; | ||||||||
3687 | bool IsConvergent : 1; | ||||||||
3688 | bool IsPatchPoint : 1; | ||||||||
3689 | bool IsPreallocated : 1; | ||||||||
3690 | bool NoMerge : 1; | ||||||||
3691 | |||||||||
3692 | // IsTailCall should be modified by implementations of | ||||||||
3693 | // TargetLowering::LowerCall that perform tail call conversions. | ||||||||
3694 | bool IsTailCall = false; | ||||||||
3695 | |||||||||
3696 | // Is Call lowering done post SelectionDAG type legalization. | ||||||||
3697 | bool IsPostTypeLegalization = false; | ||||||||
3698 | |||||||||
3699 | unsigned NumFixedArgs = -1; | ||||||||
3700 | CallingConv::ID CallConv = CallingConv::C; | ||||||||
3701 | SDValue Callee; | ||||||||
3702 | ArgListTy Args; | ||||||||
3703 | SelectionDAG &DAG; | ||||||||
3704 | SDLoc DL; | ||||||||
3705 | const CallBase *CB = nullptr; | ||||||||
3706 | SmallVector<ISD::OutputArg, 32> Outs; | ||||||||
3707 | SmallVector<SDValue, 32> OutVals; | ||||||||
3708 | SmallVector<ISD::InputArg, 32> Ins; | ||||||||
3709 | SmallVector<SDValue, 4> InVals; | ||||||||
3710 | |||||||||
3711 | CallLoweringInfo(SelectionDAG &DAG) | ||||||||
3712 | : RetSExt(false), RetZExt(false), IsVarArg(false), IsInReg(false), | ||||||||
3713 | DoesNotReturn(false), IsReturnValueUsed(true), IsConvergent(false), | ||||||||
3714 | IsPatchPoint(false), IsPreallocated(false), NoMerge(false), | ||||||||
3715 | DAG(DAG) {} | ||||||||
3716 | |||||||||
3717 | CallLoweringInfo &setDebugLoc(const SDLoc &dl) { | ||||||||
3718 | DL = dl; | ||||||||
3719 | return *this; | ||||||||
3720 | } | ||||||||
3721 | |||||||||
3722 | CallLoweringInfo &setChain(SDValue InChain) { | ||||||||
3723 | Chain = InChain; | ||||||||
3724 | return *this; | ||||||||
3725 | } | ||||||||
3726 | |||||||||
3727 | // setCallee with target/module-specific attributes | ||||||||
3728 | CallLoweringInfo &setLibCallee(CallingConv::ID CC, Type *ResultType, | ||||||||
3729 | SDValue Target, ArgListTy &&ArgsList) { | ||||||||
3730 | RetTy = ResultType; | ||||||||
3731 | Callee = Target; | ||||||||
3732 | CallConv = CC; | ||||||||
3733 | NumFixedArgs = ArgsList.size(); | ||||||||
3734 | Args = std::move(ArgsList); | ||||||||
3735 | |||||||||
3736 | DAG.getTargetLoweringInfo().markLibCallAttributes( | ||||||||
3737 | &(DAG.getMachineFunction()), CC, Args); | ||||||||
3738 | return *this; | ||||||||
3739 | } | ||||||||
3740 | |||||||||
3741 | CallLoweringInfo &setCallee(CallingConv::ID CC, Type *ResultType, | ||||||||
3742 | SDValue Target, ArgListTy &&ArgsList) { | ||||||||
3743 | RetTy = ResultType; | ||||||||
3744 | Callee = Target; | ||||||||
3745 | CallConv = CC; | ||||||||
3746 | NumFixedArgs = ArgsList.size(); | ||||||||
3747 | Args = std::move(ArgsList); | ||||||||
3748 | return *this; | ||||||||
3749 | } | ||||||||
3750 | |||||||||
3751 | CallLoweringInfo &setCallee(Type *ResultType, FunctionType *FTy, | ||||||||
3752 | SDValue Target, ArgListTy &&ArgsList, | ||||||||
3753 | const CallBase &Call) { | ||||||||
3754 | RetTy = ResultType; | ||||||||
3755 | |||||||||
3756 | IsInReg = Call.hasRetAttr(Attribute::InReg); | ||||||||
3757 | DoesNotReturn = | ||||||||
3758 | Call.doesNotReturn() || | ||||||||
3759 | (!isa<InvokeInst>(Call) && isa<UnreachableInst>(Call.getNextNode())); | ||||||||
3760 | IsVarArg = FTy->isVarArg(); | ||||||||
3761 | IsReturnValueUsed = !Call.use_empty(); | ||||||||
3762 | RetSExt = Call.hasRetAttr(Attribute::SExt); | ||||||||
3763 | RetZExt = Call.hasRetAttr(Attribute::ZExt); | ||||||||
3764 | NoMerge = Call.hasFnAttr(Attribute::NoMerge); | ||||||||
3765 | |||||||||
3766 | Callee = Target; | ||||||||
3767 | |||||||||
3768 | CallConv = Call.getCallingConv(); | ||||||||
3769 | NumFixedArgs = FTy->getNumParams(); | ||||||||
3770 | Args = std::move(ArgsList); | ||||||||
3771 | |||||||||
3772 | CB = &Call; | ||||||||
3773 | |||||||||
3774 | return *this; | ||||||||
3775 | } | ||||||||
3776 | |||||||||
3777 | CallLoweringInfo &setInRegister(bool Value = true) { | ||||||||
3778 | IsInReg = Value; | ||||||||
3779 | return *this; | ||||||||
3780 | } | ||||||||
3781 | |||||||||
3782 | CallLoweringInfo &setNoReturn(bool Value = true) { | ||||||||
3783 | DoesNotReturn = Value; | ||||||||
3784 | return *this; | ||||||||
3785 | } | ||||||||
3786 | |||||||||
3787 | CallLoweringInfo &setVarArg(bool Value = true) { | ||||||||
3788 | IsVarArg = Value; | ||||||||
3789 | return *this; | ||||||||
3790 | } | ||||||||
3791 | |||||||||
3792 | CallLoweringInfo &setTailCall(bool Value = true) { | ||||||||
3793 | IsTailCall = Value; | ||||||||
3794 | return *this; | ||||||||
3795 | } | ||||||||
3796 | |||||||||
3797 | CallLoweringInfo &setDiscardResult(bool Value = true) { | ||||||||
3798 | IsReturnValueUsed = !Value; | ||||||||
3799 | return *this; | ||||||||
3800 | } | ||||||||
3801 | |||||||||
3802 | CallLoweringInfo &setConvergent(bool Value = true) { | ||||||||
3803 | IsConvergent = Value; | ||||||||
3804 | return *this; | ||||||||
3805 | } | ||||||||
3806 | |||||||||
3807 | CallLoweringInfo &setSExtResult(bool Value = true) { | ||||||||
3808 | RetSExt = Value; | ||||||||
3809 | return *this; | ||||||||
3810 | } | ||||||||
3811 | |||||||||
3812 | CallLoweringInfo &setZExtResult(bool Value = true) { | ||||||||
3813 | RetZExt = Value; | ||||||||
3814 | return *this; | ||||||||
3815 | } | ||||||||
3816 | |||||||||
3817 | CallLoweringInfo &setIsPatchPoint(bool Value = true) { | ||||||||
3818 | IsPatchPoint = Value; | ||||||||
3819 | return *this; | ||||||||
3820 | } | ||||||||
3821 | |||||||||
3822 | CallLoweringInfo &setIsPreallocated(bool Value = true) { | ||||||||
3823 | IsPreallocated = Value; | ||||||||
3824 | return *this; | ||||||||
3825 | } | ||||||||
3826 | |||||||||
3827 | CallLoweringInfo &setIsPostTypeLegalization(bool Value=true) { | ||||||||
3828 | IsPostTypeLegalization = Value; | ||||||||
3829 | return *this; | ||||||||
3830 | } | ||||||||
3831 | |||||||||
3832 | ArgListTy &getArgs() { | ||||||||
3833 | return Args; | ||||||||
3834 | } | ||||||||
3835 | }; | ||||||||
3836 | |||||||||
3837 | /// This structure is used to pass arguments to makeLibCall function. | ||||||||
3838 | struct MakeLibCallOptions { | ||||||||
3839 | // By passing type list before soften to makeLibCall, the target hook | ||||||||
3840 | // shouldExtendTypeInLibCall can get the original type before soften. | ||||||||
3841 | ArrayRef<EVT> OpsVTBeforeSoften; | ||||||||
3842 | EVT RetVTBeforeSoften; | ||||||||
3843 | bool IsSExt : 1; | ||||||||
3844 | bool DoesNotReturn : 1; | ||||||||
3845 | bool IsReturnValueUsed : 1; | ||||||||
3846 | bool IsPostTypeLegalization : 1; | ||||||||
3847 | bool IsSoften : 1; | ||||||||
3848 | |||||||||
3849 | MakeLibCallOptions() | ||||||||
3850 | : IsSExt(false), DoesNotReturn(false), IsReturnValueUsed(true), | ||||||||
3851 | IsPostTypeLegalization(false), IsSoften(false) {} | ||||||||
3852 | |||||||||
3853 | MakeLibCallOptions &setSExt(bool Value = true) { | ||||||||
3854 | IsSExt = Value; | ||||||||
3855 | return *this; | ||||||||
3856 | } | ||||||||
3857 | |||||||||
3858 | MakeLibCallOptions &setNoReturn(bool Value = true) { | ||||||||
3859 | DoesNotReturn = Value; | ||||||||
3860 | return *this; | ||||||||
3861 | } | ||||||||
3862 | |||||||||
3863 | MakeLibCallOptions &setDiscardResult(bool Value = true) { | ||||||||
3864 | IsReturnValueUsed = !Value; | ||||||||
3865 | return *this; | ||||||||
3866 | } | ||||||||
3867 | |||||||||
3868 | MakeLibCallOptions &setIsPostTypeLegalization(bool Value = true) { | ||||||||
3869 | IsPostTypeLegalization = Value; | ||||||||
3870 | return *this; | ||||||||
3871 | } | ||||||||
3872 | |||||||||
3873 | MakeLibCallOptions &setTypeListBeforeSoften(ArrayRef<EVT> OpsVT, EVT RetVT, | ||||||||
3874 | bool Value = true) { | ||||||||
3875 | OpsVTBeforeSoften = OpsVT; | ||||||||
3876 | RetVTBeforeSoften = RetVT; | ||||||||
3877 | IsSoften = Value; | ||||||||
3878 | return *this; | ||||||||
3879 | } | ||||||||
3880 | }; | ||||||||
3881 | |||||||||
3882 | /// This function lowers an abstract call to a function into an actual call. | ||||||||
3883 | /// This returns a pair of operands. The first element is the return value | ||||||||
3884 | /// for the function (if RetTy is not VoidTy). The second element is the | ||||||||
3885 | /// outgoing token chain. It calls LowerCall to do the actual lowering. | ||||||||
3886 | std::pair<SDValue, SDValue> LowerCallTo(CallLoweringInfo &CLI) const; | ||||||||
3887 | |||||||||
3888 | /// This hook must be implemented to lower calls into the specified | ||||||||
3889 | /// DAG. The outgoing arguments to the call are described by the Outs array, | ||||||||
3890 | /// and the values to be returned by the call are described by the Ins | ||||||||
3891 | /// array. The implementation should fill in the InVals array with legal-type | ||||||||
3892 | /// return values from the call, and return the resulting token chain value. | ||||||||
3893 | virtual SDValue | ||||||||
3894 | LowerCall(CallLoweringInfo &/*CLI*/, | ||||||||
3895 | SmallVectorImpl<SDValue> &/*InVals*/) const { | ||||||||
3896 | llvm_unreachable("Not Implemented")::llvm::llvm_unreachable_internal("Not Implemented", "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/TargetLowering.h" , 3896); | ||||||||
3897 | } | ||||||||
3898 | |||||||||
3899 | /// Target-specific cleanup for formal ByVal parameters. | ||||||||
3900 | virtual void HandleByVal(CCState *, unsigned &, Align) const {} | ||||||||
3901 | |||||||||
3902 | /// This hook should be implemented to check whether the return values | ||||||||
3903 | /// described by the Outs array can fit into the return registers. If false | ||||||||
3904 | /// is returned, an sret-demotion is performed. | ||||||||
3905 | virtual bool CanLowerReturn(CallingConv::ID /*CallConv*/, | ||||||||
3906 | MachineFunction &/*MF*/, bool /*isVarArg*/, | ||||||||
3907 | const SmallVectorImpl<ISD::OutputArg> &/*Outs*/, | ||||||||
3908 | LLVMContext &/*Context*/) const | ||||||||
3909 | { | ||||||||
3910 | // Return true by default to get preexisting behavior. | ||||||||
3911 | return true; | ||||||||
3912 | } | ||||||||
3913 | |||||||||
3914 | /// This hook must be implemented to lower outgoing return values, described | ||||||||
3915 | /// by the Outs array, into the specified DAG. The implementation should | ||||||||
3916 | /// return the resulting token chain value. | ||||||||
3917 | virtual SDValue LowerReturn(SDValue /*Chain*/, CallingConv::ID /*CallConv*/, | ||||||||
3918 | bool /*isVarArg*/, | ||||||||
3919 | const SmallVectorImpl<ISD::OutputArg> & /*Outs*/, | ||||||||
3920 | const SmallVectorImpl<SDValue> & /*OutVals*/, | ||||||||
3921 | const SDLoc & /*dl*/, | ||||||||
3922 | SelectionDAG & /*DAG*/) const { | ||||||||
3923 | llvm_unreachable("Not Implemented")::llvm::llvm_unreachable_internal("Not Implemented", "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/TargetLowering.h" , 3923); | ||||||||
3924 | } | ||||||||
3925 | |||||||||
3926 | /// Return true if result of the specified node is used by a return node | ||||||||
3927 | /// only. It also compute and return the input chain for the tail call. | ||||||||
3928 | /// | ||||||||
3929 | /// This is used to determine whether it is possible to codegen a libcall as | ||||||||
3930 | /// tail call at legalization time. | ||||||||
3931 | virtual bool isUsedByReturnOnly(SDNode *, SDValue &/*Chain*/) const { | ||||||||
3932 | return false; | ||||||||
3933 | } | ||||||||
3934 | |||||||||
3935 | /// Return true if the target may be able emit the call instruction as a tail | ||||||||
3936 | /// call. This is used by optimization passes to determine if it's profitable | ||||||||
3937 | /// to duplicate return instructions to enable tailcall optimization. | ||||||||
3938 | virtual bool mayBeEmittedAsTailCall(const CallInst *) const { | ||||||||
3939 | return false; | ||||||||
3940 | } | ||||||||
3941 | |||||||||
3942 | /// Return the builtin name for the __builtin___clear_cache intrinsic | ||||||||
3943 | /// Default is to invoke the clear cache library call | ||||||||
3944 | virtual const char * getClearCacheBuiltinName() const { | ||||||||
3945 | return "__clear_cache"; | ||||||||
3946 | } | ||||||||
3947 | |||||||||
3948 | /// Return the register ID of the name passed in. Used by named register | ||||||||
3949 | /// global variables extension. There is no target-independent behaviour | ||||||||
3950 | /// so the default action is to bail. | ||||||||
3951 | virtual Register getRegisterByName(const char* RegName, LLT Ty, | ||||||||
3952 | const MachineFunction &MF) const { | ||||||||
3953 | report_fatal_error("Named registers not implemented for this target"); | ||||||||
3954 | } | ||||||||
3955 | |||||||||
3956 | /// Return the type that should be used to zero or sign extend a | ||||||||
3957 | /// zeroext/signext integer return value. FIXME: Some C calling conventions | ||||||||
3958 | /// require the return type to be promoted, but this is not true all the time, | ||||||||
3959 | /// e.g. i1/i8/i16 on x86/x86_64. It is also not necessary for non-C calling | ||||||||
3960 | /// conventions. The frontend should handle this and include all of the | ||||||||
3961 | /// necessary information. | ||||||||
3962 | virtual EVT getTypeForExtReturn(LLVMContext &Context, EVT VT, | ||||||||
3963 | ISD::NodeType /*ExtendKind*/) const { | ||||||||
3964 | EVT MinVT = getRegisterType(Context, MVT::i32); | ||||||||
3965 | return VT.bitsLT(MinVT) ? MinVT : VT; | ||||||||
3966 | } | ||||||||
3967 | |||||||||
3968 | /// For some targets, an LLVM struct type must be broken down into multiple | ||||||||
3969 | /// simple types, but the calling convention specifies that the entire struct | ||||||||
3970 | /// must be passed in a block of consecutive registers. | ||||||||
3971 | virtual bool | ||||||||
3972 | functionArgumentNeedsConsecutiveRegisters(Type *Ty, CallingConv::ID CallConv, | ||||||||
3973 | bool isVarArg) const { | ||||||||
3974 | return false; | ||||||||
3975 | } | ||||||||
3976 | |||||||||
3977 | /// For most targets, an LLVM type must be broken down into multiple | ||||||||
3978 | /// smaller types. Usually the halves are ordered according to the endianness | ||||||||
3979 | /// but for some platform that would break. So this method will default to | ||||||||
3980 | /// matching the endianness but can be overridden. | ||||||||
3981 | virtual bool | ||||||||
3982 | shouldSplitFunctionArgumentsAsLittleEndian(const DataLayout &DL) const { | ||||||||
3983 | return DL.isLittleEndian(); | ||||||||
3984 | } | ||||||||
3985 | |||||||||
3986 | /// Returns a 0 terminated array of registers that can be safely used as | ||||||||
3987 | /// scratch registers. | ||||||||
3988 | virtual const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const { | ||||||||
3989 | return nullptr; | ||||||||
3990 | } | ||||||||
3991 | |||||||||
3992 | /// This callback is used to prepare for a volatile or atomic load. | ||||||||
3993 | /// It takes a chain node as input and returns the chain for the load itself. | ||||||||
3994 | /// | ||||||||
3995 | /// Having a callback like this is necessary for targets like SystemZ, | ||||||||
3996 | /// which allows a CPU to reuse the result of a previous load indefinitely, | ||||||||
3997 | /// even if a cache-coherent store is performed by another CPU. The default | ||||||||
3998 | /// implementation does nothing. | ||||||||
3999 | virtual SDValue prepareVolatileOrAtomicLoad(SDValue Chain, const SDLoc &DL, | ||||||||
4000 | SelectionDAG &DAG) const { | ||||||||
4001 | return Chain; | ||||||||
4002 | } | ||||||||
4003 | |||||||||
4004 | /// Should SelectionDAG lower an atomic store of the given kind as a normal | ||||||||
4005 | /// StoreSDNode (as opposed to an AtomicSDNode)? NOTE: The intention is to | ||||||||
4006 | /// eventually migrate all targets to the using StoreSDNodes, but porting is | ||||||||
4007 | /// being done target at a time. | ||||||||
4008 | virtual bool lowerAtomicStoreAsStoreSDNode(const StoreInst &SI) const { | ||||||||
4009 | assert(SI.isAtomic() && "violated precondition")((SI.isAtomic() && "violated precondition") ? static_cast <void> (0) : __assert_fail ("SI.isAtomic() && \"violated precondition\"" , "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/TargetLowering.h" , 4009, __PRETTY_FUNCTION__)); | ||||||||
4010 | return false; | ||||||||
4011 | } | ||||||||
4012 | |||||||||
4013 | /// Should SelectionDAG lower an atomic load of the given kind as a normal | ||||||||
4014 | /// LoadSDNode (as opposed to an AtomicSDNode)? NOTE: The intention is to | ||||||||
4015 | /// eventually migrate all targets to the using LoadSDNodes, but porting is | ||||||||
4016 | /// being done target at a time. | ||||||||
4017 | virtual bool lowerAtomicLoadAsLoadSDNode(const LoadInst &LI) const { | ||||||||
4018 | assert(LI.isAtomic() && "violated precondition")((LI.isAtomic() && "violated precondition") ? static_cast <void> (0) : __assert_fail ("LI.isAtomic() && \"violated precondition\"" , "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/TargetLowering.h" , 4018, __PRETTY_FUNCTION__)); | ||||||||
4019 | return false; | ||||||||
4020 | } | ||||||||
4021 | |||||||||
4022 | |||||||||
4023 | /// This callback is invoked by the type legalizer to legalize nodes with an | ||||||||
4024 | /// illegal operand type but legal result types. It replaces the | ||||||||
4025 | /// LowerOperation callback in the type Legalizer. The reason we can not do | ||||||||
4026 | /// away with LowerOperation entirely is that LegalizeDAG isn't yet ready to | ||||||||
4027 | /// use this callback. | ||||||||
4028 | /// | ||||||||
4029 | /// TODO: Consider merging with ReplaceNodeResults. | ||||||||
4030 | /// | ||||||||
4031 | /// The target places new result values for the node in Results (their number | ||||||||
4032 | /// and types must exactly match those of the original return values of | ||||||||
4033 | /// the node), or leaves Results empty, which indicates that the node is not | ||||||||
4034 | /// to be custom lowered after all. | ||||||||
4035 | /// The default implementation calls LowerOperation. | ||||||||
4036 | virtual void LowerOperationWrapper(SDNode *N, | ||||||||
4037 | SmallVectorImpl<SDValue> &Results, | ||||||||
4038 | SelectionDAG &DAG) const; | ||||||||
4039 | |||||||||
4040 | /// This callback is invoked for operations that are unsupported by the | ||||||||
4041 | /// target, which are registered to use 'custom' lowering, and whose defined | ||||||||
4042 | /// values are all legal. If the target has no operations that require custom | ||||||||
4043 | /// lowering, it need not implement this. The default implementation of this | ||||||||
4044 | /// aborts. | ||||||||
4045 | virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const; | ||||||||
4046 | |||||||||
4047 | /// This callback is invoked when a node result type is illegal for the | ||||||||
4048 | /// target, and the operation was registered to use 'custom' lowering for that | ||||||||
4049 | /// result type. The target places new result values for the node in Results | ||||||||
4050 | /// (their number and types must exactly match those of the original return | ||||||||
4051 | /// values of the node), or leaves Results empty, which indicates that the | ||||||||
4052 | /// node is not to be custom lowered after all. | ||||||||
4053 | /// | ||||||||
4054 | /// If the target has no operations that require custom lowering, it need not | ||||||||
4055 | /// implement this. The default implementation aborts. | ||||||||
4056 | virtual void ReplaceNodeResults(SDNode * /*N*/, | ||||||||
4057 | SmallVectorImpl<SDValue> &/*Results*/, | ||||||||
4058 | SelectionDAG &/*DAG*/) const { | ||||||||
4059 | llvm_unreachable("ReplaceNodeResults not implemented for this target!")::llvm::llvm_unreachable_internal("ReplaceNodeResults not implemented for this target!" , "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/TargetLowering.h" , 4059); | ||||||||
4060 | } | ||||||||
4061 | |||||||||
4062 | /// This method returns the name of a target specific DAG node. | ||||||||
4063 | virtual const char *getTargetNodeName(unsigned Opcode) const; | ||||||||
4064 | |||||||||
4065 | /// This method returns a target specific FastISel object, or null if the | ||||||||
4066 | /// target does not support "fast" ISel. | ||||||||
4067 | virtual FastISel *createFastISel(FunctionLoweringInfo &, | ||||||||
4068 | const TargetLibraryInfo *) const { | ||||||||
4069 | return nullptr; | ||||||||
4070 | } | ||||||||
4071 | |||||||||
4072 | bool verifyReturnAddressArgumentIsConstant(SDValue Op, | ||||||||
4073 | SelectionDAG &DAG) const; | ||||||||
4074 | |||||||||
4075 | //===--------------------------------------------------------------------===// | ||||||||
4076 | // Inline Asm Support hooks | ||||||||
4077 | // | ||||||||
4078 | |||||||||
4079 | /// This hook allows the target to expand an inline asm call to be explicit | ||||||||
4080 | /// llvm code if it wants to. This is useful for turning simple inline asms | ||||||||
4081 | /// into LLVM intrinsics, which gives the compiler more information about the | ||||||||
4082 | /// behavior of the code. | ||||||||
4083 | virtual bool ExpandInlineAsm(CallInst *) const { | ||||||||
4084 | return false; | ||||||||
4085 | } | ||||||||
4086 | |||||||||
4087 | enum ConstraintType { | ||||||||
4088 | C_Register, // Constraint represents specific register(s). | ||||||||
4089 | C_RegisterClass, // Constraint represents any of register(s) in class. | ||||||||
4090 | C_Memory, // Memory constraint. | ||||||||
4091 | C_Immediate, // Requires an immediate. | ||||||||
4092 | C_Other, // Something else. | ||||||||
4093 | C_Unknown // Unsupported constraint. | ||||||||
4094 | }; | ||||||||
4095 | |||||||||
4096 | enum ConstraintWeight { | ||||||||
4097 | // Generic weights. | ||||||||
4098 | CW_Invalid = -1, // No match. | ||||||||
4099 | CW_Okay = 0, // Acceptable. | ||||||||
4100 | CW_Good = 1, // Good weight. | ||||||||
4101 | CW_Better = 2, // Better weight. | ||||||||
4102 | CW_Best = 3, // Best weight. | ||||||||
4103 | |||||||||
4104 | // Well-known weights. | ||||||||
4105 | CW_SpecificReg = CW_Okay, // Specific register operands. | ||||||||
4106 | CW_Register = CW_Good, // Register operands. | ||||||||
4107 | CW_Memory = CW_Better, // Memory operands. | ||||||||
4108 | CW_Constant = CW_Best, // Constant operand. | ||||||||
4109 | CW_Default = CW_Okay // Default or don't know type. | ||||||||
4110 | }; | ||||||||
4111 | |||||||||
4112 | /// This contains information for each constraint that we are lowering. | ||||||||
4113 | struct AsmOperandInfo : public InlineAsm::ConstraintInfo { | ||||||||
4114 | /// This contains the actual string for the code, like "m". TargetLowering | ||||||||
4115 | /// picks the 'best' code from ConstraintInfo::Codes that most closely | ||||||||
4116 | /// matches the operand. | ||||||||
4117 | std::string ConstraintCode; | ||||||||
4118 | |||||||||
4119 | /// Information about the constraint code, e.g. Register, RegisterClass, | ||||||||
4120 | /// Memory, Other, Unknown. | ||||||||
4121 | TargetLowering::ConstraintType ConstraintType = TargetLowering::C_Unknown; | ||||||||
4122 | |||||||||
4123 | /// If this is the result output operand or a clobber, this is null, | ||||||||
4124 | /// otherwise it is the incoming operand to the CallInst. This gets | ||||||||
4125 | /// modified as the asm is processed. | ||||||||
4126 | Value *CallOperandVal = nullptr; | ||||||||
4127 | |||||||||
4128 | /// The ValueType for the operand value. | ||||||||
4129 | MVT ConstraintVT = MVT::Other; | ||||||||
4130 | |||||||||
4131 | /// Copy constructor for copying from a ConstraintInfo. | ||||||||
4132 | AsmOperandInfo(InlineAsm::ConstraintInfo Info) | ||||||||
4133 | : InlineAsm::ConstraintInfo(std::move(Info)) {} | ||||||||
4134 | |||||||||
4135 | /// Return true of this is an input operand that is a matching constraint | ||||||||
4136 | /// like "4". | ||||||||
4137 | bool isMatchingInputConstraint() const; | ||||||||
4138 | |||||||||
4139 | /// If this is an input matching constraint, this method returns the output | ||||||||
4140 | /// operand it matches. | ||||||||
4141 | unsigned getMatchedOperand() const; | ||||||||
4142 | }; | ||||||||
4143 | |||||||||
4144 | using AsmOperandInfoVector = std::vector<AsmOperandInfo>; | ||||||||
4145 | |||||||||
4146 | /// Split up the constraint string from the inline assembly value into the | ||||||||
4147 | /// specific constraints and their prefixes, and also tie in the associated | ||||||||
4148 | /// operand values. If this returns an empty vector, and if the constraint | ||||||||
4149 | /// string itself isn't empty, there was an error parsing. | ||||||||
4150 | virtual AsmOperandInfoVector ParseConstraints(const DataLayout &DL, | ||||||||
4151 | const TargetRegisterInfo *TRI, | ||||||||
4152 | const CallBase &Call) const; | ||||||||
4153 | |||||||||
4154 | /// Examine constraint type and operand type and determine a weight value. | ||||||||
4155 | /// The operand object must already have been set up with the operand type. | ||||||||
4156 | virtual ConstraintWeight getMultipleConstraintMatchWeight( | ||||||||
4157 | AsmOperandInfo &info, int maIndex) const; | ||||||||
4158 | |||||||||
4159 | /// Examine constraint string and operand type and determine a weight value. | ||||||||
4160 | /// The operand object must already have been set up with the operand type. | ||||||||
4161 | virtual ConstraintWeight getSingleConstraintMatchWeight( | ||||||||
4162 | AsmOperandInfo &info, const char *constraint) const; | ||||||||
4163 | |||||||||
4164 | /// Determines the constraint code and constraint type to use for the specific | ||||||||
4165 | /// AsmOperandInfo, setting OpInfo.ConstraintCode and OpInfo.ConstraintType. | ||||||||
4166 | /// If the actual operand being passed in is available, it can be passed in as | ||||||||
4167 | /// Op, otherwise an empty SDValue can be passed. | ||||||||
4168 | virtual void ComputeConstraintToUse(AsmOperandInfo &OpInfo, | ||||||||
4169 | SDValue Op, | ||||||||
4170 | SelectionDAG *DAG = nullptr) const; | ||||||||
4171 | |||||||||
4172 | /// Given a constraint, return the type of constraint it is for this target. | ||||||||
4173 | virtual ConstraintType getConstraintType(StringRef Constraint) const; | ||||||||
4174 | |||||||||
4175 | /// Given a physical register constraint (e.g. {edx}), return the register | ||||||||
4176 | /// number and the register class for the register. | ||||||||
4177 | /// | ||||||||
4178 | /// Given a register class constraint, like 'r', if this corresponds directly | ||||||||
4179 | /// to an LLVM register class, return a register of 0 and the register class | ||||||||
4180 | /// pointer. | ||||||||
4181 | /// | ||||||||
4182 | /// This should only be used for C_Register constraints. On error, this | ||||||||
4183 | /// returns a register number of 0 and a null register class pointer. | ||||||||
4184 | virtual std::pair<unsigned, const TargetRegisterClass *> | ||||||||
4185 | getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, | ||||||||
4186 | StringRef Constraint, MVT VT) const; | ||||||||
4187 | |||||||||
4188 | virtual unsigned getInlineAsmMemConstraint(StringRef ConstraintCode) const { | ||||||||
4189 | if (ConstraintCode == "m") | ||||||||
4190 | return InlineAsm::Constraint_m; | ||||||||
4191 | return InlineAsm::Constraint_Unknown; | ||||||||
4192 | } | ||||||||
4193 | |||||||||
4194 | /// Try to replace an X constraint, which matches anything, with another that | ||||||||
4195 | /// has more specific requirements based on the type of the corresponding | ||||||||
4196 | /// operand. This returns null if there is no replacement to make. | ||||||||
4197 | virtual const char *LowerXConstraint(EVT ConstraintVT) const; | ||||||||
4198 | |||||||||
4199 | /// Lower the specified operand into the Ops vector. If it is invalid, don't | ||||||||
4200 | /// add anything to Ops. | ||||||||
4201 | virtual void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint, | ||||||||
4202 | std::vector<SDValue> &Ops, | ||||||||
4203 | SelectionDAG &DAG) const; | ||||||||
4204 | |||||||||
4205 | // Lower custom output constraints. If invalid, return SDValue(). | ||||||||
4206 | virtual SDValue LowerAsmOutputForConstraint(SDValue &Chain, SDValue &Flag, | ||||||||
4207 | const SDLoc &DL, | ||||||||
4208 | const AsmOperandInfo &OpInfo, | ||||||||
4209 | SelectionDAG &DAG) const; | ||||||||
4210 | |||||||||
4211 | //===--------------------------------------------------------------------===// | ||||||||
4212 | // Div utility functions | ||||||||
4213 | // | ||||||||
4214 | SDValue BuildSDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, | ||||||||
4215 | SmallVectorImpl<SDNode *> &Created) const; | ||||||||
4216 | SDValue BuildUDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, | ||||||||
4217 | SmallVectorImpl<SDNode *> &Created) const; | ||||||||
4218 | |||||||||
4219 | /// Targets may override this function to provide custom SDIV lowering for | ||||||||
4220 | /// power-of-2 denominators. If the target returns an empty SDValue, LLVM | ||||||||
4221 | /// assumes SDIV is expensive and replaces it with a series of other integer | ||||||||
4222 | /// operations. | ||||||||
4223 | virtual SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, | ||||||||
4224 | SelectionDAG &DAG, | ||||||||
4225 | SmallVectorImpl<SDNode *> &Created) const; | ||||||||
4226 | |||||||||
4227 | /// Indicate whether this target prefers to combine FDIVs with the same | ||||||||
4228 | /// divisor. If the transform should never be done, return zero. If the | ||||||||
4229 | /// transform should be done, return the minimum number of divisor uses | ||||||||
4230 | /// that must exist. | ||||||||
4231 | virtual unsigned combineRepeatedFPDivisors() const { | ||||||||
4232 | return 0; | ||||||||
4233 | } | ||||||||
4234 | |||||||||
4235 | /// Hooks for building estimates in place of slower divisions and square | ||||||||
4236 | /// roots. | ||||||||
4237 | |||||||||
4238 | /// Return either a square root or its reciprocal estimate value for the input | ||||||||
4239 | /// operand. | ||||||||
4240 | /// \p Enabled is a ReciprocalEstimate enum with value either 'Unspecified' or | ||||||||
4241 | /// 'Enabled' as set by a potential default override attribute. | ||||||||
4242 | /// If \p RefinementSteps is 'Unspecified', the number of Newton-Raphson | ||||||||
4243 | /// refinement iterations required to generate a sufficient (though not | ||||||||
4244 | /// necessarily IEEE-754 compliant) estimate is returned in that parameter. | ||||||||
4245 | /// The boolean UseOneConstNR output is used to select a Newton-Raphson | ||||||||
4246 | /// algorithm implementation that uses either one or two constants. | ||||||||
4247 | /// The boolean Reciprocal is used to select whether the estimate is for the | ||||||||
4248 | /// square root of the input operand or the reciprocal of its square root. | ||||||||
4249 | /// A target may choose to implement its own refinement within this function. | ||||||||
4250 | /// If that's true, then return '0' as the number of RefinementSteps to avoid | ||||||||
4251 | /// any further refinement of the estimate. | ||||||||
4252 | /// An empty SDValue return means no estimate sequence can be created. | ||||||||
4253 | virtual SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, | ||||||||
4254 | int Enabled, int &RefinementSteps, | ||||||||
4255 | bool &UseOneConstNR, bool Reciprocal) const { | ||||||||
4256 | return SDValue(); | ||||||||
4257 | } | ||||||||
4258 | |||||||||
4259 | /// Return a reciprocal estimate value for the input operand. | ||||||||
4260 | /// \p Enabled is a ReciprocalEstimate enum with value either 'Unspecified' or | ||||||||
4261 | /// 'Enabled' as set by a potential default override attribute. | ||||||||
4262 | /// If \p RefinementSteps is 'Unspecified', the number of Newton-Raphson | ||||||||
4263 | /// refinement iterations required to generate a sufficient (though not | ||||||||
4264 | /// necessarily IEEE-754 compliant) estimate is returned in that parameter. | ||||||||
4265 | /// A target may choose to implement its own refinement within this function. | ||||||||
4266 | /// If that's true, then return '0' as the number of RefinementSteps to avoid | ||||||||
4267 | /// any further refinement of the estimate. | ||||||||
4268 | /// An empty SDValue return means no estimate sequence can be created. | ||||||||
4269 | virtual SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, | ||||||||
4270 | int Enabled, int &RefinementSteps) const { | ||||||||
4271 | return SDValue(); | ||||||||
4272 | } | ||||||||
4273 | |||||||||
4274 | //===--------------------------------------------------------------------===// | ||||||||
4275 | // Legalization utility functions | ||||||||
4276 | // | ||||||||
4277 | |||||||||
4278 | /// Expand a MUL or [US]MUL_LOHI of n-bit values into two or four nodes, | ||||||||
4279 | /// respectively, each computing an n/2-bit part of the result. | ||||||||
4280 | /// \param Result A vector that will be filled with the parts of the result | ||||||||
4281 | /// in little-endian order. | ||||||||
4282 | /// \param LL Low bits of the LHS of the MUL. You can use this parameter | ||||||||
4283 | /// if you want to control how low bits are extracted from the LHS. | ||||||||
4284 | /// \param LH High bits of the LHS of the MUL. See LL for meaning. | ||||||||
4285 | /// \param RL Low bits of the RHS of the MUL. See LL for meaning | ||||||||
4286 | /// \param RH High bits of the RHS of the MUL. See LL for meaning. | ||||||||
4287 | /// \returns true if the node has been expanded, false if it has not | ||||||||
4288 | bool expandMUL_LOHI(unsigned Opcode, EVT VT, const SDLoc &dl, SDValue LHS, | ||||||||
4289 | SDValue RHS, SmallVectorImpl<SDValue> &Result, EVT HiLoVT, | ||||||||
4290 | SelectionDAG &DAG, MulExpansionKind Kind, | ||||||||
4291 | SDValue LL = SDValue(), SDValue LH = SDValue(), | ||||||||
4292 | SDValue RL = SDValue(), SDValue RH = SDValue()) const; | ||||||||
4293 | |||||||||
4294 | /// Expand a MUL into two nodes. One that computes the high bits of | ||||||||
4295 | /// the result and one that computes the low bits. | ||||||||
4296 | /// \param HiLoVT The value type to use for the Lo and Hi nodes. | ||||||||
4297 | /// \param LL Low bits of the LHS of the MUL. You can use this parameter | ||||||||
4298 | /// if you want to control how low bits are extracted from the LHS. | ||||||||
4299 | /// \param LH High bits of the LHS of the MUL. See LL for meaning. | ||||||||
4300 | /// \param RL Low bits of the RHS of the MUL. See LL for meaning | ||||||||
4301 | /// \param RH High bits of the RHS of the MUL. See LL for meaning. | ||||||||
4302 | /// \returns true if the node has been expanded. false if it has not | ||||||||
4303 | bool expandMUL(SDNode *N, SDValue &Lo, SDValue &Hi, EVT HiLoVT, | ||||||||
4304 | SelectionDAG &DAG, MulExpansionKind Kind, | ||||||||
4305 | SDValue LL = SDValue(), SDValue LH = SDValue(), | ||||||||
4306 | SDValue RL = SDValue(), SDValue RH = SDValue()) const; | ||||||||
4307 | |||||||||
4308 | /// Expand funnel shift. | ||||||||
4309 | /// \param N Node to expand | ||||||||
4310 | /// \param Result output after conversion | ||||||||
4311 | /// \returns True, if the expansion was successful, false otherwise | ||||||||
4312 | bool expandFunnelShift(SDNode *N, SDValue &Result, SelectionDAG &DAG) const; | ||||||||
4313 | |||||||||
4314 | /// Expand rotations. | ||||||||
4315 | /// \param N Node to expand | ||||||||
4316 | /// \param Result output after conversion | ||||||||
4317 | /// \returns True, if the expansion was successful, false otherwise | ||||||||
4318 | bool expandROT(SDNode *N, SDValue &Result, SelectionDAG &DAG) const; | ||||||||
4319 | |||||||||
4320 | /// Expand float(f32) to SINT(i64) conversion | ||||||||
4321 | /// \param N Node to expand | ||||||||
4322 | /// \param Result output after conversion | ||||||||
4323 | /// \returns True, if the expansion was successful, false otherwise | ||||||||
4324 | bool expandFP_TO_SINT(SDNode *N, SDValue &Result, SelectionDAG &DAG) const; | ||||||||
4325 | |||||||||
4326 | /// Expand float to UINT conversion | ||||||||
4327 | /// \param N Node to expand | ||||||||
4328 | /// \param Result output after conversion | ||||||||
4329 | /// \param Chain output chain after conversion | ||||||||
4330 | /// \returns True, if the expansion was successful, false otherwise | ||||||||
4331 | bool expandFP_TO_UINT(SDNode *N, SDValue &Result, SDValue &Chain, | ||||||||
4332 | SelectionDAG &DAG) const; | ||||||||
4333 | |||||||||
4334 | /// Expand UINT(i64) to double(f64) conversion | ||||||||
4335 | /// \param N Node to expand | ||||||||
4336 | /// \param Result output after conversion | ||||||||
4337 | /// \param Chain output chain after conversion | ||||||||
4338 | /// \returns True, if the expansion was successful, false otherwise | ||||||||
4339 | bool expandUINT_TO_FP(SDNode *N, SDValue &Result, SDValue &Chain, | ||||||||
4340 | SelectionDAG &DAG) const; | ||||||||
4341 | |||||||||
4342 | /// Expand fminnum/fmaxnum into fminnum_ieee/fmaxnum_ieee with quieted inputs. | ||||||||
4343 | SDValue expandFMINNUM_FMAXNUM(SDNode *N, SelectionDAG &DAG) const; | ||||||||
4344 | |||||||||
4345 | /// Expand CTPOP nodes. Expands vector/scalar CTPOP nodes, | ||||||||
4346 | /// vector nodes can only succeed if all operations are legal/custom. | ||||||||
4347 | /// \param N Node to expand | ||||||||
4348 | /// \param Result output after conversion | ||||||||
4349 | /// \returns True, if the expansion was successful, false otherwise | ||||||||
4350 | bool expandCTPOP(SDNode *N, SDValue &Result, SelectionDAG &DAG) const; | ||||||||
4351 | |||||||||
4352 | /// Expand CTLZ/CTLZ_ZERO_UNDEF nodes. Expands vector/scalar CTLZ nodes, | ||||||||
4353 | /// vector nodes can only succeed if all operations are legal/custom. | ||||||||
4354 | /// \param N Node to expand | ||||||||
4355 | /// \param Result output after conversion | ||||||||
4356 | /// \returns True, if the expansion was successful, false otherwise | ||||||||
4357 | bool expandCTLZ(SDNode *N, SDValue &Result, SelectionDAG &DAG) const; | ||||||||
4358 | |||||||||
4359 | /// Expand CTTZ/CTTZ_ZERO_UNDEF nodes. Expands vector/scalar CTTZ nodes, | ||||||||
4360 | /// vector nodes can only succeed if all operations are legal/custom. | ||||||||
4361 | /// \param N Node to expand | ||||||||
4362 | /// \param Result output after conversion | ||||||||
4363 | /// \returns True, if the expansion was successful, false otherwise | ||||||||
4364 | bool expandCTTZ(SDNode *N, SDValue &Result, SelectionDAG &DAG) const; | ||||||||
4365 | |||||||||
4366 | /// Expand ABS nodes. Expands vector/scalar ABS nodes, | ||||||||
4367 | /// vector nodes can only succeed if all operations are legal/custom. | ||||||||
4368 | /// (ABS x) -> (XOR (ADD x, (SRA x, type_size)), (SRA x, type_size)) | ||||||||
4369 | /// \param N Node to expand | ||||||||
4370 | /// \param Result output after conversion | ||||||||
4371 | /// \returns True, if the expansion was successful, false otherwise | ||||||||
4372 | bool expandABS(SDNode *N, SDValue &Result, SelectionDAG &DAG) const; | ||||||||
4373 | |||||||||
4374 | /// Turn load of vector type into a load of the individual elements. | ||||||||
4375 | /// \param LD load to expand | ||||||||
4376 | /// \returns BUILD_VECTOR and TokenFactor nodes. | ||||||||
4377 | std::pair<SDValue, SDValue> scalarizeVectorLoad(LoadSDNode *LD, | ||||||||
4378 | SelectionDAG &DAG) const; | ||||||||
4379 | |||||||||
4380 | // Turn a store of a vector type into stores of the individual elements. | ||||||||
4381 | /// \param ST Store with a vector value type | ||||||||
4382 | /// \returns TokenFactor of the individual store chains. | ||||||||
4383 | SDValue scalarizeVectorStore(StoreSDNode *ST, SelectionDAG &DAG) const; | ||||||||
4384 | |||||||||
4385 | /// Expands an unaligned load to 2 half-size loads for an integer, and | ||||||||
4386 | /// possibly more for vectors. | ||||||||
4387 | std::pair<SDValue, SDValue> expandUnalignedLoad(LoadSDNode *LD, | ||||||||
4388 | SelectionDAG &DAG) const; | ||||||||
4389 | |||||||||
4390 | /// Expands an unaligned store to 2 half-size stores for integer values, and | ||||||||
4391 | /// possibly more for vectors. | ||||||||
4392 | SDValue expandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG) const; | ||||||||
4393 | |||||||||
4394 | /// Increments memory address \p Addr according to the type of the value | ||||||||
4395 | /// \p DataVT that should be stored. If the data is stored in compressed | ||||||||
4396 | /// form, the memory address should be incremented according to the number of | ||||||||
4397 | /// the stored elements. This number is equal to the number of '1's bits | ||||||||
4398 | /// in the \p Mask. | ||||||||
4399 | /// \p DataVT is a vector type. \p Mask is a vector value. | ||||||||
4400 | /// \p DataVT and \p Mask have the same number of vector elements. | ||||||||
4401 | SDValue IncrementMemoryAddress(SDValue Addr, SDValue Mask, const SDLoc &DL, | ||||||||
4402 | EVT DataVT, SelectionDAG &DAG, | ||||||||
4403 | bool IsCompressedMemory) const; | ||||||||
4404 | |||||||||
4405 | /// Get a pointer to vector element \p Idx located in memory for a vector of | ||||||||
4406 | /// type \p VecVT starting at a base address of \p VecPtr. If \p Idx is out of | ||||||||
4407 | /// bounds the returned pointer is unspecified, but will be within the vector | ||||||||
4408 | /// bounds. | ||||||||
4409 | SDValue getVectorElementPointer(SelectionDAG &DAG, SDValue VecPtr, EVT VecVT, | ||||||||
4410 | SDValue Index) const; | ||||||||
4411 | |||||||||
4412 | /// Method for building the DAG expansion of ISD::[US][ADD|SUB]SAT. This | ||||||||
4413 | /// method accepts integers as its arguments. | ||||||||
4414 | SDValue expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const; | ||||||||
4415 | |||||||||
4416 | /// Method for building the DAG expansion of ISD::[US]SHLSAT. This | ||||||||
4417 | /// method accepts integers as its arguments. | ||||||||
4418 | SDValue expandShlSat(SDNode *Node, SelectionDAG &DAG) const; | ||||||||
4419 | |||||||||
4420 | /// Method for building the DAG expansion of ISD::[U|S]MULFIX[SAT]. This | ||||||||
4421 | /// method accepts integers as its arguments. | ||||||||
4422 | SDValue expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const; | ||||||||
4423 | |||||||||
4424 | /// Method for building the DAG expansion of ISD::[US]DIVFIX[SAT]. This | ||||||||
4425 | /// method accepts integers as its arguments. | ||||||||
4426 | /// Note: This method may fail if the division could not be performed | ||||||||
4427 | /// within the type. Clients must retry with a wider type if this happens. | ||||||||
4428 | SDValue expandFixedPointDiv(unsigned Opcode, const SDLoc &dl, | ||||||||
4429 | SDValue LHS, SDValue RHS, | ||||||||
4430 | unsigned Scale, SelectionDAG &DAG) const; | ||||||||
4431 | |||||||||
4432 | /// Method for building the DAG expansion of ISD::U(ADD|SUB)O. Expansion | ||||||||
4433 | /// always suceeds and populates the Result and Overflow arguments. | ||||||||
4434 | void expandUADDSUBO(SDNode *Node, SDValue &Result, SDValue &Overflow, | ||||||||
4435 | SelectionDAG &DAG) const; | ||||||||
4436 | |||||||||
4437 | /// Method for building the DAG expansion of ISD::S(ADD|SUB)O. Expansion | ||||||||
4438 | /// always suceeds and populates the Result and Overflow arguments. | ||||||||
4439 | void expandSADDSUBO(SDNode *Node, SDValue &Result, SDValue &Overflow, | ||||||||
4440 | SelectionDAG &DAG) const; | ||||||||
4441 | |||||||||
4442 | /// Method for building the DAG expansion of ISD::[US]MULO. Returns whether | ||||||||
4443 | /// expansion was successful and populates the Result and Overflow arguments. | ||||||||
4444 | bool expandMULO(SDNode *Node, SDValue &Result, SDValue &Overflow, | ||||||||
4445 | SelectionDAG &DAG) const; | ||||||||
4446 | |||||||||
4447 | /// Expand a VECREDUCE_* into an explicit calculation. If Count is specified, | ||||||||
4448 | /// only the first Count elements of the vector are used. | ||||||||
4449 | SDValue expandVecReduce(SDNode *Node, SelectionDAG &DAG) const; | ||||||||
4450 | |||||||||
4451 | /// Expand an SREM or UREM using SDIV/UDIV or SDIVREM/UDIVREM, if legal. | ||||||||
4452 | /// Returns true if the expansion was successful. | ||||||||
4453 | bool expandREM(SDNode *Node, SDValue &Result, SelectionDAG &DAG) const; | ||||||||
4454 | |||||||||
4455 | //===--------------------------------------------------------------------===// | ||||||||
4456 | // Instruction Emitting Hooks | ||||||||
4457 | // | ||||||||
4458 | |||||||||
4459 | /// This method should be implemented by targets that mark instructions with | ||||||||
4460 | /// the 'usesCustomInserter' flag. These instructions are special in various | ||||||||
4461 | /// ways, which require special support to insert. The specified MachineInstr | ||||||||
4462 | /// is created but not inserted into any basic blocks, and this method is | ||||||||
4463 | /// called to expand it into a sequence of instructions, potentially also | ||||||||
4464 | /// creating new basic blocks and control flow. | ||||||||
4465 | /// As long as the returned basic block is different (i.e., we created a new | ||||||||
4466 | /// one), the custom inserter is free to modify the rest of \p MBB. | ||||||||
4467 | virtual MachineBasicBlock * | ||||||||
4468 | EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const; | ||||||||
4469 | |||||||||
4470 | /// This method should be implemented by targets that mark instructions with | ||||||||
4471 | /// the 'hasPostISelHook' flag. These instructions must be adjusted after | ||||||||
4472 | /// instruction selection by target hooks. e.g. To fill in optional defs for | ||||||||
4473 | /// ARM 's' setting instructions. | ||||||||
4474 | virtual void AdjustInstrPostInstrSelection(MachineInstr &MI, | ||||||||
4475 | SDNode *Node) const; | ||||||||
4476 | |||||||||
4477 | /// If this function returns true, SelectionDAGBuilder emits a | ||||||||
4478 | /// LOAD_STACK_GUARD node when it is lowering Intrinsic::stackprotector. | ||||||||
4479 | virtual bool useLoadStackGuardNode() const { | ||||||||
4480 | return false; | ||||||||
4481 | } | ||||||||
4482 | |||||||||
4483 | virtual SDValue emitStackGuardXorFP(SelectionDAG &DAG, SDValue Val, | ||||||||
4484 | const SDLoc &DL) const { | ||||||||
4485 | llvm_unreachable("not implemented for this target")::llvm::llvm_unreachable_internal("not implemented for this target" , "/build/llvm-toolchain-snapshot-12~++20200927111121+5811d723998/llvm/include/llvm/CodeGen/TargetLowering.h" , 4485); | ||||||||
4486 | } | ||||||||
4487 | |||||||||
4488 | /// Lower TLS global address SDNode for target independent emulated TLS model. | ||||||||
4489 | virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, | ||||||||
4490 | SelectionDAG &DAG) const; | ||||||||
4491 | |||||||||
4492 | /// Expands target specific indirect branch for the case of JumpTable | ||||||||
4493 | /// expanasion. | ||||||||
4494 | virtual SDValue expandIndirectJTBranch(const SDLoc& dl, SDValue Value, SDValue Addr, | ||||||||
4495 | SelectionDAG &DAG) const { | ||||||||
4496 | return DAG.getNode(ISD::BRIND, dl, MVT::Other, Value, Addr); | ||||||||
4497 | } | ||||||||
4498 | |||||||||
4499 | // seteq(x, 0) -> truncate(srl(ctlz(zext(x)), log2(#bits))) | ||||||||
4500 | // If we're comparing for equality to zero and isCtlzFast is true, expose the | ||||||||
4501 | // fact that this can be implemented as a ctlz/srl pair, so that the dag | ||||||||
4502 | // combiner can fold the new nodes. | ||||||||
4503 | SDValue lowerCmpEqZeroToCtlzSrl(SDValue Op, SelectionDAG &DAG) const; | ||||||||
4504 | |||||||||
4505 | private: | ||||||||
4506 | SDValue foldSetCCWithAnd(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, | ||||||||
4507 | const SDLoc &DL, DAGCombinerInfo &DCI) const; | ||||||||
4508 | SDValue foldSetCCWithBinOp(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, | ||||||||
4509 | const SDLoc &DL, DAGCombinerInfo &DCI) const; | ||||||||
4510 | |||||||||
4511 | SDValue optimizeSetCCOfSignedTruncationCheck(EVT SCCVT, SDValue N0, | ||||||||
4512 | SDValue N1, ISD::CondCode Cond, | ||||||||
4513 | DAGCombinerInfo &DCI, | ||||||||
4514 | const SDLoc &DL) const; | ||||||||
4515 | |||||||||
4516 | // (X & (C l>>/<< Y)) ==/!= 0 --> ((X <</l>> Y) & C) ==/!= 0 | ||||||||
4517 | SDValue optimizeSetCCByHoistingAndByConstFromLogicalShift( | ||||||||
4518 | EVT SCCVT, SDValue N0, SDValue N1C, ISD::CondCode Cond, | ||||||||
4519 | DAGCombinerInfo &DCI, const SDLoc &DL) const; | ||||||||
4520 | |||||||||
4521 | SDValue prepareUREMEqFold(EVT SETCCVT, SDValue REMNode, | ||||||||
4522 | SDValue CompTargetNode, ISD::CondCode Cond, | ||||||||
4523 | DAGCombinerInfo &DCI, const SDLoc &DL, | ||||||||
4524 | SmallVectorImpl<SDNode *> &Created) const; | ||||||||
4525 | SDValue buildUREMEqFold(EVT SETCCVT, SDValue REMNode, SDValue CompTargetNode, | ||||||||
4526 | ISD::CondCode Cond, DAGCombinerInfo &DCI, | ||||||||
4527 | const SDLoc &DL) const; | ||||||||
4528 | |||||||||
4529 | SDValue prepareSREMEqFold(EVT SETCCVT, SDValue REMNode, | ||||||||
4530 | SDValue CompTargetNode, ISD::CondCode Cond, | ||||||||
4531 | DAGCombinerInfo &DCI, const SDLoc &DL, | ||||||||
4532 | SmallVectorImpl<SDNode *> &Created) const; | ||||||||
4533 | SDValue buildSREMEqFold(EVT SETCCVT, SDValue REMNode, SDValue CompTargetNode, | ||||||||
4534 | ISD::CondCode Cond, DAGCombinerInfo &DCI, | ||||||||
4535 | const SDLoc &DL) const; | ||||||||
4536 | }; | ||||||||
4537 | |||||||||
4538 | /// Given an LLVM IR type and return type attributes, compute the return value | ||||||||
4539 | /// EVTs and flags, and optionally also the offsets, if the return value is | ||||||||
4540 | /// being lowered to memory. | ||||||||
4541 | void GetReturnInfo(CallingConv::ID CC, Type *ReturnType, AttributeList attr, | ||||||||
4542 | SmallVectorImpl<ISD::OutputArg> &Outs, | ||||||||
4543 | const TargetLowering &TLI, const DataLayout &DL); | ||||||||
4544 | |||||||||
4545 | } // end namespace llvm | ||||||||
4546 | |||||||||
4547 | #endif // LLVM_CODEGEN_TARGETLOWERING_H |