clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name ARMTargetTransformInfo.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mframe-pointer=none -fmath-errno -fno-rounding-math -mconstructor-aliases -munwind-tables -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -ffunction-sections -fdata-sections -fcoverage-compilation-dir=/build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/build-llvm/lib/Target/ARM -resource-dir /usr/lib/llvm-14/lib/clang/14.0.0 -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I /build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/build-llvm/lib/Target/ARM -I /build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/llvm/lib/Target/ARM -I /build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/build-llvm/include -I /build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/llvm/include -D NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/x86_64-linux-gnu/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10/backward -internal-isystem /usr/lib/llvm-14/lib/clang/14.0.0/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-class-memaccess -Wno-redundant-move -Wno-pessimizing-move -Wno-noexcept-type -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir=/build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/build-llvm/lib/Target/ARM -fdebug-prefix-map=/build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e=. -ferror-limit 19 -fvisibility hidden -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2021-09-04-040900-46481-1 -x c++ /build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
1 | |
2 | |
3 | |
4 | |
5 | |
6 | |
7 | |
8 | |
9 | #include "ARMTargetTransformInfo.h" |
10 | #include "ARMSubtarget.h" |
11 | #include "MCTargetDesc/ARMAddressingModes.h" |
12 | #include "llvm/ADT/APInt.h" |
13 | #include "llvm/ADT/SmallVector.h" |
14 | #include "llvm/Analysis/LoopInfo.h" |
15 | #include "llvm/CodeGen/CostTable.h" |
16 | #include "llvm/CodeGen/ISDOpcodes.h" |
17 | #include "llvm/CodeGen/ValueTypes.h" |
18 | #include "llvm/IR/BasicBlock.h" |
19 | #include "llvm/IR/DataLayout.h" |
20 | #include "llvm/IR/DerivedTypes.h" |
21 | #include "llvm/IR/Instruction.h" |
22 | #include "llvm/IR/Instructions.h" |
23 | #include "llvm/IR/Intrinsics.h" |
24 | #include "llvm/IR/IntrinsicInst.h" |
25 | #include "llvm/IR/IntrinsicsARM.h" |
26 | #include "llvm/IR/PatternMatch.h" |
27 | #include "llvm/IR/Type.h" |
28 | #include "llvm/MC/SubtargetFeature.h" |
29 | #include "llvm/Support/Casting.h" |
30 | #include "llvm/Support/KnownBits.h" |
31 | #include "llvm/Support/MachineValueType.h" |
32 | #include "llvm/Target/TargetMachine.h" |
33 | #include "llvm/Transforms/InstCombine/InstCombiner.h" |
34 | #include "llvm/Transforms/Utils/Local.h" |
35 | #include "llvm/Transforms/Utils/LoopUtils.h" |
36 | #include <algorithm> |
37 | #include <cassert> |
38 | #include <cstdint> |
39 | #include <utility> |
40 | |
41 | using namespace llvm; |
42 | |
43 | #define DEBUG_TYPE "armtti" |
44 | |
45 | static cl::opt<bool> EnableMaskedLoadStores( |
46 | "enable-arm-maskedldst", cl::Hidden, cl::init(true), |
47 | cl::desc("Enable the generation of masked loads and stores")); |
48 | |
49 | static cl::opt<bool> DisableLowOverheadLoops( |
50 | "disable-arm-loloops", cl::Hidden, cl::init(false), |
51 | cl::desc("Disable the generation of low-overhead loops")); |
52 | |
53 | static cl::opt<bool> |
54 | AllowWLSLoops("allow-arm-wlsloops", cl::Hidden, cl::init(true), |
55 | cl::desc("Enable the generation of WLS loops")); |
56 | |
57 | extern cl::opt<TailPredication::Mode> EnableTailPredication; |
58 | |
59 | extern cl::opt<bool> EnableMaskedGatherScatters; |
60 | |
61 | extern cl::opt<unsigned> MVEMaxSupportedInterleaveFactor; |
62 | |
63 | |
64 | |
65 | |
66 | static Value *simplifyNeonVld1(const IntrinsicInst &II, unsigned MemAlign, |
67 | InstCombiner::BuilderTy &Builder) { |
68 | auto *IntrAlign = dyn_cast<ConstantInt>(II.getArgOperand(1)); |
69 | |
70 | if (!IntrAlign) |
71 | return nullptr; |
72 | |
73 | unsigned Alignment = IntrAlign->getLimitedValue() < MemAlign |
74 | ? MemAlign |
75 | : IntrAlign->getLimitedValue(); |
76 | |
77 | if (!isPowerOf2_32(Alignment)) |
78 | return nullptr; |
79 | |
80 | auto *BCastInst = Builder.CreateBitCast(II.getArgOperand(0), |
81 | PointerType::get(II.getType(), 0)); |
82 | return Builder.CreateAlignedLoad(II.getType(), BCastInst, Align(Alignment)); |
83 | } |
84 | |
85 | bool ARMTTIImpl::areInlineCompatible(const Function *Caller, |
86 | const Function *Callee) const { |
87 | const TargetMachine &TM = getTLI()->getTargetMachine(); |
88 | const FeatureBitset &CallerBits = |
89 | TM.getSubtargetImpl(*Caller)->getFeatureBits(); |
90 | const FeatureBitset &CalleeBits = |
91 | TM.getSubtargetImpl(*Callee)->getFeatureBits(); |
92 | |
93 | |
94 | bool MatchExact = (CallerBits & ~InlineFeaturesAllowed) == |
95 | (CalleeBits & ~InlineFeaturesAllowed); |
96 | |
97 | |
98 | bool MatchSubset = ((CallerBits & CalleeBits) & InlineFeaturesAllowed) == |
99 | (CalleeBits & InlineFeaturesAllowed); |
100 | return MatchExact && MatchSubset; |
101 | } |
102 | |
103 | TTI::AddressingModeKind |
104 | ARMTTIImpl::getPreferredAddressingMode(const Loop *L, |
105 | ScalarEvolution *SE) const { |
106 | if (ST->hasMVEIntegerOps()) |
107 | return TTI::AMK_PostIndexed; |
108 | |
109 | if (L->getHeader()->getParent()->hasOptSize()) |
110 | return TTI::AMK_None; |
111 | |
112 | if (ST->isMClass() && ST->isThumb2() && |
113 | L->getNumBlocks() == 1) |
114 | return TTI::AMK_PreIndexed; |
115 | |
116 | return TTI::AMK_None; |
117 | } |
118 | |
119 | Optional<Instruction *> |
120 | ARMTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const { |
121 | using namespace PatternMatch; |
122 | Intrinsic::ID IID = II.getIntrinsicID(); |
123 | switch (IID) { |
124 | default: |
125 | break; |
126 | case Intrinsic::arm_neon_vld1: { |
127 | Align MemAlign = |
128 | getKnownAlignment(II.getArgOperand(0), IC.getDataLayout(), &II, |
129 | &IC.getAssumptionCache(), &IC.getDominatorTree()); |
130 | if (Value *V = simplifyNeonVld1(II, MemAlign.value(), IC.Builder)) { |
131 | return IC.replaceInstUsesWith(II, V); |
132 | } |
133 | break; |
134 | } |
135 | |
136 | case Intrinsic::arm_neon_vld2: |
137 | case Intrinsic::arm_neon_vld3: |
138 | case Intrinsic::arm_neon_vld4: |
139 | case Intrinsic::arm_neon_vld2lane: |
140 | case Intrinsic::arm_neon_vld3lane: |
141 | case Intrinsic::arm_neon_vld4lane: |
142 | case Intrinsic::arm_neon_vst1: |
143 | case Intrinsic::arm_neon_vst2: |
144 | case Intrinsic::arm_neon_vst3: |
145 | case Intrinsic::arm_neon_vst4: |
146 | case Intrinsic::arm_neon_vst2lane: |
147 | case Intrinsic::arm_neon_vst3lane: |
148 | case Intrinsic::arm_neon_vst4lane: { |
149 | Align MemAlign = |
150 | getKnownAlignment(II.getArgOperand(0), IC.getDataLayout(), &II, |
151 | &IC.getAssumptionCache(), &IC.getDominatorTree()); |
152 | unsigned AlignArg = II.getNumArgOperands() - 1; |
153 | Value *AlignArgOp = II.getArgOperand(AlignArg); |
154 | MaybeAlign Align = cast<ConstantInt>(AlignArgOp)->getMaybeAlignValue(); |
155 | if (Align && *Align < MemAlign) { |
156 | return IC.replaceOperand( |
157 | II, AlignArg, |
158 | ConstantInt::get(Type::getInt32Ty(II.getContext()), MemAlign.value(), |
159 | false)); |
160 | } |
161 | break; |
162 | } |
163 | |
164 | case Intrinsic::arm_mve_pred_i2v: { |
165 | Value *Arg = II.getArgOperand(0); |
166 | Value *ArgArg; |
167 | if (match(Arg, PatternMatch::m_Intrinsic<Intrinsic::arm_mve_pred_v2i>( |
168 | PatternMatch::m_Value(ArgArg))) && |
169 | II.getType() == ArgArg->getType()) { |
170 | return IC.replaceInstUsesWith(II, ArgArg); |
171 | } |
172 | Constant *XorMask; |
173 | if (match(Arg, m_Xor(PatternMatch::m_Intrinsic<Intrinsic::arm_mve_pred_v2i>( |
174 | PatternMatch::m_Value(ArgArg)), |
175 | PatternMatch::m_Constant(XorMask))) && |
176 | II.getType() == ArgArg->getType()) { |
177 | if (auto *CI = dyn_cast<ConstantInt>(XorMask)) { |
178 | if (CI->getValue().trunc(16).isAllOnesValue()) { |
179 | auto TrueVector = IC.Builder.CreateVectorSplat( |
180 | cast<FixedVectorType>(II.getType())->getNumElements(), |
181 | IC.Builder.getTrue()); |
182 | return BinaryOperator::Create(Instruction::Xor, ArgArg, TrueVector); |
183 | } |
184 | } |
185 | } |
186 | KnownBits ScalarKnown(32); |
187 | if (IC.SimplifyDemandedBits(&II, 0, APInt::getLowBitsSet(32, 16), |
188 | ScalarKnown, 0)) { |
189 | return &II; |
190 | } |
191 | break; |
192 | } |
193 | case Intrinsic::arm_mve_pred_v2i: { |
194 | Value *Arg = II.getArgOperand(0); |
195 | Value *ArgArg; |
196 | if (match(Arg, PatternMatch::m_Intrinsic<Intrinsic::arm_mve_pred_i2v>( |
197 | PatternMatch::m_Value(ArgArg)))) { |
198 | return IC.replaceInstUsesWith(II, ArgArg); |
199 | } |
200 | if (!II.getMetadata(LLVMContext::MD_range)) { |
201 | Type *IntTy32 = Type::getInt32Ty(II.getContext()); |
202 | Metadata *M[] = { |
203 | ConstantAsMetadata::get(ConstantInt::get(IntTy32, 0)), |
204 | ConstantAsMetadata::get(ConstantInt::get(IntTy32, 0x10000))}; |
205 | II.setMetadata(LLVMContext::MD_range, MDNode::get(II.getContext(), M)); |
206 | return &II; |
207 | } |
208 | break; |
209 | } |
210 | case Intrinsic::arm_mve_vadc: |
211 | case Intrinsic::arm_mve_vadc_predicated: { |
212 | unsigned CarryOp = |
213 | (II.getIntrinsicID() == Intrinsic::arm_mve_vadc_predicated) ? 3 : 2; |
214 | assert(II.getArgOperand(CarryOp)->getType()->getScalarSizeInBits() == 32 && |
215 | "Bad type for intrinsic!"); |
216 | |
217 | KnownBits CarryKnown(32); |
218 | if (IC.SimplifyDemandedBits(&II, CarryOp, APInt::getOneBitSet(32, 29), |
219 | CarryKnown)) { |
220 | return &II; |
221 | } |
222 | break; |
223 | } |
224 | case Intrinsic::arm_mve_vmldava: { |
225 | Instruction *I = cast<Instruction>(&II); |
226 | if (I->hasOneUse()) { |
227 | auto *User = cast<Instruction>(*I->user_begin()); |
228 | Value *OpZ; |
229 | if (match(User, m_c_Add(m_Specific(I), m_Value(OpZ))) && |
230 | match(I->getOperand(3), m_Zero())) { |
231 | Value *OpX = I->getOperand(4); |
232 | Value *OpY = I->getOperand(5); |
233 | Type *OpTy = OpX->getType(); |
234 | |
235 | IC.Builder.SetInsertPoint(User); |
236 | Value *V = |
237 | IC.Builder.CreateIntrinsic(Intrinsic::arm_mve_vmldava, {OpTy}, |
238 | {I->getOperand(0), I->getOperand(1), |
239 | I->getOperand(2), OpZ, OpX, OpY}); |
240 | |
241 | IC.replaceInstUsesWith(*User, V); |
242 | return IC.eraseInstFromFunction(*User); |
243 | } |
244 | } |
245 | return None; |
246 | } |
247 | } |
248 | return None; |
249 | } |
250 | |
251 | InstructionCost ARMTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty, |
252 | TTI::TargetCostKind CostKind) { |
253 | assert(Ty->isIntegerTy()); |
254 | |
255 | unsigned Bits = Ty->getPrimitiveSizeInBits(); |
256 | if (Bits == 0 || Imm.getActiveBits() >= 64) |
257 | return 4; |
258 | |
259 | int64_t SImmVal = Imm.getSExtValue(); |
260 | uint64_t ZImmVal = Imm.getZExtValue(); |
261 | if (!ST->isThumb()) { |
262 | if ((SImmVal >= 0 && SImmVal < 65536) || |
263 | (ARM_AM::getSOImmVal(ZImmVal) != -1) || |
264 | (ARM_AM::getSOImmVal(~ZImmVal) != -1)) |
265 | return 1; |
266 | return ST->hasV6T2Ops() ? 2 : 3; |
267 | } |
268 | if (ST->isThumb2()) { |
269 | if ((SImmVal >= 0 && SImmVal < 65536) || |
270 | (ARM_AM::getT2SOImmVal(ZImmVal) != -1) || |
271 | (ARM_AM::getT2SOImmVal(~ZImmVal) != -1)) |
272 | return 1; |
273 | return ST->hasV6T2Ops() ? 2 : 3; |
274 | } |
275 | |
276 | if (Bits == 8 || (SImmVal >= 0 && SImmVal < 256)) |
277 | return 1; |
278 | if ((~SImmVal < 256) || ARM_AM::isThumbImmShiftedVal(ZImmVal)) |
279 | return 2; |
280 | |
281 | return 3; |
282 | } |
283 | |
284 | |
285 | |
286 | InstructionCost ARMTTIImpl::getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx, |
287 | const APInt &Imm, Type *Ty) { |
288 | if (Imm.isNonNegative() && Imm.getLimitedValue() < 256) |
289 | return 0; |
290 | |
291 | return 1; |
292 | } |
293 | |
294 | |
295 | |
296 | static bool isSSATMinMaxPattern(Instruction *Inst, const APInt &Imm) { |
297 | Value *LHS, *RHS; |
298 | ConstantInt *C; |
299 | SelectPatternFlavor InstSPF = matchSelectPattern(Inst, LHS, RHS).Flavor; |
300 | |
301 | if (InstSPF == SPF_SMAX && |
302 | PatternMatch::match(RHS, PatternMatch::m_ConstantInt(C)) && |
303 | C->getValue() == Imm && Imm.isNegative() && (-Imm).isPowerOf2()) { |
304 | |
305 | auto isSSatMin = [&](Value *MinInst) { |
306 | if (isa<SelectInst>(MinInst)) { |
307 | Value *MinLHS, *MinRHS; |
308 | ConstantInt *MinC; |
309 | SelectPatternFlavor MinSPF = |
310 | matchSelectPattern(MinInst, MinLHS, MinRHS).Flavor; |
311 | if (MinSPF == SPF_SMIN && |
312 | PatternMatch::match(MinRHS, PatternMatch::m_ConstantInt(MinC)) && |
313 | MinC->getValue() == ((-Imm) - 1)) |
314 | return true; |
315 | } |
316 | return false; |
317 | }; |
318 | |
319 | if (isSSatMin(Inst->getOperand(1)) || |
320 | (Inst->hasNUses(2) && (isSSatMin(*Inst->user_begin()) || |
321 | isSSatMin(*(++Inst->user_begin()))))) |
322 | return true; |
323 | } |
324 | return false; |
325 | } |
326 | |
327 | InstructionCost ARMTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx, |
328 | const APInt &Imm, Type *Ty, |
329 | TTI::TargetCostKind CostKind, |
330 | Instruction *Inst) { |
331 | |
332 | |
333 | |
334 | |
335 | if ((Opcode == Instruction::SDiv || Opcode == Instruction::UDiv || |
336 | Opcode == Instruction::SRem || Opcode == Instruction::URem) && |
337 | Idx == 1) |
338 | return 0; |
339 | |
340 | |
341 | |
342 | if (Opcode == Instruction::GetElementPtr && Idx != 0) |
343 | return 0; |
344 | |
345 | if (Opcode == Instruction::And) { |
346 | |
347 | if (Imm == 255 || Imm == 65535) |
348 | return 0; |
349 | |
350 | return std::min(getIntImmCost(Imm, Ty, CostKind), |
351 | getIntImmCost(~Imm, Ty, CostKind)); |
352 | } |
353 | |
354 | if (Opcode == Instruction::Add) |
355 | |
356 | return std::min(getIntImmCost(Imm, Ty, CostKind), |
357 | getIntImmCost(-Imm, Ty, CostKind)); |
358 | |
359 | if (Opcode == Instruction::ICmp && Imm.isNegative() && |
360 | Ty->getIntegerBitWidth() == 32) { |
361 | int64_t NegImm = -Imm.getSExtValue(); |
362 | if (ST->isThumb2() && NegImm < 1<<12) |
363 | |
364 | return 0; |
365 | if (ST->isThumb() && NegImm < 1<<8) |
366 | |
367 | return 0; |
368 | } |
369 | |
370 | |
371 | if (Opcode == Instruction::Xor && Imm.isAllOnesValue()) |
372 | return 0; |
373 | |
374 | |
375 | |
376 | if (Inst && ((ST->hasV6Ops() && !ST->isThumb()) || ST->isThumb2()) && |
377 | Ty->getIntegerBitWidth() <= 32) { |
378 | if (isSSATMinMaxPattern(Inst, Imm) || |
379 | (isa<ICmpInst>(Inst) && Inst->hasOneUse() && |
380 | isSSATMinMaxPattern(cast<Instruction>(*Inst->user_begin()), Imm))) |
381 | return 0; |
382 | } |
383 | |
384 | return getIntImmCost(Imm, Ty, CostKind); |
385 | } |
386 | |
387 | InstructionCost ARMTTIImpl::getCFInstrCost(unsigned Opcode, |
388 | TTI::TargetCostKind CostKind, |
389 | const Instruction *I) { |
390 | if (CostKind == TTI::TCK_RecipThroughput && |
391 | (ST->hasNEON() || ST->hasMVEIntegerOps())) { |
392 | |
393 | |
394 | |
395 | |
396 | return 0; |
397 | } |
398 | return BaseT::getCFInstrCost(Opcode, CostKind, I); |
399 | } |
400 | |
401 | InstructionCost ARMTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, |
402 | Type *Src, |
403 | TTI::CastContextHint CCH, |
404 | TTI::TargetCostKind CostKind, |
405 | const Instruction *I) { |
406 | int ISD = TLI->InstructionOpcodeToISD(Opcode); |
407 | assert(ISD && "Invalid opcode"); |
408 | |
409 | |
410 | auto AdjustCost = [&CostKind](InstructionCost Cost) -> InstructionCost { |
411 | if (CostKind != TTI::TCK_RecipThroughput) |
412 | return Cost == 0 ? 0 : 1; |
413 | return Cost; |
414 | }; |
415 | auto IsLegalFPType = [this](EVT VT) { |
416 | EVT EltVT = VT.getScalarType(); |
417 | return (EltVT == MVT::f32 && ST->hasVFP2Base()) || |
418 | (EltVT == MVT::f64 && ST->hasFP64()) || |
419 | (EltVT == MVT::f16 && ST->hasFullFP16()); |
420 | }; |
421 | |
422 | EVT SrcTy = TLI->getValueType(DL, Src); |
423 | EVT DstTy = TLI->getValueType(DL, Dst); |
424 | |
425 | if (!SrcTy.isSimple() || !DstTy.isSimple()) |
426 | return AdjustCost( |
427 | BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I)); |
428 | |
429 | |
430 | |
431 | |
432 | if ((ST->hasMVEIntegerOps() && |
433 | (Opcode == Instruction::Trunc || Opcode == Instruction::ZExt || |
434 | Opcode == Instruction::SExt)) || |
435 | (ST->hasMVEFloatOps() && |
436 | (Opcode == Instruction::FPExt || Opcode == Instruction::FPTrunc) && |
437 | IsLegalFPType(SrcTy) && IsLegalFPType(DstTy))) |
438 | if (CCH == TTI::CastContextHint::Masked && DstTy.getSizeInBits() > 128) |
439 | return 2 * DstTy.getVectorNumElements() * |
440 | ST->getMVEVectorCostFactor(CostKind); |
441 | |
442 | |
443 | if (CCH == TTI::CastContextHint::Normal || |
444 | CCH == TTI::CastContextHint::Masked) { |
445 | static const TypeConversionCostTblEntry LoadConversionTbl[] = { |
446 | {ISD::SIGN_EXTEND, MVT::i32, MVT::i16, 0}, |
447 | {ISD::ZERO_EXTEND, MVT::i32, MVT::i16, 0}, |
448 | {ISD::SIGN_EXTEND, MVT::i32, MVT::i8, 0}, |
449 | {ISD::ZERO_EXTEND, MVT::i32, MVT::i8, 0}, |
450 | {ISD::SIGN_EXTEND, MVT::i16, MVT::i8, 0}, |
451 | {ISD::ZERO_EXTEND, MVT::i16, MVT::i8, 0}, |
452 | {ISD::SIGN_EXTEND, MVT::i64, MVT::i32, 1}, |
453 | {ISD::ZERO_EXTEND, MVT::i64, MVT::i32, 1}, |
454 | {ISD::SIGN_EXTEND, MVT::i64, MVT::i16, 1}, |
455 | {ISD::ZERO_EXTEND, MVT::i64, MVT::i16, 1}, |
456 | {ISD::SIGN_EXTEND, MVT::i64, MVT::i8, 1}, |
457 | {ISD::ZERO_EXTEND, MVT::i64, MVT::i8, 1}, |
458 | }; |
459 | if (const auto *Entry = ConvertCostTableLookup( |
460 | LoadConversionTbl, ISD, DstTy.getSimpleVT(), SrcTy.getSimpleVT())) |
461 | return AdjustCost(Entry->Cost); |
462 | |
463 | static const TypeConversionCostTblEntry MVELoadConversionTbl[] = { |
464 | {ISD::SIGN_EXTEND, MVT::v4i32, MVT::v4i16, 0}, |
465 | {ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i16, 0}, |
466 | {ISD::SIGN_EXTEND, MVT::v4i32, MVT::v4i8, 0}, |
467 | {ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i8, 0}, |
468 | {ISD::SIGN_EXTEND, MVT::v8i16, MVT::v8i8, 0}, |
469 | {ISD::ZERO_EXTEND, MVT::v8i16, MVT::v8i8, 0}, |
470 | |
471 | |
472 | |
473 | {ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i16, 1}, |
474 | {ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i16, 1}, |
475 | {ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i8, 3}, |
476 | {ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i8, 3}, |
477 | {ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i8, 1}, |
478 | {ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i8, 1}, |
479 | }; |
480 | if (SrcTy.isVector() && ST->hasMVEIntegerOps()) { |
481 | if (const auto *Entry = |
482 | ConvertCostTableLookup(MVELoadConversionTbl, ISD, |
483 | DstTy.getSimpleVT(), SrcTy.getSimpleVT())) |
484 | return Entry->Cost * ST->getMVEVectorCostFactor(CostKind); |
485 | } |
486 | |
487 | static const TypeConversionCostTblEntry MVEFLoadConversionTbl[] = { |
488 | |
489 | {ISD::FP_EXTEND, MVT::v4f32, MVT::v4f16, 1}, |
490 | {ISD::FP_EXTEND, MVT::v8f32, MVT::v8f16, 3}, |
491 | }; |
492 | if (SrcTy.isVector() && ST->hasMVEFloatOps()) { |
493 | if (const auto *Entry = |
494 | ConvertCostTableLookup(MVEFLoadConversionTbl, ISD, |
495 | DstTy.getSimpleVT(), SrcTy.getSimpleVT())) |
496 | return Entry->Cost * ST->getMVEVectorCostFactor(CostKind); |
497 | } |
498 | |
499 | |
500 | static const TypeConversionCostTblEntry MVEStoreConversionTbl[] = { |
501 | {ISD::TRUNCATE, MVT::v4i32, MVT::v4i16, 0}, |
502 | {ISD::TRUNCATE, MVT::v4i32, MVT::v4i8, 0}, |
503 | {ISD::TRUNCATE, MVT::v8i16, MVT::v8i8, 0}, |
504 | {ISD::TRUNCATE, MVT::v8i32, MVT::v8i16, 1}, |
505 | {ISD::TRUNCATE, MVT::v8i32, MVT::v8i8, 1}, |
506 | {ISD::TRUNCATE, MVT::v16i32, MVT::v16i8, 3}, |
507 | {ISD::TRUNCATE, MVT::v16i16, MVT::v16i8, 1}, |
508 | }; |
509 | if (SrcTy.isVector() && ST->hasMVEIntegerOps()) { |
510 | if (const auto *Entry = |
511 | ConvertCostTableLookup(MVEStoreConversionTbl, ISD, |
512 | SrcTy.getSimpleVT(), DstTy.getSimpleVT())) |
513 | return Entry->Cost * ST->getMVEVectorCostFactor(CostKind); |
514 | } |
515 | |
516 | static const TypeConversionCostTblEntry MVEFStoreConversionTbl[] = { |
517 | {ISD::FP_ROUND, MVT::v4f32, MVT::v4f16, 1}, |
518 | {ISD::FP_ROUND, MVT::v8f32, MVT::v8f16, 3}, |
519 | }; |
520 | if (SrcTy.isVector() && ST->hasMVEFloatOps()) { |
521 | if (const auto *Entry = |
522 | ConvertCostTableLookup(MVEFStoreConversionTbl, ISD, |
523 | SrcTy.getSimpleVT(), DstTy.getSimpleVT())) |
524 | return Entry->Cost * ST->getMVEVectorCostFactor(CostKind); |
525 | } |
526 | } |
527 | |
528 | |
529 | if ((ISD == ISD::SIGN_EXTEND || ISD == ISD::ZERO_EXTEND) && |
530 | I && I->hasOneUse() && ST->hasNEON() && SrcTy.isVector()) { |
531 | static const TypeConversionCostTblEntry NEONDoubleWidthTbl[] = { |
532 | |
533 | { ISD::ADD, MVT::v4i32, MVT::v4i16, 0 }, |
534 | { ISD::ADD, MVT::v8i16, MVT::v8i8, 0 }, |
535 | |
536 | { ISD::SUB, MVT::v4i32, MVT::v4i16, 0 }, |
537 | { ISD::SUB, MVT::v8i16, MVT::v8i8, 0 }, |
538 | |
539 | { ISD::MUL, MVT::v4i32, MVT::v4i16, 0 }, |
540 | { ISD::MUL, MVT::v8i16, MVT::v8i8, 0 }, |
541 | |
542 | { ISD::SHL, MVT::v4i32, MVT::v4i16, 0 }, |
543 | { ISD::SHL, MVT::v8i16, MVT::v8i8, 0 }, |
544 | }; |
545 | |
546 | auto *User = cast<Instruction>(*I->user_begin()); |
547 | int UserISD = TLI->InstructionOpcodeToISD(User->getOpcode()); |
548 | if (auto *Entry = ConvertCostTableLookup(NEONDoubleWidthTbl, UserISD, |
549 | DstTy.getSimpleVT(), |
550 | SrcTy.getSimpleVT())) { |
551 | return AdjustCost(Entry->Cost); |
552 | } |
553 | } |
554 | |
555 | |
556 | if (Src->isVectorTy() && ST->hasNEON() && |
557 | ((ISD == ISD::FP_ROUND && SrcTy.getScalarType() == MVT::f64 && |
558 | DstTy.getScalarType() == MVT::f32) || |
559 | (ISD == ISD::FP_EXTEND && SrcTy.getScalarType() == MVT::f32 && |
560 | DstTy.getScalarType() == MVT::f64))) { |
561 | static const CostTblEntry NEONFltDblTbl[] = { |
562 | |
563 | {ISD::FP_ROUND, MVT::v2f64, 2}, |
564 | {ISD::FP_EXTEND, MVT::v2f32, 2}, |
565 | {ISD::FP_EXTEND, MVT::v4f32, 4}}; |
566 | |
567 | std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Src); |
568 | if (const auto *Entry = CostTableLookup(NEONFltDblTbl, ISD, LT.second)) |
569 | return AdjustCost(LT.first * Entry->Cost); |
570 | } |
571 | |
572 | |
573 | |
574 | |
575 | static const TypeConversionCostTblEntry NEONVectorConversionTbl[] = { |
576 | { ISD::SIGN_EXTEND, MVT::v4i32, MVT::v4i16, 1 }, |
577 | { ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i16, 1 }, |
578 | { ISD::SIGN_EXTEND, MVT::v2i64, MVT::v2i32, 1 }, |
579 | { ISD::ZERO_EXTEND, MVT::v2i64, MVT::v2i32, 1 }, |
580 | { ISD::TRUNCATE, MVT::v4i32, MVT::v4i64, 0 }, |
581 | { ISD::TRUNCATE, MVT::v4i16, MVT::v4i32, 1 }, |
582 | |
583 | |
584 | { ISD::SIGN_EXTEND, MVT::v8i16, MVT::v8i8, 1 }, |
585 | { ISD::ZERO_EXTEND, MVT::v8i16, MVT::v8i8, 1 }, |
586 | { ISD::SIGN_EXTEND, MVT::v4i32, MVT::v4i8, 2 }, |
587 | { ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i8, 2 }, |
588 | { ISD::SIGN_EXTEND, MVT::v2i64, MVT::v2i8, 3 }, |
589 | { ISD::ZERO_EXTEND, MVT::v2i64, MVT::v2i8, 3 }, |
590 | { ISD::SIGN_EXTEND, MVT::v2i64, MVT::v2i16, 2 }, |
591 | { ISD::ZERO_EXTEND, MVT::v2i64, MVT::v2i16, 2 }, |
592 | { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 3 }, |
593 | { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i16, 3 }, |
594 | { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i8, 3 }, |
595 | { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i8, 3 }, |
596 | { ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i8, 7 }, |
597 | { ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i8, 7 }, |
598 | { ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i16, 6 }, |
599 | { ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i16, 6 }, |
600 | { ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i8, 6 }, |
601 | { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i8, 6 }, |
602 | |
603 | |
604 | { ISD::TRUNCATE, MVT::v16i8, MVT::v16i32, 6 }, |
605 | { ISD::TRUNCATE, MVT::v8i8, MVT::v8i32, 3 }, |
606 | |
607 | |
608 | { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 }, |
609 | { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 }, |
610 | |
611 | { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i8, 3 }, |
612 | { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i8, 3 }, |
613 | { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i16, 2 }, |
614 | { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i16, 2 }, |
615 | { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i32, 1 }, |
616 | { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i32, 1 }, |
617 | { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i1, 3 }, |
618 | { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i1, 3 }, |
619 | { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i8, 3 }, |
620 | { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i8, 3 }, |
621 | { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i16, 2 }, |
622 | { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i16, 2 }, |
623 | { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i16, 4 }, |
624 | { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i16, 4 }, |
625 | { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i32, 2 }, |
626 | { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i32, 2 }, |
627 | { ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i16, 8 }, |
628 | { ISD::UINT_TO_FP, MVT::v16f32, MVT::v16i16, 8 }, |
629 | { ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i32, 4 }, |
630 | { ISD::UINT_TO_FP, MVT::v16f32, MVT::v16i32, 4 }, |
631 | |
632 | { ISD::FP_TO_SINT, MVT::v4i32, MVT::v4f32, 1 }, |
633 | { ISD::FP_TO_UINT, MVT::v4i32, MVT::v4f32, 1 }, |
634 | { ISD::FP_TO_SINT, MVT::v4i8, MVT::v4f32, 3 }, |
635 | { ISD::FP_TO_UINT, MVT::v4i8, MVT::v4f32, 3 }, |
636 | { ISD::FP_TO_SINT, MVT::v4i16, MVT::v4f32, 2 }, |
637 | { ISD::FP_TO_UINT, MVT::v4i16, MVT::v4f32, 2 }, |
638 | |
639 | |
640 | { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 }, |
641 | { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 }, |
642 | |
643 | { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i8, 4 }, |
644 | { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i8, 4 }, |
645 | { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i16, 3 }, |
646 | { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i16, 3 }, |
647 | { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 }, |
648 | { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i32, 2 }, |
649 | |
650 | { ISD::FP_TO_SINT, MVT::v2i32, MVT::v2f64, 2 }, |
651 | { ISD::FP_TO_UINT, MVT::v2i32, MVT::v2f64, 2 }, |
652 | { ISD::FP_TO_SINT, MVT::v8i16, MVT::v8f32, 4 }, |
653 | { ISD::FP_TO_UINT, MVT::v8i16, MVT::v8f32, 4 }, |
654 | { ISD::FP_TO_SINT, MVT::v16i16, MVT::v16f32, 8 }, |
655 | { ISD::FP_TO_UINT, MVT::v16i16, MVT::v16f32, 8 } |
656 | }; |
657 | |
658 | if (SrcTy.isVector() && ST->hasNEON()) { |
659 | if (const auto *Entry = ConvertCostTableLookup(NEONVectorConversionTbl, ISD, |
660 | DstTy.getSimpleVT(), |
661 | SrcTy.getSimpleVT())) |
662 | return AdjustCost(Entry->Cost); |
663 | } |
664 | |
665 | |
666 | static const TypeConversionCostTblEntry NEONFloatConversionTbl[] = { |
667 | { ISD::FP_TO_SINT, MVT::i1, MVT::f32, 2 }, |
668 | { ISD::FP_TO_UINT, MVT::i1, MVT::f32, 2 }, |
669 | { ISD::FP_TO_SINT, MVT::i1, MVT::f64, 2 }, |
670 | { ISD::FP_TO_UINT, MVT::i1, MVT::f64, 2 }, |
671 | { ISD::FP_TO_SINT, MVT::i8, MVT::f32, 2 }, |
672 | { ISD::FP_TO_UINT, MVT::i8, MVT::f32, 2 }, |
673 | { ISD::FP_TO_SINT, MVT::i8, MVT::f64, 2 }, |
674 | { ISD::FP_TO_UINT, MVT::i8, MVT::f64, 2 }, |
675 | { ISD::FP_TO_SINT, MVT::i16, MVT::f32, 2 }, |
676 | { ISD::FP_TO_UINT, MVT::i16, MVT::f32, 2 }, |
677 | { ISD::FP_TO_SINT, MVT::i16, MVT::f64, 2 }, |
678 | { ISD::FP_TO_UINT, MVT::i16, MVT::f64, 2 }, |
679 | { ISD::FP_TO_SINT, MVT::i32, MVT::f32, 2 }, |
680 | { ISD::FP_TO_UINT, MVT::i32, MVT::f32, 2 }, |
681 | { ISD::FP_TO_SINT, MVT::i32, MVT::f64, 2 }, |
682 | { ISD::FP_TO_UINT, MVT::i32, MVT::f64, 2 }, |
683 | { ISD::FP_TO_SINT, MVT::i64, MVT::f32, 10 }, |
684 | { ISD::FP_TO_UINT, MVT::i64, MVT::f32, 10 }, |
685 | { ISD::FP_TO_SINT, MVT::i64, MVT::f64, 10 }, |
686 | { ISD::FP_TO_UINT, MVT::i64, MVT::f64, 10 } |
687 | }; |
688 | if (SrcTy.isFloatingPoint() && ST->hasNEON()) { |
689 | if (const auto *Entry = ConvertCostTableLookup(NEONFloatConversionTbl, ISD, |
690 | DstTy.getSimpleVT(), |
691 | SrcTy.getSimpleVT())) |
692 | return AdjustCost(Entry->Cost); |
693 | } |
694 | |
695 | |
696 | static const TypeConversionCostTblEntry NEONIntegerConversionTbl[] = { |
697 | { ISD::SINT_TO_FP, MVT::f32, MVT::i1, 2 }, |
698 | { ISD::UINT_TO_FP, MVT::f32, MVT::i1, 2 }, |
699 | { ISD::SINT_TO_FP, MVT::f64, MVT::i1, 2 }, |
700 | { ISD::UINT_TO_FP, MVT::f64, MVT::i1, 2 }, |
701 | { ISD::SINT_TO_FP, MVT::f32, MVT::i8, 2 }, |
702 | { ISD::UINT_TO_FP, MVT::f32, MVT::i8, 2 }, |
703 | { ISD::SINT_TO_FP, MVT::f64, MVT::i8, 2 }, |
704 | { ISD::UINT_TO_FP, MVT::f64, MVT::i8, 2 }, |
705 | { ISD::SINT_TO_FP, MVT::f32, MVT::i16, 2 }, |
706 | { ISD::UINT_TO_FP, MVT::f32, MVT::i16, 2 }, |
707 | { ISD::SINT_TO_FP, MVT::f64, MVT::i16, 2 }, |
708 | { ISD::UINT_TO_FP, MVT::f64, MVT::i16, 2 }, |
709 | { ISD::SINT_TO_FP, MVT::f32, MVT::i32, 2 }, |
710 | { ISD::UINT_TO_FP, MVT::f32, MVT::i32, 2 }, |
711 | { ISD::SINT_TO_FP, MVT::f64, MVT::i32, 2 }, |
712 | { ISD::UINT_TO_FP, MVT::f64, MVT::i32, 2 }, |
713 | { ISD::SINT_TO_FP, MVT::f32, MVT::i64, 10 }, |
714 | { ISD::UINT_TO_FP, MVT::f32, MVT::i64, 10 }, |
715 | { ISD::SINT_TO_FP, MVT::f64, MVT::i64, 10 }, |
716 | { ISD::UINT_TO_FP, MVT::f64, MVT::i64, 10 } |
717 | }; |
718 | |
719 | if (SrcTy.isInteger() && ST->hasNEON()) { |
720 | if (const auto *Entry = ConvertCostTableLookup(NEONIntegerConversionTbl, |
721 | ISD, DstTy.getSimpleVT(), |
722 | SrcTy.getSimpleVT())) |
723 | return AdjustCost(Entry->Cost); |
724 | } |
725 | |
726 | |
727 | |
728 | |
729 | static const TypeConversionCostTblEntry MVEVectorConversionTbl[] = { |
730 | { ISD::SIGN_EXTEND, MVT::v8i16, MVT::v8i8, 1 }, |
731 | { ISD::ZERO_EXTEND, MVT::v8i16, MVT::v8i8, 1 }, |
732 | { ISD::SIGN_EXTEND, MVT::v4i32, MVT::v4i8, 2 }, |
733 | { ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i8, 2 }, |
734 | { ISD::SIGN_EXTEND, MVT::v2i64, MVT::v2i8, 10 }, |
735 | { ISD::ZERO_EXTEND, MVT::v2i64, MVT::v2i8, 2 }, |
736 | { ISD::SIGN_EXTEND, MVT::v4i32, MVT::v4i16, 1 }, |
737 | { ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i16, 1 }, |
738 | { ISD::SIGN_EXTEND, MVT::v2i64, MVT::v2i16, 10 }, |
739 | { ISD::ZERO_EXTEND, MVT::v2i64, MVT::v2i16, 2 }, |
740 | { ISD::SIGN_EXTEND, MVT::v2i64, MVT::v2i32, 8 }, |
741 | { ISD::ZERO_EXTEND, MVT::v2i64, MVT::v2i32, 2 }, |
742 | }; |
743 | |
744 | if (SrcTy.isVector() && ST->hasMVEIntegerOps()) { |
745 | if (const auto *Entry = ConvertCostTableLookup(MVEVectorConversionTbl, |
746 | ISD, DstTy.getSimpleVT(), |
747 | SrcTy.getSimpleVT())) |
748 | return Entry->Cost * ST->getMVEVectorCostFactor(CostKind); |
749 | } |
750 | |
751 | if (ISD == ISD::FP_ROUND || ISD == ISD::FP_EXTEND) { |
752 | |
753 | |
754 | |
755 | const InstructionCost CallCost = |
756 | getCallInstrCost(nullptr, Dst, {Src}, CostKind); |
757 | int Lanes = 1; |
758 | if (SrcTy.isFixedLengthVector()) |
759 | Lanes = SrcTy.getVectorNumElements(); |
760 | |
761 | if (IsLegalFPType(SrcTy) && IsLegalFPType(DstTy)) |
762 | return Lanes; |
763 | else |
764 | return Lanes * CallCost; |
765 | } |
766 | |
767 | if (ISD == ISD::TRUNCATE && ST->hasMVEIntegerOps() && |
768 | SrcTy.isFixedLengthVector()) { |
769 | |
770 | |
771 | if ((SrcTy.getScalarType() == MVT::i8 || |
772 | SrcTy.getScalarType() == MVT::i16 || |
773 | SrcTy.getScalarType() == MVT::i32) && |
774 | SrcTy.getSizeInBits() > 128 && |
775 | SrcTy.getSizeInBits() > DstTy.getSizeInBits()) |
776 | return SrcTy.getVectorNumElements() * 2; |
777 | } |
778 | |
779 | |
780 | static const TypeConversionCostTblEntry ARMIntegerConversionTbl[] = { |
781 | |
782 | { ISD::SIGN_EXTEND, MVT::i64, MVT::i16, 2 }, |
783 | |
784 | |
785 | { ISD::TRUNCATE, MVT::i32, MVT::i64, 0 }, |
786 | { ISD::TRUNCATE, MVT::i16, MVT::i64, 0 }, |
787 | { ISD::TRUNCATE, MVT::i8, MVT::i64, 0 }, |
788 | { ISD::TRUNCATE, MVT::i1, MVT::i64, 0 } |
789 | }; |
790 | |
791 | if (SrcTy.isInteger()) { |
792 | if (const auto *Entry = ConvertCostTableLookup(ARMIntegerConversionTbl, ISD, |
793 | DstTy.getSimpleVT(), |
794 | SrcTy.getSimpleVT())) |
795 | return AdjustCost(Entry->Cost); |
796 | } |
797 | |
798 | int BaseCost = ST->hasMVEIntegerOps() && Src->isVectorTy() |
799 | ? ST->getMVEVectorCostFactor(CostKind) |
800 | : 1; |
801 | return AdjustCost( |
802 | BaseCost * BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I)); |
803 | } |
804 | |
805 | InstructionCost ARMTTIImpl::getVectorInstrCost(unsigned Opcode, Type *ValTy, |
806 | unsigned Index) { |
807 | |
808 | |
809 | if (ST->hasSlowLoadDSubregister() && Opcode == Instruction::InsertElement && |
810 | ValTy->isVectorTy() && ValTy->getScalarSizeInBits() <= 32) |
811 | return 3; |
812 | |
813 | if (ST->hasNEON() && (Opcode == Instruction::InsertElement || |
814 | Opcode == Instruction::ExtractElement)) { |
815 | |
816 | |
817 | if (cast<VectorType>(ValTy)->getElementType()->isIntegerTy()) |
818 | return 3; |
819 | |
820 | |
821 | |
822 | if (ValTy->isVectorTy() && |
823 | ValTy->getScalarSizeInBits() <= 32) |
824 | return std::max<InstructionCost>( |
825 | BaseT::getVectorInstrCost(Opcode, ValTy, Index), 2U); |
826 | } |
827 | |
828 | if (ST->hasMVEIntegerOps() && (Opcode == Instruction::InsertElement || |
829 | Opcode == Instruction::ExtractElement)) { |
830 | |
831 | |
832 | |
833 | std::pair<InstructionCost, MVT> LT = |
834 | getTLI()->getTypeLegalizationCost(DL, ValTy->getScalarType()); |
835 | return LT.first * (ValTy->getScalarType()->isIntegerTy() ? 4 : 1); |
836 | } |
837 | |
838 | return BaseT::getVectorInstrCost(Opcode, ValTy, Index); |
839 | } |
840 | |
841 | InstructionCost ARMTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, |
842 | Type *CondTy, |
843 | CmpInst::Predicate VecPred, |
844 | TTI::TargetCostKind CostKind, |
845 | const Instruction *I) { |
846 | int ISD = TLI->InstructionOpcodeToISD(Opcode); |
847 | |
848 | |
849 | if (CostKind == TTI::TCK_CodeSize && ISD == ISD::SELECT && |
| 40 | | Assuming 'CostKind' is not equal to TCK_CodeSize | |
|
850 | ST->isThumb() && !ValTy->isVectorTy()) { |
851 | |
852 | if (TLI->getValueType(DL, ValTy, true) == MVT::Other) |
853 | return TTI::TCC_Expensive; |
854 | |
855 | |
856 | |
857 | |
858 | |
859 | InstructionCost Cost = TLI->getTypeLegalizationCost(DL, ValTy).first; |
860 | |
861 | |
862 | ++Cost; |
863 | |
864 | |
865 | |
866 | if (ValTy->isIntegerTy(1)) |
867 | ++Cost; |
868 | |
869 | return Cost; |
870 | } |
871 | |
872 | |
873 | |
874 | |
875 | const Instruction *Sel = I; |
876 | if ((Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) && Sel && |
877 | Sel->hasOneUse()) |
878 | Sel = cast<Instruction>(Sel->user_back()); |
879 | if (Sel && ValTy->isVectorTy() && |
880 | (ValTy->isIntOrIntVectorTy() || ValTy->isFPOrFPVectorTy())) { |
881 | const Value *LHS, *RHS; |
882 | SelectPatternFlavor SPF = matchSelectPattern(Sel, LHS, RHS).Flavor; |
883 | unsigned IID = 0; |
884 | switch (SPF) { |
885 | case SPF_ABS: |
886 | IID = Intrinsic::abs; |
887 | break; |
888 | case SPF_SMIN: |
889 | IID = Intrinsic::smin; |
890 | break; |
891 | case SPF_SMAX: |
892 | IID = Intrinsic::smax; |
893 | break; |
894 | case SPF_UMIN: |
895 | IID = Intrinsic::umin; |
896 | break; |
897 | case SPF_UMAX: |
898 | IID = Intrinsic::umax; |
899 | break; |
900 | case SPF_FMINNUM: |
901 | IID = Intrinsic::minnum; |
902 | break; |
903 | case SPF_FMAXNUM: |
904 | IID = Intrinsic::maxnum; |
905 | break; |
906 | default: |
907 | break; |
908 | } |
909 | if (IID) { |
910 | |
911 | if (Sel != I) |
912 | return 0; |
913 | IntrinsicCostAttributes CostAttrs(IID, ValTy, {ValTy, ValTy}); |
914 | return getIntrinsicInstrCost(CostAttrs, CostKind); |
915 | } |
916 | } |
917 | |
918 | |
919 | if (ST->hasNEON() && ValTy->isVectorTy() && ISD == ISD::SELECT && CondTy) { |
| 41 | | Assuming the condition is false | |
|
920 | |
921 | static const TypeConversionCostTblEntry NEONVectorSelectTbl[] = { |
922 | { ISD::SELECT, MVT::v4i1, MVT::v4i64, 4*4 + 1*2 + 1 }, |
923 | { ISD::SELECT, MVT::v8i1, MVT::v8i64, 50 }, |
924 | { ISD::SELECT, MVT::v16i1, MVT::v16i64, 100 } |
925 | }; |
926 | |
927 | EVT SelCondTy = TLI->getValueType(DL, CondTy); |
928 | EVT SelValTy = TLI->getValueType(DL, ValTy); |
929 | if (SelCondTy.isSimple() && SelValTy.isSimple()) { |
930 | if (const auto *Entry = ConvertCostTableLookup(NEONVectorSelectTbl, ISD, |
931 | SelCondTy.getSimpleVT(), |
932 | SelValTy.getSimpleVT())) |
933 | return Entry->Cost; |
934 | } |
935 | |
936 | std::pair<InstructionCost, MVT> LT = |
937 | TLI->getTypeLegalizationCost(DL, ValTy); |
938 | return LT.first; |
939 | } |
940 | |
941 | if (ST->hasMVEIntegerOps() && ValTy->isVectorTy() && |
| 42 | | Assuming the condition is true | |
|
| 43 | | Calling 'Type::isVectorTy' | |
|
| 47 | | Returning from 'Type::isVectorTy' | |
|
| |
942 | (Opcode == Instruction::ICmp || Opcode == Instruction::FCmp) && |
943 | cast<FixedVectorType>(ValTy)->getNumElements() > 1) { |
| 48 | | 'ValTy' is a 'FixedVectorType' | |
|
| 49 | | Assuming the condition is true | |
|
944 | FixedVectorType *VecValTy = cast<FixedVectorType>(ValTy); |
| 51 | | 'ValTy' is a 'FixedVectorType' | |
|
945 | FixedVectorType *VecCondTy = dyn_cast_or_null<FixedVectorType>(CondTy); |
| 52 | | Assuming pointer value is null | |
|
| 53 | | Assuming null pointer is passed into cast | |
|
946 | if (!VecCondTy) |
| |
947 | VecCondTy = cast<FixedVectorType>(CmpInst::makeCmpResultType(VecValTy)); |
| 55 | | The object is a 'FixedVectorType' | |
|
948 | |
949 | |
950 | if (Opcode == Instruction::FCmp && !ST->hasMVEFloatOps()) { |
951 | |
952 | |
953 | return BaseT::getScalarizationOverhead(VecValTy, false, true) + |
954 | BaseT::getScalarizationOverhead(VecCondTy, true, false) + |
955 | VecValTy->getNumElements() * |
956 | getCmpSelInstrCost(Opcode, ValTy->getScalarType(), |
957 | VecCondTy->getScalarType(), VecPred, CostKind, |
958 | I); |
959 | } |
960 | |
961 | std::pair<InstructionCost, MVT> LT = |
962 | TLI->getTypeLegalizationCost(DL, ValTy); |
963 | int BaseCost = ST->getMVEVectorCostFactor(CostKind); |
964 | |
965 | |
966 | |
967 | |
968 | |
969 | if (LT.second.getVectorNumElements() > 2) { |
| 56 | | Assuming the condition is false | |
|
| |
970 | if (LT.first > 1) |
971 | return LT.first * BaseCost + |
972 | BaseT::getScalarizationOverhead(VecCondTy, true, false); |
973 | return BaseCost; |
974 | } |
975 | } |
976 | |
977 | |
978 | |
979 | int BaseCost = 1; |
980 | if (ST->hasMVEIntegerOps() && ValTy->isVectorTy()) |
| |
981 | BaseCost = ST->getMVEVectorCostFactor(CostKind); |
982 | |
983 | return BaseCost * |
984 | BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, I); |
| 59 | | Passing null pointer value via 3rd parameter 'CondTy' | |
|
| 60 | | Calling 'BasicTTIImplBase::getCmpSelInstrCost' | |
|
985 | } |
986 | |
987 | InstructionCost ARMTTIImpl::getAddressComputationCost(Type *Ty, |
988 | ScalarEvolution *SE, |
989 | const SCEV *Ptr) { |
990 | |
991 | |
992 | |
993 | |
994 | unsigned NumVectorInstToHideOverhead = 10; |
995 | int MaxMergeDistance = 64; |
996 | |
997 | if (ST->hasNEON()) { |
998 | if (Ty->isVectorTy() && SE && |
999 | !BaseT::isConstantStridedAccessLessThan(SE, Ptr, MaxMergeDistance + 1)) |
1000 | return NumVectorInstToHideOverhead; |
1001 | |
1002 | |
1003 | |
1004 | return 1; |
1005 | } |
1006 | return BaseT::getAddressComputationCost(Ty, SE, Ptr); |
1007 | } |
1008 | |
1009 | bool ARMTTIImpl::isProfitableLSRChainElement(Instruction *I) { |
1010 | if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { |
1011 | |
1012 | |
1013 | switch (II->getIntrinsicID()) { |
1014 | case Intrinsic::arm_mve_vctp8: |
1015 | case Intrinsic::arm_mve_vctp16: |
1016 | case Intrinsic::arm_mve_vctp32: |
1017 | case Intrinsic::arm_mve_vctp64: |
1018 | return true; |
1019 | default: |
1020 | break; |
1021 | } |
1022 | } |
1023 | return false; |
1024 | } |
1025 | |
1026 | bool ARMTTIImpl::isLegalMaskedLoad(Type *DataTy, Align Alignment) { |
1027 | if (!EnableMaskedLoadStores || !ST->hasMVEIntegerOps()) |
1028 | return false; |
1029 | |
1030 | if (auto *VecTy = dyn_cast<FixedVectorType>(DataTy)) { |
1031 | |
1032 | if (VecTy->getNumElements() == 2) |
1033 | return false; |
1034 | |
1035 | |
1036 | unsigned VecWidth = DataTy->getPrimitiveSizeInBits(); |
1037 | if (VecWidth != 128 && VecTy->getElementType()->isFloatingPointTy()) |
1038 | return false; |
1039 | } |
1040 | |
1041 | unsigned EltWidth = DataTy->getScalarSizeInBits(); |
1042 | return (EltWidth == 32 && Alignment >= 4) || |
1043 | (EltWidth == 16 && Alignment >= 2) || (EltWidth == 8); |
1044 | } |
1045 | |
1046 | bool ARMTTIImpl::isLegalMaskedGather(Type *Ty, Align Alignment) { |
1047 | if (!EnableMaskedGatherScatters || !ST->hasMVEIntegerOps()) |
1048 | return false; |
1049 | |
1050 | |
1051 | |
1052 | |
1053 | |
1054 | |
1055 | |
1056 | |
1057 | |
1058 | |
1059 | if (isa<VectorType>(Ty)) |
1060 | return false; |
1061 | |
1062 | unsigned EltWidth = Ty->getScalarSizeInBits(); |
1063 | return ((EltWidth == 32 && Alignment >= 4) || |
1064 | (EltWidth == 16 && Alignment >= 2) || EltWidth == 8); |
1065 | } |
1066 | |
1067 | |
1068 | |
1069 | |
1070 | int ARMTTIImpl::getNumMemOps(const IntrinsicInst *I) const { |
1071 | MemOp MOp; |
1072 | unsigned DstAddrSpace = ~0u; |
1073 | unsigned SrcAddrSpace = ~0u; |
1074 | const Function *F = I->getParent()->getParent(); |
1075 | |
1076 | if (const auto *MC = dyn_cast<MemTransferInst>(I)) { |
1077 | ConstantInt *C = dyn_cast<ConstantInt>(MC->getLength()); |
1078 | |
1079 | if (!C) |
1080 | return -1; |
1081 | |
1082 | const unsigned Size = C->getValue().getZExtValue(); |
1083 | const Align DstAlign = *MC->getDestAlign(); |
1084 | const Align SrcAlign = *MC->getSourceAlign(); |
1085 | |
1086 | MOp = MemOp::Copy(Size, false, DstAlign, SrcAlign, |
1087 | false); |
1088 | DstAddrSpace = MC->getDestAddressSpace(); |
1089 | SrcAddrSpace = MC->getSourceAddressSpace(); |
1090 | } |
1091 | else if (const auto *MS = dyn_cast<MemSetInst>(I)) { |
1092 | ConstantInt *C = dyn_cast<ConstantInt>(MS->getLength()); |
1093 | |
1094 | if (!C) |
1095 | return -1; |
1096 | |
1097 | const unsigned Size = C->getValue().getZExtValue(); |
1098 | const Align DstAlign = *MS->getDestAlign(); |
1099 | |
1100 | MOp = MemOp::Set(Size, false, DstAlign, |
1101 | false, false); |
1102 | DstAddrSpace = MS->getDestAddressSpace(); |
1103 | } |
1104 | else |
1105 | llvm_unreachable("Expected a memcpy/move or memset!"); |
1106 | |
1107 | unsigned Limit, Factor = 2; |
1108 | switch(I->getIntrinsicID()) { |
1109 | case Intrinsic::memcpy: |
1110 | Limit = TLI->getMaxStoresPerMemcpy(F->hasMinSize()); |
1111 | break; |
1112 | case Intrinsic::memmove: |
1113 | Limit = TLI->getMaxStoresPerMemmove(F->hasMinSize()); |
1114 | break; |
1115 | case Intrinsic::memset: |
1116 | Limit = TLI->getMaxStoresPerMemset(F->hasMinSize()); |
1117 | Factor = 1; |
1118 | break; |
1119 | default: |
1120 | llvm_unreachable("Expected a memcpy/move or memset!"); |
1121 | } |
1122 | |
1123 | |
1124 | |
1125 | |
1126 | std::vector<EVT> MemOps; |
1127 | if (getTLI()->findOptimalMemOpLowering( |
1128 | MemOps, Limit, MOp, DstAddrSpace, |
1129 | SrcAddrSpace, F->getAttributes())) |
1130 | return MemOps.size() * Factor; |
1131 | |
1132 | |
1133 | return -1; |
1134 | } |
1135 | |
1136 | InstructionCost ARMTTIImpl::getMemcpyCost(const Instruction *I) { |
1137 | int NumOps = getNumMemOps(cast<IntrinsicInst>(I)); |
1138 | |
1139 | |
1140 | |
1141 | if (NumOps == -1) |
1142 | return 4; |
1143 | return NumOps; |
1144 | } |
1145 | |
1146 | InstructionCost ARMTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, |
1147 | VectorType *Tp, ArrayRef<int> Mask, |
1148 | int Index, VectorType *SubTp) { |
1149 | Kind = improveShuffleKindFromMask(Kind, Mask); |
1150 | if (ST->hasNEON()) { |
1151 | if (Kind == TTI::SK_Broadcast) { |
1152 | static const CostTblEntry NEONDupTbl[] = { |
1153 | |
1154 | {ISD::VECTOR_SHUFFLE, MVT::v2i32, 1}, |
1155 | {ISD::VECTOR_SHUFFLE, MVT::v2f32, 1}, |
1156 | {ISD::VECTOR_SHUFFLE, MVT::v2i64, 1}, |
1157 | {ISD::VECTOR_SHUFFLE, MVT::v2f64, 1}, |
1158 | {ISD::VECTOR_SHUFFLE, MVT::v4i16, 1}, |
1159 | {ISD::VECTOR_SHUFFLE, MVT::v8i8, 1}, |
1160 | |
1161 | {ISD::VECTOR_SHUFFLE, MVT::v4i32, 1}, |
1162 | {ISD::VECTOR_SHUFFLE, MVT::v4f32, 1}, |
1163 | {ISD::VECTOR_SHUFFLE, MVT::v8i16, 1}, |
1164 | {ISD::VECTOR_SHUFFLE, MVT::v16i8, 1}}; |
1165 | |
1166 | std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp); |
1167 | if (const auto *Entry = |
1168 | CostTableLookup(NEONDupTbl, ISD::VECTOR_SHUFFLE, LT.second)) |
1169 | return LT.first * Entry->Cost; |
1170 | } |
1171 | if (Kind == TTI::SK_Reverse) { |
1172 | static const CostTblEntry NEONShuffleTbl[] = { |
1173 | |
1174 | |
1175 | {ISD::VECTOR_SHUFFLE, MVT::v2i32, 1}, |
1176 | {ISD::VECTOR_SHUFFLE, MVT::v2f32, 1}, |
1177 | {ISD::VECTOR_SHUFFLE, MVT::v2i64, 1}, |
1178 | {ISD::VECTOR_SHUFFLE, MVT::v2f64, 1}, |
1179 | {ISD::VECTOR_SHUFFLE, MVT::v4i16, 1}, |
1180 | {ISD::VECTOR_SHUFFLE, MVT::v8i8, 1}, |
1181 | |
1182 | {ISD::VECTOR_SHUFFLE, MVT::v4i32, 2}, |
1183 | {ISD::VECTOR_SHUFFLE, MVT::v4f32, 2}, |
1184 | {ISD::VECTOR_SHUFFLE, MVT::v8i16, 2}, |
1185 | {ISD::VECTOR_SHUFFLE, MVT::v16i8, 2}}; |
1186 | |
1187 | std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp); |
1188 | if (const auto *Entry = |
1189 | CostTableLookup(NEONShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second)) |
1190 | return LT.first * Entry->Cost; |
1191 | } |
1192 | if (Kind == TTI::SK_Select) { |
1193 | static const CostTblEntry NEONSelShuffleTbl[] = { |
1194 | |
1195 | |
1196 | |
1197 | |
1198 | {ISD::VECTOR_SHUFFLE, MVT::v2f32, 1}, |
1199 | {ISD::VECTOR_SHUFFLE, MVT::v2i64, 1}, |
1200 | {ISD::VECTOR_SHUFFLE, MVT::v2f64, 1}, |
1201 | {ISD::VECTOR_SHUFFLE, MVT::v2i32, 1}, |
1202 | |
1203 | {ISD::VECTOR_SHUFFLE, MVT::v4i32, 2}, |
1204 | {ISD::VECTOR_SHUFFLE, MVT::v4f32, 2}, |
1205 | {ISD::VECTOR_SHUFFLE, MVT::v4i16, 2}, |
1206 | |
1207 | {ISD::VECTOR_SHUFFLE, MVT::v8i16, 16}, |
1208 | |
1209 | {ISD::VECTOR_SHUFFLE, MVT::v16i8, 32}}; |
1210 | |
1211 | std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp); |
1212 | if (const auto *Entry = CostTableLookup(NEONSelShuffleTbl, |
1213 | ISD::VECTOR_SHUFFLE, LT.second)) |
1214 | return LT.first * Entry->Cost; |
1215 | } |
1216 | } |
1217 | if (ST->hasMVEIntegerOps()) { |
1218 | if (Kind == TTI::SK_Broadcast) { |
1219 | static const CostTblEntry MVEDupTbl[] = { |
1220 | |
1221 | {ISD::VECTOR_SHUFFLE, MVT::v4i32, 1}, |
1222 | {ISD::VECTOR_SHUFFLE, MVT::v8i16, 1}, |
1223 | {ISD::VECTOR_SHUFFLE, MVT::v16i8, 1}, |
1224 | {ISD::VECTOR_SHUFFLE, MVT::v4f32, 1}, |
1225 | {ISD::VECTOR_SHUFFLE, MVT::v8f16, 1}}; |
1226 | |
1227 | std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp); |
1228 | if (const auto *Entry = CostTableLookup(MVEDupTbl, ISD::VECTOR_SHUFFLE, |
1229 | LT.second)) |
1230 | return LT.first * Entry->Cost * |
1231 | ST->getMVEVectorCostFactor(TTI::TCK_RecipThroughput); |
1232 | } |
1233 | |
1234 | if (!Mask.empty()) { |
1235 | std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp); |
1236 | if (Mask.size() <= LT.second.getVectorNumElements() && |
1237 | (isVREVMask(Mask, LT.second, 16) || isVREVMask(Mask, LT.second, 32) || |
1238 | isVREVMask(Mask, LT.second, 64))) |
1239 | return ST->getMVEVectorCostFactor(TTI::TCK_RecipThroughput) * LT.first; |
1240 | } |
1241 | } |
1242 | |
1243 | int BaseCost = ST->hasMVEIntegerOps() && Tp->isVectorTy() |
1244 | ? ST->getMVEVectorCostFactor(TTI::TCK_RecipThroughput) |
1245 | : 1; |
1246 | return BaseCost * BaseT::getShuffleCost(Kind, Tp, Mask, Index, SubTp); |
1247 | } |
1248 | |
1249 | InstructionCost ARMTTIImpl::getArithmeticInstrCost( |
1250 | unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, |
1251 | TTI::OperandValueKind Op1Info, TTI::OperandValueKind Op2Info, |
1252 | TTI::OperandValueProperties Opd1PropInfo, |
1253 | TTI::OperandValueProperties Opd2PropInfo, ArrayRef<const Value *> Args, |
1254 | const Instruction *CxtI) { |
1255 | int ISDOpcode = TLI->InstructionOpcodeToISD(Opcode); |
1256 | if (ST->isThumb() && CostKind == TTI::TCK_CodeSize && Ty->isIntegerTy(1)) { |
1257 | |
1258 | |
1259 | |
1260 | switch (ISDOpcode) { |
1261 | default: |
1262 | break; |
1263 | case ISD::AND: |
1264 | case ISD::XOR: |
1265 | return 2; |
1266 | case ISD::OR: |
1267 | return 3; |
1268 | } |
1269 | } |
1270 | |
1271 | std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty); |
1272 | |
1273 | if (ST->hasNEON()) { |
1274 | const unsigned FunctionCallDivCost = 20; |
1275 | const unsigned ReciprocalDivCost = 10; |
1276 | static const CostTblEntry CostTbl[] = { |
1277 | |
1278 | |
1279 | |
1280 | |
1281 | { ISD::SDIV, MVT::v1i64, 1 * FunctionCallDivCost}, |
1282 | { ISD::UDIV, MVT::v1i64, 1 * FunctionCallDivCost}, |
1283 | { ISD::SREM, MVT::v1i64, 1 * FunctionCallDivCost}, |
1284 | { ISD::UREM, MVT::v1i64, 1 * FunctionCallDivCost}, |
1285 | { ISD::SDIV, MVT::v2i32, 2 * FunctionCallDivCost}, |
1286 | { ISD::UDIV, MVT::v2i32, 2 * FunctionCallDivCost}, |
1287 | { ISD::SREM, MVT::v2i32, 2 * FunctionCallDivCost}, |
1288 | { ISD::UREM, MVT::v2i32, 2 * FunctionCallDivCost}, |
1289 | { ISD::SDIV, MVT::v4i16, ReciprocalDivCost}, |
1290 | { ISD::UDIV, MVT::v4i16, ReciprocalDivCost}, |
1291 | { ISD::SREM, MVT::v4i16, 4 * FunctionCallDivCost}, |
1292 | { ISD::UREM, MVT::v4i16, 4 * FunctionCallDivCost}, |
1293 | { ISD::SDIV, MVT::v8i8, ReciprocalDivCost}, |
1294 | { ISD::UDIV, MVT::v8i8, ReciprocalDivCost}, |
1295 | { ISD::SREM, MVT::v8i8, 8 * FunctionCallDivCost}, |
1296 | { ISD::UREM, MVT::v8i8, 8 * FunctionCallDivCost}, |
1297 | |
1298 | { ISD::SDIV, MVT::v2i64, 2 * FunctionCallDivCost}, |
1299 | { ISD::UDIV, MVT::v2i64, 2 * FunctionCallDivCost}, |
1300 | { ISD::SREM, MVT::v2i64, 2 * FunctionCallDivCost}, |
1301 | { ISD::UREM, MVT::v2i64, 2 * FunctionCallDivCost}, |
1302 | { ISD::SDIV, MVT::v4i32, 4 * FunctionCallDivCost}, |
1303 | { ISD::UDIV, MVT::v4i32, 4 * FunctionCallDivCost}, |
1304 | { ISD::SREM, MVT::v4i32, 4 * FunctionCallDivCost}, |
1305 | { ISD::UREM, MVT::v4i32, 4 * FunctionCallDivCost}, |
1306 | { ISD::SDIV, MVT::v8i16, 8 * FunctionCallDivCost}, |
1307 | { ISD::UDIV, MVT::v8i16, 8 * FunctionCallDivCost}, |
1308 | { ISD::SREM, MVT::v8i16, 8 * FunctionCallDivCost}, |
1309 | { ISD::UREM, MVT::v8i16, 8 * FunctionCallDivCost}, |
1310 | { ISD::SDIV, MVT::v16i8, 16 * FunctionCallDivCost}, |
1311 | { ISD::UDIV, MVT::v16i8, 16 * FunctionCallDivCost}, |
1312 | { ISD::SREM, MVT::v16i8, 16 * FunctionCallDivCost}, |
1313 | { ISD::UREM, MVT::v16i8, 16 * FunctionCallDivCost}, |
1314 | |
1315 | }; |
1316 | |
1317 | if (const auto *Entry = CostTableLookup(CostTbl, ISDOpcode, LT.second)) |
1318 | return LT.first * Entry->Cost; |
1319 | |
1320 | InstructionCost Cost = BaseT::getArithmeticInstrCost( |
1321 | Opcode, Ty, CostKind, Op1Info, Op2Info, Opd1PropInfo, Opd2PropInfo); |
1322 | |
1323 | |
1324 | |
1325 | |
1326 | |
1327 | |
1328 | |
1329 | |
1330 | if (LT.second == MVT::v2i64 && |
1331 | Op2Info == TargetTransformInfo::OK_UniformConstantValue) |
1332 | Cost += 4; |
1333 | |
1334 | return Cost; |
1335 | } |
1336 | |
1337 | |
1338 | |
1339 | auto LooksLikeAFreeShift = [&]() { |
1340 | if (ST->isThumb1Only() || Ty->isVectorTy()) |
1341 | return false; |
1342 | |
1343 | if (!CxtI || !CxtI->hasOneUse() || !CxtI->isShift()) |
1344 | return false; |
1345 | if (Op2Info != TargetTransformInfo::OK_UniformConstantValue) |
1346 | return false; |
1347 | |
1348 | |
1349 | switch (cast<Instruction>(CxtI->user_back())->getOpcode()) { |
1350 | case Instruction::Add: |
1351 | case Instruction::Sub: |
1352 | case Instruction::And: |
1353 | case Instruction::Xor: |
1354 | case Instruction::Or: |
1355 | case Instruction::ICmp: |
1356 | return true; |
1357 | default: |
1358 | return false; |
1359 | } |
1360 | }; |
1361 | if (LooksLikeAFreeShift()) |
1362 | return 0; |
1363 | |
1364 | |
1365 | |
1366 | int BaseCost = 1; |
1367 | if (ST->hasMVEIntegerOps() && Ty->isVectorTy()) |
1368 | BaseCost = ST->getMVEVectorCostFactor(CostKind); |
1369 | |
1370 | |
1371 | |
1372 | |
1373 | |
1374 | if (TLI->isOperationLegalOrCustomOrPromote(ISDOpcode, LT.second)) |
1375 | return LT.first * BaseCost; |
1376 | |
1377 | |
1378 | if (auto *VTy = dyn_cast<FixedVectorType>(Ty)) { |
1379 | unsigned Num = VTy->getNumElements(); |
1380 | InstructionCost Cost = |
1381 | getArithmeticInstrCost(Opcode, Ty->getScalarType(), CostKind); |
1382 | |
1383 | |
1384 | SmallVector<Type *> Tys(Args.size(), Ty); |
1385 | return BaseT::getScalarizationOverhead(VTy, Args, Tys) + Num * Cost; |
1386 | } |
1387 | |
1388 | return BaseCost; |
1389 | } |
1390 | |
1391 | InstructionCost ARMTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, |
1392 | MaybeAlign Alignment, |
1393 | unsigned AddressSpace, |
1394 | TTI::TargetCostKind CostKind, |
1395 | const Instruction *I) { |
1396 | |
1397 | if (CostKind != TTI::TCK_RecipThroughput) |
1398 | return 1; |
1399 | |
1400 | |
1401 | if (TLI->getValueType(DL, Src, true) == MVT::Other) |
1402 | return BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, |
1403 | CostKind); |
1404 | |
1405 | if (ST->hasNEON() && Src->isVectorTy() && |
1406 | (Alignment && *Alignment != Align(16)) && |
1407 | cast<VectorType>(Src)->getElementType()->isDoubleTy()) { |
1408 | |
1409 | |
1410 | std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Src); |
1411 | return LT.first * 4; |
1412 | } |
1413 | |
1414 | |
1415 | |
1416 | if (ST->hasMVEFloatOps() && isa<FixedVectorType>(Src) && I && |
1417 | ((Opcode == Instruction::Load && I->hasOneUse() && |
1418 | isa<FPExtInst>(*I->user_begin())) || |
1419 | (Opcode == Instruction::Store && isa<FPTruncInst>(I->getOperand(0))))) { |
1420 | FixedVectorType *SrcVTy = cast<FixedVectorType>(Src); |
1421 | Type *DstTy = |
1422 | Opcode == Instruction::Load |
1423 | ? (*I->user_begin())->getType() |
1424 | : cast<Instruction>(I->getOperand(0))->getOperand(0)->getType(); |
1425 | if (SrcVTy->getNumElements() == 4 && SrcVTy->getScalarType()->isHalfTy() && |
1426 | DstTy->getScalarType()->isFloatTy()) |
1427 | return ST->getMVEVectorCostFactor(CostKind); |
1428 | } |
1429 | |
1430 | int BaseCost = ST->hasMVEIntegerOps() && Src->isVectorTy() |
1431 | ? ST->getMVEVectorCostFactor(CostKind) |
1432 | : 1; |
1433 | return BaseCost * BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, |
1434 | CostKind, I); |
1435 | } |
1436 | |
1437 | InstructionCost |
1438 | ARMTTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, |
1439 | unsigned AddressSpace, |
1440 | TTI::TargetCostKind CostKind) { |
1441 | if (ST->hasMVEIntegerOps()) { |
1442 | if (Opcode == Instruction::Load && isLegalMaskedLoad(Src, Alignment)) |
1443 | return ST->getMVEVectorCostFactor(CostKind); |
1444 | if (Opcode == Instruction::Store && isLegalMaskedStore(Src, Alignment)) |
1445 | return ST->getMVEVectorCostFactor(CostKind); |
1446 | } |
1447 | if (!isa<FixedVectorType>(Src)) |
1448 | return BaseT::getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace, |
1449 | CostKind); |
1450 | |
1451 | |
1452 | return cast<FixedVectorType>(Src)->getNumElements() * 8; |
1453 | } |
1454 | |
1455 | InstructionCost ARMTTIImpl::getInterleavedMemoryOpCost( |
1456 | unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices, |
1457 | Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, |
1458 | bool UseMaskForCond, bool UseMaskForGaps) { |
1459 | assert(Factor >= 2 && "Invalid interleave factor"); |
1460 | assert(isa<VectorType>(VecTy) && "Expect a vector type"); |
1461 | |
1462 | |
1463 | bool EltIs64Bits = DL.getTypeSizeInBits(VecTy->getScalarType()) == 64; |
1464 | |
1465 | if (Factor <= TLI->getMaxSupportedInterleaveFactor() && !EltIs64Bits && |
1466 | !UseMaskForCond && !UseMaskForGaps) { |
1467 | unsigned NumElts = cast<FixedVectorType>(VecTy)->getNumElements(); |
1468 | auto *SubVecTy = |
1469 | FixedVectorType::get(VecTy->getScalarType(), NumElts / Factor); |
1470 | |
1471 | |
1472 | |
1473 | |
1474 | int BaseCost = |
1475 | ST->hasMVEIntegerOps() ? ST->getMVEVectorCostFactor(CostKind) : 1; |
1476 | if (NumElts % Factor == 0 && |
1477 | TLI->isLegalInterleavedAccessType(Factor, SubVecTy, Alignment, DL)) |
1478 | return Factor * BaseCost * TLI->getNumInterleavedAccesses(SubVecTy, DL); |
1479 | |
1480 | |
1481 | |
1482 | |
1483 | |
1484 | |
1485 | if (ST->hasMVEIntegerOps() && Factor == 2 && NumElts / Factor > 2 && |
1486 | VecTy->isIntOrIntVectorTy() && |
1487 | DL.getTypeSizeInBits(SubVecTy).getFixedSize() <= 64) |
1488 | return 2 * BaseCost; |
1489 | } |
1490 | |
1491 | return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices, |
1492 | Alignment, AddressSpace, CostKind, |
1493 | UseMaskForCond, UseMaskForGaps); |
1494 | } |
1495 | |
1496 | InstructionCost ARMTTIImpl::getGatherScatterOpCost( |
1497 | unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, |
1498 | Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I) { |
1499 | using namespace PatternMatch; |
1500 | if (!ST->hasMVEIntegerOps() || !EnableMaskedGatherScatters) |
1501 | return BaseT::getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask, |
1502 | Alignment, CostKind, I); |
1503 | |
1504 | assert(DataTy->isVectorTy() && "Can't do gather/scatters on scalar!"); |
1505 | auto *VTy = cast<FixedVectorType>(DataTy); |
1506 | |
1507 | |
1508 | |
1509 | unsigned NumElems = VTy->getNumElements(); |
1510 | unsigned EltSize = VTy->getScalarSizeInBits(); |
1511 | std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, DataTy); |
1512 | |
1513 | |
1514 | |
1515 | |
1516 | |
1517 | |
1518 | InstructionCost VectorCost = |
1519 | NumElems * LT.first * ST->getMVEVectorCostFactor(CostKind); |
1520 | |
1521 | |
1522 | InstructionCost ScalarCost = |
1523 | NumElems * LT.first + BaseT::getScalarizationOverhead(VTy, true, false) + |
1524 | BaseT::getScalarizationOverhead(VTy, false, true); |
1525 | |
1526 | if (EltSize < 8 || Alignment < EltSize / 8) |
1527 | return ScalarCost; |
1528 | |
1529 | unsigned ExtSize = EltSize; |
1530 | |
1531 | if (I != nullptr) { |
1532 | |
1533 | |
1534 | |
1535 | if ((I->getOpcode() == Instruction::Load || |
1536 | match(I, m_Intrinsic<Intrinsic::masked_gather>())) && |
1537 | I->hasOneUse()) { |
1538 | const User *Us = *I->users().begin(); |
1539 | if (isa<ZExtInst>(Us) || isa<SExtInst>(Us)) { |
1540 | |
1541 | unsigned TypeSize = |
1542 | cast<Instruction>(Us)->getType()->getScalarSizeInBits(); |
1543 | if (((TypeSize == 32 && (EltSize == 8 || EltSize == 16)) || |
1544 | (TypeSize == 16 && EltSize == 8)) && |
1545 | TypeSize * NumElems == 128) { |
1546 | ExtSize = TypeSize; |
1547 | } |
1548 | } |
1549 | } |
1550 | |
1551 | TruncInst *T; |
1552 | if ((I->getOpcode() == Instruction::Store || |
1553 | match(I, m_Intrinsic<Intrinsic::masked_scatter>())) && |
1554 | (T = dyn_cast<TruncInst>(I->getOperand(0)))) { |
1555 | |
1556 | unsigned TypeSize = T->getOperand(0)->getType()->getScalarSizeInBits(); |
1557 | if (((EltSize == 16 && TypeSize == 32) || |
1558 | (EltSize == 8 && (TypeSize == 32 || TypeSize == 16))) && |
1559 | TypeSize * NumElems == 128) |
1560 | ExtSize = TypeSize; |
1561 | } |
1562 | } |
1563 | |
1564 | if (ExtSize * NumElems != 128 || NumElems < 4) |
1565 | return ScalarCost; |
1566 | |
1567 | |
1568 | if (ExtSize == 32) |
1569 | return VectorCost; |
1570 | |
1571 | |
1572 | |
1573 | if (ExtSize != 8 && ExtSize != 16) |
1574 | return ScalarCost; |
1575 | |
1576 | if (const auto *BC = dyn_cast<BitCastInst>(Ptr)) |
1577 | Ptr = BC->getOperand(0); |
1578 | if (const auto *GEP = dyn_cast<GetElementPtrInst>(Ptr)) { |
1579 | if (GEP->getNumOperands() != 2) |
1580 | return ScalarCost; |
1581 | unsigned Scale = DL.getTypeAllocSize(GEP->getResultElementType()); |
1582 | |
1583 | if (Scale != 1 && Scale * 8 != ExtSize) |
1584 | return ScalarCost; |
1585 | |
1586 | if (const auto *ZExt = dyn_cast<ZExtInst>(GEP->getOperand(1))) { |
1587 | if (ZExt->getOperand(0)->getType()->getScalarSizeInBits() <= ExtSize) |
1588 | return VectorCost; |
1589 | } |
1590 | return ScalarCost; |
1591 | } |
1592 | return ScalarCost; |
1593 | } |
1594 | |
1595 | InstructionCost |
1596 | ARMTTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy, |
1597 | Optional<FastMathFlags> FMF, |
1598 | TTI::TargetCostKind CostKind) { |
1599 | if (TTI::requiresOrderedReduction(FMF)) |
1600 | return BaseT::getArithmeticReductionCost(Opcode, ValTy, FMF, CostKind); |
1601 | |
1602 | EVT ValVT = TLI->getValueType(DL, ValTy); |
1603 | int ISD = TLI->InstructionOpcodeToISD(Opcode); |
1604 | if (!ST->hasMVEIntegerOps() || !ValVT.isSimple() || ISD != ISD::ADD) |
1605 | return BaseT::getArithmeticReductionCost(Opcode, ValTy, FMF, CostKind); |
1606 | |
1607 | std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy); |
1608 | |
1609 | static const CostTblEntry CostTblAdd[]{ |
1610 | {ISD::ADD, MVT::v16i8, 1}, |
1611 | {ISD::ADD, MVT::v8i16, 1}, |
1612 | {ISD::ADD, MVT::v4i32, 1}, |
1613 | }; |
1614 | if (const auto *Entry = CostTableLookup(CostTblAdd, ISD, LT.second)) |
1615 | return Entry->Cost * ST->getMVEVectorCostFactor(CostKind) * LT.first; |
1616 | |
1617 | return BaseT::getArithmeticReductionCost(Opcode, ValTy, FMF, CostKind); |
1618 | } |
1619 | |
1620 | InstructionCost |
1621 | ARMTTIImpl::getExtendedAddReductionCost(bool IsMLA, bool IsUnsigned, |
1622 | Type *ResTy, VectorType *ValTy, |
1623 | TTI::TargetCostKind CostKind) { |
1624 | EVT ValVT = TLI->getValueType(DL, ValTy); |
1625 | EVT ResVT = TLI->getValueType(DL, ResTy); |
1626 | |
1627 | if (ST->hasMVEIntegerOps() && ValVT.isSimple() && ResVT.isSimple()) { |
1628 | std::pair<InstructionCost, MVT> LT = |
1629 | TLI->getTypeLegalizationCost(DL, ValTy); |
1630 | |
1631 | |
1632 | |
1633 | |
1634 | |
1635 | |
1636 | |
1637 | |
1638 | |
1639 | unsigned RevVTSize = ResVT.getSizeInBits(); |
1640 | if (ValVT.getSizeInBits() <= 128 && |
1641 | ((LT.second == MVT::v16i8 && RevVTSize <= 32) || |
1642 | (LT.second == MVT::v8i16 && RevVTSize <= (IsMLA ? 64u : 32u)) || |
1643 | (LT.second == MVT::v4i32 && RevVTSize <= 64))) |
1644 | return ST->getMVEVectorCostFactor(CostKind) * LT.first; |
1645 | } |
1646 | |
1647 | return BaseT::getExtendedAddReductionCost(IsMLA, IsUnsigned, ResTy, ValTy, |
1648 | CostKind); |
1649 | } |
1650 | |
1651 | InstructionCost |
1652 | ARMTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, |
1653 | TTI::TargetCostKind CostKind) { |
1654 | switch (ICA.getID()) { |
1655 | case Intrinsic::get_active_lane_mask: |
1656 | |
1657 | |
1658 | |
1659 | |
1660 | |
1661 | |
1662 | |
1663 | if (ST->hasMVEIntegerOps()) |
1664 | return 0; |
1665 | break; |
1666 | case Intrinsic::sadd_sat: |
1667 | case Intrinsic::ssub_sat: |
1668 | case Intrinsic::uadd_sat: |
1669 | case Intrinsic::usub_sat: { |
1670 | if (!ST->hasMVEIntegerOps()) |
1671 | break; |
1672 | Type *VT = ICA.getReturnType(); |
1673 | |
1674 | std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, VT); |
1675 | if (LT.second == MVT::v4i32 || LT.second == MVT::v8i16 || |
1676 | LT.second == MVT::v16i8) { |
1677 | |
1678 | |
1679 | unsigned Instrs = |
1680 | LT.second.getScalarSizeInBits() == VT->getScalarSizeInBits() ? 1 : 4; |
1681 | return LT.first * ST->getMVEVectorCostFactor(CostKind) * Instrs; |
1682 | } |
1683 | break; |
1684 | } |
1685 | case Intrinsic::abs: |
1686 | case Intrinsic::smin: |
1687 | case Intrinsic::smax: |
1688 | case Intrinsic::umin: |
1689 | case Intrinsic::umax: { |
1690 | if (!ST->hasMVEIntegerOps()) |
1691 | break; |
1692 | Type *VT = ICA.getReturnType(); |
1693 | |
1694 | std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, VT); |
1695 | if (LT.second == MVT::v4i32 || LT.second == MVT::v8i16 || |
1696 | LT.second == MVT::v16i8) |
1697 | return LT.first * ST->getMVEVectorCostFactor(CostKind); |
1698 | break; |
1699 | } |
1700 | case Intrinsic::minnum: |
1701 | case Intrinsic::maxnum: { |
1702 | if (!ST->hasMVEFloatOps()) |
1703 | break; |
1704 | Type *VT = ICA.getReturnType(); |
1705 | std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, VT); |
1706 | if (LT.second == MVT::v4f32 || LT.second == MVT::v8f16) |
1707 | return LT.first * ST->getMVEVectorCostFactor(CostKind); |
1708 | break; |
1709 | } |
1710 | } |
1711 | |
1712 | return BaseT::getIntrinsicInstrCost(ICA, CostKind); |
| 1 | 'Default' branch taken. Execution continues on line 1712 | |
|
| 2 | | Calling 'BasicTTIImplBase::getIntrinsicInstrCost' | |
|
1713 | } |
1714 | |
1715 | bool ARMTTIImpl::isLoweredToCall(const Function *F) { |
1716 | if (!F->isIntrinsic()) |
1717 | BaseT::isLoweredToCall(F); |
1718 | |
1719 | |
1720 | if (F->getName().startswith("llvm.arm")) |
1721 | return false; |
1722 | |
1723 | switch (F->getIntrinsicID()) { |
1724 | default: break; |
1725 | case Intrinsic::powi: |
1726 | case Intrinsic::sin: |
1727 | case Intrinsic::cos: |
1728 | case Intrinsic::pow: |
1729 | case Intrinsic::log: |
1730 | case Intrinsic::log10: |
1731 | case Intrinsic::log2: |
1732 | case Intrinsic::exp: |
1733 | case Intrinsic::exp2: |
1734 | return true; |
1735 | case Intrinsic::sqrt: |
1736 | case Intrinsic::fabs: |
1737 | case Intrinsic::copysign: |
1738 | case Intrinsic::floor: |
1739 | case Intrinsic::ceil: |
1740 | case Intrinsic::trunc: |
1741 | case Intrinsic::rint: |
1742 | case Intrinsic::nearbyint: |
1743 | case Intrinsic::round: |
1744 | case Intrinsic::canonicalize: |
1745 | case Intrinsic::lround: |
1746 | case Intrinsic::llround: |
1747 | case Intrinsic::lrint: |
1748 | case Intrinsic::llrint: |
1749 | if (F->getReturnType()->isDoubleTy() && !ST->hasFP64()) |
1750 | return true; |
1751 | if (F->getReturnType()->isHalfTy() && !ST->hasFullFP16()) |
1752 | return true; |
1753 | |
1754 | |
1755 | |
1756 | return !ST->hasFPARMv8Base() && !ST->hasVFP2Base(); |
1757 | case Intrinsic::masked_store: |
1758 | case Intrinsic::masked_load: |
1759 | case Intrinsic::masked_gather: |
1760 | case Intrinsic::masked_scatter: |
1761 | return !ST->hasMVEIntegerOps(); |
1762 | case Intrinsic::sadd_with_overflow: |
1763 | case Intrinsic::uadd_with_overflow: |
1764 | case Intrinsic::ssub_with_overflow: |
1765 | case Intrinsic::usub_with_overflow: |
1766 | case Intrinsic::sadd_sat: |
1767 | case Intrinsic::uadd_sat: |
1768 | case Intrinsic::ssub_sat: |
1769 | case Intrinsic::usub_sat: |
1770 | return false; |
1771 | } |
1772 | |
1773 | return BaseT::isLoweredToCall(F); |
1774 | } |
1775 | |
1776 | bool ARMTTIImpl::maybeLoweredToCall(Instruction &I) { |
1777 | unsigned ISD = TLI->InstructionOpcodeToISD(I.getOpcode()); |
1778 | EVT VT = TLI->getValueType(DL, I.getType(), true); |
1779 | if (TLI->getOperationAction(ISD, VT) == TargetLowering::LibCall) |
1780 | return true; |
1781 | |
1782 | |
1783 | |
1784 | if (auto *Call = dyn_cast<CallInst>(&I)) { |
1785 | if (auto *II = dyn_cast<IntrinsicInst>(Call)) { |
1786 | switch(II->getIntrinsicID()) { |
1787 | case Intrinsic::memcpy: |
1788 | case Intrinsic::memset: |
1789 | case Intrinsic::memmove: |
1790 | return getNumMemOps(II) == -1; |
1791 | default: |
1792 | if (const Function *F = Call->getCalledFunction()) |
1793 | return isLoweredToCall(F); |
1794 | } |
1795 | } |
1796 | return true; |
1797 | } |
1798 | |
1799 | |
1800 | |
1801 | switch (I.getOpcode()) { |
1802 | default: |
1803 | break; |
1804 | case Instruction::FPToSI: |
1805 | case Instruction::FPToUI: |
1806 | case Instruction::SIToFP: |
1807 | case Instruction::UIToFP: |
1808 | case Instruction::FPTrunc: |
1809 | case Instruction::FPExt: |
1810 | return !ST->hasFPARMv8Base(); |
1811 | } |
1812 | |
1813 | |
1814 | |
1815 | |
1816 | |
1817 | |
1818 | |
1819 | if (VT.isInteger() && VT.getSizeInBits() >= 64) { |
1820 | switch (ISD) { |
1821 | default: |
1822 | break; |
1823 | case ISD::SDIV: |
1824 | case ISD::UDIV: |
1825 | case ISD::SREM: |
1826 | case ISD::UREM: |
1827 | case ISD::SDIVREM: |
1828 | case ISD::UDIVREM: |
1829 | return true; |
1830 | } |
1831 | } |
1832 | |
1833 | |
1834 | if (!VT.isFloatingPoint()) |
1835 | return false; |
1836 | |
1837 | |
1838 | if (TLI->useSoftFloat()) { |
1839 | switch (I.getOpcode()) { |
1840 | default: |
1841 | return true; |
1842 | case Instruction::Alloca: |
1843 | case Instruction::Load: |
1844 | case Instruction::Store: |
1845 | case Instruction::Select: |
1846 | case Instruction::PHI: |
1847 | return false; |
1848 | } |
1849 | } |
1850 | |
1851 | |
1852 | |
1853 | if (I.getType()->isDoubleTy() && !ST->hasFP64()) |
1854 | return true; |
1855 | |
1856 | |
1857 | if (I.getType()->isHalfTy() && !ST->hasFullFP16()) |
1858 | return true; |
1859 | |
1860 | return false; |
1861 | } |
1862 | |
1863 | bool ARMTTIImpl::isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, |
1864 | AssumptionCache &AC, |
1865 | TargetLibraryInfo *LibInfo, |
1866 | HardwareLoopInfo &HWLoopInfo) { |
1867 | |
1868 | |
1869 | if (!ST->hasLOB() || DisableLowOverheadLoops) { |
1870 | LLVM_DEBUG(dbgs() << "ARMHWLoops: Disabled\n"); |
1871 | return false; |
1872 | } |
1873 | |
1874 | if (!SE.hasLoopInvariantBackedgeTakenCount(L)) { |
1875 | LLVM_DEBUG(dbgs() << "ARMHWLoops: No BETC\n"); |
1876 | return false; |
1877 | } |
1878 | |
1879 | const SCEV *BackedgeTakenCount = SE.getBackedgeTakenCount(L); |
1880 | if (isa<SCEVCouldNotCompute>(BackedgeTakenCount)) { |
1881 | LLVM_DEBUG(dbgs() << "ARMHWLoops: Uncomputable BETC\n"); |
1882 | return false; |
1883 | } |
1884 | |
1885 | const SCEV *TripCountSCEV = |
1886 | SE.getAddExpr(BackedgeTakenCount, |
1887 | SE.getOne(BackedgeTakenCount->getType())); |
1888 | |
1889 | |
1890 | if (SE.getUnsignedRangeMax(TripCountSCEV).getBitWidth() > 32) { |
1891 | LLVM_DEBUG(dbgs() << "ARMHWLoops: Trip count does not fit into 32bits\n"); |
1892 | return false; |
1893 | } |
1894 | |
1895 | |
1896 | |
1897 | |
1898 | auto IsHardwareLoopIntrinsic = [](Instruction &I) { |
1899 | if (auto *Call = dyn_cast<IntrinsicInst>(&I)) { |
1900 | switch (Call->getIntrinsicID()) { |
1901 | default: |
1902 | break; |
1903 | case Intrinsic::start_loop_iterations: |
1904 | case Intrinsic::test_start_loop_iterations: |
1905 | case Intrinsic::loop_decrement: |
1906 | case Intrinsic::loop_decrement_reg: |
1907 | return true; |
1908 | } |
1909 | } |
1910 | return false; |
1911 | }; |
1912 | |
1913 | |
1914 | |
1915 | |
1916 | bool IsTailPredLoop = false; |
1917 | auto ScanLoop = [&](Loop *L) { |
1918 | for (auto *BB : L->getBlocks()) { |
1919 | for (auto &I : *BB) { |
1920 | if (maybeLoweredToCall(I) || IsHardwareLoopIntrinsic(I) || |
1921 | isa<InlineAsm>(I)) { |
1922 | LLVM_DEBUG(dbgs() << "ARMHWLoops: Bad instruction: " << I << "\n"); |
1923 | return false; |
1924 | } |
1925 | if (auto *II = dyn_cast<IntrinsicInst>(&I)) |
1926 | IsTailPredLoop |= |
1927 | II->getIntrinsicID() == Intrinsic::get_active_lane_mask || |
1928 | II->getIntrinsicID() == Intrinsic::arm_mve_vctp8 || |
1929 | II->getIntrinsicID() == Intrinsic::arm_mve_vctp16 || |
1930 | II->getIntrinsicID() == Intrinsic::arm_mve_vctp32 || |
1931 | II->getIntrinsicID() == Intrinsic::arm_mve_vctp64; |
1932 | } |
1933 | } |
1934 | return true; |
1935 | }; |
1936 | |
1937 | |
1938 | for (auto Inner : *L) |
1939 | if (!ScanLoop(Inner)) |
1940 | return false; |
1941 | |
1942 | if (!ScanLoop(L)) |
1943 | return false; |
1944 | |
1945 | |
1946 | |
1947 | |
1948 | |
1949 | LLVMContext &C = L->getHeader()->getContext(); |
1950 | HWLoopInfo.CounterInReg = true; |
1951 | HWLoopInfo.IsNestingLegal = false; |
1952 | HWLoopInfo.PerformEntryTest = AllowWLSLoops && !IsTailPredLoop; |
1953 | HWLoopInfo.CountType = Type::getInt32Ty(C); |
1954 | HWLoopInfo.LoopDecrement = ConstantInt::get(HWLoopInfo.CountType, 1); |
1955 | return true; |
1956 | } |
1957 | |
1958 | static bool canTailPredicateInstruction(Instruction &I, int &ICmpCount) { |
1959 | |
1960 | |
1961 | if (isa<ICmpInst>(&I) && ++ICmpCount > 1) |
1962 | return false; |
1963 | |
1964 | |
1965 | |
1966 | |
1967 | |
1968 | |
1969 | |
1970 | if (auto *II = dyn_cast<IntrinsicInst>(&I)) |
1971 | if ((II->getIntrinsicID() == Intrinsic::smin || |
1972 | II->getIntrinsicID() == Intrinsic::smax || |
1973 | II->getIntrinsicID() == Intrinsic::umin || |
1974 | II->getIntrinsicID() == Intrinsic::umax) && |
1975 | ++ICmpCount > 1) |
1976 | return false; |
1977 | |
1978 | if (isa<FCmpInst>(&I)) |
1979 | return false; |
1980 | |
1981 | |
1982 | |
1983 | if (isa<FPExtInst>(&I) || isa<FPTruncInst>(&I)) |
1984 | return false; |
1985 | |
1986 | |
1987 | if (isa<SExtInst>(&I) || isa<ZExtInst>(&I) ) |
1988 | if (!I.getOperand(0)->hasOneUse() || !isa<LoadInst>(I.getOperand(0))) |
1989 | return false; |
1990 | |
1991 | |
1992 | if (isa<TruncInst>(&I) ) |
1993 | if (!I.hasOneUse() || !isa<StoreInst>(*I.user_begin())) |
1994 | return false; |
1995 | |
1996 | return true; |
1997 | } |
1998 | |
1999 | |
2000 | |
2001 | |
2002 | |
2003 | |
2004 | |
2005 | |
2006 | |
2007 | |
2008 | |
2009 | static bool canTailPredicateLoop(Loop *L, LoopInfo *LI, ScalarEvolution &SE, |
2010 | const DataLayout &DL, |
2011 | const LoopAccessInfo *LAI) { |
2012 | LLVM_DEBUG(dbgs() << "Tail-predication: checking allowed instructions\n"); |
2013 | |
2014 | |
2015 | |
2016 | |
2017 | |
2018 | |
2019 | |
2020 | |
2021 | SmallVector< Instruction *, 8 > LiveOuts; |
2022 | LiveOuts = llvm::findDefsUsedOutsideOfLoop(L); |
2023 | bool ReductionsDisabled = |
2024 | EnableTailPredication == TailPredication::EnabledNoReductions || |
2025 | EnableTailPredication == TailPredication::ForceEnabledNoReductions; |
2026 | |
2027 | for (auto *I : LiveOuts) { |
2028 | if (!I->getType()->isIntegerTy() && !I->getType()->isFloatTy() && |
2029 | !I->getType()->isHalfTy()) { |
2030 | LLVM_DEBUG(dbgs() << "Don't tail-predicate loop with non-integer/float " |
2031 | "live-out value\n"); |
2032 | return false; |
2033 | } |
2034 | if (ReductionsDisabled) { |
2035 | LLVM_DEBUG(dbgs() << "Reductions not enabled\n"); |
2036 | return false; |
2037 | } |
2038 | } |
2039 | |
2040 | |
2041 | PredicatedScalarEvolution PSE = LAI->getPSE(); |
2042 | SmallVector<Instruction *, 16> LoadStores; |
2043 | int ICmpCount = 0; |
2044 | |
2045 | for (BasicBlock *BB : L->blocks()) { |
2046 | for (Instruction &I : BB->instructionsWithoutDebug()) { |
2047 | if (isa<PHINode>(&I)) |
2048 | continue; |
2049 | if (!canTailPredicateInstruction(I, ICmpCount)) { |
2050 | LLVM_DEBUG(dbgs() << "Instruction not allowed: "; I.dump()); |
2051 | return false; |
2052 | } |
2053 | |
2054 | Type *T = I.getType(); |
2055 | if (T->isPointerTy()) |
2056 | T = T->getPointerElementType(); |
2057 | |
2058 | if (T->getScalarSizeInBits() > 32) { |
2059 | LLVM_DEBUG(dbgs() << "Unsupported Type: "; T->dump()); |
2060 | return false; |
2061 | } |
2062 | if (isa<StoreInst>(I) || isa<LoadInst>(I)) { |
2063 | Value *Ptr = isa<LoadInst>(I) ? I.getOperand(0) : I.getOperand(1); |
2064 | int64_t NextStride = getPtrStride(PSE, Ptr, L); |
2065 | if (NextStride == 1) { |
2066 | |
2067 | |
2068 | |
2069 | continue; |
2070 | } else if (NextStride == -1 || |
2071 | (NextStride == 2 && MVEMaxSupportedInterleaveFactor >= 2) || |
2072 | (NextStride == 4 && MVEMaxSupportedInterleaveFactor >= 4)) { |
2073 | LLVM_DEBUG(dbgs() |
2074 | << "Consecutive strides of 2 found, vld2/vstr2 can't " |
2075 | "be tail-predicated\n."); |
2076 | return false; |
2077 | |
2078 | } else if (EnableMaskedGatherScatters) { |
2079 | |
2080 | |
2081 | |
2082 | |
2083 | const SCEV *PtrScev = |
2084 | replaceSymbolicStrideSCEV(PSE, llvm::ValueToValueMap(), Ptr); |
2085 | if (auto AR = dyn_cast<SCEVAddRecExpr>(PtrScev)) { |
2086 | const SCEV *Step = AR->getStepRecurrence(*PSE.getSE()); |
2087 | if (PSE.getSE()->isLoopInvariant(Step, L)) |
2088 | continue; |
2089 | } |
2090 | } |
2091 | LLVM_DEBUG(dbgs() << "Bad stride found, can't " |
2092 | "tail-predicate\n."); |
2093 | return false; |
2094 | } |
2095 | } |
2096 | } |
2097 | |
2098 | LLVM_DEBUG(dbgs() << "tail-predication: all instructions allowed!\n"); |
2099 | return true; |
2100 | } |
2101 | |
2102 | bool ARMTTIImpl::preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, |
2103 | ScalarEvolution &SE, |
2104 | AssumptionCache &AC, |
2105 | TargetLibraryInfo *TLI, |
2106 | DominatorTree *DT, |
2107 | const LoopAccessInfo *LAI) { |
2108 | if (!EnableTailPredication) { |
2109 | LLVM_DEBUG(dbgs() << "Tail-predication not enabled.\n"); |
2110 | return false; |
2111 | } |
2112 | |
2113 | |
2114 | |
2115 | |
2116 | if (!ST->hasMVEIntegerOps()) |
2117 | return false; |
2118 | |
2119 | |
2120 | if (L->getNumBlocks() > 1) { |
2121 | LLVM_DEBUG(dbgs() << "preferPredicateOverEpilogue: not a single block " |
2122 | "loop.\n"); |
2123 | return false; |
2124 | } |
2125 | |
2126 | assert(L->isInnermost() && "preferPredicateOverEpilogue: inner-loop expected"); |
2127 | |
2128 | HardwareLoopInfo HWLoopInfo(L); |
2129 | if (!HWLoopInfo.canAnalyze(*LI)) { |
2130 | LLVM_DEBUG(dbgs() << "preferPredicateOverEpilogue: hardware-loop is not " |
2131 | "analyzable.\n"); |
2132 | return false; |
2133 | } |
2134 | |
2135 | |
2136 | |
2137 | if (!isHardwareLoopProfitable(L, SE, AC, TLI, HWLoopInfo)) { |
2138 | LLVM_DEBUG(dbgs() << "preferPredicateOverEpilogue: hardware-loop is not " |
2139 | "profitable.\n"); |
2140 | return false; |
2141 | } |
2142 | |
2143 | if (!HWLoopInfo.isHardwareLoopCandidate(SE, *LI, *DT)) { |
2144 | LLVM_DEBUG(dbgs() << "preferPredicateOverEpilogue: hardware-loop is not " |
2145 | "a candidate.\n"); |
2146 | return false; |
2147 | } |
2148 | |
2149 | return canTailPredicateLoop(L, LI, SE, DL, LAI); |
2150 | } |
2151 | |
2152 | bool ARMTTIImpl::emitGetActiveLaneMask() const { |
2153 | if (!ST->hasMVEIntegerOps() || !EnableTailPredication) |
2154 | return false; |
2155 | |
2156 | |
2157 | |
2158 | |
2159 | |
2160 | return true; |
2161 | } |
2162 | void ARMTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE, |
2163 | TTI::UnrollingPreferences &UP, |
2164 | OptimizationRemarkEmitter *ORE) { |
2165 | |
2166 | |
2167 | UP.UpperBound = true; |
2168 | |
2169 | |
2170 | if (!ST->isMClass()) |
2171 | return BasicTTIImplBase::getUnrollingPreferences(L, SE, UP, ORE); |
2172 | |
2173 | |
2174 | UP.OptSizeThreshold = 0; |
2175 | UP.PartialOptSizeThreshold = 0; |
2176 | if (L->getHeader()->getParent()->hasOptSize()) |
2177 | return; |
2178 | |
2179 | SmallVector<BasicBlock*, 4> ExitingBlocks; |
2180 | L->getExitingBlocks(ExitingBlocks); |
2181 | LLVM_DEBUG(dbgs() << "Loop has:\n" |
2182 | << "Blocks: " << L->getNumBlocks() << "\n" |
2183 | << "Exit blocks: " << ExitingBlocks.size() << "\n"); |
2184 | |
2185 | |
2186 | |
2187 | if (ExitingBlocks.size() > 2) |
2188 | return; |
2189 | |
2190 | |
2191 | |
2192 | if (ST->hasBranchPredictor() && L->getNumBlocks() > 4) |
2193 | return; |
2194 | |
2195 | |
2196 | if (getBooleanLoopAttribute(L, "llvm.loop.isvectorized")) |
2197 | return; |
2198 | |
2199 | |
2200 | |
2201 | InstructionCost Cost = 0; |
2202 | for (auto *BB : L->getBlocks()) { |
2203 | for (auto &I : *BB) { |
2204 | |
2205 | |
2206 | if (I.getType()->isVectorTy()) |
2207 | return; |
2208 | |
2209 | if (isa<CallInst>(I) || isa<InvokeInst>(I)) { |
2210 | if (const Function *F = cast<CallBase>(I).getCalledFunction()) { |
2211 | if (!isLoweredToCall(F)) |
2212 | continue; |
2213 | } |
2214 | return; |
2215 | } |
2216 | |
2217 | SmallVector<const Value*, 4> Operands(I.operand_values()); |
2218 | Cost += |
2219 | getUserCost(&I, Operands, TargetTransformInfo::TCK_SizeAndLatency); |
2220 | } |
2221 | } |
2222 | |
2223 | |
2224 | |
2225 | |
2226 | |
2227 | |
2228 | |
2229 | unsigned UnrollCount = 4; |
2230 | if (ST->isThumb1Only()) { |
2231 | unsigned ExitingValues = 0; |
2232 | SmallVector<BasicBlock *, 4> ExitBlocks; |
2233 | L->getExitBlocks(ExitBlocks); |
2234 | for (auto *Exit : ExitBlocks) { |
2235 | |
2236 | |
2237 | unsigned LiveOuts = count_if(Exit->phis(), [](auto &PH) { |
2238 | return PH.getNumOperands() != 1 || |
2239 | !isa<GetElementPtrInst>(PH.getOperand(0)); |
2240 | }); |
2241 | ExitingValues = ExitingValues < LiveOuts ? LiveOuts : ExitingValues; |
2242 | } |
2243 | if (ExitingValues) |
2244 | UnrollCount /= ExitingValues; |
2245 | if (UnrollCount <= 1) |
2246 | return; |
2247 | } |
2248 | |
2249 | LLVM_DEBUG(dbgs() << "Cost of loop: " << Cost << "\n"); |
2250 | LLVM_DEBUG(dbgs() << "Default Runtime Unroll Count: " << UnrollCount << "\n"); |
2251 | |
2252 | UP.Partial = true; |
2253 | UP.Runtime = true; |
2254 | UP.UnrollRemainder = true; |
2255 | UP.DefaultUnrollRuntimeCount = UnrollCount; |
2256 | UP.UnrollAndJam = true; |
2257 | UP.UnrollAndJamInnerLoopThreshold = 60; |
2258 | |
2259 | |
2260 | |
2261 | if (Cost < 12) |
2262 | UP.Force = true; |
2263 | } |
2264 | |
2265 | void ARMTTIImpl::getPeelingPreferences(Loop *L, ScalarEvolution &SE, |
2266 | TTI::PeelingPreferences &PP) { |
2267 | BaseT::getPeelingPreferences(L, SE, PP); |
2268 | } |
2269 | |
2270 | bool ARMTTIImpl::preferInLoopReduction(unsigned Opcode, Type *Ty, |
2271 | TTI::ReductionFlags Flags) const { |
2272 | if (!ST->hasMVEIntegerOps()) |
2273 | return false; |
2274 | |
2275 | unsigned ScalarBits = Ty->getScalarSizeInBits(); |
2276 | switch (Opcode) { |
2277 | case Instruction::Add: |
2278 | return ScalarBits <= 64; |
2279 | default: |
2280 | return false; |
2281 | } |
2282 | } |
2283 | |
2284 | bool ARMTTIImpl::preferPredicatedReductionSelect( |
2285 | unsigned Opcode, Type *Ty, TTI::ReductionFlags Flags) const { |
2286 | if (!ST->hasMVEIntegerOps()) |
2287 | return false; |
2288 | return true; |
2289 | } |
1 | |
2 | |
3 | |
4 | |
5 | |
6 | |
7 | |
8 | |
9 | |
10 | |
11 | |
12 | |
13 | |
14 | |
15 | |
16 | #ifndef LLVM_CODEGEN_BASICTTIIMPL_H |
17 | #define LLVM_CODEGEN_BASICTTIIMPL_H |
18 | |
19 | #include "llvm/ADT/APInt.h" |
20 | #include "llvm/ADT/ArrayRef.h" |
21 | #include "llvm/ADT/BitVector.h" |
22 | #include "llvm/ADT/SmallPtrSet.h" |
23 | #include "llvm/ADT/SmallVector.h" |
24 | #include "llvm/Analysis/LoopInfo.h" |
25 | #include "llvm/Analysis/OptimizationRemarkEmitter.h" |
26 | #include "llvm/Analysis/TargetTransformInfo.h" |
27 | #include "llvm/Analysis/TargetTransformInfoImpl.h" |
28 | #include "llvm/CodeGen/ISDOpcodes.h" |
29 | #include "llvm/CodeGen/TargetLowering.h" |
30 | #include "llvm/CodeGen/TargetSubtargetInfo.h" |
31 | #include "llvm/CodeGen/ValueTypes.h" |
32 | #include "llvm/IR/BasicBlock.h" |
33 | #include "llvm/IR/Constant.h" |
34 | #include "llvm/IR/Constants.h" |
35 | #include "llvm/IR/DataLayout.h" |
36 | #include "llvm/IR/DerivedTypes.h" |
37 | #include "llvm/IR/InstrTypes.h" |
38 | #include "llvm/IR/Instruction.h" |
39 | #include "llvm/IR/Instructions.h" |
40 | #include "llvm/IR/Intrinsics.h" |
41 | #include "llvm/IR/Operator.h" |
42 | #include "llvm/IR/Type.h" |
43 | #include "llvm/IR/Value.h" |
44 | #include "llvm/Support/Casting.h" |
45 | #include "llvm/Support/CommandLine.h" |
46 | #include "llvm/Support/ErrorHandling.h" |
47 | #include "llvm/Support/MachineValueType.h" |
48 | #include "llvm/Support/MathExtras.h" |
49 | #include "llvm/Target/TargetMachine.h" |
50 | #include <algorithm> |
51 | #include <cassert> |
52 | #include <cstdint> |
53 | #include <limits> |
54 | #include <utility> |
55 | |
56 | namespace llvm { |
57 | |
58 | class Function; |
59 | class GlobalValue; |
60 | class LLVMContext; |
61 | class ScalarEvolution; |
62 | class SCEV; |
63 | class TargetMachine; |
64 | |
65 | extern cl::opt<unsigned> PartialUnrollingThreshold; |
66 | |
67 | |
68 | |
69 | |
70 | |
71 | |
72 | |
73 | |
74 | |
75 | |
76 | template <typename T> |
77 | class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> { |
78 | private: |
79 | using BaseT = TargetTransformInfoImplCRTPBase<T>; |
80 | using TTI = TargetTransformInfo; |
81 | |
82 | |
83 | T *thisT() { return static_cast<T *>(this); } |
84 | |
85 | |
86 | |
87 | InstructionCost getBroadcastShuffleOverhead(FixedVectorType *VTy) { |
88 | InstructionCost Cost = 0; |
89 | |
90 | |
91 | Cost += thisT()->getVectorInstrCost(Instruction::ExtractElement, VTy, 0); |
92 | |
93 | for (int i = 0, e = VTy->getNumElements(); i < e; ++i) { |
94 | Cost += thisT()->getVectorInstrCost(Instruction::InsertElement, VTy, i); |
95 | } |
96 | return Cost; |
97 | } |
98 | |
99 | |
100 | |
101 | InstructionCost getPermuteShuffleOverhead(FixedVectorType *VTy) { |
102 | InstructionCost Cost = 0; |
103 | |
104 | |
105 | |
106 | |
107 | |
108 | |
109 | |
110 | for (int i = 0, e = VTy->getNumElements(); i < e; ++i) { |
111 | Cost += thisT()->getVectorInstrCost(Instruction::InsertElement, VTy, i); |
112 | Cost += thisT()->getVectorInstrCost(Instruction::ExtractElement, VTy, i); |
113 | } |
114 | return Cost; |
115 | } |
116 | |
117 | |
118 | |
119 | InstructionCost getExtractSubvectorOverhead(VectorType *VTy, int Index, |
120 | FixedVectorType *SubVTy) { |
121 | assert(VTy && SubVTy && |
122 | "Can only extract subvectors from vectors"); |
123 | int NumSubElts = SubVTy->getNumElements(); |
124 | assert((!isa<FixedVectorType>(VTy) || |
125 | (Index + NumSubElts) <= |
126 | (int)cast<FixedVectorType>(VTy)->getNumElements()) && |
127 | "SK_ExtractSubvector index out of range"); |
128 | |
129 | InstructionCost Cost = 0; |
130 | |
131 | |
132 | |
133 | for (int i = 0; i != NumSubElts; ++i) { |
134 | Cost += thisT()->getVectorInstrCost(Instruction::ExtractElement, VTy, |
135 | i + Index); |
136 | Cost += |
137 | thisT()->getVectorInstrCost(Instruction::InsertElement, SubVTy, i); |
138 | } |
139 | return Cost; |
140 | } |
141 | |
142 | |
143 | |
144 | InstructionCost getInsertSubvectorOverhead(VectorType *VTy, int Index, |
145 | FixedVectorType *SubVTy) { |
146 | assert(VTy && SubVTy && |
147 | "Can only insert subvectors into vectors"); |
148 | int NumSubElts = SubVTy->getNumElements(); |
149 | assert((!isa<FixedVectorType>(VTy) || |
150 | (Index + NumSubElts) <= |
151 | (int)cast<FixedVectorType>(VTy)->getNumElements()) && |
152 | "SK_InsertSubvector index out of range"); |
153 | |
154 | InstructionCost Cost = 0; |
155 | |
156 | |
157 | |
158 | for (int i = 0; i != NumSubElts; ++i) { |
159 | Cost += |
160 | thisT()->getVectorInstrCost(Instruction::ExtractElement, SubVTy, i); |
161 | Cost += thisT()->getVectorInstrCost(Instruction::InsertElement, VTy, |
162 | i + Index); |
163 | } |
164 | return Cost; |
165 | } |
166 | |
167 | |
168 | const TargetSubtargetInfo *getST() const { |
169 | return static_cast<const T *>(this)->getST(); |
170 | } |
171 | |
172 | |
173 | const TargetLoweringBase *getTLI() const { |
174 | return static_cast<const T *>(this)->getTLI(); |
175 | } |
176 | |
177 | static ISD::MemIndexedMode getISDIndexedMode(TTI::MemIndexedMode M) { |
178 | switch (M) { |
179 | case TTI::MIM_Unindexed: |
180 | return ISD::UNINDEXED; |
181 | case TTI::MIM_PreInc: |
182 | return ISD::PRE_INC; |
183 | case TTI::MIM_PreDec: |
184 | return ISD::PRE_DEC; |
185 | case TTI::MIM_PostInc: |
186 | return ISD::POST_INC; |
187 | case TTI::MIM_PostDec: |
188 | return ISD::POST_DEC; |
189 | } |
190 | llvm_unreachable("Unexpected MemIndexedMode"); |
191 | } |
192 | |
193 | InstructionCost getCommonMaskedMemoryOpCost(unsigned Opcode, Type *DataTy, |
194 | Align Alignment, |
195 | bool VariableMask, |
196 | bool IsGatherScatter, |
197 | TTI::TargetCostKind CostKind) { |
198 | auto *VT = cast<FixedVectorType>(DataTy); |
199 | |
200 | |
201 | |
202 | |
203 | InstructionCost AddrExtractCost = |
204 | IsGatherScatter |
205 | ? getVectorInstrCost(Instruction::ExtractElement, |
206 | FixedVectorType::get( |
207 | PointerType::get(VT->getElementType(), 0), |
208 | VT->getNumElements()), |
209 | -1) |
210 | : 0; |
211 | InstructionCost LoadCost = |
212 | VT->getNumElements() * |
213 | (AddrExtractCost + |
214 | getMemoryOpCost(Opcode, VT->getElementType(), Alignment, 0, CostKind)); |
215 | |
216 | |
217 | InstructionCost PackingCost = getScalarizationOverhead( |
218 | VT, Opcode != Instruction::Store, Opcode == Instruction::Store); |
219 | |
220 | InstructionCost ConditionalCost = 0; |
221 | if (VariableMask) { |
222 | |
223 | |
224 | |
225 | |
226 | |
227 | |
228 | ConditionalCost = |
229 | VT->getNumElements() * |
230 | (getVectorInstrCost( |
231 | Instruction::ExtractElement, |
232 | FixedVectorType::get(Type::getInt1Ty(DataTy->getContext()), |
233 | VT->getNumElements()), |
234 | -1) + |
235 | getCFInstrCost(Instruction::Br, CostKind) + |
236 | getCFInstrCost(Instruction::PHI, CostKind)); |
237 | } |
238 | |
239 | return LoadCost + PackingCost + ConditionalCost; |
240 | } |
241 | |
242 | protected: |
243 | explicit BasicTTIImplBase(const TargetMachine *TM, const DataLayout &DL) |
244 | : BaseT(DL) {} |
245 | virtual ~BasicTTIImplBase() = default; |
246 | |
247 | using TargetTransformInfoImplBase::DL; |
248 | |
249 | public: |
250 | |
251 | |
252 | bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth, |
253 | unsigned AddressSpace, Align Alignment, |
254 | bool *Fast) const { |
255 | EVT E = EVT::getIntegerVT(Context, BitWidth); |
256 | return getTLI()->allowsMisalignedMemoryAccesses( |
257 | E, AddressSpace, Alignment, MachineMemOperand::MONone, Fast); |
258 | } |
259 | |
260 | bool hasBranchDivergence() { return false; } |
261 | |
262 | bool useGPUDivergenceAnalysis() { return false; } |
263 | |
264 | bool isSourceOfDivergence(const Value *V) { return false; } |
265 | |
266 | bool isAlwaysUniform(const Value *V) { return false; } |
267 | |
268 | unsigned getFlatAddressSpace() { |
269 | |
270 | return -1; |
271 | } |
272 | |
273 | bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes, |
274 | Intrinsic::ID IID) const { |
275 | return false; |
276 | } |
277 | |
278 | bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const { |
279 | return getTLI()->getTargetMachine().isNoopAddrSpaceCast(FromAS, ToAS); |
280 | } |
281 | |
282 | unsigned getAssumedAddrSpace(const Value *V) const { |
283 | return getTLI()->getTargetMachine().getAssumedAddrSpace(V); |
284 | } |
285 | |
286 | Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV, |
287 | Value *NewV) const { |
288 | return nullptr; |
289 | } |
290 | |
291 | bool isLegalAddImmediate(int64_t imm) { |
292 | return getTLI()->isLegalAddImmediate(imm); |
293 | } |
294 | |
295 | bool isLegalICmpImmediate(int64_t imm) { |
296 | return getTLI()->isLegalICmpImmediate(imm); |
297 | } |
298 | |
299 | bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, |
300 | bool HasBaseReg, int64_t Scale, |
301 | unsigned AddrSpace, Instruction *I = nullptr) { |
302 | TargetLoweringBase::AddrMode AM; |
303 | AM.BaseGV = BaseGV; |
304 | AM.BaseOffs = BaseOffset; |
305 | AM.HasBaseReg = HasBaseReg; |
306 | AM.Scale = Scale; |
307 | return getTLI()->isLegalAddressingMode(DL, AM, Ty, AddrSpace, I); |
308 | } |
309 | |
310 | bool isIndexedLoadLegal(TTI::MemIndexedMode M, Type *Ty, |
311 | const DataLayout &DL) const { |
312 | EVT VT = getTLI()->getValueType(DL, Ty); |
313 | return getTLI()->isIndexedLoadLegal(getISDIndexedMode(M), VT); |
314 | } |
315 | |
316 | bool isIndexedStoreLegal(TTI::MemIndexedMode M, Type *Ty, |
317 | const DataLayout &DL) const { |
318 | EVT VT = getTLI()->getValueType(DL, Ty); |
319 | return getTLI()->isIndexedStoreLegal(getISDIndexedMode(M), VT); |
320 | } |
321 | |
322 | bool isLSRCostLess(TTI::LSRCost C1, TTI::LSRCost C2) { |
323 | return TargetTransformInfoImplBase::isLSRCostLess(C1, C2); |
324 | } |
325 | |
326 | bool isNumRegsMajorCostOfLSR() { |
327 | return TargetTransformInfoImplBase::isNumRegsMajorCostOfLSR(); |
328 | } |
329 | |
330 | bool isProfitableLSRChainElement(Instruction *I) { |
331 | return TargetTransformInfoImplBase::isProfitableLSRChainElement(I); |
332 | } |
333 | |
334 | InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, |
335 | int64_t BaseOffset, bool HasBaseReg, |
336 | int64_t Scale, unsigned AddrSpace) { |
337 | TargetLoweringBase::AddrMode AM; |
338 | AM.BaseGV = BaseGV; |
339 | AM.BaseOffs = BaseOffset; |
340 | AM.HasBaseReg = HasBaseReg; |
341 | AM.Scale = Scale; |
342 | return getTLI()->getScalingFactorCost(DL, AM, Ty, AddrSpace); |
343 | } |
344 | |
345 | bool isTruncateFree(Type *Ty1, Type *Ty2) { |
346 | return getTLI()->isTruncateFree(Ty1, Ty2); |
347 | } |
348 | |
349 | bool isProfitableToHoist(Instruction *I) { |
350 | return getTLI()->isProfitableToHoist(I); |
351 | } |
352 | |
353 | bool useAA() const { return getST()->useAA(); } |
354 | |
355 | bool isTypeLegal(Type *Ty) { |
356 | EVT VT = getTLI()->getValueType(DL, Ty); |
357 | return getTLI()->isTypeLegal(VT); |
358 | } |
359 | |
360 | InstructionCost getRegUsageForType(Type *Ty) { |
361 | InstructionCost Val = getTLI()->getTypeLegalizationCost(DL, Ty).first; |
362 | assert(Val >= 0 && "Negative cost!"); |
363 | return Val; |
364 | } |
365 | |
366 | InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr, |
367 | ArrayRef<const Value *> Operands) { |
368 | return BaseT::getGEPCost(PointeeType, Ptr, Operands); |
369 | } |
370 | |
371 | unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI, |
372 | unsigned &JumpTableSize, |
373 | ProfileSummaryInfo *PSI, |
374 | BlockFrequencyInfo *BFI) { |
375 | |
376 | |
377 | |
378 | |
379 | |
380 | |
381 | |
382 | unsigned N = SI.getNumCases(); |
383 | const TargetLoweringBase *TLI = getTLI(); |
384 | const DataLayout &DL = this->getDataLayout(); |
385 | |
386 | JumpTableSize = 0; |
387 | bool IsJTAllowed = TLI->areJTsAllowed(SI.getParent()->getParent()); |
388 | |
389 | |
390 | if (N < 1 || (!IsJTAllowed && DL.getIndexSizeInBits(0u) < N)) |
391 | return N; |
392 | |
393 | APInt MaxCaseVal = SI.case_begin()->getCaseValue()->getValue(); |
394 | APInt MinCaseVal = MaxCaseVal; |
395 | for (auto CI : SI.cases()) { |
396 | const APInt &CaseVal = CI.getCaseValue()->getValue(); |
397 | if (CaseVal.sgt(MaxCaseVal)) |
398 | MaxCaseVal = CaseVal; |
399 | if (CaseVal.slt(MinCaseVal)) |
400 | MinCaseVal = CaseVal; |
401 | } |
402 | |
403 | |
404 | if (N <= DL.getIndexSizeInBits(0u)) { |
405 | SmallPtrSet<const BasicBlock *, 4> Dests; |
406 | for (auto I : SI.cases()) |
407 | Dests.insert(I.getCaseSuccessor()); |
408 | |
409 | if (TLI->isSuitableForBitTests(Dests.size(), N, MinCaseVal, MaxCaseVal, |
410 | DL)) |
411 | return 1; |
412 | } |
413 | |
414 | |
415 | if (IsJTAllowed) { |
416 | if (N < 2 || N < TLI->getMinimumJumpTableEntries()) |
417 | return N; |
418 | uint64_t Range = |
419 | (MaxCaseVal - MinCaseVal) |
420 | .getLimitedValue(std::numeric_limits<uint64_t>::max() - 1) + 1; |
421 | |
422 | if (TLI->isSuitableForJumpTable(&SI, N, Range, PSI, BFI)) { |
423 | JumpTableSize = Range; |
424 | return 1; |
425 | } |
426 | } |
427 | return N; |
428 | } |
429 | |
430 | bool shouldBuildLookupTables() { |
431 | const TargetLoweringBase *TLI = getTLI(); |
432 | return TLI->isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) || |
433 | TLI->isOperationLegalOrCustom(ISD::BRIND, MVT::Other); |
434 | } |
435 | |
436 | bool shouldBuildRelLookupTables() const { |
437 | const TargetMachine &TM = getTLI()->getTargetMachine(); |
438 | |
439 | if (!TM.isPositionIndependent()) |
440 | return false; |
441 | |
442 | |
443 | |
444 | |
445 | if (TM.getCodeModel() == CodeModel::Medium || |
446 | TM.getCodeModel() == CodeModel::Large) |
447 | return false; |
448 | |
449 | Triple TargetTriple = TM.getTargetTriple(); |
450 | if (!TargetTriple.isArch64Bit()) |
451 | return false; |
452 | |
453 | |
454 | |
455 | if (TargetTriple.getArch() == Triple::aarch64 && TargetTriple.isOSDarwin()) |
456 | return false; |
457 | |
458 | return true; |
459 | } |
460 | |
461 | bool haveFastSqrt(Type *Ty) { |
462 | const TargetLoweringBase *TLI = getTLI(); |
463 | EVT VT = TLI->getValueType(DL, Ty); |
464 | return TLI->isTypeLegal(VT) && |
465 | TLI->isOperationLegalOrCustom(ISD::FSQRT, VT); |
466 | } |
467 | |
468 | bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) { |
469 | return true; |
470 | } |
471 | |
472 | InstructionCost getFPOpCost(Type *Ty) { |
473 | |
474 | |
475 | const TargetLoweringBase *TLI = getTLI(); |
476 | EVT VT = TLI->getValueType(DL, Ty); |
477 | if (TLI->isOperationLegalOrCustomOrPromote(ISD::FADD, VT)) |
478 | return TargetTransformInfo::TCC_Basic; |
479 | return TargetTransformInfo::TCC_Expensive; |
480 | } |
481 | |
482 | unsigned getInliningThresholdMultiplier() { return 1; } |
483 | unsigned adjustInliningThreshold(const CallBase *CB) { return 0; } |
484 | |
485 | int getInlinerVectorBonusPercent() { return 150; } |
486 | |
487 | void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, |
488 | TTI::UnrollingPreferences &UP, |
489 | OptimizationRemarkEmitter *ORE) { |
490 | |
491 | |
492 | |
493 | |
494 | |
495 | |
496 | |
497 | |
498 | |
499 | |
500 | |
501 | |
502 | |
503 | |
504 | |
505 | |
506 | |
507 | |
508 | |
509 | |
510 | |
511 | |
512 | |
513 | unsigned MaxOps; |
514 | const TargetSubtargetInfo *ST = getST(); |
515 | if (PartialUnrollingThreshold.getNumOccurrences() > 0) |
516 | MaxOps = PartialUnrollingThreshold; |
517 | else if (ST->getSchedModel().LoopMicroOpBufferSize > 0) |
518 | MaxOps = ST->getSchedModel().LoopMicroOpBufferSize; |
519 | else |
520 | return; |
521 | |
522 | |
523 | for (BasicBlock *BB : L->blocks()) { |
524 | for (Instruction &I : *BB) { |
525 | if (isa<CallInst>(I) || isa<InvokeInst>(I)) { |
526 | if (const Function *F = cast<CallBase>(I).getCalledFunction()) { |
527 | if (!thisT()->isLoweredToCall(F)) |
528 | continue; |
529 | } |
530 | |
531 | if (ORE) { |
532 | ORE->emit([&]() { |
533 | return OptimizationRemark("TTI", "DontUnroll", L->getStartLoc(), |
534 | L->getHeader()) |
535 | << "advising against unrolling the loop because it " |
536 | "contains a " |
537 | << ore::NV("Call", &I); |
538 | }); |
539 | } |
540 | return; |
541 | } |
542 | } |
543 | } |
544 | |
545 | |
546 | |
547 | UP.Partial = UP.Runtime = UP.UpperBound = true; |
548 | UP.PartialThreshold = MaxOps; |
549 | |
550 | |
551 | UP.OptSizeThreshold = 0; |
552 | UP.PartialOptSizeThreshold = 0; |
553 | |
554 | |
555 | |
556 | UP.BEInsns = 2; |
557 | } |
558 | |
559 | void getPeelingPreferences(Loop *L, ScalarEvolution &SE, |
560 | TTI::PeelingPreferences &PP) { |
561 | PP.PeelCount = 0; |
562 | PP.AllowPeeling = true; |
563 | PP.AllowLoopNestsPeeling = false; |
564 | PP.PeelProfiledIterations = true; |
565 | } |
566 | |
567 | bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, |
568 | AssumptionCache &AC, |
569 | TargetLibraryInfo *LibInfo, |
570 | HardwareLoopInfo &HWLoopInfo) { |
571 | return BaseT::isHardwareLoopProfitable(L, SE, AC, LibInfo, HWLoopInfo); |
572 | } |
573 | |
574 | bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, ScalarEvolution &SE, |
575 | AssumptionCache &AC, TargetLibraryInfo *TLI, |
576 | DominatorTree *DT, |
577 | const LoopAccessInfo *LAI) { |
578 | return BaseT::preferPredicateOverEpilogue(L, LI, SE, AC, TLI, DT, LAI); |
579 | } |
580 | |
581 | bool emitGetActiveLaneMask() { |
582 | return BaseT::emitGetActiveLaneMask(); |
583 | } |
584 | |
585 | Optional<Instruction *> instCombineIntrinsic(InstCombiner &IC, |
586 | IntrinsicInst &II) { |
587 | return BaseT::instCombineIntrinsic(IC, II); |
588 | } |
589 | |
590 | Optional<Value *> simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, |
591 | IntrinsicInst &II, |
592 | APInt DemandedMask, |
593 | KnownBits &Known, |
594 | bool &KnownBitsComputed) { |
595 | return BaseT::simplifyDemandedUseBitsIntrinsic(IC, II, DemandedMask, Known, |
596 | KnownBitsComputed); |
597 | } |
598 | |
599 | Optional<Value *> simplifyDemandedVectorEltsIntrinsic( |
600 | InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, |
601 | APInt &UndefElts2, APInt &UndefElts3, |
602 | std::function<void(Instruction *, unsigned, APInt, APInt &)> |
603 | SimplifyAndSetOp) { |
604 | return BaseT::simplifyDemandedVectorEltsIntrinsic( |
605 | IC, II, DemandedElts, UndefElts, UndefElts2, UndefElts3, |
606 | SimplifyAndSetOp); |
607 | } |
608 | |
609 | InstructionCost getInstructionLatency(const Instruction *I) { |
610 | if (isa<LoadInst>(I)) |
611 | return getST()->getSchedModel().DefaultLoadLatency; |
612 | |
613 | return BaseT::getInstructionLatency(I); |
614 | } |
615 | |
616 | virtual Optional<unsigned> |
617 | getCacheSize(TargetTransformInfo::CacheLevel Level) const { |
618 | return Optional<unsigned>( |
619 | getST()->getCacheSize(static_cast<unsigned>(Level))); |
620 | } |
621 | |
622 | virtual Optional<unsigned> |
623 | getCacheAssociativity(TargetTransformInfo::CacheLevel Level) const { |
624 | Optional<unsigned> TargetResult = |
625 | getST()->getCacheAssociativity(static_cast<unsigned>(Level)); |
626 | |
627 | if (TargetResult) |
628 | return TargetResult; |
629 | |
630 | return BaseT::getCacheAssociativity(Level); |
631 | } |
632 | |
633 | virtual unsigned getCacheLineSize() const { |
634 | return getST()->getCacheLineSize(); |
635 | } |
636 | |
637 | virtual unsigned getPrefetchDistance() const { |
638 | return getST()->getPrefetchDistance(); |
639 | } |
640 | |
641 | virtual unsigned getMinPrefetchStride(unsigned NumMemAccesses, |
642 | unsigned NumStridedMemAccesses, |
643 | unsigned NumPrefetches, |
644 | bool HasCall) const { |
645 | return getST()->getMinPrefetchStride(NumMemAccesses, NumStridedMemAccesses, |
646 | NumPrefetches, HasCall); |
647 | } |
648 | |
649 | virtual unsigned getMaxPrefetchIterationsAhead() const { |
650 | return getST()->getMaxPrefetchIterationsAhead(); |
651 | } |
652 | |
653 | virtual bool enableWritePrefetching() const { |
654 | return getST()->enableWritePrefetching(); |
655 | } |
656 | |
657 | |
658 | |
659 | |
660 | |
661 | |
662 | TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const { |
663 | return TypeSize::getFixed(32); |
664 | } |
665 | |
666 | Optional<unsigned> getMaxVScale() const { return None; } |
667 | |
668 | |
669 | |
670 | |
671 | InstructionCost getScalarizationOverhead(VectorType *InTy, |
672 | const APInt &DemandedElts, |
673 | bool Insert, bool Extract) { |
674 | |
675 | |
676 | auto *Ty = cast<FixedVectorType>(InTy); |
677 | |
678 | assert(DemandedElts.getBitWidth() == Ty->getNumElements() && |
679 | "Vector size mismatch"); |
680 | |
681 | InstructionCost Cost = 0; |
682 | |
683 | for (int i = 0, e = Ty->getNumElements(); i < e; ++i) { |
684 | if (!DemandedElts[i]) |
685 | continue; |
686 | if (Insert) |
687 | Cost += thisT()->getVectorInstrCost(Instruction::InsertElement, Ty, i); |
688 | if (Extract) |
689 | Cost += thisT()->getVectorInstrCost(Instruction::ExtractElement, Ty, i); |
690 | } |
691 | |
692 | return Cost; |
693 | } |
694 | |
695 | |
696 | InstructionCost getScalarizationOverhead(VectorType *InTy, bool Insert, |
697 | bool Extract) { |
698 | auto *Ty = cast<FixedVectorType>(InTy); |
699 | |
700 | APInt DemandedElts = APInt::getAllOnesValue(Ty->getNumElements()); |
701 | return thisT()->getScalarizationOverhead(Ty, DemandedElts, Insert, Extract); |
702 | } |
703 | |
704 | |
705 | |
706 | |
707 | InstructionCost getOperandsScalarizationOverhead(ArrayRef<const Value *> Args, |
708 | ArrayRef<Type *> Tys) { |
709 | assert(Args.size() == Tys.size() && "Expected matching Args and Tys"); |
710 | |
711 | InstructionCost Cost = 0; |
712 | SmallPtrSet<const Value*, 4> UniqueOperands; |
713 | for (int I = 0, E = Args.size(); I != E; I++) { |
714 | |
715 | const Value *A = Args[I]; |
716 | Type *Ty = Tys[I]; |
717 | if (!Ty->isIntOrIntVectorTy() && !Ty->isFPOrFPVectorTy() && |
718 | !Ty->isPtrOrPtrVectorTy()) |
719 | continue; |
720 | |
721 | if (!isa<Constant>(A) && UniqueOperands.insert(A).second) { |
722 | if (auto *VecTy = dyn_cast<VectorType>(Ty)) |
723 | Cost += getScalarizationOverhead(VecTy, false, true); |
724 | } |
725 | } |
726 | |
727 | return Cost; |
728 | } |
729 | |
730 | |
731 | |
732 | |
733 | |
734 | InstructionCost getScalarizationOverhead(VectorType *RetTy, |
735 | ArrayRef<const Value *> Args, |
736 | ArrayRef<Type *> Tys) { |
737 | InstructionCost Cost = getScalarizationOverhead(RetTy, true, false); |
738 | if (!Args.empty()) |
739 | Cost += getOperandsScalarizationOverhead(Args, Tys); |
740 | else |
741 | |
742 | |
743 | Cost += getScalarizationOverhead(RetTy, false, true); |
744 | |
745 | return Cost; |
746 | } |
747 | |
748 | unsigned getMaxInterleaveFactor(unsigned VF) { return 1; } |
749 | |
750 | InstructionCost getArithmeticInstrCost( |
751 | unsigned Opcode, Type *Ty, |
752 | TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput, |
753 | TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue, |
754 | TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue, |
755 | TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None, |
756 | TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None, |
757 | ArrayRef<const Value *> Args = ArrayRef<const Value *>(), |
758 | const Instruction *CxtI = nullptr) { |
759 | |
760 | const TargetLoweringBase *TLI = getTLI(); |
761 | int ISD = TLI->InstructionOpcodeToISD(Opcode); |
762 | assert(ISD && "Invalid opcode"); |
763 | |
764 | |
765 | if (CostKind != TTI::TCK_RecipThroughput) |
766 | return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, |
767 | Opd1Info, Opd2Info, |
768 | Opd1PropInfo, Opd2PropInfo, |
769 | Args, CxtI); |
770 | |
771 | std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty); |
772 | |
773 | bool IsFloat = Ty->isFPOrFPVectorTy(); |
774 | |
775 | |
776 | InstructionCost OpCost = (IsFloat ? 2 : 1); |
777 | |
778 | if (TLI->isOperationLegalOrPromote(ISD, LT.second)) { |
779 | |
780 | |
781 | return LT.first * OpCost; |
782 | } |
783 | |
784 | if (!TLI->isOperationExpand(ISD, LT.second)) { |
785 | |
786 | |
787 | return LT.first * 2 * OpCost; |
788 | } |
789 | |
790 | |
791 | |
792 | |
793 | if (ISD == ISD::UREM || ISD == ISD::SREM) { |
794 | bool IsSigned = ISD == ISD::SREM; |
795 | if (TLI->isOperationLegalOrCustom(IsSigned ? ISD::SDIVREM : ISD::UDIVREM, |
796 | LT.second) || |
797 | TLI->isOperationLegalOrCustom(IsSigned ? ISD::SDIV : ISD::UDIV, |
798 | LT.second)) { |
799 | unsigned DivOpc = IsSigned ? Instruction::SDiv : Instruction::UDiv; |
800 | InstructionCost DivCost = thisT()->getArithmeticInstrCost( |
801 | DivOpc, Ty, CostKind, Opd1Info, Opd2Info, Opd1PropInfo, |
802 | Opd2PropInfo); |
803 | InstructionCost MulCost = |
804 | thisT()->getArithmeticInstrCost(Instruction::Mul, Ty, CostKind); |
805 | InstructionCost SubCost = |
806 | thisT()->getArithmeticInstrCost(Instruction::Sub, Ty, CostKind); |
807 | return DivCost + MulCost + SubCost; |
808 | } |
809 | } |
810 | |
811 | |
812 | if (isa<ScalableVectorType>(Ty)) |
813 | return InstructionCost::getInvalid(); |
814 | |
815 | |
816 | |
817 | |
818 | if (auto *VTy = dyn_cast<FixedVectorType>(Ty)) { |
819 | InstructionCost Cost = thisT()->getArithmeticInstrCost( |
820 | Opcode, VTy->getScalarType(), CostKind, Opd1Info, Opd2Info, |
821 | Opd1PropInfo, Opd2PropInfo, Args, CxtI); |
822 | |
823 | |
824 | SmallVector<Type *> Tys(Args.size(), Ty); |
825 | return getScalarizationOverhead(VTy, Args, Tys) + |
826 | VTy->getNumElements() * Cost; |
827 | } |
828 | |
829 | |
830 | return OpCost; |
831 | } |
832 | |
833 | TTI::ShuffleKind improveShuffleKindFromMask(TTI::ShuffleKind Kind, |
834 | ArrayRef<int> Mask) const { |
835 | int Limit = Mask.size() * 2; |
836 | if (Mask.empty() || |
837 | |
838 | |
839 | any_of(Mask, [Limit](int I) { return I >= Limit; })) |
840 | return Kind; |
841 | switch (Kind) { |
842 | case TTI::SK_PermuteSingleSrc: |
843 | if (ShuffleVectorInst::isReverseMask(Mask)) |
844 | return TTI::SK_Reverse; |
845 | if (ShuffleVectorInst::isZeroEltSplatMask(Mask)) |
846 | return TTI::SK_Broadcast; |
847 | break; |
848 | case TTI::SK_PermuteTwoSrc: |
849 | if (ShuffleVectorInst::isSelectMask(Mask)) |
850 | return TTI::SK_Select; |
851 | if (ShuffleVectorInst::isTransposeMask(Mask)) |
852 | return TTI::SK_Transpose; |
853 | break; |
854 | case TTI::SK_Select: |
855 | case TTI::SK_Reverse: |
856 | case TTI::SK_Broadcast: |
857 | case TTI::SK_Transpose: |
858 | case TTI::SK_InsertSubvector: |
859 | case TTI::SK_ExtractSubvector: |
860 | case TTI::SK_Splice: |
861 | break; |
862 | } |
863 | return Kind; |
864 | } |
865 | |
866 | InstructionCost getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, |
867 | ArrayRef<int> Mask, int Index, |
868 | VectorType *SubTp) { |
869 | |
870 | switch (improveShuffleKindFromMask(Kind, Mask)) { |
871 | case TTI::SK_Broadcast: |
872 | return getBroadcastShuffleOverhead(cast<FixedVectorType>(Tp)); |
873 | case TTI::SK_Select: |
874 | case TTI::SK_Splice: |
875 | case TTI::SK_Reverse: |
876 | case TTI::SK_Transpose: |
877 | case TTI::SK_PermuteSingleSrc: |
878 | case TTI::SK_PermuteTwoSrc: |
879 | return getPermuteShuffleOverhead(cast<FixedVectorType>(Tp)); |
880 | case TTI::SK_ExtractSubvector: |
881 | return getExtractSubvectorOverhead(Tp, Index, |
882 | cast<FixedVectorType>(SubTp)); |
883 | case TTI::SK_InsertSubvector: |
884 | return getInsertSubvectorOverhead(Tp, Index, |
885 | cast<FixedVectorType>(SubTp)); |
886 | } |
887 | llvm_unreachable("Unknown TTI::ShuffleKind"); |
888 | } |
889 | |
890 | InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, |
891 | TTI::CastContextHint CCH, |
892 | TTI::TargetCostKind CostKind, |
893 | const Instruction *I = nullptr) { |
894 | if (BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I) == 0) |
895 | return 0; |
896 | |
897 | const TargetLoweringBase *TLI = getTLI(); |
898 | int ISD = TLI->InstructionOpcodeToISD(Opcode); |
899 | assert(ISD && "Invalid opcode"); |
900 | std::pair<InstructionCost, MVT> SrcLT = |
901 | TLI->getTypeLegalizationCost(DL, Src); |
902 | std::pair<InstructionCost, MVT> DstLT = |
903 | TLI->getTypeLegalizationCost(DL, Dst); |
904 | |
905 | TypeSize SrcSize = SrcLT.second.getSizeInBits(); |
906 | TypeSize DstSize = DstLT.second.getSizeInBits(); |
907 | bool IntOrPtrSrc = Src->isIntegerTy() || Src->isPointerTy(); |
908 | bool IntOrPtrDst = Dst->isIntegerTy() || Dst->isPointerTy(); |
909 | |
910 | switch (Opcode) { |
911 | default: |
912 | break; |
913 | case Instruction::Trunc: |
914 | |
915 | if (TLI->isTruncateFree(SrcLT.second, DstLT.second)) |
916 | return 0; |
917 | LLVM_FALLTHROUGH; |
918 | case Instruction::BitCast: |
919 | |
920 | |
921 | if (SrcLT.first == DstLT.first && IntOrPtrSrc == IntOrPtrDst && |
922 | SrcSize == DstSize) |
923 | return 0; |
924 | break; |
925 | case Instruction::FPExt: |
926 | if (I && getTLI()->isExtFree(I)) |
927 | return 0; |
928 | break; |
929 | case Instruction::ZExt: |
930 | if (TLI->isZExtFree(SrcLT.second, DstLT.second)) |
931 | return 0; |
932 | LLVM_FALLTHROUGH; |
933 | case Instruction::SExt: |
934 | if (I && getTLI()->isExtFree(I)) |
935 | return 0; |
936 | |
937 | |
938 | |
939 | if (CCH == TTI::CastContextHint::Normal) { |
940 | EVT ExtVT = EVT::getEVT(Dst); |
941 | EVT LoadVT = EVT::getEVT(Src); |
942 | unsigned LType = |
943 | ((Opcode == Instruction::ZExt) ? ISD::ZEXTLOAD : ISD::SEXTLOAD); |
944 | if (DstLT.first == SrcLT.first && |
945 | TLI->isLoadExtLegal(LType, ExtVT, LoadVT)) |
946 | return 0; |
947 | } |
948 | break; |
949 | case Instruction::AddrSpaceCast: |
950 | if (TLI->isFreeAddrSpaceCast(Src->getPointerAddressSpace(), |
951 | Dst->getPointerAddressSpace())) |
952 | return 0; |
953 | break; |
954 | } |
955 | |
956 | auto *SrcVTy = dyn_cast<VectorType>(Src); |
957 | auto *DstVTy = dyn_cast<VectorType>(Dst); |
958 | |
959 | |
960 | if (SrcLT.first == DstLT.first && |
961 | TLI->isOperationLegalOrPromote(ISD, DstLT.second)) |
962 | return SrcLT.first; |
963 | |
964 | |
965 | if (!SrcVTy && !DstVTy) { |
966 | |
967 | |
968 | if (!TLI->isOperationExpand(ISD, DstLT.second)) |
969 | return 1; |
970 | |
971 | |
972 | return 4; |
973 | } |
974 | |
975 | |
976 | if (DstVTy && SrcVTy) { |
977 | |
978 | if (SrcLT.first == DstLT.first && SrcSize == DstSize) { |
979 | |
980 | |
981 | if (Opcode == Instruction::ZExt) |
982 | return SrcLT.first; |
983 | |
984 | |
985 | if (Opcode == Instruction::SExt) |
986 | return SrcLT.first * 2; |
987 | |
988 | |
989 | |
990 | |
991 | if (!TLI->isOperationExpand(ISD, DstLT.second)) |
992 | return SrcLT.first * 1; |
993 | } |
994 | |
995 | |
996 | |
997 | |
998 | |
999 | bool SplitSrc = |
1000 | TLI->getTypeAction(Src->getContext(), TLI->getValueType(DL, Src)) == |
1001 | TargetLowering::TypeSplitVector; |
1002 | bool SplitDst = |
1003 | TLI->getTypeAction(Dst->getContext(), TLI->getValueType(DL, Dst)) == |
1004 | TargetLowering::TypeSplitVector; |
1005 | if ((SplitSrc || SplitDst) && SrcVTy->getElementCount().isVector() && |
1006 | DstVTy->getElementCount().isVector()) { |
1007 | Type *SplitDstTy = VectorType::getHalfElementsVectorType(DstVTy); |
1008 | Type *SplitSrcTy = VectorType::getHalfElementsVectorType(SrcVTy); |
1009 | T *TTI = static_cast<T *>(this); |
1010 | |
1011 | InstructionCost SplitCost = |
1012 | (!SplitSrc || !SplitDst) ? TTI->getVectorSplitCost() : 0; |
1013 | return SplitCost + |
1014 | (2 * TTI->getCastInstrCost(Opcode, SplitDstTy, SplitSrcTy, CCH, |
1015 | CostKind, I)); |
1016 | } |
1017 | |
1018 | |
1019 | if (isa<ScalableVectorType>(DstVTy)) |
1020 | return InstructionCost::getInvalid(); |
1021 | |
1022 | |
1023 | |
1024 | unsigned Num = cast<FixedVectorType>(DstVTy)->getNumElements(); |
1025 | InstructionCost Cost = thisT()->getCastInstrCost( |
1026 | Opcode, Dst->getScalarType(), Src->getScalarType(), CCH, CostKind, I); |
1027 | |
1028 | |
1029 | |
1030 | return getScalarizationOverhead(DstVTy, true, true) + Num * Cost; |
1031 | } |
1032 | |
1033 | |
1034 | |
1035 | |
1036 | |
1037 | if (Opcode == Instruction::BitCast) { |
1038 | |
1039 | return (SrcVTy ? getScalarizationOverhead(SrcVTy, false, true) : 0) + |
1040 | (DstVTy ? getScalarizationOverhead(DstVTy, true, false) : 0); |
1041 | } |
1042 | |
1043 | llvm_unreachable("Unhandled cast"); |
1044 | } |
1045 | |
1046 | InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst, |
1047 | VectorType *VecTy, unsigned Index) { |
1048 | return thisT()->getVectorInstrCost(Instruction::ExtractElement, VecTy, |
1049 | Index) + |
1050 | thisT()->getCastInstrCost(Opcode, Dst, VecTy->getElementType(), |
1051 | TTI::CastContextHint::None, |
1052 | TTI::TCK_RecipThroughput); |
1053 | } |
1054 | |
1055 | InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, |
1056 | const Instruction *I = nullptr) { |
1057 | return BaseT::getCFInstrCost(Opcode, CostKind, I); |
1058 | } |
1059 | |
1060 | InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, |
1061 | CmpInst::Predicate VecPred, |
1062 | TTI::TargetCostKind CostKind, |
1063 | const Instruction *I = nullptr) { |
1064 | const TargetLoweringBase *TLI = getTLI(); |
1065 | int ISD = TLI->InstructionOpcodeToISD(Opcode); |
1066 | assert(ISD && "Invalid opcode"); |
1067 | |
1068 | |
1069 | if (CostKind != TTI::TCK_RecipThroughput) |
| 61 | | Assuming 'CostKind' is equal to TCK_RecipThroughput | |
|
| |
1070 | return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, |
1071 | I); |
1072 | |
1073 | |
1074 | if (ISD == ISD::SELECT) { |
| 63 | | Assuming 'ISD' is equal to SELECT | |
|
| |
1075 | assert(CondTy && "CondTy must exist"); |
1076 | if (CondTy->isVectorTy()) |
| 65 | | Called C++ object pointer is null |
|
1077 | ISD = ISD::VSELECT; |
1078 | } |
1079 | std::pair<InstructionCost, MVT> LT = |
1080 | TLI->getTypeLegalizationCost(DL, ValTy); |
1081 | |
1082 | if (!(ValTy->isVectorTy() && !LT.second.isVector()) && |
1083 | !TLI->isOperationExpand(ISD, LT.second)) { |
1084 | |
1085 | |
1086 | return LT.first * 1; |
1087 | } |
1088 | |
1089 | |
1090 | |
1091 | |
1092 | if (auto *ValVTy = dyn_cast<VectorType>(ValTy)) { |
1093 | unsigned Num = cast<FixedVectorType>(ValVTy)->getNumElements(); |
1094 | if (CondTy) |
1095 | CondTy = CondTy->getScalarType(); |
1096 | InstructionCost Cost = thisT()->getCmpSelInstrCost( |
1097 | Opcode, ValVTy->getScalarType(), CondTy, VecPred, CostKind, I); |
1098 | |
1099 | |
1100 | |
1101 | return getScalarizationOverhead(ValVTy, true, false) + Num * Cost; |
1102 | } |
1103 | |
1104 | |
1105 | return 1; |
1106 | } |
1107 | |
1108 | InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, |
1109 | unsigned Index) { |
1110 | std::pair<InstructionCost, MVT> LT = |
1111 | getTLI()->getTypeLegalizationCost(DL, Val->getScalarType()); |
1112 | |
1113 | return LT.first; |
1114 | } |
1115 | |
1116 | InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, |
1117 | MaybeAlign Alignment, unsigned AddressSpace, |
1118 | TTI::TargetCostKind CostKind, |
1119 | const Instruction *I = nullptr) { |
1120 | assert(!Src->isVoidTy() && "Invalid type"); |
1121 | |
1122 | if (getTLI()->getValueType(DL, Src, true) == MVT::Other) |
1123 | return 4; |
1124 | std::pair<InstructionCost, MVT> LT = |
1125 | getTLI()->getTypeLegalizationCost(DL, Src); |
1126 | |
1127 | |
1128 | InstructionCost Cost = LT.first; |
1129 | if (CostKind != TTI::TCK_RecipThroughput) |
1130 | return Cost; |
1131 | |
1132 | if (Src->isVectorTy() && |
1133 | |
1134 | |
1135 | |
1136 | TypeSize::isKnownLT(Src->getPrimitiveSizeInBits(), |
1137 | LT.second.getSizeInBits())) { |
1138 | |
1139 | |
1140 | |
1141 | TargetLowering::LegalizeAction LA = TargetLowering::Expand; |
1142 | EVT MemVT = getTLI()->getValueType(DL, Src); |
1143 | if (Opcode == Instruction::Store) |
1144 | LA = getTLI()->getTruncStoreAction(LT.second, MemVT); |
1145 | else |
1146 | LA = getTLI()->getLoadExtAction(ISD::EXTLOAD, LT.second, MemVT); |
1147 | |
1148 | if (LA != TargetLowering::Legal && LA != TargetLowering::Custom) { |
1149 | |
1150 | |
1151 | Cost += getScalarizationOverhead(cast<VectorType>(Src), |
1152 | Opcode != Instruction::Store, |
1153 | Opcode == Instruction::Store); |
1154 | } |
1155 | } |
1156 | |
1157 | return Cost; |
1158 | } |
1159 | |
1160 | InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *DataTy, |
1161 | Align Alignment, unsigned AddressSpace, |
1162 | TTI::TargetCostKind CostKind) { |
1163 | return getCommonMaskedMemoryOpCost(Opcode, DataTy, Alignment, true, false, |
1164 | CostKind); |
1165 | } |
1166 | |
1167 | InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, |
1168 | const Value *Ptr, bool VariableMask, |
1169 | Align Alignment, |
1170 | TTI::TargetCostKind CostKind, |
1171 | const Instruction *I = nullptr) { |
1172 | return getCommonMaskedMemoryOpCost(Opcode, DataTy, Alignment, VariableMask, |
1173 | true, CostKind); |
1174 | } |
1175 | |
1176 | InstructionCost getInterleavedMemoryOpCost( |
1177 | unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices, |
1178 | Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, |
1179 | bool UseMaskForCond = false, bool UseMaskForGaps = false) { |
1180 | auto *VT = cast<FixedVectorType>(VecTy); |
1181 | |
1182 | unsigned NumElts = VT->getNumElements(); |
1183 | assert(Factor > 1 && NumElts % Factor == 0 && "Invalid interleave factor"); |
1184 | |
1185 | unsigned NumSubElts = NumElts / Factor; |
1186 | auto *SubVT = FixedVectorType::get(VT->getElementType(), NumSubElts); |
1187 | |
1188 | |
1189 | InstructionCost Cost; |
1190 | if (UseMaskForCond || UseMaskForGaps) |
1191 | Cost = thisT()->getMaskedMemoryOpCost(Opcode, VecTy, Alignment, |
1192 | AddressSpace, CostKind); |
1193 | else |
1194 | Cost = thisT()->getMemoryOpCost(Opcode, VecTy, Alignment, AddressSpace, |
1195 | CostKind); |
1196 | |
1197 | |
1198 | |
1199 | MVT VecTyLT = getTLI()->getTypeLegalizationCost(DL, VecTy).second; |
1200 | unsigned VecTySize = thisT()->getDataLayout().getTypeStoreSize(VecTy); |
1201 | unsigned VecTyLTSize = VecTyLT.getStoreSize(); |
1202 | |
1203 | |
1204 | |
1205 | |
1206 | |
1207 | |
1208 | |
1209 | |
1210 | |
1211 | |
1212 | |
1213 | |
1214 | |
1215 | |
1216 | |
1217 | if (VecTySize > VecTyLTSize) { |
1218 | |
1219 | |
1220 | unsigned NumLegalInsts = divideCeil(VecTySize, VecTyLTSize); |
1221 | |
1222 | |
1223 | |
1224 | unsigned NumEltsPerLegalInst = divideCeil(NumElts, NumLegalInsts); |
1225 | |
1226 | |
1227 | BitVector UsedInsts(NumLegalInsts, false); |
1228 | for (unsigned Index : Indices) |
1229 | for (unsigned Elt = 0; Elt < NumSubElts; ++Elt) |
1230 | UsedInsts.set((Index + Elt * Factor) / NumEltsPerLegalInst); |
1231 | |
1232 | |
1233 | |
1234 | Cost *= UsedInsts.count() / NumLegalInsts; |
1235 | } |
1236 | |
1237 | |
1238 | assert(Indices.size() <= Factor && |
1239 | "Interleaved memory op has too many members"); |
1240 | if (Opcode == Instruction::Load) { |
1241 | |
1242 | |
1243 | |
1244 | |
1245 | |
1246 | |
1247 | |
1248 | |
1249 | for (unsigned Index : Indices) { |
1250 | assert(Index < Factor && "Invalid index for interleaved memory op"); |
1251 | |
1252 | |
1253 | for (unsigned Elm = 0; Elm < NumSubElts; Elm++) |
1254 | Cost += thisT()->getVectorInstrCost(Instruction::ExtractElement, VT, |
1255 | Index + Elm * Factor); |
1256 | } |
1257 | |
1258 | InstructionCost InsSubCost = 0; |
1259 | for (unsigned Elm = 0; Elm < NumSubElts; Elm++) |
1260 | InsSubCost += |
1261 | thisT()->getVectorInstrCost(Instruction::InsertElement, SubVT, Elm); |
1262 | |
1263 | Cost += Indices.size() * InsSubCost; |
1264 | } else { |
1265 | |
1266 | |
1267 | |
1268 | |
1269 | |
1270 | |
1271 | |
1272 | |
1273 | |
1274 | |
1275 | |
1276 | |
1277 | |
1278 | InstructionCost ExtSubCost = 0; |
1279 | for (unsigned Elm = 0; Elm < NumSubElts; Elm++) |
1280 | ExtSubCost += thisT()->getVectorInstrCost(Instruction::ExtractElement, |
1281 | SubVT, Elm); |
1282 | Cost += ExtSubCost * Indices.size(); |
1283 | |
1284 | for (unsigned Index : Indices) { |
1285 | assert(Index < Factor && "Invalid index for interleaved memory op"); |
1286 | |
1287 | |
1288 | for (unsigned Elm = 0; Elm < NumSubElts; Elm++) |
1289 | Cost += thisT()->getVectorInstrCost(Instruction::InsertElement, VT, |
1290 | Index + Elm * Factor); |
1291 | } |
1292 | } |
1293 | |
1294 | if (!UseMaskForCond) |
1295 | return Cost; |
1296 | |
1297 | Type *I8Type = Type::getInt8Ty(VT->getContext()); |
1298 | auto *MaskVT = FixedVectorType::get(I8Type, NumElts); |
1299 | SubVT = FixedVectorType::get(I8Type, NumSubElts); |
1300 | |
1301 | |
1302 | |
1303 | |
1304 | |
1305 | |
1306 | |
1307 | |
1308 | |
1309 | |
1310 | |
1311 | for (unsigned i = 0; i < NumSubElts; i++) |
1312 | Cost += |
1313 | thisT()->getVectorInstrCost(Instruction::ExtractElement, SubVT, i); |
1314 | |
1315 | for (unsigned i = 0; i < NumElts; i++) |
1316 | Cost += |
1317 | thisT()->getVectorInstrCost(Instruction::InsertElement, MaskVT, i); |
1318 | |
1319 | |
1320 | |
1321 | |
1322 | |
1323 | |
1324 | if (UseMaskForGaps) |
1325 | Cost += thisT()->getArithmeticInstrCost(BinaryOperator::And, MaskVT, |
1326 | CostKind); |
1327 | |
1328 | return Cost; |
1329 | } |
1330 | |
1331 | |
1332 | InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, |
1333 | TTI::TargetCostKind CostKind) { |
1334 | |
1335 | if (BaseT::getIntrinsicInstrCost(ICA, CostKind) == 0) |
| 3 | | Calling 'InstructionCost::operator==' | |
|
| 8 | | Returning from 'InstructionCost::operator==' | |
|
| |
1336 | return 0; |
1337 | |
1338 | |
1339 | Intrinsic::ID IID = ICA.getID(); |
1340 | if (Function::isTargetIntrinsic(IID)) |
| 10 | | Assuming the condition is false | |
|
| |
1341 | return TargetTransformInfo::TCC_Basic; |
1342 | |
1343 | if (ICA.isTypeBasedOnly()) |
| 12 | | Calling 'IntrinsicCostAttributes::isTypeBasedOnly' | |
|
| 18 | | Returning from 'IntrinsicCostAttributes::isTypeBasedOnly' | |
|
| |
1344 | return getTypeBasedIntrinsicInstrCost(ICA, CostKind); |
1345 | |
1346 | Type *RetTy = ICA.getReturnType(); |
1347 | |
1348 | ElementCount RetVF = |
1349 | (RetTy->isVectorTy() ? cast<VectorType>(RetTy)->getElementCount() |
| |
1350 | : ElementCount::getFixed(1)); |
1351 | const IntrinsicInst *I = ICA.getInst(); |
1352 | const SmallVectorImpl<const Value *> &Args = ICA.getArgs(); |
1353 | FastMathFlags FMF = ICA.getFlags(); |
1354 | switch (IID) { |
| 21 | | Control jumps to 'case fshl:' at line 1445 | |
|
1355 | default: |
1356 | break; |
1357 | |
1358 | case Intrinsic::cttz: |
1359 | |
1360 | if (RetVF.isScalar() && getTLI()->isCheapToSpeculateCttz()) |
1361 | return TargetTransformInfo::TCC_Basic; |
1362 | break; |
1363 | |
1364 | case Intrinsic::ctlz: |
1365 | |
1366 | if (RetVF.isScalar() && getTLI()->isCheapToSpeculateCtlz()) |
1367 | return TargetTransformInfo::TCC_Basic; |
1368 | break; |
1369 | |
1370 | case Intrinsic::memcpy: |
1371 | return thisT()->getMemcpyCost(ICA.getInst()); |
1372 | |
1373 | case Intrinsic::masked_scatter: { |
1374 | const Value *Mask = Args[3]; |
1375 | bool VarMask = !isa<Constant>(Mask); |
1376 | Align Alignment = cast<ConstantInt>(Args[2])->getAlignValue(); |
1377 | return thisT()->getGatherScatterOpCost(Instruction::Store, |
1378 | ICA.getArgTypes()[0], Args[1], |
1379 | VarMask, Alignment, CostKind, I); |
1380 | } |
1381 | case Intrinsic::masked_gather: { |
1382 | const Value *Mask = Args[2]; |
1383 | bool VarMask = !isa<Constant>(Mask); |
1384 | Align Alignment = cast<ConstantInt>(Args[1])->getAlignValue(); |
1385 | return thisT()->getGatherScatterOpCost(Instruction::Load, RetTy, Args[0], |
1386 | VarMask, Alignment, CostKind, I); |
1387 | } |
1388 | case Intrinsic::experimental_stepvector: { |
1389 | if (isa<ScalableVectorType>(RetTy)) |
1390 | return BaseT::getIntrinsicInstrCost(ICA, CostKind); |
1391 | |
1392 | return TargetTransformInfo::TCC_Basic; |
1393 | } |
1394 | case Intrinsic::experimental_vector_extract: { |
1395 | |
1396 | |
1397 | if (isa<ScalableVectorType>(RetTy)) |
1398 | return BaseT::getIntrinsicInstrCost(ICA, CostKind); |
1399 | unsigned Index = cast<ConstantInt>(Args[1])->getZExtValue(); |
1400 | return thisT()->getShuffleCost(TTI::SK_ExtractSubvector, |
1401 | cast<VectorType>(Args[0]->getType()), None, |
1402 | Index, cast<VectorType>(RetTy)); |
1403 | } |
1404 | case Intrinsic::experimental_vector_insert: { |
1405 | |
1406 | |
1407 | if (isa<ScalableVectorType>(Args[1]->getType())) |
1408 | return BaseT::getIntrinsicInstrCost(ICA, CostKind); |
1409 | unsigned Index = cast<ConstantInt>(Args[2])->getZExtValue(); |
1410 | return thisT()->getShuffleCost( |
1411 | TTI::SK_InsertSubvector, cast<VectorType>(Args[0]->getType()), None, |
1412 | Index, cast<VectorType>(Args[1]->getType())); |
1413 | } |
1414 | case Intrinsic::experimental_vector_reverse: { |
1415 | return thisT()->getShuffleCost(TTI::SK_Reverse, |
1416 | cast<VectorType>(Args[0]->getType()), None, |
1417 | 0, cast<VectorType>(RetTy)); |
1418 | } |
1419 | case Intrinsic::experimental_vector_splice: { |
1420 | unsigned Index = cast<ConstantInt>(Args[2])->getZExtValue(); |
1421 | return thisT()->getShuffleCost(TTI::SK_Splice, |
1422 | cast<VectorType>(Args[0]->getType()), None, |
1423 | Index, cast<VectorType>(RetTy)); |
1424 | } |
1425 | case Intrinsic::vector_reduce_add: |
1426 | case Intrinsic::vector_reduce_mul: |
1427 | case Intrinsic::vector_reduce_and: |
1428 | case Intrinsic::vector_reduce_or: |
1429 | case Intrinsic::vector_reduce_xor: |
1430 | case Intrinsic::vector_reduce_smax: |
1431 | case Intrinsic::vector_reduce_smin: |
1432 | case Intrinsic::vector_reduce_fmax: |
1433 | case Intrinsic::vector_reduce_fmin: |
1434 | case Intrinsic::vector_reduce_umax: |
1435 | case Intrinsic::vector_reduce_umin: { |
1436 | IntrinsicCostAttributes Attrs(IID, RetTy, Args[0]->getType(), FMF, I, 1); |
1437 | return getTypeBasedIntrinsicInstrCost(Attrs, CostKind); |
1438 | } |
1439 | case Intrinsic::vector_reduce_fadd: |
1440 | case Intrinsic::vector_reduce_fmul: { |
1441 | IntrinsicCostAttributes Attrs( |
1442 | IID, RetTy, {Args[0]->getType(), Args[1]->getType()}, FMF, I, 1); |
1443 | return getTypeBasedIntrinsicInstrCost(Attrs, CostKind); |
1444 | } |
1445 | case Intrinsic::fshl: |
1446 | case Intrinsic::fshr: { |
1447 | if (isa<ScalableVectorType>(RetTy)) |
| 22 | | Assuming 'RetTy' is not a 'ScalableVectorType' | |
|
| |
1448 | return BaseT::getIntrinsicInstrCost(ICA, CostKind); |
1449 | const Value *X = Args[0]; |
1450 | const Value *Y = Args[1]; |
1451 | const Value *Z = Args[2]; |
1452 | TTI::OperandValueProperties OpPropsX, OpPropsY, OpPropsZ, OpPropsBW; |
1453 | TTI::OperandValueKind OpKindX = TTI::getOperandInfo(X, OpPropsX); |
1454 | TTI::OperandValueKind OpKindY = TTI::getOperandInfo(Y, OpPropsY); |
1455 | TTI::OperandValueKind OpKindZ = TTI::getOperandInfo(Z, OpPropsZ); |
1456 | TTI::OperandValueKind OpKindBW = TTI::OK_UniformConstantValue; |
1457 | OpPropsBW = isPowerOf2_32(RetTy->getScalarSizeInBits()) ? TTI::OP_PowerOf2 |
| |
1458 | : TTI::OP_None; |
1459 | |
1460 | |
1461 | InstructionCost Cost = 0; |
1462 | Cost += |
1463 | thisT()->getArithmeticInstrCost(BinaryOperator::Or, RetTy, CostKind); |
1464 | Cost += |
1465 | thisT()->getArithmeticInstrCost(BinaryOperator::Sub, RetTy, CostKind); |
1466 | Cost += thisT()->getArithmeticInstrCost( |
1467 | BinaryOperator::Shl, RetTy, CostKind, OpKindX, OpKindZ, OpPropsX); |
1468 | Cost += thisT()->getArithmeticInstrCost( |
1469 | BinaryOperator::LShr, RetTy, CostKind, OpKindY, OpKindZ, OpPropsY); |
1470 | |
1471 | if (OpKindZ != TTI::OK_UniformConstantValue && |
| 25 | | Assuming 'OpKindZ' is equal to OK_UniformConstantValue | |
|
1472 | OpKindZ != TTI::OK_NonUniformConstantValue) |
1473 | Cost += thisT()->getArithmeticInstrCost(BinaryOperator::URem, RetTy, |
1474 | CostKind, OpKindZ, OpKindBW, |
1475 | OpPropsZ, OpPropsBW); |
1476 | |
1477 | if (X != Y) { |
| 26 | | Assuming 'X' is not equal to 'Y' | |
|
| |
1478 | Type *CondTy = RetTy->getWithNewBitWidth(1); |
| 28 | | Calling 'Type::getWithNewBitWidth' | |
|
| 36 | | Returning from 'Type::getWithNewBitWidth' | |
|
| 37 | | 'CondTy' initialized here | |
|
1479 | Cost += |
1480 | thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy, |
| 38 | | Passing 'CondTy' via 3rd parameter 'CondTy' | |
|
| 39 | | Calling 'ARMTTIImpl::getCmpSelInstrCost' | |
|
1481 | CmpInst::BAD_ICMP_PREDICATE, CostKind); |
1482 | Cost += |
1483 | thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, CondTy, |
1484 | CmpInst::BAD_ICMP_PREDICATE, CostKind); |
1485 | } |
1486 | return Cost; |
1487 | } |
1488 | } |
1489 | |
1490 | |
1491 | |
1492 | |
1493 | InstructionCost ScalarizationCost = InstructionCost::getInvalid(); |
1494 | if (RetVF.isVector() && !RetVF.isScalable()) { |
1495 | ScalarizationCost = 0; |
1496 | if (!RetTy->isVoidTy()) |
1497 | ScalarizationCost += |
1498 | getScalarizationOverhead(cast<VectorType>(RetTy), true, false); |
1499 | ScalarizationCost += |
1500 | getOperandsScalarizationOverhead(Args, ICA.getArgTypes()); |
1501 | } |
1502 | |
1503 | IntrinsicCostAttributes Attrs(IID, RetTy, ICA.getArgTypes(), FMF, I, |
1504 | ScalarizationCost); |
1505 | return thisT()->getTypeBasedIntrinsicInstrCost(Attrs, CostKind); |
1506 | } |
1507 | |
1508 | |
1509 | |
1510 | |
1511 | |
1512 | InstructionCost |
1513 | getTypeBasedIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, |
1514 | TTI::TargetCostKind CostKind) { |
1515 | Intrinsic::ID IID = ICA.getID(); |
1516 | Type *RetTy = ICA.getReturnType(); |
1517 | const SmallVectorImpl<Type *> &Tys = ICA.getArgTypes(); |
1518 | FastMathFlags FMF = ICA.getFlags(); |
1519 | InstructionCost ScalarizationCostPassed = ICA.getScalarizationCost(); |
1520 | bool SkipScalarizationCost = ICA.skipScalarizationCost(); |
1521 | |
1522 | VectorType *VecOpTy = nullptr; |
1523 | if (!Tys.empty()) { |
1524 | |
1525 | |
1526 | unsigned VecTyIndex = 0; |
1527 | if (IID == Intrinsic::vector_reduce_fadd || |
1528 | IID == Intrinsic::vector_reduce_fmul) |
1529 | VecTyIndex = 1; |
1530 | assert(Tys.size() > VecTyIndex && "Unexpected IntrinsicCostAttributes"); |
1531 | VecOpTy = dyn_cast<VectorType>(Tys[VecTyIndex]); |
1532 | } |
1533 | |
1534 | |
1535 | unsigned SingleCallCost = CostKind == TTI::TCK_CodeSize ? 1 : 10; |
1536 | SmallVector<unsigned, 2> ISDs; |
1537 | switch (IID) { |
1538 | default: { |
1539 | |
1540 | if (isa<ScalableVectorType>(RetTy) || any_of(Tys, [](const Type *Ty) { |
1541 | return isa<ScalableVectorType>(Ty); |
1542 | })) |
1543 | return InstructionCost::getInvalid(); |
1544 | |
1545 | |
1546 | InstructionCost ScalarizationCost = |
1547 | SkipScalarizationCost ? ScalarizationCostPassed : 0; |
1548 | unsigned ScalarCalls = 1; |
1549 | Type *ScalarRetTy = RetTy; |
1550 | if (auto *RetVTy = dyn_cast<VectorType>(RetTy)) { |
1551 | if (!SkipScalarizationCost) |
1552 | ScalarizationCost = getScalarizationOverhead(RetVTy, true, false); |
1553 | ScalarCalls = std::max(ScalarCalls, |
1554 | cast<FixedVectorType>(RetVTy)->getNumElements()); |
1555 | ScalarRetTy = RetTy->getScalarType(); |
1556 | } |
1557 | SmallVector<Type *, 4> ScalarTys; |
1558 | for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) { |
1559 | Type *Ty = Tys[i]; |
1560 | if (auto *VTy = dyn_cast<VectorType>(Ty)) { |
1561 | if (!SkipScalarizationCost) |
1562 | ScalarizationCost += getScalarizationOverhead(VTy, false, true); |
1563 | ScalarCalls = std::max(ScalarCalls, |
1564 | cast<FixedVectorType>(VTy)->getNumElements()); |
1565 | Ty = Ty->getScalarType(); |
1566 | } |
1567 | ScalarTys.push_back(Ty); |
1568 | } |
1569 | if (ScalarCalls == 1) |
1570 | return 1; |
1571 | |
1572 | IntrinsicCostAttributes ScalarAttrs(IID, ScalarRetTy, ScalarTys, FMF); |
1573 | InstructionCost ScalarCost = |
1574 | thisT()->getIntrinsicInstrCost(ScalarAttrs, CostKind); |
1575 | |
1576 | return ScalarCalls * ScalarCost + ScalarizationCost; |
1577 | } |
1578 | |
1579 | |
1580 | case Intrinsic::sqrt: |
1581 | ISDs.push_back(ISD::FSQRT); |
1582 | break; |
1583 | case Intrinsic::sin: |
1584 | ISDs.push_back(ISD::FSIN); |
1585 | break; |
1586 | case Intrinsic::cos: |
1587 | ISDs.push_back(ISD::FCOS); |
1588 | break; |
1589 | case Intrinsic::exp: |
1590 | ISDs.push_back(ISD::FEXP); |
1591 | break; |
1592 | case Intrinsic::exp2: |
1593 | ISDs.push_back(ISD::FEXP2); |
1594 | break; |
1595 | case Intrinsic::log: |
1596 | ISDs.push_back(ISD::FLOG); |
1597 | break; |
1598 | case Intrinsic::log10: |
1599 | ISDs.push_back(ISD::FLOG10); |
1600 | break; |
1601 | case Intrinsic::log2: |
1602 | ISDs.push_back(ISD::FLOG2); |
1603 | break; |
1604 | case Intrinsic::fabs: |
1605 | ISDs.push_back(ISD::FABS); |
1606 | break; |
1607 | case Intrinsic::canonicalize: |
1608 | ISDs.push_back(ISD::FCANONICALIZE); |
1609 | break; |
1610 | case Intrinsic::minnum: |
1611 | ISDs.push_back(ISD::FMINNUM); |
1612 | break; |
1613 | case Intrinsic::maxnum: |
1614 | ISDs.push_back(ISD::FMAXNUM); |
1615 | break; |
1616 | case Intrinsic::minimum: |
1617 | ISDs.push_back(ISD::FMINIMUM); |
1618 | break; |
1619 | case Intrinsic::maximum: |
1620 | ISDs.push_back(ISD::FMAXIMUM); |
1621 | break; |
1622 | case Intrinsic::copysign: |
1623 | ISDs.push_back(ISD::FCOPYSIGN); |
1624 | break; |
1625 | case Intrinsic::floor: |
1626 | ISDs.push_back(ISD::FFLOOR); |
1627 | break; |
1628 | case Intrinsic::ceil: |
1629 | ISDs.push_back(ISD::FCEIL); |
1630 | break; |
1631 | case Intrinsic::trunc: |
1632 | ISDs.push_back(ISD::FTRUNC); |
1633 | break; |
1634 | case Intrinsic::nearbyint: |
1635 | ISDs.push_back(ISD::FNEARBYINT); |
1636 | break; |
1637 | case Intrinsic::rint: |
1638 | ISDs.push_back(ISD::FRINT); |
1639 | break; |
1640 | case Intrinsic::round: |
1641 | ISDs.push_back(ISD::FROUND); |
1642 | break; |
1643 | case Intrinsic::roundeven: |
1644 | ISDs.push_back(ISD::FROUNDEVEN); |
1645 | break; |
1646 | case Intrinsic::pow: |
1647 | ISDs.push_back(ISD::FPOW); |
1648 | break; |
1649 | case Intrinsic::fma: |
1650 | ISDs.push_back(ISD::FMA); |
1651 | break; |
1652 | case Intrinsic::fmuladd: |
1653 | ISDs.push_back(ISD::FMA); |
1654 | break; |
1655 | case Intrinsic::experimental_constrained_fmuladd: |
1656 | ISDs.push_back(ISD::STRICT_FMA); |
1657 | break; |
1658 | |
1659 | case Intrinsic::lifetime_start: |
1660 | case Intrinsic::lifetime_end: |
1661 | case Intrinsic::sideeffect: |
1662 | case Intrinsic::pseudoprobe: |
1663 | case Intrinsic::arithmetic_fence: |
1664 | return 0; |
1665 | case Intrinsic::masked_store: { |
1666 | Type *Ty = Tys[0]; |
1667 | Align TyAlign = thisT()->DL.getABITypeAlign(Ty); |
1668 | return thisT()->getMaskedMemoryOpCost(Instruction::Store, Ty, TyAlign, 0, |
1669 | CostKind); |
1670 | } |
1671 | case Intrinsic::masked_load: { |
1672 | Type *Ty = RetTy; |
1673 | Align TyAlign = thisT()->DL.getABITypeAlign(Ty); |
1674 | return thisT()->getMaskedMemoryOpCost(Instruction::Load, Ty, TyAlign, 0, |
1675 | CostKind); |
1676 | } |
1677 | case Intrinsic::vector_reduce_add: |
1678 | return thisT()->getArithmeticReductionCost(Instruction::Add, VecOpTy, |
1679 | None, CostKind); |
1680 | case Intrinsic::vector_reduce_mul: |
1681 | return thisT()->getArithmeticReductionCost(Instruction::Mul, VecOpTy, |
1682 | None, CostKind); |
1683 | case Intrinsic::vector_reduce_and: |
1684 | return thisT()->getArithmeticReductionCost(Instruction::And, VecOpTy, |
1685 | None, CostKind); |
1686 | case Intrinsic::vector_reduce_or: |
1687 | return thisT()->getArithmeticReductionCost(Instruction::Or, VecOpTy, None, |
1688 | CostKind); |
1689 | case Intrinsic::vector_reduce_xor: |
1690 | return thisT()->getArithmeticReductionCost(Instruction::Xor, VecOpTy, |
1691 | None, CostKind); |
1692 | case Intrinsic::vector_reduce_fadd: |
1693 | return thisT()->getArithmeticReductionCost(Instruction::FAdd, VecOpTy, |
1694 | FMF, CostKind); |
1695 | case Intrinsic::vector_reduce_fmul: |
1696 | return thisT()->getArithmeticReductionCost(Instruction::FMul, VecOpTy, |
1697 | FMF, CostKind); |
1698 | case Intrinsic::vector_reduce_smax: |
1699 | case Intrinsic::vector_reduce_smin: |
1700 | case Intrinsic::vector_reduce_fmax: |
1701 | case Intrinsic::vector_reduce_fmin: |
1702 | return thisT()->getMinMaxReductionCost( |
1703 | VecOpTy, cast<VectorType>(CmpInst::makeCmpResultType(VecOpTy)), |
1704 | false, CostKind); |
1705 | case Intrinsic::vector_reduce_umax: |
1706 | case Intrinsic::vector_reduce_umin: |
1707 | return thisT()->getMinMaxReductionCost( |
1708 | VecOpTy, cast<VectorType>(CmpInst::makeCmpResultType(VecOpTy)), |
1709 | true, CostKind); |
1710 | case Intrinsic::abs: |
1711 | case Intrinsic::smax: |
1712 | case Intrinsic::smin: |
1713 | case Intrinsic::umax: |
1714 | case Intrinsic::umin: { |
1715 | |
1716 | |
1717 | Type *CondTy = RetTy->getWithNewBitWidth(1); |
1718 | InstructionCost Cost = 0; |
1719 | |
1720 | Cost += |
1721 | thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy, |
1722 | CmpInst::BAD_ICMP_PREDICATE, CostKind); |
1723 | Cost += |
1724 | thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, CondTy, |
1725 | CmpInst::BAD_ICMP_PREDICATE, CostKind); |
1726 | |
1727 | if (IID == Intrinsic::abs) |
1728 | Cost += thisT()->getArithmeticInstrCost( |
1729 | BinaryOperator::Sub, RetTy, CostKind, TTI::OK_UniformConstantValue); |
1730 | return Cost; |
1731 | } |
1732 | case Intrinsic::sadd_sat: |
1733 | case Intrinsic::ssub_sat: { |
1734 | Type *CondTy = RetTy->getWithNewBitWidth(1); |
1735 | |
1736 | Type *OpTy = StructType::create({RetTy, CondTy}); |
1737 | Intrinsic::ID OverflowOp = IID == Intrinsic::sadd_sat |
1738 | ? Intrinsic::sadd_with_overflow |
1739 | : Intrinsic::ssub_with_overflow; |
1740 | |
1741 | |
1742 | |
1743 | InstructionCost Cost = 0; |
1744 | IntrinsicCostAttributes Attrs(OverflowOp, OpTy, {RetTy, RetTy}, FMF, |
1745 | nullptr, ScalarizationCostPassed); |
1746 | Cost += thisT()->getIntrinsicInstrCost(Attrs, CostKind); |
1747 | Cost += |
1748 | thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy, |
1749 | CmpInst::BAD_ICMP_PREDICATE, CostKind); |
1750 | Cost += 2 * thisT()->getCmpSelInstrCost( |
1751 | BinaryOperator::Select, RetTy, CondTy, |
1752 | CmpInst::BAD_ICMP_PREDICATE, CostKind); |
1753 | return Cost; |
1754 | } |
1755 | case Intrinsic::uadd_sat: |
1756 | case Intrinsic::usub_sat: { |
1757 | Type *CondTy = RetTy->getWithNewBitWidth(1); |
1758 | |
1759 | Type *OpTy = StructType::create({RetTy, CondTy}); |
1760 | Intrinsic::ID OverflowOp = IID == Intrinsic::uadd_sat |
1761 | ? Intrinsic::uadd_with_overflow |
1762 | : Intrinsic::usub_with_overflow; |
1763 | |
1764 | InstructionCost Cost = 0; |
1765 | IntrinsicCostAttributes Attrs(OverflowOp, OpTy, {RetTy, RetTy}, FMF, |
1766 | nullptr, ScalarizationCostPassed); |
1767 | Cost += thisT()->getIntrinsicInstrCost(Attrs, CostKind); |
1768 | Cost += |
1769 | thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, CondTy, |
1770 | CmpInst::BAD_ICMP_PREDICATE, CostKind); |
1771 | return Cost; |
1772 | } |
1773 | case Intrinsic::smul_fix: |
1774 | case Intrinsic::umul_fix: { |
1775 | unsigned ExtSize = RetTy->getScalarSizeInBits() * 2; |
1776 | Type *ExtTy = RetTy->getWithNewBitWidth(ExtSize); |
1777 | |
1778 | unsigned ExtOp = |
1779 | IID == Intrinsic::smul_fix ? Instruction::SExt : Instruction::ZExt; |
1780 | TTI::CastContextHint CCH = TTI::CastContextHint::None; |
1781 | |
1782 | InstructionCost Cost = 0; |
1783 | Cost += 2 * thisT()->getCastInstrCost(ExtOp, ExtTy, RetTy, CCH, CostKind); |
1784 | Cost += |
1785 | thisT()->getArithmeticInstrCost(Instruction::Mul, ExtTy, CostKind); |
1786 | Cost += 2 * thisT()->getCastInstrCost(Instruction::Trunc, RetTy, ExtTy, |
1787 | CCH, CostKind); |
1788 | Cost += thisT()->getArithmeticInstrCost(Instruction::LShr, RetTy, |
1789 | CostKind, TTI::OK_AnyValue, |
1790 | TTI::OK_UniformConstantValue); |
1791 | Cost += thisT()->getArithmeticInstrCost(Instruction::Shl, RetTy, CostKind, |
1792 | TTI::OK_AnyValue, |
1793 | TTI::OK_UniformConstantValue); |
1794 | Cost += thisT()->getArithmeticInstrCost(Instruction::Or, RetTy, CostKind); |
1795 | return Cost; |
1796 | } |
1797 | case Intrinsic::sadd_with_overflow: |
1798 | case Intrinsic::ssub_with_overflow: { |
1799 | Type *SumTy = RetTy->getContainedType(0); |
1800 | Type *OverflowTy = RetTy->getContainedType(1); |
1801 | unsigned Opcode = IID == Intrinsic::sadd_with_overflow |
1802 | ? BinaryOperator::Add |
1803 | : BinaryOperator::Sub; |
1804 | |
1805 | |
1806 | |
1807 | |
1808 | |
1809 | |
1810 | |
1811 | |
1812 | |
1813 | InstructionCost Cost = 0; |
1814 | Cost += thisT()->getArithmeticInstrCost(Opcode, SumTy, CostKind); |
1815 | Cost += 3 * thisT()->getCmpSelInstrCost( |
1816 | Instruction::ICmp, SumTy, OverflowTy, |
1817 | CmpInst::BAD_ICMP_PREDICATE, CostKind); |
1818 | Cost += 2 * thisT()->getCmpSelInstrCost( |
1819 | Instruction::Select, OverflowTy, OverflowTy, |
1820 | CmpInst::BAD_ICMP_PREDICATE, CostKind); |
1821 | Cost += thisT()->getArithmeticInstrCost(BinaryOperator::And, OverflowTy, |
1822 | CostKind); |
1823 | return Cost; |
1824 | } |
1825 | case Intrinsic::uadd_with_overflow: |
1826 | case Intrinsic::usub_with_overflow: { |
1827 | Type *SumTy = RetTy->getContainedType(0); |
1828 | Type *OverflowTy = RetTy->getContainedType(1); |
1829 | unsigned Opcode = IID == Intrinsic::uadd_with_overflow |
1830 | ? BinaryOperator::Add |
1831 | : BinaryOperator::Sub; |
1832 | |
1833 | InstructionCost Cost = 0; |
1834 | Cost += thisT()->getArithmeticInstrCost(Opcode, SumTy, CostKind); |
1835 | Cost += |
1836 | thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, SumTy, OverflowTy, |
1837 | CmpInst::BAD_ICMP_PREDICATE, CostKind); |
1838 | return Cost; |
1839 | } |
1840 | case Intrinsic::smul_with_overflow: |
1841 | case Intrinsic::umul_with_overflow: { |
1842 | Type *MulTy = RetTy->getContainedType(0); |
1843 | Type *OverflowTy = RetTy->getContainedType(1); |
1844 | unsigned ExtSize = MulTy->getScalarSizeInBits() * 2; |
1845 | Type *ExtTy = MulTy->getWithNewBitWidth(ExtSize); |
1846 | |
1847 | unsigned ExtOp = |
1848 | IID == Intrinsic::smul_fix ? Instruction::SExt : Instruction::ZExt; |
1849 | TTI::CastContextHint CCH = TTI::CastContextHint::None; |
1850 | |
1851 | InstructionCost Cost = 0; |
1852 | Cost += 2 * thisT()->getCastInstrCost(ExtOp, ExtTy, MulTy, CCH, CostKind); |
1853 | Cost += |
1854 | thisT()->getArithmeticInstrCost(Instruction::Mul, ExtTy, CostKind); |
1855 | Cost += 2 * thisT()->getCastInstrCost(Instruction::Trunc, MulTy, ExtTy, |
1856 | CCH, CostKind); |
1857 | Cost += thisT()->getArithmeticInstrCost(Instruction::LShr, MulTy, |
1858 | CostKind, TTI::OK_AnyValue, |
1859 | TTI::OK_UniformConstantValue); |
1860 | |
1861 | if (IID == Intrinsic::smul_with_overflow) |
1862 | Cost += thisT()->getArithmeticInstrCost(Instruction::AShr, MulTy, |
1863 | CostKind, TTI::OK_AnyValue, |
1864 | TTI::OK_UniformConstantValue); |
1865 | |
1866 | Cost += |
1867 | thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, MulTy, OverflowTy, |
1868 | CmpInst::BAD_ICMP_PREDICATE, CostKind); |
1869 | return Cost; |
1870 | } |
1871 | case Intrinsic::ctpop: |
1872 | ISDs.push_back(ISD::CTPOP); |
1873 | |
1874 | |
1875 | SingleCallCost = TargetTransformInfo::TCC_Expensive; |
1876 | break; |
1877 | case Intrinsic::ctlz: |
1878 | ISDs.push_back(ISD::CTLZ); |
1879 | break; |
1880 | case Intrinsic::cttz: |
1881 | ISDs.push_back(ISD::CTTZ); |
1882 | break; |
1883 | case Intrinsic::bswap: |
1884 | ISDs.push_back(ISD::BSWAP); |
1885 | break; |
1886 | case Intrinsic::bitreverse: |
1887 | ISDs.push_back(ISD::BITREVERSE); |
1888 | break; |
1889 | } |
1890 | |
1891 | const TargetLoweringBase *TLI = getTLI(); |
1892 | std::pair<InstructionCost, MVT> LT = |
1893 | TLI->getTypeLegalizationCost(DL, RetTy); |
1894 | |
1895 | SmallVector<InstructionCost, 2> LegalCost; |
1896 | SmallVector<InstructionCost, 2> CustomCost; |
1897 | for (unsigned ISD : ISDs) { |
1898 | if (TLI->isOperationLegalOrPromote(ISD, LT.second)) { |
1899 | if (IID == Intrinsic::fabs && LT.second.isFloatingPoint() && |
1900 | TLI->isFAbsFree(LT.second)) { |
1901 | return 0; |
1902 | } |
1903 | |
1904 | |
1905 | |
1906 | |
1907 | |
1908 | if (LT.first > 1) |
1909 | LegalCost.push_back(LT.first * 2); |
1910 | else |
1911 | LegalCost.push_back(LT.first * 1); |
1912 | } else if (!TLI->isOperationExpand(ISD, LT.second)) { |
1913 | |
1914 | |
1915 | CustomCost.push_back(LT.first * 2); |
1916 | } |
1917 | } |
1918 | |
1919 | auto *MinLegalCostI = std::min_element(LegalCost.begin(), LegalCost.end()); |
1920 | if (MinLegalCostI != LegalCost.end()) |
1921 | return *MinLegalCostI; |
1922 | |
1923 | auto MinCustomCostI = |
1924 | std::min_element(CustomCost.begin(), CustomCost.end()); |
1925 | if (MinCustomCostI != CustomCost.end()) |
1926 | return *MinCustomCostI; |
1927 | |
1928 | |
1929 | |
1930 | if (IID == Intrinsic::fmuladd) |
1931 | return thisT()->getArithmeticInstrCost(BinaryOperator::FMul, RetTy, |
1932 | CostKind) + |
1933 | thisT()->getArithmeticInstrCost(BinaryOperator::FAdd, RetTy, |
1934 | CostKind); |
1935 | if (IID == Intrinsic::experimental_constrained_fmuladd) { |
1936 | IntrinsicCostAttributes FMulAttrs( |
1937 | Intrinsic::experimental_constrained_fmul, RetTy, Tys); |
1938 | IntrinsicCostAttributes FAddAttrs( |
1939 | Intrinsic::experimental_constrained_fadd, RetTy, Tys); |
1940 | return thisT()->getIntrinsicInstrCost(FMulAttrs, CostKind) + |
1941 | thisT()->getIntrinsicInstrCost(FAddAttrs, CostKind); |
1942 | } |
1943 | |
1944 | |
1945 | |
1946 | |
1947 | if (auto *RetVTy = dyn_cast<VectorType>(RetTy)) { |
1948 | |
1949 | if (isa<ScalableVectorType>(RetTy) || any_of(Tys, [](const Type *Ty) { |
1950 | return isa<ScalableVectorType>(Ty); |
1951 | })) |
1952 | return InstructionCost::getInvalid(); |
1953 | |
1954 | InstructionCost ScalarizationCost = |
1955 | SkipScalarizationCost ? ScalarizationCostPassed |
1956 | : getScalarizationOverhead(RetVTy, true, false); |
1957 | |
1958 | unsigned ScalarCalls = cast<FixedVectorType>(RetVTy)->getNumElements(); |
1959 | SmallVector<Type *, 4> ScalarTys; |
1960 | for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) { |
1961 | Type *Ty = Tys[i]; |
1962 | if (Ty->isVectorTy()) |
1963 | Ty = Ty->getScalarType(); |
1964 | ScalarTys.push_back(Ty); |
1965 | } |
1966 | IntrinsicCostAttributes Attrs(IID, RetTy->getScalarType(), ScalarTys, FMF); |
1967 | InstructionCost ScalarCost = |
1968 | thisT()->getIntrinsicInstrCost(Attrs, CostKind); |
1969 | for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) { |
1970 | if (auto *VTy = dyn_cast<VectorType>(Tys[i])) { |
1971 | if (!ICA.skipScalarizationCost()) |
1972 | ScalarizationCost += getScalarizationOverhead(VTy, false, true); |
1973 | ScalarCalls = std::max(ScalarCalls, |
1974 | cast<FixedVectorType>(VTy)->getNumElements()); |
1975 | } |
1976 | } |
1977 | return ScalarCalls * ScalarCost + ScalarizationCost; |
1978 | } |
1979 | |
1980 | |
1981 | return SingleCallCost; |
1982 | } |
1983 | |
1984 | |
1985 | |
1986 | |
1987 | |
1988 | |
1989 | |
1990 | |
1991 | |
1992 | |
1993 | |
1994 | |
1995 | InstructionCost |
1996 | getCallInstrCost(Function *F, Type *RetTy, ArrayRef<Type *> Tys, |
1997 | TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency) { |
1998 | return 10; |
1999 | } |
2000 | |
2001 | unsigned getNumberOfParts(Type *Tp) { |
2002 | std::pair<InstructionCost, MVT> LT = |
2003 | getTLI()->getTypeLegalizationCost(DL, Tp); |
2004 | return *LT.first.getValue(); |
2005 | } |
2006 | |
2007 | InstructionCost getAddressComputationCost(Type *Ty, ScalarEvolution *, |
2008 | const SCEV *) { |
2009 | return 0; |
2010 | } |
2011 | |
2012 | |
2013 | |
2014 | |
2015 | |
2016 | |
2017 | |
2018 | |
2019 | |
2020 | |
2021 | |
2022 | |
2023 | |
2024 | |
2025 | |
2026 | |
2027 | |
2028 | |
2029 | |
2030 | |
2031 | |
2032 | |
2033 | InstructionCost getTreeReductionCost(unsigned Opcode, VectorType *Ty, |
2034 | TTI::TargetCostKind CostKind) { |
2035 | Type *ScalarTy = Ty->getElementType(); |
2036 | unsigned NumVecElts = cast<FixedVectorType>(Ty)->getNumElements(); |
2037 | if ((Opcode == Instruction::Or || Opcode == Instruction::And) && |
2038 | ScalarTy == IntegerType::getInt1Ty(Ty->getContext()) && |
2039 | NumVecElts >= 2) { |
2040 | |
2041 | |
2042 | |
2043 | |
2044 | |
2045 | |
2046 | Type *ValTy = IntegerType::get(Ty->getContext(), NumVecElts); |
2047 | return thisT()->getCastInstrCost(Instruction::BitCast, ValTy, Ty, |
2048 | TTI::CastContextHint::None, CostKind) + |
2049 | thisT()->getCmpSelInstrCost(Instruction::ICmp, ValTy, |
2050 | CmpInst::makeCmpResultType(ValTy), |
2051 | CmpInst::BAD_ICMP_PREDICATE, CostKind); |
2052 | } |
2053 | unsigned NumReduxLevels = Log2_32(NumVecElts); |
2054 | InstructionCost ArithCost = 0; |
2055 | InstructionCost ShuffleCost = 0; |
2056 | std::pair<InstructionCost, MVT> LT = |
2057 | thisT()->getTLI()->getTypeLegalizationCost(DL, Ty); |
2058 | unsigned LongVectorCount = 0; |
2059 | unsigned MVTLen = |
2060 | LT.second.isVector() ? LT.second.getVectorNumElements() : 1; |
2061 | while (NumVecElts > MVTLen) { |
2062 | NumVecElts /= 2; |
2063 | VectorType *SubTy = FixedVectorType::get(ScalarTy, NumVecElts); |
2064 | ShuffleCost += thisT()->getShuffleCost(TTI::SK_ExtractSubvector, Ty, None, |
2065 | NumVecElts, SubTy); |
2066 | ArithCost += thisT()->getArithmeticInstrCost(Opcode, SubTy, CostKind); |
2067 | Ty = SubTy; |
2068 | ++LongVectorCount; |
2069 | } |
2070 | |
2071 | NumReduxLevels -= LongVectorCount; |
2072 | |
2073 | |
2074 | |
2075 | |
2076 | |
2077 | |
2078 | |
2079 | ShuffleCost += NumReduxLevels * thisT()->getShuffleCost( |
2080 | TTI::SK_PermuteSingleSrc, Ty, None, 0, Ty); |
2081 | ArithCost += NumReduxLevels * thisT()->getArithmeticInstrCost(Opcode, Ty); |
2082 | return ShuffleCost + ArithCost + |
2083 | thisT()->getVectorInstrCost(Instruction::ExtractElement, Ty, 0); |
2084 | } |
2085 | |
2086 | |
2087 | |
2088 | |
2089 | |
2090 | |
2091 | |
2092 | |
2093 | |
2094 | |
2095 | |
2096 | |
2097 | |
2098 | |
2099 | |
2100 | |
2101 | |
2102 | InstructionCost getOrderedReductionCost(unsigned Opcode, VectorType *Ty, |
2103 | TTI::TargetCostKind CostKind) { |
2104 | |
2105 | |
2106 | if (isa<ScalableVectorType>(Ty)) |
2107 | return InstructionCost::getInvalid(); |
2108 | |
2109 | auto *VTy = cast<FixedVectorType>(Ty); |
2110 | InstructionCost ExtractCost = |
2111 | getScalarizationOverhead(VTy, false, true); |
2112 | InstructionCost ArithCost = thisT()->getArithmeticInstrCost( |
2113 | Opcode, VTy->getElementType(), CostKind); |
2114 | ArithCost *= VTy->getNumElements(); |
2115 | |
2116 | return ExtractCost + ArithCost; |
2117 | } |
2118 | |
2119 | InstructionCost getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, |
2120 | Optional<FastMathFlags> FMF, |
2121 | TTI::TargetCostKind CostKind) { |
2122 | if (TTI::requiresOrderedReduction(FMF)) |
2123 | return getOrderedReductionCost(Opcode, Ty, CostKind); |
2124 | return getTreeReductionCost(Opcode, Ty, CostKind); |
2125 | } |
2126 | |
2127 | |
2128 | |
2129 | InstructionCost getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy, |
2130 | bool IsUnsigned, |
2131 | TTI::TargetCostKind CostKind) { |
2132 | Type *ScalarTy = Ty->getElementType(); |
2133 | Type *ScalarCondTy = CondTy->getElementType(); |
2134 | unsigned NumVecElts = cast<FixedVectorType>(Ty)->getNumElements(); |
2135 | unsigned NumReduxLevels = Log2_32(NumVecElts); |
2136 | unsigned CmpOpcode; |
2137 | if (Ty->isFPOrFPVectorTy()) { |
2138 | CmpOpcode = Instruction::FCmp; |
2139 | } else { |
2140 | assert(Ty->isIntOrIntVectorTy() && |
2141 | "expecting floating point or integer type for min/max reduction"); |
2142 | CmpOpcode = Instruction::ICmp; |
2143 | } |
2144 | InstructionCost MinMaxCost = 0; |
2145 | InstructionCost ShuffleCost = 0; |
2146 | std::pair<InstructionCost, MVT> LT = |
2147 | thisT()->getTLI()->getTypeLegalizationCost(DL, Ty); |
2148 | unsigned LongVectorCount = 0; |
2149 | unsigned MVTLen = |
2150 | LT.second.isVector() ? LT.second.getVectorNumElements() : 1; |
2151 | while (NumVecElts > MVTLen) { |
2152 | NumVecElts /= 2; |
2153 | auto *SubTy = FixedVectorType::get(ScalarTy, NumVecElts); |
2154 | CondTy = FixedVectorType::get(ScalarCondTy, NumVecElts); |
2155 | |
2156 | ShuffleCost += thisT()->getShuffleCost(TTI::SK_ExtractSubvector, Ty, None, |
2157 | NumVecElts, SubTy); |
2158 | MinMaxCost += |
2159 | thisT()->getCmpSelInstrCost(CmpOpcode, SubTy, CondTy, |
2160 | CmpInst::BAD_ICMP_PREDICATE, CostKind) + |
2161 | thisT()->getCmpSelInstrCost(Instruction::Select, SubTy, CondTy, |
2162 | CmpInst::BAD_ICMP_PREDICATE, CostKind); |
2163 | Ty = SubTy; |
2164 | ++LongVectorCount; |
2165 | } |
2166 | |
2167 | NumReduxLevels -= LongVectorCount; |
2168 | |
2169 | |
2170 | |
2171 | |
2172 | |
2173 | ShuffleCost += NumReduxLevels * thisT()->getShuffleCost( |
2174 | TTI::SK_PermuteSingleSrc, Ty, None, 0, Ty); |
2175 | MinMaxCost += |
2176 | NumReduxLevels * |
2177 | (thisT()->getCmpSelInstrCost(CmpOpcode, Ty, CondTy, |
2178 | CmpInst::BAD_ICMP_PREDICATE, CostKind) + |
2179 | thisT()->getCmpSelInstrCost(Instruction::Select, Ty, CondTy, |
2180 | CmpInst::BAD_ICMP_PREDICATE, CostKind)); |
2181 | |
2182 | |
2183 | return ShuffleCost + MinMaxCost + |
2184 | thisT()->getVectorInstrCost(Instruction::ExtractElement, Ty, 0); |
2185 | } |
2186 | |
2187 | InstructionCost getExtendedAddReductionCost(bool IsMLA, bool IsUnsigned, |
2188 | Type *ResTy, VectorType *Ty, |
2189 | TTI::TargetCostKind CostKind) { |
2190 | |
2191 | |
2192 | VectorType *ExtTy = VectorType::get(ResTy, Ty); |
2193 | InstructionCost RedCost = thisT()->getArithmeticReductionCost( |
2194 | Instruction::Add, ExtTy, None, CostKind); |
2195 | InstructionCost MulCost = 0; |
2196 | InstructionCost ExtCost = thisT()->getCastInstrCost( |
2197 | IsUnsigned ? Instruction::ZExt : Instruction::SExt, ExtTy, Ty, |
2198 | TTI::CastContextHint::None, CostKind); |
2199 | if (IsMLA) { |
2200 | MulCost = |
2201 | thisT()->getArithmeticInstrCost(Instruction::Mul, ExtTy, CostKind); |
2202 | ExtCost *= 2; |
2203 | } |
2204 | |
2205 | return RedCost + MulCost + ExtCost; |
2206 | } |
2207 | |
2208 | InstructionCost getVectorSplitCost() { return 1; } |
2209 | |
2210 | |
2211 | }; |
2212 | |
2213 | |
2214 | |
2215 | class BasicTTIImpl : public BasicTTIImplBase<BasicTTIImpl> { |
2216 | using BaseT = BasicTTIImplBase<BasicTTIImpl>; |
2217 | |
2218 | friend class BasicTTIImplBase<BasicTTIImpl>; |
2219 | |
2220 | const TargetSubtargetInfo *ST; |
2221 | const TargetLoweringBase *TLI; |
2222 | |
2223 | const TargetSubtargetInfo *getST() const { return ST; } |
2224 | const TargetLoweringBase *getTLI() const { return TLI; } |
2225 | |
2226 | public: |
2227 | explicit BasicTTIImpl(const TargetMachine *TM, const Function &F); |
2228 | }; |
2229 | |
2230 | } |
2231 | |
2232 | #endif // LLVM_CODEGEN_BASICTTIIMPL_H |
1 | |
2 | |
3 | |
4 | |
5 | |
6 | |
7 | |
8 | |
9 | |
10 | |
11 | |
12 | |
13 | |
14 | |
15 | |
16 | |
17 | |
18 | |
19 | |
20 | |
21 | #ifndef LLVM_ANALYSIS_TARGETTRANSFORMINFO_H |
22 | #define LLVM_ANALYSIS_TARGETTRANSFORMINFO_H |
23 | |
24 | #include "llvm/IR/InstrTypes.h" |
25 | #include "llvm/IR/Operator.h" |
26 | #include "llvm/IR/PassManager.h" |
27 | #include "llvm/Pass.h" |
28 | #include "llvm/Support/AtomicOrdering.h" |
29 | #include "llvm/Support/BranchProbability.h" |
30 | #include "llvm/Support/DataTypes.h" |
31 | #include "llvm/Support/InstructionCost.h" |
32 | #include <functional> |
33 | |
34 | namespace llvm { |
35 | |
36 | namespace Intrinsic { |
37 | typedef unsigned ID; |
38 | } |
39 | |
40 | class AssumptionCache; |
41 | class BlockFrequencyInfo; |
42 | class DominatorTree; |
43 | class BranchInst; |
44 | class CallBase; |
45 | class ExtractElementInst; |
46 | class Function; |
47 | class GlobalValue; |
48 | class InstCombiner; |
49 | class OptimizationRemarkEmitter; |
50 | class IntrinsicInst; |
51 | class LoadInst; |
52 | class LoopAccessInfo; |
53 | class Loop; |
54 | class LoopInfo; |
55 | class ProfileSummaryInfo; |
56 | class RecurrenceDescriptor; |
57 | class SCEV; |
58 | class ScalarEvolution; |
59 | class StoreInst; |
60 | class SwitchInst; |
61 | class TargetLibraryInfo; |
62 | class Type; |
63 | class User; |
64 | class Value; |
65 | class VPIntrinsic; |
66 | struct KnownBits; |
67 | template <typename T> class Optional; |
68 | |
69 | |
70 | struct MemIntrinsicInfo { |
71 | |
72 | |
73 | |
74 | |
75 | Value *PtrVal = nullptr; |
76 | |
77 | |
78 | AtomicOrdering Ordering = AtomicOrdering::NotAtomic; |
79 | |
80 | |
81 | unsigned short MatchingId = 0; |
82 | |
83 | bool ReadMem = false; |
84 | bool WriteMem = false; |
85 | bool IsVolatile = false; |
86 | |
87 | bool isUnordered() const { |
88 | return (Ordering == AtomicOrdering::NotAtomic || |
89 | Ordering == AtomicOrdering::Unordered) && |
90 | !IsVolatile; |
91 | } |
92 | }; |
93 | |
94 | |
95 | struct HardwareLoopInfo { |
96 | HardwareLoopInfo() = delete; |
97 | HardwareLoopInfo(Loop *L) : L(L) {} |
98 | Loop *L = nullptr; |
99 | BasicBlock *ExitBlock = nullptr; |
100 | BranchInst *ExitBranch = nullptr; |
101 | const SCEV *ExitCount = nullptr; |
102 | IntegerType *CountType = nullptr; |
103 | Value *LoopDecrement = nullptr; |
104 | |
105 | bool IsNestingLegal = false; |
106 | |
107 | bool CounterInReg = false; |
108 | |
109 | bool PerformEntryTest = false; |
110 | |
111 | |
112 | bool isHardwareLoopCandidate(ScalarEvolution &SE, LoopInfo &LI, |
113 | DominatorTree &DT, bool ForceNestedLoop = false, |
114 | bool ForceHardwareLoopPHI = false); |
115 | bool canAnalyze(LoopInfo &LI); |
116 | }; |
117 | |
118 | class IntrinsicCostAttributes { |
119 | const IntrinsicInst *II = nullptr; |
120 | Type *RetTy = nullptr; |
121 | Intrinsic::ID IID; |
122 | SmallVector<Type *, 4> ParamTys; |
123 | SmallVector<const Value *, 4> Arguments; |
124 | FastMathFlags FMF; |
125 | |
126 | |
127 | InstructionCost ScalarizationCost = InstructionCost::getInvalid(); |
128 | |
129 | public: |
130 | IntrinsicCostAttributes( |
131 | Intrinsic::ID Id, const CallBase &CI, |
132 | InstructionCost ScalarCost = InstructionCost::getInvalid()); |
133 | |
134 | IntrinsicCostAttributes( |
135 | Intrinsic::ID Id, Type *RTy, ArrayRef<Type *> Tys, |
136 | FastMathFlags Flags = FastMathFlags(), const IntrinsicInst *I = nullptr, |
137 | InstructionCost ScalarCost = InstructionCost::getInvalid()); |
138 | |
139 | IntrinsicCostAttributes(Intrinsic::ID Id, Type *RTy, |
140 | ArrayRef<const Value *> Args); |
141 | |
142 | IntrinsicCostAttributes( |
143 | Intrinsic::ID Id, Type *RTy, ArrayRef<const Value *> Args, |
144 | ArrayRef<Type *> Tys, FastMathFlags Flags = FastMathFlags(), |
145 | const IntrinsicInst *I = nullptr, |
146 | InstructionCost ScalarCost = InstructionCost::getInvalid()); |
147 | |
148 | Intrinsic::ID getID() const { return IID; } |
149 | const IntrinsicInst *getInst() const { return II; } |
150 | Type *getReturnType() const { return RetTy; } |
151 | FastMathFlags getFlags() const { return FMF; } |
152 | InstructionCost getScalarizationCost() const { return ScalarizationCost; } |
153 | const SmallVectorImpl<const Value *> &getArgs() const { return Arguments; } |
154 | const SmallVectorImpl<Type *> &getArgTypes() const { return ParamTys; } |
155 | |
156 | bool isTypeBasedOnly() const { |
157 | return Arguments.empty(); |
| 13 | | Calling 'SmallVectorBase::empty' | |
|
| 16 | | Returning from 'SmallVectorBase::empty' | |
|
| 17 | | Returning zero, which participates in a condition later | |
|
158 | } |
159 | |
160 | bool skipScalarizationCost() const { return ScalarizationCost.isValid(); } |
161 | }; |
162 | |
163 | class TargetTransformInfo; |
164 | typedef TargetTransformInfo TTI; |
165 | |
166 | |
167 | |
168 | class TargetTransformInfo { |
169 | public: |
170 | |
171 | |
172 | |
173 | |
174 | |
175 | template <typename T> TargetTransformInfo(T Impl); |
176 | |
177 | |
178 | |
179 | |
180 | |
181 | |
182 | explicit TargetTransformInfo(const DataLayout &DL); |
183 | |
184 | |
185 | TargetTransformInfo(TargetTransformInfo &&Arg); |
186 | TargetTransformInfo &operator=(TargetTransformInfo &&RHS); |
187 | |
188 | |
189 | |
190 | ~TargetTransformInfo(); |
191 | |
192 | |
193 | |
194 | |
195 | |
196 | |
197 | bool invalidate(Function &, const PreservedAnalyses &, |
198 | FunctionAnalysisManager::Invalidator &) { |
199 | |
200 | |
201 | return false; |
202 | } |
203 | |
204 | |
205 | |
206 | |
207 | |
208 | |
209 | |
210 | |
211 | enum TargetCostKind { |
212 | TCK_RecipThroughput, |
213 | TCK_Latency, |
214 | TCK_CodeSize, |
215 | TCK_SizeAndLatency |
216 | }; |
217 | |
218 | |
219 | |
220 | |
221 | |
222 | |
223 | |
224 | |
225 | InstructionCost getInstructionCost(const Instruction *I, |
226 | enum TargetCostKind kind) const { |
227 | InstructionCost Cost; |
228 | switch (kind) { |
229 | case TCK_RecipThroughput: |
230 | Cost = getInstructionThroughput(I); |
231 | break; |
232 | case TCK_Latency: |
233 | Cost = getInstructionLatency(I); |
234 | break; |
235 | case TCK_CodeSize: |
236 | case TCK_SizeAndLatency: |
237 | Cost = getUserCost(I, kind); |
238 | break; |
239 | } |
240 | return Cost; |
241 | } |
242 | |
243 | |
244 | |
245 | |
246 | |
247 | |
248 | |
249 | |
250 | |
251 | |
252 | |
253 | |
254 | |
255 | |
256 | |
257 | |
258 | |
259 | |
260 | |
261 | enum TargetCostConstants { |
262 | TCC_Free = 0, |
263 | TCC_Basic = 1, |
264 | TCC_Expensive = 4 |
265 | }; |
266 | |
267 | |
268 | InstructionCost |
269 | getGEPCost(Type *PointeeType, const Value *Ptr, |
270 | ArrayRef<const Value *> Operands, |
271 | TargetCostKind CostKind = TCK_SizeAndLatency) const; |
272 | |
273 | |
274 | |
275 | |
276 | |
277 | |
278 | |
279 | unsigned getInliningThresholdMultiplier() const; |
280 | |
281 | |
282 | unsigned adjustInliningThreshold(const CallBase *CB) const; |
283 | |
284 | |
285 | |
286 | |
287 | |
288 | |
289 | |
290 | |
291 | |
292 | |
293 | |
294 | int getInlinerVectorBonusPercent() const; |
295 | |
296 | |
297 | |
298 | InstructionCost getMemcpyCost(const Instruction *I) const; |
299 | |
300 | |
301 | |
302 | |
303 | unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI, |
304 | unsigned &JTSize, |
305 | ProfileSummaryInfo *PSI, |
306 | BlockFrequencyInfo *BFI) const; |
307 | |
308 | |
309 | |
310 | |
311 | |
312 | |
313 | |
314 | |
315 | |
316 | |
317 | |
318 | |
319 | |
320 | |
321 | InstructionCost getUserCost(const User *U, ArrayRef<const Value *> Operands, |
322 | TargetCostKind CostKind) const; |
323 | |
324 | |
325 | |
326 | InstructionCost getUserCost(const User *U, TargetCostKind CostKind) const { |
327 | SmallVector<const Value *, 4> Operands(U->operand_values()); |
328 | return getUserCost(U, Operands, CostKind); |
329 | } |
330 | |
331 | |
332 | |
333 | BranchProbability getPredictableBranchThreshold() const; |
334 | |
335 | |
336 | |
337 | |
338 | |
339 | |
340 | bool hasBranchDivergence() const; |
341 | |
342 | |
343 | |
344 | bool useGPUDivergenceAnalysis() const; |
345 | |
346 | |
347 | |
348 | |
349 | |
350 | |
351 | |
352 | bool isSourceOfDivergence(const Value *V) const; |
353 | |
354 | |
355 | |
356 | |
357 | bool isAlwaysUniform(const Value *V) const; |
358 | |
359 | |
360 | |
361 | |
362 | |
363 | |
364 | |
365 | |
366 | |
367 | |
368 | |
369 | |
370 | |
371 | |
372 | |
373 | |
374 | |
375 | unsigned getFlatAddressSpace() const; |
376 | |
377 | |
378 | |
379 | |
380 | |
381 | bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes, |
382 | Intrinsic::ID IID) const; |
383 | |
384 | bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const; |
385 | |
386 | unsigned getAssumedAddrSpace(const Value *V) const; |
387 | |
388 | |
389 | |
390 | |
391 | |
392 | |
393 | Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV, |
394 | Value *NewV) const; |
395 | |
396 | |
397 | |
398 | |
399 | |
400 | |
401 | |
402 | |
403 | |
404 | |
405 | |
406 | |
407 | |
408 | bool isLoweredToCall(const Function *F) const; |
409 | |
410 | struct LSRCost { |
411 | |
412 | |
413 | unsigned Insns; |
414 | unsigned NumRegs; |
415 | unsigned AddRecCost; |
416 | unsigned NumIVMuls; |
417 | unsigned NumBaseAdds; |
418 | unsigned ImmCost; |
419 | unsigned SetupCost; |
420 | unsigned ScaleCost; |
421 | }; |
422 | |
423 | |
424 | struct UnrollingPreferences { |
425 | |
426 | |
427 | |
428 | |
429 | |
430 | |
431 | |
432 | unsigned Threshold; |
433 | |
434 | |
435 | |
436 | |
437 | |
438 | |
439 | |
440 | |
441 | |
442 | |
443 | unsigned MaxPercentThresholdBoost; |
444 | |
445 | |
446 | unsigned OptSizeThreshold; |
447 | |
448 | |
449 | unsigned PartialThreshold; |
450 | |
451 | |
452 | |
453 | unsigned PartialOptSizeThreshold; |
454 | |
455 | |
456 | |
457 | |
458 | unsigned Count; |
459 | |
460 | unsigned DefaultUnrollRuntimeCount; |
461 | |
462 | |
463 | |
464 | |
465 | unsigned MaxCount; |
466 | |
467 | |
468 | |
469 | unsigned FullUnrollMaxCount; |
470 | |
471 | |
472 | |
473 | |
474 | unsigned BEInsns; |
475 | |
476 | |
477 | bool Partial; |
478 | |
479 | |
480 | |
481 | bool Runtime; |
482 | |
483 | bool AllowRemainder; |
484 | |
485 | |
486 | bool AllowExpensiveTripCount; |
487 | |
488 | |
489 | bool Force; |
490 | |
491 | bool UpperBound; |
492 | |
493 | bool UnrollRemainder; |
494 | |
495 | bool UnrollAndJam; |
496 | |
497 | |
498 | |
499 | |
500 | unsigned UnrollAndJamInnerLoopThreshold; |
501 | |
502 | |
503 | unsigned MaxIterationsCountToAnalyze; |
504 | }; |
505 | |
506 | |
507 | |
508 | |
509 | void getUnrollingPreferences(Loop *L, ScalarEvolution &, |
510 | UnrollingPreferences &UP, |
511 | OptimizationRemarkEmitter *ORE) const; |
512 | |
513 | |
514 | |
515 | bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, |
516 | AssumptionCache &AC, TargetLibraryInfo *LibInfo, |
517 | HardwareLoopInfo &HWLoopInfo) const; |
518 | |
519 | |
520 | |
521 | bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, ScalarEvolution &SE, |
522 | AssumptionCache &AC, TargetLibraryInfo *TLI, |
523 | DominatorTree *DT, |
524 | const LoopAccessInfo *LAI) const; |
525 | |
526 | |
527 | |
528 | bool emitGetActiveLaneMask() const; |
529 | |
530 | |
531 | struct PeelingPreferences { |
532 | |
533 | |
534 | |
535 | unsigned PeelCount; |
536 | |
537 | bool AllowPeeling; |
538 | |
539 | bool AllowLoopNestsPeeling; |
540 | |
541 | |
542 | |
543 | |
544 | bool PeelProfiledIterations; |
545 | }; |
546 | |
547 | |
548 | |
549 | |
550 | void getPeelingPreferences(Loop *L, ScalarEvolution &SE, |
551 | PeelingPreferences &PP) const; |
552 | |
553 | |
554 | |
555 | |
556 | |
557 | |
558 | |
559 | |
560 | Optional<Instruction *> instCombineIntrinsic(InstCombiner &IC, |
561 | IntrinsicInst &II) const; |
562 | |
563 | |
564 | Optional<Value *> |
565 | simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II, |
566 | APInt DemandedMask, KnownBits &Known, |
567 | bool &KnownBitsComputed) const; |
568 | |
569 | |
570 | Optional<Value *> simplifyDemandedVectorEltsIntrinsic( |
571 | InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, |
572 | APInt &UndefElts2, APInt &UndefElts3, |
573 | std::function<void(Instruction *, unsigned, APInt, APInt &)> |
574 | SimplifyAndSetOp) const; |
575 | |
576 | |
577 | |
578 | |
579 | |
580 | |
581 | |
582 | |
583 | |
584 | |
585 | |
586 | |
587 | |
588 | enum PopcntSupportKind { PSK_Software, PSK_SlowHardware, PSK_FastHardware }; |
589 | |
590 | |
591 | |
592 | |
593 | bool isLegalAddImmediate(int64_t Imm) const; |
594 | |
595 | |
596 | |
597 | |
598 | |
599 | bool isLegalICmpImmediate(int64_t Imm) const; |
600 | |
601 | |
602 | |
603 | |
604 | |
605 | |
606 | |
607 | bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, |
608 | bool HasBaseReg, int64_t Scale, |
609 | unsigned AddrSpace = 0, |
610 | Instruction *I = nullptr) const; |
611 | |
612 | |
613 | bool isLSRCostLess(TargetTransformInfo::LSRCost &C1, |
614 | TargetTransformInfo::LSRCost &C2) const; |
615 | |
616 | |
617 | |
618 | |
619 | bool isNumRegsMajorCostOfLSR() const; |
620 | |
621 | |
622 | bool isProfitableLSRChainElement(Instruction *I) const; |
623 | |
624 | |
625 | |
626 | |
627 | bool canMacroFuseCmp() const; |
628 | |
629 | |
630 | |
631 | bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI, |
632 | DominatorTree *DT, AssumptionCache *AC, |
633 | TargetLibraryInfo *LibInfo) const; |
634 | |
635 | enum AddressingModeKind { |
636 | AMK_PreIndexed, |
637 | AMK_PostIndexed, |
638 | AMK_None |
639 | }; |
640 | |
641 | |
642 | AddressingModeKind getPreferredAddressingMode(const Loop *L, |
643 | ScalarEvolution *SE) const; |
644 | |
645 | |
646 | bool isLegalMaskedStore(Type *DataType, Align Alignment) const; |
647 | |
648 | bool isLegalMaskedLoad(Type *DataType, Align Alignment) const; |
649 | |
650 | |
651 | bool isLegalNTStore(Type *DataType, Align Alignment) const; |
652 | |
653 | bool isLegalNTLoad(Type *DataType, Align Alignment) const; |
654 | |
655 | |
656 | bool isLegalMaskedScatter(Type *DataType, Align Alignment) const; |
657 | |
658 | bool isLegalMaskedGather(Type *DataType, Align Alignment) const; |
659 | |
660 | |
661 | bool isLegalMaskedCompressStore(Type *DataType) const; |
662 | |
663 | bool isLegalMaskedExpandLoad(Type *DataType) const; |
664 | |
665 | |
666 | bool enableOrderedReductions() const; |
667 | |
668 | |
669 | |
670 | |
671 | |
672 | |
673 | bool hasDivRemOp(Type *DataType, bool IsSigned) const; |
674 | |
675 | |
676 | |
677 | |
678 | |
679 | |
680 | bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) const; |
681 | |
682 | |
683 | bool prefersVectorizedAddressing() const; |
684 | |
685 | |
686 | |
687 | |
688 | |
689 | |
690 | |
691 | InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, |
692 | int64_t BaseOffset, bool HasBaseReg, |
693 | int64_t Scale, |
694 | unsigned AddrSpace = 0) const; |
695 | |
696 | |
697 | |
698 | |
699 | |
700 | bool LSRWithInstrQueries() const; |
701 | |
702 | |
703 | |
704 | |
705 | bool isTruncateFree(Type *Ty1, Type *Ty2) const; |
706 | |
707 | |
708 | |
709 | bool isProfitableToHoist(Instruction *I) const; |
710 | |
711 | bool useAA() const; |
712 | |
713 | |
714 | bool isTypeLegal(Type *Ty) const; |
715 | |
716 | |
717 | InstructionCost getRegUsageForType(Type *Ty) const; |
718 | |
719 | |
720 | |
721 | bool shouldBuildLookupTables() const; |
722 | |
723 | |
724 | |
725 | bool shouldBuildLookupTablesForConstant(Constant *C) const; |
726 | |
727 | |
728 | bool shouldBuildRelLookupTables() const; |
729 | |
730 | |
731 | |
732 | bool useColdCCForColdCall(Function &F) const; |
733 | |
734 | |
735 | |
736 | |
737 | InstructionCost getScalarizationOverhead(VectorType *Ty, |
738 | const APInt &DemandedElts, |
739 | bool Insert, bool Extract) const; |
740 | |
741 | |
742 | |
743 | |
744 | InstructionCost getOperandsScalarizationOverhead(ArrayRef<const Value *> Args, |
745 | ArrayRef<Type *> Tys) const; |
746 | |
747 | |
748 | |
749 | |
750 | bool supportsEfficientVectorElementLoadStore() const; |
751 | |
752 | |
753 | bool enableAggressiveInterleaving(bool LoopHasReductions) const; |
754 | |
755 | |
756 | |
757 | struct MemCmpExpansionOptions { |
758 | |
759 | operator bool() const { return MaxNumLoads > 0; } |
760 | |
761 | |
762 | unsigned MaxNumLoads = 0; |
763 | |
764 | |
765 | SmallVector<unsigned, 8> LoadSizes; |
766 | |
767 | |
768 | |
769 | |
770 | |
771 | |
772 | |
773 | |
774 | |
775 | unsigned NumLoadsPerBlock = 1; |
776 | |
777 | |
778 | |
779 | |
780 | bool AllowOverlappingLoads = false; |
781 | }; |
782 | MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, |
783 | bool IsZeroCmp) const; |
784 | |
785 | |
786 | bool enableInterleavedAccessVectorization() const; |
787 | |
788 | |
789 | |
790 | |
791 | bool enableMaskedInterleavedAccessVectorization() const; |
792 | |
793 | |
794 | |
795 | |
796 | |
797 | |
798 | |
799 | |
800 | bool isFPVectorizationPotentiallyUnsafe() const; |
801 | |
802 | |
803 | bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth, |
804 | unsigned AddressSpace = 0, |
805 | Align Alignment = Align(1), |
806 | bool *Fast = nullptr) const; |
807 | |
808 | |
809 | PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const; |
810 | |
811 | |
812 | bool haveFastSqrt(Type *Ty) const; |
813 | |
814 | |
815 | |
816 | |
817 | |
818 | bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) const; |
819 | |
820 | |
821 | |
822 | InstructionCost getFPOpCost(Type *Ty) const; |
823 | |
824 | |
825 | |
826 | InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, |
827 | TargetCostKind CostKind) const; |
828 | |
829 | |
830 | |
831 | |
832 | InstructionCost getIntImmCostInst(unsigned Opc, unsigned Idx, |
833 | const APInt &Imm, Type *Ty, |
834 | TargetCostKind CostKind, |
835 | Instruction *Inst = nullptr) const; |
836 | InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, |
837 | const APInt &Imm, Type *Ty, |
838 | TargetCostKind CostKind) const; |
839 | |
840 | |
841 | |
842 | |
843 | |
844 | |
845 | |
846 | |
847 | InstructionCost getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, |
848 | const APInt &Imm, Type *Ty) const; |
849 | |
850 | |
851 | |
852 | |
853 | |
854 | |
855 | enum ShuffleKind { |
856 | SK_Broadcast, |
857 | SK_Reverse, |
858 | SK_Select, |
859 | |
860 | |
861 | SK_Transpose, |
862 | SK_InsertSubvector, |
863 | SK_ExtractSubvector, |
864 | SK_PermuteTwoSrc, |
865 | |
866 | SK_PermuteSingleSrc, |
867 | |
868 | SK_Splice |
869 | |
870 | |
871 | }; |
872 | |
873 | |
874 | enum OperandValueKind { |
875 | OK_AnyValue, |
876 | OK_UniformValue, |
877 | OK_UniformConstantValue, |
878 | OK_NonUniformConstantValue |
879 | }; |
880 | |
881 | |
882 | enum OperandValueProperties { OP_None = 0, OP_PowerOf2 = 1 }; |
883 | |
884 | |
885 | unsigned getNumberOfRegisters(unsigned ClassID) const; |
886 | |
887 | |
888 | |
889 | |
890 | |
891 | |
892 | |
893 | |
894 | |
895 | |
896 | |
897 | |
898 | unsigned getRegisterClassForType(bool Vector, Type *Ty = nullptr) const; |
899 | |
900 | |
901 | const char *getRegisterClassName(unsigned ClassID) const; |
902 | |
903 | enum RegisterKind { RGK_Scalar, RGK_FixedWidthVector, RGK_ScalableVector }; |
904 | |
905 | |
906 | TypeSize getRegisterBitWidth(RegisterKind K) const; |
907 | |
908 | |
909 | unsigned getMinVectorRegisterBitWidth() const; |
910 | |
911 | |
912 | |
913 | Optional<unsigned> getMaxVScale() const; |
914 | |
915 | |
916 | |
917 | |
918 | |
919 | |
920 | |
921 | bool shouldMaximizeVectorBandwidth() const; |
922 | |
923 | |
924 | |
925 | |
926 | |
927 | ElementCount getMinimumVF(unsigned ElemWidth, bool IsScalable) const; |
928 | |
929 | |
930 | |
931 | |
932 | unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const; |
933 | |
934 | |
935 | |
936 | |
937 | bool shouldConsiderAddressTypePromotion( |
938 | const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const; |
939 | |
940 | |
941 | unsigned getCacheLineSize() const; |
942 | |
943 | |
944 | enum class CacheLevel { |
945 | L1D, |
946 | L2D, |
947 | |
948 | |
949 | |
950 | |
951 | }; |
952 | |
953 | |
954 | Optional<unsigned> getCacheSize(CacheLevel Level) const; |
955 | |
956 | |
957 | Optional<unsigned> getCacheAssociativity(CacheLevel Level) const; |
958 | |
959 | |
960 | |
961 | |
962 | unsigned getPrefetchDistance() const; |
963 | |
964 | |
965 | |
966 | |
967 | |
968 | |
969 | |
970 | |
971 | |
972 | |
973 | |
974 | |
975 | |
976 | |
977 | |
978 | |
979 | |
980 | |
981 | unsigned getMinPrefetchStride(unsigned NumMemAccesses, |
982 | unsigned NumStridedMemAccesses, |
983 | unsigned NumPrefetches, bool HasCall) const; |
984 | |
985 | |
986 | |
987 | |
988 | unsigned getMaxPrefetchIterationsAhead() const; |
989 | |
990 | |
991 | bool enableWritePrefetching() const; |
992 | |
993 | |
994 | |
995 | |
996 | unsigned getMaxInterleaveFactor(unsigned VF) const; |
997 | |
998 | |
999 | static OperandValueKind getOperandInfo(const Value *V, |
1000 | OperandValueProperties &OpProps); |
1001 | |
1002 | |
1003 | |
1004 | |
1005 | |
1006 | |
1007 | |
1008 | |
1009 | |
1010 | |
1011 | |
1012 | |
1013 | |
1014 | |
1015 | |
1016 | |
1017 | |
1018 | InstructionCost getArithmeticInstrCost( |
1019 | unsigned Opcode, Type *Ty, |
1020 | TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput, |
1021 | OperandValueKind Opd1Info = OK_AnyValue, |
1022 | OperandValueKind Opd2Info = OK_AnyValue, |
1023 | OperandValueProperties Opd1PropInfo = OP_None, |
1024 | OperandValueProperties Opd2PropInfo = OP_None, |
1025 | ArrayRef<const Value *> Args = ArrayRef<const Value *>(), |
1026 | const Instruction *CxtI = nullptr) const; |
1027 | |
1028 | |
1029 | |
1030 | |
1031 | |
1032 | |
1033 | |
1034 | InstructionCost getShuffleCost(ShuffleKind Kind, VectorType *Tp, |
1035 | ArrayRef<int> Mask = None, int Index = 0, |
1036 | VectorType *SubTp = nullptr) const; |
1037 | |
1038 | |
1039 | |
1040 | |
1041 | |
1042 | |
1043 | |
1044 | |
1045 | |
1046 | |
1047 | |
1048 | |
1049 | |
1050 | |
1051 | |
1052 | |
1053 | |
1054 | |
1055 | |
1056 | |
1057 | |
1058 | enum class CastContextHint : uint8_t { |
1059 | None, |
1060 | Normal, |
1061 | Masked, |
1062 | GatherScatter, |
1063 | Interleave, |
1064 | Reversed, |
1065 | }; |
1066 | |
1067 | |
1068 | |
1069 | |
1070 | |
1071 | |
1072 | static CastContextHint getCastContextHint(const Instruction *I); |
1073 | |
1074 | |
1075 | |
1076 | |
1077 | InstructionCost |
1078 | getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, |
1079 | TTI::CastContextHint CCH, |
1080 | TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency, |
1081 | const Instruction *I = nullptr) const; |
1082 | |
1083 | |
1084 | |
1085 | InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst, |
1086 | VectorType *VecTy, |
1087 | unsigned Index = -1) const; |
1088 | |
1089 | |
1090 | |
1091 | InstructionCost |
1092 | getCFInstrCost(unsigned Opcode, |
1093 | TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency, |
1094 | const Instruction *I = nullptr) const; |
1095 | |
1096 | |
1097 | |
1098 | |
1099 | |
1100 | |
1101 | InstructionCost |
1102 | getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy = nullptr, |
1103 | CmpInst::Predicate VecPred = CmpInst::BAD_ICMP_PREDICATE, |
1104 | TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput, |
1105 | const Instruction *I = nullptr) const; |
1106 | |
1107 | |
1108 | |
1109 | InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, |
1110 | unsigned Index = -1) const; |
1111 | |
1112 | |
1113 | InstructionCost |
1114 | getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, |
1115 | unsigned AddressSpace, |
1116 | TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput, |
1117 | const Instruction *I = nullptr) const; |
1118 | |
1119 | |
1120 | InstructionCost getMaskedMemoryOpCost( |
1121 | unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace, |
1122 | TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const; |
1123 | |
1124 | |
1125 | |
1126 | |
1127 | |
1128 | |
1129 | |
1130 | |
1131 | |
1132 | |
1133 | InstructionCost getGatherScatterOpCost( |
1134 | unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask, |
1135 | Align Alignment, TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput, |
1136 | const Instruction *I = nullptr) const; |
1137 | |
1138 | |
1139 | |
1140 | |
1141 | |
1142 | |
1143 | |
1144 | |
1145 | |
1146 | |
1147 | |
1148 | InstructionCost getInterleavedMemoryOpCost( |
1149 | unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices, |
1150 | Align Alignment, unsigned AddressSpace, |
1151 | TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput, |
1152 | bool UseMaskForCond = false, bool UseMaskForGaps = false) const; |
1153 | |
1154 | |
1155 | |
1156 | static bool requiresOrderedReduction(Optional<FastMathFlags> FMF) { |
1157 | return FMF != None && !(*FMF).allowReassoc(); |
1158 | } |
1159 | |
1160 | |
1161 | |
1162 | |
1163 | |
1164 | |
1165 | |
1166 | |
1167 | |
1168 | |
1169 | |
1170 | |
1171 | |
1172 | |
1173 | |
1174 | |
1175 | |
1176 | |
1177 | |
1178 | |
1179 | |
1180 | |
1181 | |
1182 | |
1183 | |
1184 | InstructionCost getArithmeticReductionCost( |
1185 | unsigned Opcode, VectorType *Ty, Optional<FastMathFlags> FMF, |
1186 | TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const; |
1187 | |
1188 | InstructionCost getMinMaxReductionCost( |
1189 | VectorType *Ty, VectorType *CondTy, bool IsUnsigned, |
1190 | TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const; |
1191 | |
1192 | |
1193 | |
1194 | |
1195 | |
1196 | |
1197 | |
1198 | InstructionCost getExtendedAddReductionCost( |
1199 | bool IsMLA, bool IsUnsigned, Type *ResTy, VectorType *Ty, |
1200 | TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const; |
1201 | |
1202 | |
1203 | |
1204 | |
1205 | InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, |
1206 | TTI::TargetCostKind CostKind) const; |
1207 | |
1208 | |
1209 | InstructionCost getCallInstrCost( |
1210 | Function *F, Type *RetTy, ArrayRef<Type *> Tys, |
1211 | TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency) const; |
1212 | |
1213 | |
1214 | |
1215 | unsigned getNumberOfParts(Type *Tp) const; |
1216 | |
1217 | |
1218 | |
1219 | |
1220 | |
1221 | |
1222 | |
1223 | |
1224 | InstructionCost getAddressComputationCost(Type *Ty, |
1225 | ScalarEvolution *SE = nullptr, |
1226 | const SCEV *Ptr = nullptr) const; |
1227 | |
1228 | |
1229 | |
1230 | |
1231 | |
1232 | |
1233 | InstructionCost getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) const; |
1234 | |
1235 | |
1236 | |
1237 | |
1238 | |
1239 | bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const; |
1240 | |
1241 | |
1242 | |
1243 | unsigned getAtomicMemIntrinsicMaxElementSize() const; |
1244 | |
1245 | |
1246 | |
1247 | |
1248 | |
1249 | Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, |
1250 | Type *ExpectedType) const; |
1251 | |
1252 | |
1253 | Type *getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length, |
1254 | unsigned SrcAddrSpace, unsigned DestAddrSpace, |
1255 | unsigned SrcAlign, unsigned DestAlign) const; |
1256 | |
1257 | |
1258 | |
1259 | |
1260 | |
1261 | |
1262 | |
1263 | void getMemcpyLoopResidualLoweringType( |
1264 | SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context, |
1265 | unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace, |
1266 | unsigned SrcAlign, unsigned DestAlign) const; |
1267 | |
1268 | |
1269 | |
1270 | bool areInlineCompatible(const Function *Caller, |
1271 | const Function *Callee) const; |
1272 | |
1273 | |
1274 | |
1275 | |
1276 | |
1277 | bool areFunctionArgsABICompatible(const Function *Caller, |
1278 | const Function *Callee, |
1279 | SmallPtrSetImpl<Argument *> &Args) const; |
1280 | |
1281 | |
1282 | enum MemIndexedMode { |
1283 | MIM_Unindexed, |
1284 | MIM_PreInc, |
1285 | MIM_PreDec, |
1286 | MIM_PostInc, |
1287 | MIM_PostDec |
1288 | }; |
1289 | |
1290 | |
1291 | bool isIndexedLoadLegal(enum MemIndexedMode Mode, Type *Ty) const; |
1292 | |
1293 | |
1294 | bool isIndexedStoreLegal(enum MemIndexedMode Mode, Type *Ty) const; |
1295 | |
1296 | |
1297 | |
1298 | unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const; |
1299 | |
1300 | |
1301 | bool isLegalToVectorizeLoad(LoadInst *LI) const; |
1302 | |
1303 | |
1304 | bool isLegalToVectorizeStore(StoreInst *SI) const; |
1305 | |
1306 | |
1307 | bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment, |
1308 | unsigned AddrSpace) const; |
1309 | |
1310 | |
1311 | bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment, |
1312 | unsigned AddrSpace) const; |
1313 | |
1314 | |
1315 | bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc, |
1316 | ElementCount VF) const; |
1317 | |
1318 | |
1319 | bool isElementTypeLegalForScalableVector(Type *Ty) const; |
1320 | |
1321 | |
1322 | |
1323 | unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize, |
1324 | unsigned ChainSizeInBytes, |
1325 | VectorType *VecTy) const; |
1326 | |
1327 | |
1328 | |
1329 | unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize, |
1330 | unsigned ChainSizeInBytes, |
1331 | VectorType *VecTy) const; |
1332 | |
1333 | |
1334 | struct ReductionFlags { |
1335 | ReductionFlags() : IsMaxOp(false), IsSigned(false), NoNaN(false) {} |
1336 | bool IsMaxOp; |
1337 | bool IsSigned; |
1338 | bool NoNaN; |
1339 | }; |
1340 | |
1341 | |
1342 | bool preferInLoopReduction(unsigned Opcode, Type *Ty, |
1343 | ReductionFlags Flags) const; |
1344 | |
1345 | |
1346 | |
1347 | |
1348 | |
1349 | |
1350 | |
1351 | |
1352 | |
1353 | |
1354 | |
1355 | |
1356 | bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty, |
1357 | ReductionFlags Flags) const; |
1358 | |
1359 | |
1360 | |
1361 | bool shouldExpandReduction(const IntrinsicInst *II) const; |
1362 | |
1363 | |
1364 | |
1365 | unsigned getGISelRematGlobalCost() const; |
1366 | |
1367 | |
1368 | bool supportsScalableVectors() const; |
1369 | |
1370 | |
1371 | |
1372 | |
1373 | |
1374 | |
1375 | bool hasActiveVectorLength() const; |
1376 | |
1377 | struct VPLegalization { |
1378 | enum VPTransform { |
1379 | |
1380 | Legal = 0, |
1381 | |
1382 | Discard = 1, |
1383 | |
1384 | Convert = 2 |
1385 | }; |
1386 | |
1387 | |
1388 | |
1389 | |
1390 | |
1391 | VPTransform EVLParamStrategy; |
1392 | |
1393 | |
1394 | |
1395 | |
1396 | |
1397 | VPTransform OpStrategy; |
1398 | |
1399 | bool shouldDoNothing() const { |
1400 | return (EVLParamStrategy == Legal) && (OpStrategy == Legal); |
1401 | } |
1402 | VPLegalization(VPTransform EVLParamStrategy, VPTransform OpStrategy) |
1403 | : EVLParamStrategy(EVLParamStrategy), OpStrategy(OpStrategy) {} |
1404 | }; |
1405 | |
1406 | |
1407 | |
1408 | VPLegalization getVPLegalizationStrategy(const VPIntrinsic &PI) const; |
1409 | |
1410 | |
1411 | |
1412 | |
1413 | private: |
1414 | |
1415 | |
1416 | InstructionCost getInstructionLatency(const Instruction *I) const; |
1417 | |
1418 | |
1419 | |
1420 | InstructionCost getInstructionThroughput(const Instruction *I) const; |
1421 | |
1422 | |
1423 | |
1424 | class Concept; |
1425 | |
1426 | |
1427 | |
1428 | template <typename T> class Model; |
1429 | |
1430 | std::unique_ptr<Concept> TTIImpl; |
1431 | }; |
1432 | |
1433 | class TargetTransformInfo::Concept { |
1434 | public: |
1435 | virtual ~Concept() = 0; |
1436 | virtual const DataLayout &getDataLayout() const = 0; |
1437 | virtual InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr, |
1438 | ArrayRef<const Value *> Operands, |
1439 | TTI::TargetCostKind CostKind) = 0; |
1440 | virtual unsigned getInliningThresholdMultiplier() = 0; |
1441 | virtual unsigned adjustInliningThreshold(const CallBase *CB) = 0; |
1442 | virtual int getInlinerVectorBonusPercent() = 0; |
1443 | virtual InstructionCost getMemcpyCost(const Instruction *I) = 0; |
1444 | virtual unsigned |
1445 | getEstimatedNumberOfCaseClusters(const SwitchInst &SI, unsigned &JTSize, |
1446 | ProfileSummaryInfo *PSI, |
1447 | BlockFrequencyInfo *BFI) = 0; |
1448 | virtual InstructionCost getUserCost(const User *U, |
1449 | ArrayRef<const Value *> Operands, |
1450 | TargetCostKind CostKind) = 0; |
1451 | virtual BranchProbability getPredictableBranchThreshold() = 0; |
1452 | virtual bool hasBranchDivergence() = 0; |
1453 | virtual bool useGPUDivergenceAnalysis() = 0; |
1454 | virtual bool isSourceOfDivergence(const Value *V) = 0; |
1455 | virtual bool isAlwaysUniform(const Value *V) = 0; |
1456 | virtual unsigned getFlatAddressSpace() = 0; |
1457 | virtual bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes, |
1458 | Intrinsic::ID IID) const = 0; |
1459 | virtual bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const = 0; |
1460 | virtual unsigned getAssumedAddrSpace(const Value *V) const = 0; |
1461 | virtual Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, |
1462 | Value *OldV, |
1463 | Value *NewV) const = 0; |
1464 | virtual bool isLoweredToCall(const Function *F) = 0; |
1465 | virtual void getUnrollingPreferences(Loop *L, ScalarEvolution &, |
1466 | UnrollingPreferences &UP, |
1467 | OptimizationRemarkEmitter *ORE) = 0; |
1468 | virtual void getPeelingPreferences(Loop *L, ScalarEvolution &SE, |
1469 | PeelingPreferences &PP) = 0; |
1470 | virtual bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, |
1471 | AssumptionCache &AC, |
1472 | TargetLibraryInfo *LibInfo, |
1473 | HardwareLoopInfo &HWLoopInfo) = 0; |
1474 | virtual bool |
1475 | preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, ScalarEvolution &SE, |
1476 | AssumptionCache &AC, TargetLibraryInfo *TLI, |
1477 | DominatorTree *DT, const LoopAccessInfo *LAI) = 0; |
1478 | virtual bool emitGetActiveLaneMask() = 0; |
1479 | virtual Optional<Instruction *> instCombineIntrinsic(InstCombiner &IC, |
1480 | IntrinsicInst &II) = 0; |
1481 | virtual Optional<Value *> |
1482 | simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II, |
1483 | APInt DemandedMask, KnownBits &Known, |
1484 | bool &KnownBitsComputed) = 0; |
1485 | virtual Optional<Value *> simplifyDemandedVectorEltsIntrinsic( |
1486 | InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, |
1487 | APInt &UndefElts2, APInt &UndefElts3, |
1488 | std::function<void(Instruction *, unsigned, APInt, APInt &)> |
1489 | SimplifyAndSetOp) = 0; |
1490 | virtual bool isLegalAddImmediate(int64_t Imm) = 0; |
1491 | virtual bool isLegalICmpImmediate(int64_t Imm) = 0; |
1492 | virtual bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, |
1493 | int64_t BaseOffset, bool HasBaseReg, |
1494 | int64_t Scale, unsigned AddrSpace, |
1495 | Instruction *I) = 0; |
1496 | virtual bool isLSRCostLess(TargetTransformInfo::LSRCost &C1, |
1497 | TargetTransformInfo::LSRCost &C2) = 0; |
1498 | virtual bool isNumRegsMajorCostOfLSR() = 0; |
1499 | virtual bool isProfitableLSRChainElement(Instruction *I) = 0; |
1500 | virtual bool canMacroFuseCmp() = 0; |
1501 | virtual bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, |
1502 | LoopInfo *LI, DominatorTree *DT, AssumptionCache *AC, |
1503 | TargetLibraryInfo *LibInfo) = 0; |
1504 | virtual AddressingModeKind |
1505 | getPreferredAddressingMode(const Loop *L, ScalarEvolution *SE) const = 0; |
1506 | virtual bool isLegalMaskedStore(Type *DataType, Align Alignment) = 0; |
1507 | virtual bool isLegalMaskedLoad(Type *DataType, Align Alignment) = 0; |
1508 | virtual bool isLegalNTStore(Type *DataType, Align Alignment) = 0; |
1509 | virtual bool isLegalNTLoad(Type *DataType, Align Alignment) = 0; |
1510 | virtual bool isLegalMaskedScatter(Type *DataType, Align Alignment) = 0; |
1511 | virtual bool isLegalMaskedGather(Type *DataType, Align Alignment) = 0; |
1512 | virtual bool isLegalMaskedCompressStore(Type *DataType) = 0; |
1513 | virtual bool isLegalMaskedExpandLoad(Type *DataType) = 0; |
1514 | virtual bool enableOrderedReductions() = 0; |
1515 | virtual bool hasDivRemOp(Type *DataType, bool IsSigned) = 0; |
1516 | virtual bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) = 0; |
1517 | virtual bool prefersVectorizedAddressing() = 0; |
1518 | virtual InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, |
1519 | int64_t BaseOffset, |
1520 | bool HasBaseReg, int64_t Scale, |
1521 | unsigned AddrSpace) = 0; |
1522 | virtual bool LSRWithInstrQueries() = 0; |
1523 | virtual bool isTruncateFree(Type *Ty1, Type *Ty2) = 0; |
1524 | virtual bool isProfitableToHoist(Instruction *I) = 0; |
1525 | virtual bool useAA() = 0; |
1526 | virtual bool isTypeLegal(Type *Ty) = 0; |
1527 | virtual InstructionCost getRegUsageForType(Type *Ty) = 0; |
1528 | virtual bool shouldBuildLookupTables() = 0; |
1529 | virtual bool shouldBuildLookupTablesForConstant(Constant *C) = 0; |
1530 | virtual bool shouldBuildRelLookupTables() = 0; |
1531 | virtual bool useColdCCForColdCall(Function &F) = 0; |
1532 | virtual InstructionCost getScalarizationOverhead(VectorType *Ty, |
1533 | const APInt &DemandedElts, |
1534 | bool Insert, |
1535 | bool Extract) = 0; |
1536 | virtual InstructionCost |
1537 | getOperandsScalarizationOverhead(ArrayRef<const Value *> Args, |
1538 | ArrayRef<Type *> Tys) = 0; |
1539 | virtual bool supportsEfficientVectorElementLoadStore() = 0; |
1540 | virtual bool enableAggressiveInterleaving(bool LoopHasReductions) = 0; |
1541 | virtual MemCmpExpansionOptions |
1542 | enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const = 0; |
1543 | virtual bool enableInterleavedAccessVectorization() = 0; |
1544 | virtual bool enableMaskedInterleavedAccessVectorization() = 0; |
1545 | virtual bool isFPVectorizationPotentiallyUnsafe() = 0; |
1546 | virtual bool allowsMisalignedMemoryAccesses(LLVMContext &Context, |
1547 | unsigned BitWidth, |
1548 | unsigned AddressSpace, |
1549 | Align Alignment, |
1550 | bool *Fast) = 0; |
1551 | virtual PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) = 0; |
1552 | virtual bool haveFastSqrt(Type *Ty) = 0; |
1553 | virtual bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) = 0; |
1554 | virtual InstructionCost getFPOpCost(Type *Ty) = 0; |
1555 | virtual InstructionCost getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, |
1556 | const APInt &Imm, Type *Ty) = 0; |
1557 | virtual InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, |
1558 | TargetCostKind CostKind) = 0; |
1559 | virtual InstructionCost getIntImmCostInst(unsigned Opc, unsigned Idx, |
1560 | const APInt &Imm, Type *Ty, |
1561 | TargetCostKind CostKind, |
1562 | Instruction *Inst = nullptr) = 0; |
1563 | virtual InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, |
1564 | const APInt &Imm, Type *Ty, |
1565 | TargetCostKind CostKind) = 0; |
1566 | virtual unsigned getNumberOfRegisters(unsigned ClassID) const = 0; |
1567 | virtual unsigned getRegisterClassForType(bool Vector, |
1568 | Type *Ty = nullptr) const = 0; |
1569 | virtual const char *getRegisterClassName(unsigned ClassID) const = 0; |
1570 | virtual TypeSize getRegisterBitWidth(RegisterKind K) const = 0; |
1571 | virtual unsigned getMinVectorRegisterBitWidth() const = 0; |
1572 | virtual Optional<unsigned> getMaxVScale() const = 0; |
1573 | virtual bool shouldMaximizeVectorBandwidth() const = 0; |
1574 | virtual ElementCount getMinimumVF(unsigned ElemWidth, |
1575 | bool IsScalable) const = 0; |
1576 | virtual unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const = 0; |
1577 | virtual bool shouldConsiderAddressTypePromotion( |
1578 | const Instruction &I, bool &AllowPromotionWithoutCommonHeader) = 0; |
1579 | virtual unsigned getCacheLineSize() const = 0; |
1580 | virtual Optional<unsigned> getCacheSize(CacheLevel Level) const = 0; |
1581 | virtual Optional<unsigned> getCacheAssociativity(CacheLevel Level) const = 0; |
1582 | |
1583 | |
1584 | |
1585 | |
1586 | virtual unsigned getPrefetchDistance() const = 0; |
1587 | |
1588 | |
1589 | |
1590 | |
1591 | |
1592 | |
1593 | |
1594 | virtual unsigned getMinPrefetchStride(unsigned NumMemAccesses, |
1595 | unsigned NumStridedMemAccesses, |
1596 | unsigned NumPrefetches, |
1597 | bool HasCall) const = 0; |
1598 | |
1599 | |
1600 | |
1601 | |
1602 | virtual unsigned getMaxPrefetchIterationsAhead() const = 0; |
1603 | |
1604 | |
1605 | virtual bool enableWritePrefetching() const = 0; |
1606 | |
1607 | virtual unsigned getMaxInterleaveFactor(unsigned VF) = 0; |
1608 | virtual InstructionCost getArithmeticInstrCost( |
1609 | unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, |
1610 | OperandValueKind Opd1Info, OperandValueKind Opd2Info, |
1611 | OperandValueProperties Opd1PropInfo, OperandValueProperties Opd2PropInfo, |
1612 | ArrayRef<const Value *> Args, const Instruction *CxtI = nullptr) = 0; |
1613 | virtual InstructionCost getShuffleCost(ShuffleKind Kind, VectorType *Tp, |
1614 | ArrayRef<int> Mask, int Index, |
1615 | VectorType *SubTp) = 0; |
1616 | virtual InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, |
1617 | Type *Src, CastContextHint CCH, |
1618 | TTI::TargetCostKind CostKind, |
1619 | const Instruction *I) = 0; |
1620 | virtual InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst, |
1621 | VectorType *VecTy, |
1622 | unsigned Index) = 0; |
1623 | virtual InstructionCost getCFInstrCost(unsigned Opcode, |
1624 | TTI::TargetCostKind CostKind, |
1625 | const Instruction *I = nullptr) = 0; |
1626 | virtual InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, |
1627 | Type *CondTy, |
1628 | CmpInst::Predicate VecPred, |
1629 | TTI::TargetCostKind CostKind, |
1630 | const Instruction *I) = 0; |
1631 | virtual InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, |
1632 | unsigned Index) = 0; |
1633 | virtual InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, |
1634 | Align Alignment, |
1635 | unsigned AddressSpace, |
1636 | TTI::TargetCostKind CostKind, |
1637 | const Instruction *I) = 0; |
1638 | virtual InstructionCost |
1639 | getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, |
1640 | unsigned AddressSpace, |
1641 | TTI::TargetCostKind CostKind) = 0; |
1642 | virtual InstructionCost |
1643 | getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, |
1644 | bool VariableMask, Align Alignment, |
1645 | TTI::TargetCostKind CostKind, |
1646 | const Instruction *I = nullptr) = 0; |
1647 | |
1648 | virtual InstructionCost getInterleavedMemoryOpCost( |
1649 | unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices, |
1650 | Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, |
1651 | bool UseMaskForCond = false, bool UseMaskForGaps = false) = 0; |
1652 | virtual InstructionCost |
1653 | getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, |
1654 | Optional<FastMathFlags> FMF, |
1655 | TTI::TargetCostKind CostKind) = 0; |
1656 | virtual InstructionCost |
1657 | getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy, bool IsUnsigned, |
1658 | TTI::TargetCostKind CostKind) = 0; |
1659 | virtual InstructionCost getExtendedAddReductionCost( |
1660 | bool IsMLA, bool IsUnsigned, Type *ResTy, VectorType *Ty, |
1661 | TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) = 0; |
1662 | virtual InstructionCost |
1663 | getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, |
1664 | TTI::TargetCostKind CostKind) = 0; |
1665 | virtual InstructionCost getCallInstrCost(Function *F, Type *RetTy, |
1666 | ArrayRef<Type *> Tys, |
1667 | TTI::TargetCostKind CostKind) = 0; |
1668 | virtual unsigned getNumberOfParts(Type *Tp) = 0; |
1669 | virtual InstructionCost |
1670 | getAddressComputationCost(Type *Ty, ScalarEvolution *SE, const SCEV *Ptr) = 0; |
1671 | virtual InstructionCost |
1672 | getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) = 0; |
1673 | virtual bool getTgtMemIntrinsic(IntrinsicInst *Inst, |
1674 | MemIntrinsicInfo &Info) = 0; |
1675 | virtual unsigned getAtomicMemIntrinsicMaxElementSize() const = 0; |
1676 | virtual Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, |
1677 | Type *ExpectedType) = 0; |
1678 | virtual Type *getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length, |
1679 | unsigned SrcAddrSpace, |
1680 | unsigned DestAddrSpace, |
1681 | unsigned SrcAlign, |
1682 | unsigned DestAlign) const = 0; |
1683 | virtual void getMemcpyLoopResidualLoweringType( |
1684 | SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context, |
1685 | unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace, |
1686 | unsigned SrcAlign, unsigned DestAlign) const = 0; |
1687 | virtual bool areInlineCompatible(const Function *Caller, |
1688 | const Function *Callee) const = 0; |
1689 | virtual bool |
1690 | areFunctionArgsABICompatible(const Function *Caller, const Function *Callee, |
1691 | SmallPtrSetImpl<Argument *> &Args) const = 0; |
1692 | virtual bool isIndexedLoadLegal(MemIndexedMode Mode, Type *Ty) const = 0; |
1693 | virtual bool isIndexedStoreLegal(MemIndexedMode Mode, Type *Ty) const = 0; |
1694 | virtual unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const = 0; |
1695 | virtual bool isLegalToVectorizeLoad(LoadInst *LI) const = 0; |
1696 | virtual bool isLegalToVectorizeStore(StoreInst *SI) const = 0; |
1697 | virtual bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, |
1698 | Align Alignment, |
1699 | unsigned AddrSpace) const = 0; |
1700 | virtual bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, |
1701 | Align Alignment, |
1702 | unsigned AddrSpace) const = 0; |
1703 | virtual bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc, |
1704 | ElementCount VF) const = 0; |
1705 | virtual bool isElementTypeLegalForScalableVector(Type *Ty) const = 0; |
1706 | virtual unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize, |
1707 | unsigned ChainSizeInBytes, |
1708 | VectorType *VecTy) const = 0; |
1709 | virtual unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize, |
1710 | unsigned ChainSizeInBytes, |
1711 | VectorType *VecTy) const = 0; |
1712 | virtual bool preferInLoopReduction(unsigned Opcode, Type *Ty, |
1713 | ReductionFlags) const = 0; |
1714 | virtual bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty, |
1715 | ReductionFlags) const = 0; |
1716 | virtual bool shouldExpandReduction(const IntrinsicInst *II) const = 0; |
1717 | virtual unsigned getGISelRematGlobalCost() const = 0; |
1718 | virtual bool supportsScalableVectors() const = 0; |
1719 | virtual bool hasActiveVectorLength() const = 0; |
1720 | virtual InstructionCost getInstructionLatency(const Instruction *I) = 0; |
1721 | virtual VPLegalization |
1722 | getVPLegalizationStrategy(const VPIntrinsic &PI) const = 0; |
1723 | }; |
1724 | |
1725 | template <typename T> |
1726 | class TargetTransformInfo::Model final : public TargetTransformInfo::Concept { |
1727 | T Impl; |
1728 | |
1729 | public: |
1730 | Model(T Impl) : Impl(std::move(Impl)) {} |
1731 | ~Model() override {} |
1732 | |
1733 | const DataLayout &getDataLayout() const override { |
1734 | return Impl.getDataLayout(); |
1735 | } |
1736 | |
1737 | InstructionCost |
1738 | getGEPCost(Type *PointeeType, const Value *Ptr, |
1739 | ArrayRef<const Value *> Operands, |
1740 | enum TargetTransformInfo::TargetCostKind CostKind) override { |
1741 | return Impl.getGEPCost(PointeeType, Ptr, Operands); |
1742 | } |
1743 | unsigned getInliningThresholdMultiplier() override { |
1744 | return Impl.getInliningThresholdMultiplier(); |
1745 | } |
1746 | unsigned adjustInliningThreshold(const CallBase *CB) override { |
1747 | return Impl.adjustInliningThreshold(CB); |
1748 | } |
1749 | int getInlinerVectorBonusPercent() override { |
1750 | return Impl.getInlinerVectorBonusPercent(); |
1751 | } |
1752 | InstructionCost getMemcpyCost(const Instruction *I) override { |
1753 | return Impl.getMemcpyCost(I); |
1754 | } |
1755 | InstructionCost getUserCost(const User *U, ArrayRef<const Value *> Operands, |
1756 | TargetCostKind CostKind) override { |
1757 | return Impl.getUserCost(U, Operands, CostKind); |
1758 | } |
1759 | BranchProbability getPredictableBranchThreshold() override { |
1760 | return Impl.getPredictableBranchThreshold(); |
1761 | } |
1762 | bool hasBranchDivergence() override { return Impl.hasBranchDivergence(); } |
1763 | bool useGPUDivergenceAnalysis() override { |
1764 | return Impl.useGPUDivergenceAnalysis(); |
1765 | } |
1766 | bool isSourceOfDivergence(const Value *V) override { |
1767 | return Impl.isSourceOfDivergence(V); |
1768 | } |
1769 | |
1770 | bool isAlwaysUniform(const Value *V) override { |
1771 | return Impl.isAlwaysUniform(V); |
1772 | } |
1773 | |
1774 | unsigned getFlatAddressSpace() override { return Impl.getFlatAddressSpace(); } |
1775 | |
1776 | bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes, |
1777 | Intrinsic::ID IID) const override { |
1778 | return Impl.collectFlatAddressOperands(OpIndexes, IID); |
1779 | } |
1780 | |
1781 | bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const override { |
1782 | return Impl.isNoopAddrSpaceCast(FromAS, ToAS); |
1783 | } |
1784 | |
1785 | unsigned getAssumedAddrSpace(const Value *V) const override { |
1786 | return Impl.getAssumedAddrSpace(V); |
1787 | } |
1788 | |
1789 | Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV, |
1790 | Value *NewV) const override { |
1791 | return Impl.rewriteIntrinsicWithAddressSpace(II, OldV, NewV); |
1792 | } |
1793 | |
1794 | bool isLoweredToCall(const Function *F) override { |
1795 | return Impl.isLoweredToCall(F); |
1796 | } |
1797 | void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, |
1798 | UnrollingPreferences &UP, |
1799 | OptimizationRemarkEmitter *ORE) override { |
1800 | return Impl.getUnrollingPreferences(L, SE, UP, ORE); |
1801 | } |
1802 | void getPeelingPreferences(Loop *L, ScalarEvolution &SE, |
1803 | PeelingPreferences &PP) override { |
1804 | return Impl.getPeelingPreferences(L, SE, PP); |
1805 | } |
1806 | bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, |
1807 | AssumptionCache &AC, TargetLibraryInfo *LibInfo, |
1808 | HardwareLoopInfo &HWLoopInfo) override { |
1809 | return Impl.isHardwareLoopProfitable(L, SE, AC, LibInfo, HWLoopInfo); |
1810 | } |
1811 | bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, ScalarEvolution &SE, |
1812 | AssumptionCache &AC, TargetLibraryInfo *TLI, |
1813 | DominatorTree *DT, |
1814 | const LoopAccessInfo *LAI) override { |
1815 | return Impl.preferPredicateOverEpilogue(L, LI, SE, AC, TLI, DT, LAI); |
1816 | } |
1817 | bool emitGetActiveLaneMask() override { |
1818 | return Impl.emitGetActiveLaneMask(); |
1819 | } |
1820 | Optional<Instruction *> instCombineIntrinsic(InstCombiner &IC, |
1821 | IntrinsicInst &II) override { |
1822 | return Impl.instCombineIntrinsic(IC, II); |
1823 | } |
1824 | Optional<Value *> |
1825 | simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II, |
1826 | APInt DemandedMask, KnownBits &Known, |
1827 | bool &KnownBitsComputed) override { |
1828 | return Impl.simplifyDemandedUseBitsIntrinsic(IC, II, DemandedMask, Known, |
1829 | KnownBitsComputed); |
1830 | } |
1831 | Optional<Value *> simplifyDemandedVectorEltsIntrinsic( |
1832 | InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, |
1833 | APInt &UndefElts2, APInt &UndefElts3, |
1834 | std::function<void(Instruction *, unsigned, APInt, APInt &)> |
1835 | SimplifyAndSetOp) override { |
1836 | return Impl.simplifyDemandedVectorEltsIntrinsic( |
1837 | IC, II, DemandedElts, UndefElts, UndefElts2, UndefElts3, |
1838 | SimplifyAndSetOp); |
1839 | } |
1840 | bool isLegalAddImmediate(int64_t Imm) override { |
1841 | return Impl.isLegalAddImmediate(Imm); |
1842 | } |
1843 | bool isLegalICmpImmediate(int64_t Imm) override { |
1844 | return Impl.isLegalICmpImmediate(Imm); |
1845 | } |
1846 | bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, |
1847 | bool HasBaseReg, int64_t Scale, unsigned AddrSpace, |
1848 | Instruction *I) override { |
1849 | return Impl.isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg, Scale, |
1850 | AddrSpace, I); |
1851 | } |
1852 | bool isLSRCostLess(TargetTransformInfo::LSRCost &C1, |
1853 | TargetTransformInfo::LSRCost &C2) override { |
1854 | return Impl.isLSRCostLess(C1, C2); |
1855 | } |
1856 | bool isNumRegsMajorCostOfLSR() override { |
1857 | return Impl.isNumRegsMajorCostOfLSR(); |
1858 | } |
1859 | bool isProfitableLSRChainElement(Instruction *I) override { |
1860 | return Impl.isProfitableLSRChainElement(I); |
1861 | } |
1862 | bool canMacroFuseCmp() override { return Impl.canMacroFuseCmp(); } |
1863 | bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI, |
1864 | DominatorTree *DT, AssumptionCache *AC, |
1865 | TargetLibraryInfo *LibInfo) override { |
1866 | return Impl.canSaveCmp(L, BI, SE, LI, DT, AC, LibInfo); |
1867 | } |
1868 | AddressingModeKind |
1869 | getPreferredAddressingMode(const Loop *L, |
1870 | ScalarEvolution *SE) const override { |
1871 | return Impl.getPreferredAddressingMode(L, SE); |
1872 | } |
1873 | bool isLegalMaskedStore(Type *DataType, Align Alignment) override { |
1874 | return Impl.isLegalMaskedStore(DataType, Alignment); |
1875 | } |
1876 | bool isLegalMaskedLoad(Type *DataType, Align Alignment) override { |
1877 | return Impl.isLegalMaskedLoad(DataType, Alignment); |
1878 | } |
1879 | bool isLegalNTStore(Type *DataType, Align Alignment) override { |
1880 | return Impl.isLegalNTStore(DataType, Alignment); |
1881 | } |
1882 | bool isLegalNTLoad(Type *DataType, Align Alignment) override { |
1883 | return Impl.isLegalNTLoad(DataType, Alignment); |
1884 | } |
1885 | bool isLegalMaskedScatter(Type *DataType, Align Alignment) override { |
1886 | return Impl.isLegalMaskedScatter(DataType, Alignment); |
1887 | } |
1888 | bool isLegalMaskedGather(Type *DataType, Align Alignment) override { |
1889 | return Impl.isLegalMaskedGather(DataType, Alignment); |
1890 | } |
1891 | bool isLegalMaskedCompressStore(Type *DataType) override { |
1892 | return Impl.isLegalMaskedCompressStore(DataType); |
1893 | } |
1894 | bool isLegalMaskedExpandLoad(Type *DataType) override { |
1895 | return Impl.isLegalMaskedExpandLoad(DataType); |
1896 | } |
1897 | bool enableOrderedReductions() override { |
1898 | return Impl.enableOrderedReductions(); |
1899 | } |
1900 | bool hasDivRemOp(Type *DataType, bool IsSigned) override { |
1901 | return Impl.hasDivRemOp(DataType, IsSigned); |
1902 | } |
1903 | bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) override { |
1904 | return Impl.hasVolatileVariant(I, AddrSpace); |
1905 | } |
1906 | bool prefersVectorizedAddressing() override { |
1907 | return Impl.prefersVectorizedAddressing(); |
1908 | } |
1909 | InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, |
1910 | int64_t BaseOffset, bool HasBaseReg, |
1911 | int64_t Scale, |
1912 | unsigned AddrSpace) override { |
1913 | return Impl.getScalingFactorCost(Ty, BaseGV, BaseOffset, HasBaseReg, Scale, |
1914 | AddrSpace); |
1915 | } |
1916 | bool LSRWithInstrQueries() override { return Impl.LSRWithInstrQueries(); } |
1917 | bool isTruncateFree(Type *Ty1, Type *Ty2) override { |
1918 | return Impl.isTruncateFree(Ty1, Ty2); |
1919 | } |
1920 | bool isProfitableToHoist(Instruction *I) override { |
1921 | return Impl.isProfitableToHoist(I); |
1922 | } |
1923 | bool useAA() override { return Impl.useAA(); } |
1924 | bool isTypeLegal(Type *Ty) override { return Impl.isTypeLegal(Ty); } |
1925 | InstructionCost getRegUsageForType(Type *Ty) override { |
1926 | return Impl.getRegUsageForType(Ty); |
1927 | } |
1928 | bool shouldBuildLookupTables() override { |
1929 | return Impl.shouldBuildLookupTables(); |
1930 | } |
1931 | bool shouldBuildLookupTablesForConstant(Constant *C) override { |
1932 | return Impl.shouldBuildLookupTablesForConstant(C); |
1933 | } |
1934 | bool shouldBuildRelLookupTables() override { |
1935 | return Impl.shouldBuildRelLookupTables(); |
1936 | } |
1937 | bool useColdCCForColdCall(Function &F) override { |
1938 | return Impl.useColdCCForColdCall(F); |
1939 | } |
1940 | |
1941 | InstructionCost getScalarizationOverhead(VectorType *Ty, |
1942 | const APInt &DemandedElts, |
1943 | bool Insert, bool Extract) override { |
1944 | return Impl.getScalarizationOverhead(Ty, DemandedElts, Insert, Extract); |
1945 | } |
1946 | InstructionCost |
1947 | getOperandsScalarizationOverhead(ArrayRef<const Value *> Args, |
1948 | ArrayRef<Type *> Tys) override { |
1949 | return Impl.getOperandsScalarizationOverhead(Args, Tys); |
1950 | } |
1951 | |
1952 | bool supportsEfficientVectorElementLoadStore() override { |
1953 | return Impl.supportsEfficientVectorElementLoadStore(); |
1954 | } |
1955 | |
1956 | bool enableAggressiveInterleaving(bool LoopHasReductions) override { |
1957 | return Impl.enableAggressiveInterleaving(LoopHasReductions); |
1958 | } |
1959 | MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, |
1960 | bool IsZeroCmp) const override { |
1961 | return Impl.enableMemCmpExpansion(OptSize, IsZeroCmp); |
1962 | } |
1963 | bool enableInterleavedAccessVectorization() override { |
1964 | return Impl.enableInterleavedAccessVectorization(); |
1965 | } |
1966 | bool enableMaskedInterleavedAccessVectorization() override { |
1967 | return Impl.enableMaskedInterleavedAccessVectorization(); |
1968 | } |
1969 | bool isFPVectorizationPotentiallyUnsafe() override { |
1970 | return Impl.isFPVectorizationPotentiallyUnsafe(); |
1971 | } |
1972 | bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth, |
1973 | unsigned AddressSpace, Align Alignment, |
1974 | bool *Fast) override { |
1975 | return Impl.allowsMisalignedMemoryAccesses(Context, BitWidth, AddressSpace, |
1976 | Alignment, Fast); |
1977 | } |
1978 | PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) override { |
1979 | return Impl.getPopcntSupport(IntTyWidthInBit); |
1980 | } |
1981 | bool haveFastSqrt(Type *Ty) override { return Impl.haveFastSqrt(Ty); } |
1982 | |
1983 | bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) override { |
1984 | return Impl.isFCmpOrdCheaperThanFCmpZero(Ty); |
1985 | } |
1986 | |
1987 | InstructionCost getFPOpCost(Type *Ty) override { |
1988 | return Impl.getFPOpCost(Ty); |
1989 | } |
1990 | |
1991 | InstructionCost getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, |
1992 | const APInt &Imm, Type *Ty) override { |
1993 | return Impl.getIntImmCodeSizeCost(Opc, Idx, Imm, Ty); |
1994 | } |
1995 | InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, |
1996 | TargetCostKind CostKind) override { |
1997 | return Impl.getIntImmCost(Imm, Ty, CostKind); |
1998 | } |
1999 | InstructionCost getIntImmCostInst(unsigned Opc, unsigned Idx, |
2000 | const APInt &Imm, Type *Ty, |
2001 | TargetCostKind CostKind, |
2002 | Instruction *Inst = nullptr) override { |
2003 | return Impl.getIntImmCostInst(Opc, Idx, Imm, Ty, CostKind, Inst); |
2004 | } |
2005 | InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, |
2006 | const APInt &Imm, Type *Ty, |
2007 | TargetCostKind CostKind) override { |
2008 | return Impl.getIntImmCostIntrin(IID, Idx, Imm, Ty, CostKind); |
2009 | } |
2010 | unsigned getNumberOfRegisters(unsigned ClassID) const override { |
2011 | return Impl.getNumberOfRegisters(ClassID); |
2012 | } |
2013 | unsigned getRegisterClassForType(bool Vector, |
2014 | Type *Ty = nullptr) const override { |
2015 | return Impl.getRegisterClassForType(Vector, Ty); |
2016 | } |
2017 | const char *getRegisterClassName(unsigned ClassID) const override { |
2018 | return Impl.getRegisterClassName(ClassID); |
2019 | } |
2020 | TypeSize getRegisterBitWidth(RegisterKind K) const override { |
2021 | return Impl.getRegisterBitWidth(K); |
2022 | } |
2023 | unsigned getMinVectorRegisterBitWidth() const override { |
2024 | return Impl.getMinVectorRegisterBitWidth(); |
2025 | } |
2026 | Optional<unsigned> getMaxVScale() const override { |
2027 | return Impl.getMaxVScale(); |
2028 | } |
2029 | bool shouldMaximizeVectorBandwidth() const override { |
2030 | return Impl.shouldMaximizeVectorBandwidth(); |
2031 | } |
2032 | ElementCount getMinimumVF(unsigned ElemWidth, |
2033 | bool IsScalable) const override { |
2034 | return Impl.getMinimumVF(ElemWidth, IsScalable); |
2035 | } |
2036 | unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const override { |
2037 | return Impl.getMaximumVF(ElemWidth, Opcode); |
2038 | } |
2039 | bool shouldConsiderAddressTypePromotion( |
2040 | const Instruction &I, bool &AllowPromotionWithoutCommonHeader) override { |
2041 | return Impl.shouldConsiderAddressTypePromotion( |
2042 | I, AllowPromotionWithoutCommonHeader); |
2043 | } |
2044 | unsigned getCacheLineSize() const override { return Impl.getCacheLineSize(); } |
2045 | Optional<unsigned> getCacheSize(CacheLevel Level) const override { |
2046 | return Impl.getCacheSize(Level); |
2047 | } |
2048 | Optional<unsigned> getCacheAssociativity(CacheLevel Level) const override { |
2049 | return Impl.getCacheAssociativity(Level); |
2050 | } |
2051 | |
2052 | |
2053 | |
2054 | unsigned getPrefetchDistance() const override { |
2055 | return Impl.getPrefetchDistance(); |
2056 | } |
2057 | |
2058 | |
2059 | |
2060 | |
2061 | unsigned getMinPrefetchStride(unsigned NumMemAccesses, |
2062 | unsigned NumStridedMemAccesses, |
2063 | unsigned NumPrefetches, |
2064 | bool HasCall) const override { |
2065 | return Impl.getMinPrefetchStride(NumMemAccesses, NumStridedMemAccesses, |
2066 | NumPrefetches, HasCall); |
2067 | } |
2068 | |
2069 | |
2070 | |
2071 | |
2072 | unsigned getMaxPrefetchIterationsAhead() const override { |
2073 | return Impl.getMaxPrefetchIterationsAhead(); |
2074 | } |
2075 | |
2076 | |
2077 | bool enableWritePrefetching() const override { |
2078 | return Impl.enableWritePrefetching(); |
2079 | } |
2080 | |
2081 | unsigned getMaxInterleaveFactor(unsigned VF) override { |
2082 | return Impl.getMaxInterleaveFactor(VF); |
2083 | } |
2084 | unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI, |
2085 | unsigned &JTSize, |
2086 | ProfileSummaryInfo *PSI, |
2087 | BlockFrequencyInfo *BFI) override { |
2088 | return Impl.getEstimatedNumberOfCaseClusters(SI, JTSize, PSI, BFI); |
2089 | } |
2090 | InstructionCost getArithmeticInstrCost( |
2091 | unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, |
2092 | OperandValueKind Opd1Info, OperandValueKind Opd2Info, |
2093 | OperandValueProperties Opd1PropInfo, OperandValueProperties Opd2PropInfo, |
2094 | ArrayRef<const Value *> Args, |
2095 | const Instruction *CxtI = nullptr) override { |
2096 | return Impl.getArithmeticInstrCost(Opcode, Ty, CostKind, Opd1Info, Opd2Info, |
2097 | Opd1PropInfo, Opd2PropInfo, Args, CxtI); |
2098 | } |
2099 | InstructionCost getShuffleCost(ShuffleKind Kind, VectorType *Tp, |
2100 | ArrayRef<int> Mask, int Index, |
2101 | VectorType *SubTp) override { |
2102 | return Impl.getShuffleCost(Kind, Tp, Mask, Index, SubTp); |
2103 | } |
2104 | InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, |
2105 | CastContextHint CCH, |
2106 | TTI::TargetCostKind CostKind, |
2107 | const Instruction *I) override { |
2108 | return Impl.getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I); |
2109 | } |
2110 | InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst, |
2111 | VectorType *VecTy, |
2112 | unsigned Index) override { |
2113 | return Impl.getExtractWithExtendCost(Opcode, Dst, VecTy, Index); |
2114 | } |
2115 | InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, |
2116 | const Instruction *I = nullptr) override { |
2117 | return Impl.getCFInstrCost(Opcode, CostKind, I); |
2118 | } |
2119 | InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, |
2120 | CmpInst::Predicate VecPred, |
2121 | TTI::TargetCostKind CostKind, |
2122 | const Instruction *I) override { |
2123 | return Impl.getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, I); |
2124 | } |
2125 | InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, |
2126 | unsigned Index) override { |
2127 | return Impl.getVectorInstrCost(Opcode, Val, Index); |
2128 | } |
2129 | InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, |
2130 | unsigned AddressSpace, |
2131 | TTI::TargetCostKind CostKind, |
2132 | const Instruction *I) override { |
2133 | return Impl.getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, |
2134 | CostKind, I); |
2135 | } |
2136 | InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src, |
2137 | Align Alignment, unsigned AddressSpace, |
2138 | TTI::TargetCostKind CostKind) override { |
2139 | return Impl.getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace, |
2140 | CostKind); |
2141 | } |
2142 | InstructionCost |
2143 | getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr, |
2144 | bool VariableMask, Align Alignment, |
2145 | TTI::TargetCostKind CostKind, |
2146 | const Instruction *I = nullptr) override { |
2147 | return Impl.getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask, |
2148 | Alignment, CostKind, I); |
2149 | } |
2150 | InstructionCost getInterleavedMemoryOpCost( |
2151 | unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices, |
2152 | Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, |
2153 | bool UseMaskForCond, bool UseMaskForGaps) override { |
2154 | return Impl.getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices, |
2155 | Alignment, AddressSpace, CostKind, |
2156 | UseMaskForCond, UseMaskForGaps); |
2157 | } |
2158 | InstructionCost |
2159 | getArithmeticReductionCost(unsigned Opcode, VectorType *Ty, |
2160 | Optional<FastMathFlags> FMF, |
2161 | TTI::TargetCostKind CostKind) override { |
2162 | return Impl.getArithmeticReductionCost(Opcode, Ty, FMF, CostKind); |
2163 | } |
2164 | InstructionCost |
2165 | getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy, bool IsUnsigned, |
2166 | TTI::TargetCostKind CostKind) override { |
2167 | return Impl.getMinMaxReductionCost(Ty, CondTy, IsUnsigned, CostKind); |
2168 | } |
2169 | InstructionCost getExtendedAddReductionCost( |
2170 | bool IsMLA, bool IsUnsigned, Type *ResTy, VectorType *Ty, |
2171 | TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) override { |
2172 | return Impl.getExtendedAddReductionCost(IsMLA, IsUnsigned, ResTy, Ty, |
2173 | CostKind); |
2174 | } |
2175 | InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, |
2176 | TTI::TargetCostKind CostKind) override { |
2177 | return Impl.getIntrinsicInstrCost(ICA, CostKind); |
2178 | } |
2179 | InstructionCost getCallInstrCost(Function *F, Type *RetTy, |
2180 | ArrayRef<Type *> Tys, |
2181 | TTI::TargetCostKind CostKind) override { |
2182 | return Impl.getCallInstrCost(F, RetTy, Tys, CostKind); |
2183 | } |
2184 | unsigned getNumberOfParts(Type *Tp) override { |
2185 | return Impl.getNumberOfParts(Tp); |
2186 | } |
2187 | InstructionCost getAddressComputationCost(Type *Ty, ScalarEvolution *SE, |
2188 | const SCEV *Ptr) override { |
2189 | return Impl.getAddressComputationCost(Ty, SE, Ptr); |
2190 | } |
2191 | InstructionCost getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) override { |
2192 | return Impl.getCostOfKeepingLiveOverCall(Tys); |
2193 | } |
2194 | bool getTgtMemIntrinsic(IntrinsicInst *Inst, |
2195 | MemIntrinsicInfo &Info) override { |
2196 | return Impl.getTgtMemIntrinsic(Inst, Info); |
2197 | } |
2198 | unsigned getAtomicMemIntrinsicMaxElementSize() const override { |
2199 | return Impl.getAtomicMemIntrinsicMaxElementSize(); |
2200 | } |
2201 | Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, |
2202 | Type *ExpectedType) override { |
2203 | return Impl.getOrCreateResultFromMemIntrinsic(Inst, ExpectedType); |
2204 | } |
2205 | Type *getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length, |
2206 | unsigned SrcAddrSpace, unsigned DestAddrSpace, |
2207 | unsigned SrcAlign, |
2208 | unsigned DestAlign) const override { |
2209 | return Impl.getMemcpyLoopLoweringType(Context, Length, SrcAddrSpace, |
2210 | DestAddrSpace, SrcAlign, DestAlign); |
2211 | } |
2212 | void getMemcpyLoopResidualLoweringType( |
2213 | SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context, |
2214 | unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace, |
2215 | unsigned SrcAlign, unsigned DestAlign) const override { |
2216 | Impl.getMemcpyLoopResidualLoweringType(OpsOut, Context, RemainingBytes, |
2217 | SrcAddrSpace, DestAddrSpace, |
2218 | SrcAlign, DestAlign); |
2219 | } |
2220 | bool areInlineCompatible(const Function *Caller, |
2221 | const Function *Callee) const override { |
2222 | return Impl.areInlineCompatible(Caller, Callee); |
2223 | } |
2224 | bool areFunctionArgsABICompatible( |
2225 | const Function *Caller, const Function *Callee, |
2226 | SmallPtrSetImpl<Argument *> &Args) const override { |
2227 | return Impl.areFunctionArgsABICompatible(Caller, Callee, Args); |
2228 | } |
2229 | bool isIndexedLoadLegal(MemIndexedMode Mode, Type *Ty) const override { |
2230 | return Impl.isIndexedLoadLegal(Mode, Ty, getDataLayout()); |
2231 | } |
2232 | bool isIndexedStoreLegal(MemIndexedMode Mode, Type *Ty) const override { |
2233 | return Impl.isIndexedStoreLegal(Mode, Ty, getDataLayout()); |
2234 | } |
2235 | unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const override { |
2236 | return Impl.getLoadStoreVecRegBitWidth(AddrSpace); |
2237 | } |
2238 | bool isLegalToVectorizeLoad(LoadInst *LI) const override { |
2239 | return Impl.isLegalToVectorizeLoad(LI); |
2240 | } |
2241 | bool isLegalToVectorizeStore(StoreInst *SI) const override { |
2242 | return Impl.isLegalToVectorizeStore(SI); |
2243 | } |
2244 | bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment, |
2245 | unsigned AddrSpace) const override { |
2246 | return Impl.isLegalToVectorizeLoadChain(ChainSizeInBytes, Alignment, |
2247 | AddrSpace); |
2248 | } |
2249 | bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment, |
2250 | unsigned AddrSpace) const override { |
2251 | return Impl.isLegalToVectorizeStoreChain(ChainSizeInBytes, Alignment, |
2252 | AddrSpace); |
2253 | } |
2254 | bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc, |
2255 | ElementCount VF) const override { |
2256 | return Impl.isLegalToVectorizeReduction(RdxDesc, VF); |
2257 | } |
2258 | bool isElementTypeLegalForScalableVector(Type *Ty) const override { |
2259 | return Impl.isElementTypeLegalForScalableVector(Ty); |
2260 | } |
2261 | unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize, |
2262 | unsigned ChainSizeInBytes, |
2263 | VectorType *VecTy) const override { |
2264 | return Impl.getLoadVectorFactor(VF, LoadSize, ChainSizeInBytes, VecTy); |
2265 | } |
2266 | unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize, |
2267 | unsigned ChainSizeInBytes, |
2268 | VectorType *VecTy) const override { |
2269 | return Impl.getStoreVectorFactor(VF, StoreSize, ChainSizeInBytes, VecTy); |
2270 | } |
2271 | bool preferInLoopReduction(unsigned Opcode, Type *Ty, |
2272 | ReductionFlags Flags) const override { |
2273 | return Impl.preferInLoopReduction(Opcode, Ty, Flags); |
2274 | } |
2275 | bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty, |
2276 | ReductionFlags Flags) const override { |
2277 | return Impl.preferPredicatedReductionSelect(Opcode, Ty, Flags); |
2278 | } |
2279 | bool shouldExpandReduction(const IntrinsicInst *II) const override { |
2280 | return Impl.shouldExpandReduction(II); |
2281 | } |
2282 | |
2283 | unsigned getGISelRematGlobalCost() const override { |
2284 | return Impl.getGISelRematGlobalCost(); |
2285 | } |
2286 | |
2287 | bool supportsScalableVectors() const override { |
2288 | return Impl.supportsScalableVectors(); |
2289 | } |
2290 | |
2291 | bool hasActiveVectorLength() const override { |
2292 | return Impl.hasActiveVectorLength(); |
2293 | } |
2294 | |
2295 | InstructionCost getInstructionLatency(const Instruction *I) override { |
2296 | return Impl.getInstructionLatency(I); |
2297 | } |
2298 | |
2299 | VPLegalization |
2300 | getVPLegalizationStrategy(const VPIntrinsic &PI) const override { |
2301 | return Impl.getVPLegalizationStrategy(PI); |
2302 | } |
2303 | }; |
2304 | |
2305 | template <typename T> |
2306 | TargetTransformInfo::TargetTransformInfo(T Impl) |
2307 | : TTIImpl(new Model<T>(Impl)) {} |
2308 | |
2309 | |
2310 | |
2311 | |
2312 | |
2313 | |
2314 | |
2315 | |
2316 | |
2317 | |
2318 | |
2319 | |
2320 | class TargetIRAnalysis : public AnalysisInfoMixin<TargetIRAnalysis> { |
2321 | public: |
2322 | typedef TargetTransformInfo Result; |
2323 | |
2324 | |
2325 | |
2326 | |
2327 | |
2328 | TargetIRAnalysis(); |
2329 | |
2330 | |
2331 | |
2332 | |
2333 | |
2334 | TargetIRAnalysis(std::function<Result(const Function &)> TTICallback); |
2335 | |
2336 | |
2337 | TargetIRAnalysis(const TargetIRAnalysis &Arg) |
2338 | : TTICallback(Arg.TTICallback) {} |
2339 | TargetIRAnalysis(TargetIRAnalysis &&Arg) |
2340 | : TTICallback(std::move(Arg.TTICallback)) {} |
2341 | TargetIRAnalysis &operator=(const TargetIRAnalysis &RHS) { |
2342 | TTICallback = RHS.TTICallback; |
2343 | return *this; |
2344 | } |
2345 | TargetIRAnalysis &operator=(TargetIRAnalysis &&RHS) { |
2346 | TTICallback = std::move(RHS.TTICallback); |
2347 | return *this; |
2348 | } |
2349 | |
2350 | Result run(const Function &F, FunctionAnalysisManager &); |
2351 | |
2352 | private: |
2353 | friend AnalysisInfoMixin<TargetIRAnalysis>; |
2354 | static AnalysisKey Key; |
2355 | |
2356 | |
2357 | |
2358 | |
2359 | |
2360 | |
2361 | |
2362 | |
2363 | |
2364 | |
2365 | |
2366 | std::function<Result(const Function &)> TTICallback; |
2367 | |
2368 | |
2369 | static Result getDefaultTTI(const Function &F); |
2370 | }; |
2371 | |
2372 | |
2373 | |
2374 | |
2375 | |
2376 | class TargetTransformInfoWrapperPass : public ImmutablePass { |
2377 | TargetIRAnalysis TIRA; |
2378 | Optional<TargetTransformInfo> TTI; |
2379 | |
2380 | virtual void anchor(); |
2381 | |
2382 | public: |
2383 | static char ID; |
2384 | |
2385 | |
2386 | |
2387 | |
2388 | |
2389 | TargetTransformInfoWrapperPass(); |
2390 | |
2391 | explicit TargetTransformInfoWrapperPass(TargetIRAnalysis TIRA); |
2392 | |
2393 | TargetTransformInfo &getTTI(const Function &F); |
2394 | }; |
2395 | |
2396 | |
2397 | |
2398 | |
2399 | |
2400 | ImmutablePass *createTargetTransformInfoWrapperPass(TargetIRAnalysis TIRA); |
2401 | |
2402 | } |
2403 | |
2404 | #endif |
1 | |
2 | |
3 | |
4 | |
5 | |
6 | |
7 | |
8 | |
9 | |
10 | |
11 | |
12 | |
13 | #ifndef LLVM_ADT_SMALLVECTOR_H |
14 | #define LLVM_ADT_SMALLVECTOR_H |
15 | |
16 | #include "llvm/ADT/iterator_range.h" |
17 | #include "llvm/Support/Compiler.h" |
18 | #include "llvm/Support/ErrorHandling.h" |
19 | #include "llvm/Support/MemAlloc.h" |
20 | #include "llvm/Support/type_traits.h" |
21 | #include <algorithm> |
22 | #include <cassert> |
23 | #include <cstddef> |
24 | #include <cstdlib> |
25 | #include <cstring> |
26 | #include <functional> |
27 | #include <initializer_list> |
28 | #include <iterator> |
29 | #include <limits> |
30 | #include <memory> |
31 | #include <new> |
32 | #include <type_traits> |
33 | #include <utility> |
34 | |
35 | namespace llvm { |
36 | |
37 | |
38 | |
39 | |
40 | |
41 | |
42 | |
43 | |
44 | |
45 | template <class Size_T> class SmallVectorBase { |
46 | protected: |
47 | void *BeginX; |
48 | Size_T Size = 0, Capacity; |
49 | |
50 | |
51 | static constexpr size_t SizeTypeMax() { |
52 | return std::numeric_limits<Size_T>::max(); |
53 | } |
54 | |
55 | SmallVectorBase() = delete; |
56 | SmallVectorBase(void *FirstEl, size_t TotalCapacity) |
57 | : BeginX(FirstEl), Capacity(TotalCapacity) {} |
58 | |
59 | |
60 | |
61 | |
62 | void *mallocForGrow(size_t MinSize, size_t TSize, size_t &NewCapacity); |
63 | |
64 | |
65 | |
66 | |
67 | void grow_pod(void *FirstEl, size_t MinSize, size_t TSize); |
68 | |
69 | public: |
70 | size_t size() const { return Size; } |
71 | size_t capacity() const { return Capacity; } |
72 | |
73 | LLVM_NODISCARD bool empty() const { return !Size; } |
| 14 | | Assuming field 'Size' is not equal to 0 | |
|
| 15 | | Returning zero, which participates in a condition later | |
|
74 | |
75 | |
76 | |
77 | |
78 | |
79 | |
80 | |
81 | |
82 | |
83 | |
84 | void set_size(size_t N) { |
85 | assert(N <= capacity()); |
86 | Size = N; |
87 | } |
88 | }; |
89 | |
90 | template <class T> |
91 | using SmallVectorSizeType = |
92 | typename std::conditional<sizeof(T) < 4 && sizeof(void *) >= 8, uint64_t, |
93 | uint32_t>::type; |
94 | |
95 | |
96 | template <class T, typename = void> struct SmallVectorAlignmentAndSize { |
97 | alignas(SmallVectorBase<SmallVectorSizeType<T>>) char Base[sizeof( |
98 | SmallVectorBase<SmallVectorSizeType<T>>)]; |
99 | alignas(T) char FirstEl[sizeof(T)]; |
100 | }; |
101 | |
102 | |
103 | |
104 | |
105 | template <typename T, typename = void> |
106 | class SmallVectorTemplateCommon |
107 | : public SmallVectorBase<SmallVectorSizeType<T>> { |
108 | using Base = SmallVectorBase<SmallVectorSizeType<T>>; |
109 | |
110 | |
111 | |
112 | |
113 | void *getFirstEl() const { |
114 | return const_cast<void *>(reinterpret_cast<const void *>( |
115 | reinterpret_cast<const char *>(this) + |
116 | offsetof(SmallVectorAlignmentAndSize<T>, FirstEl))); |
117 | } |
118 | |
119 | |
120 | protected: |
121 | SmallVectorTemplateCommon(size_t Size) : Base(getFirstEl(), Size) {} |
122 | |
123 | void grow_pod(size_t MinSize, size_t TSize) { |
124 | Base::grow_pod(getFirstEl(), MinSize, TSize); |
125 | } |
126 | |
127 | |
128 | |
129 | bool isSmall() const { return this->BeginX == getFirstEl(); } |
130 | |
131 | |
132 | void resetToSmall() { |
133 | this->BeginX = getFirstEl(); |
134 | this->Size = this->Capacity = 0; |
135 | } |
136 | |
137 | |
138 | bool isReferenceToRange(const void *V, const void *First, const void *Last) const { |
139 | |
140 | std::less<> LessThan; |
141 | return !LessThan(V, First) && LessThan(V, Last); |
142 | } |
143 | |
144 | |
145 | bool isReferenceToStorage(const void *V) const { |
146 | return isReferenceToRange(V, this->begin(), this->end()); |
147 | } |
148 | |
149 | |
150 | |
151 | bool isRangeInStorage(const void *First, const void *Last) const { |
152 | |
153 | std::less<> LessThan; |
154 | return !LessThan(First, this->begin()) && !LessThan(Last, First) && |
155 | !LessThan(this->end(), Last); |
156 | } |
157 | |
158 | |
159 | |
160 | bool isSafeToReferenceAfterResize(const void *Elt, size_t NewSize) { |
161 | |
162 | if (LLVM_LIKELY(!isReferenceToStorage(Elt))) |
163 | return true; |
164 | |
165 | |
166 | if (NewSize <= this->size()) |
167 | return Elt < this->begin() + NewSize; |
168 | |
169 | |
170 | return NewSize <= this->capacity(); |
171 | } |
172 | |
173 | |
174 | void assertSafeToReferenceAfterResize(const void *Elt, size_t NewSize) { |
175 | assert(isSafeToReferenceAfterResize(Elt, NewSize) && |
176 | "Attempting to reference an element of the vector in an operation " |
177 | "that invalidates it"); |
178 | } |
179 | |
180 | |
181 | |
182 | void assertSafeToAdd(const void *Elt, size_t N = 1) { |
183 | this->assertSafeToReferenceAfterResize(Elt, this->size() + N); |
184 | } |
185 | |
186 | |
187 | void assertSafeToReferenceAfterClear(const T *From, const T *To) { |
188 | if (From == To) |
189 | return; |
190 | this->assertSafeToReferenceAfterResize(From, 0); |
191 | this->assertSafeToReferenceAfterResize(To - 1, 0); |
192 | } |
193 | template < |
194 | class ItTy, |
195 | std::enable_if_t<!std::is_same<std::remove_const_t<ItTy>, T *>::value, |
196 | bool> = false> |
197 | void assertSafeToReferenceAfterClear(ItTy, ItTy) {} |
198 | |
199 | |
200 | void assertSafeToAddRange(const T *From, const T *To) { |
201 | if (From == To) |
202 | return; |
203 | this->assertSafeToAdd(From, To - From); |
204 | this->assertSafeToAdd(To - 1, To - From); |
205 | } |
206 | template < |
207 | class ItTy, |
208 | std::enable_if_t<!std::is_same<std::remove_const_t<ItTy>, T *>::value, |
209 | bool> = false> |
210 | void assertSafeToAddRange(ItTy, ItTy) {} |
211 | |
212 | |
213 | |
214 | template <class U> |
215 | static const T *reserveForParamAndGetAddressImpl(U *This, const T &Elt, |
216 | size_t N) { |
217 | size_t NewSize = This->size() + N; |
218 | if (LLVM_LIKELY(NewSize <= This->capacity())) |
219 | return &Elt; |
220 | |
221 | bool ReferencesStorage = false; |
222 | int64_t Index = -1; |
223 | if (!U::TakesParamByValue) { |
224 | if (LLVM_UNLIKELY(This->isReferenceToStorage(&Elt))) { |
225 | ReferencesStorage = true; |
226 | Index = &Elt - This->begin(); |
227 | } |
228 | } |
229 | This->grow(NewSize); |
230 | return ReferencesStorage ? This->begin() + Index : &Elt; |
231 | } |
232 | |
233 | public: |
234 | using size_type = size_t; |
235 | using difference_type = ptrdiff_t; |
236 | using value_type = T; |
237 | using iterator = T *; |
238 | using const_iterator = const T *; |
239 | |
240 | using const_reverse_iterator = std::reverse_iterator<const_iterator>; |
241 | using reverse_iterator = std::reverse_iterator<iterator>; |
242 | |
243 | using reference = T &; |
244 | using const_reference = const T &; |
245 | using pointer = T *; |
246 | using const_pointer = const T *; |
247 | |
248 | using Base::capacity; |
249 | using Base::empty; |
250 | using Base::size; |
251 | |
252 | |
253 | iterator begin() { return (iterator)this->BeginX; } |
254 | const_iterator begin() const { return (const_iterator)this->BeginX; } |
255 | iterator end() { return begin() + size(); } |
256 | const_iterator end() const { return begin() + size(); } |
257 | |
258 | |
259 | reverse_iterator rbegin() { return reverse_iterator(end()); } |
260 | const_reverse_iterator rbegin() const{ return const_reverse_iterator(end()); } |
261 | reverse_iterator rend() { return reverse_iterator(begin()); } |
262 | const_reverse_iterator rend() const { return const_reverse_iterator(begin());} |
263 | |
264 | size_type size_in_bytes() const { return size() * sizeof(T); } |
265 | size_type max_size() const { |
266 | return std::min(this->SizeTypeMax(), size_type(-1) / sizeof(T)); |
267 | } |
268 | |
269 | size_t capacity_in_bytes() const { return capacity() * sizeof(T); } |
270 | |
271 | |
272 | pointer data() { return pointer(begin()); } |
273 | |
274 | const_pointer data() const { return const_pointer(begin()); } |
275 | |
276 | reference operator[](size_type idx) { |
277 | assert(idx < size()); |
278 | return begin()[idx]; |
279 | } |
280 | const_reference operator[](size_type idx) const { |
281 | assert(idx < size()); |
282 | return begin()[idx]; |
283 | } |
284 | |
285 | reference front() { |
286 | assert(!empty()); |
287 | return begin()[0]; |
288 | } |
289 | const_reference front() const { |
290 | assert(!empty()); |
291 | return begin()[0]; |
292 | } |
293 | |
294 | reference back() { |
295 | assert(!empty()); |
296 | return end()[-1]; |
297 | } |
298 | const_reference back() const { |
299 | assert(!empty()); |
300 | return end()[-1]; |
301 | } |
302 | }; |
303 | |
304 | |
305 | |
306 | |
307 | |
308 | |
309 | |
310 | |
311 | |
312 | template <typename T, bool = (is_trivially_copy_constructible<T>::value) && |
313 | (is_trivially_move_constructible<T>::value) && |
314 | std::is_trivially_destructible<T>::value> |
315 | class SmallVectorTemplateBase : public SmallVectorTemplateCommon<T> { |
316 | friend class SmallVectorTemplateCommon<T>; |
317 | |
318 | protected: |
319 | static constexpr bool TakesParamByValue = false; |
320 | using ValueParamT = const T &; |
321 | |
322 | SmallVectorTemplateBase(size_t Size) : SmallVectorTemplateCommon<T>(Size) {} |
323 | |
324 | static void destroy_range(T *S, T *E) { |
325 | while (S != E) { |
326 | --E; |
327 | E->~T(); |
328 | } |
329 | } |
330 | |
331 | |
332 | |
333 | template<typename It1, typename It2> |
334 | static void uninitialized_move(It1 I, It1 E, It2 Dest) { |
335 | std::uninitialized_copy(std::make_move_iterator(I), |
336 | std::make_move_iterator(E), Dest); |
337 | } |
338 | |
339 | |
340 | |
341 | template<typename It1, typename It2> |
342 | static void uninitialized_copy(It1 I, It1 E, It2 Dest) { |
343 | std::uninitialized_copy(I, E, Dest); |
344 | } |
345 | |
346 | |
347 | |
348 | |
349 | void grow(size_t MinSize = 0); |
350 | |
351 | |
352 | |
353 | T *mallocForGrow(size_t MinSize, size_t &NewCapacity) { |
354 | return static_cast<T *>( |
355 | SmallVectorBase<SmallVectorSizeType<T>>::mallocForGrow( |
356 | MinSize, sizeof(T), NewCapacity)); |
357 | } |
358 | |
359 | |
360 | |
361 | void moveElementsForGrow(T *NewElts); |
362 | |
363 | |
364 | void takeAllocationForGrow(T *NewElts, size_t NewCapacity); |
365 | |
366 | |
367 | |
368 | const T *reserveForParamAndGetAddress(const T &Elt, size_t N = 1) { |
369 | return this->reserveForParamAndGetAddressImpl(this, Elt, N); |
370 | } |
371 | |
372 | |
373 | |
374 | T *reserveForParamAndGetAddress(T &Elt, size_t N = 1) { |
375 | return const_cast<T *>( |
376 | this->reserveForParamAndGetAddressImpl(this, Elt, N)); |
377 | } |
378 | |
379 | static T &&forward_value_param(T &&V) { return std::move(V); } |
380 | static const T &forward_value_param(const T &V) { return V; } |
381 | |
382 | void growAndAssign(size_t NumElts, const T &Elt) { |
383 | |
384 | size_t NewCapacity; |
385 | T *NewElts = mallocForGrow(NumElts, NewCapacity); |
386 | std::uninitialized_fill_n(NewElts, NumElts, Elt); |
387 | this->destroy_range(this->begin(), this->end()); |
388 | takeAllocationForGrow(NewElts, NewCapacity); |
389 | this->set_size(NumElts); |
390 | } |
391 | |
392 | template <typename... ArgTypes> T &growAndEmplaceBack(ArgTypes &&... Args) { |
393 | |
394 | size_t NewCapacity; |
395 | T *NewElts = mallocForGrow(0, NewCapacity); |
396 | ::new ((void *)(NewElts + this->size())) T(std::forward<ArgTypes>(Args)...); |
397 | moveElementsForGrow(NewElts); |
398 | takeAllocationForGrow(NewElts, NewCapacity); |
399 | this->set_size(this->size() + 1); |
400 | return this->back(); |
401 | } |
402 | |
403 | public: |
404 | void push_back(const T &Elt) { |
405 | const T *EltPtr = reserveForParamAndGetAddress(Elt); |
406 | ::new ((void *)this->end()) T(*EltPtr); |
407 | this->set_size(this->size() + 1); |
408 | } |
409 | |
410 | void push_back(T &&Elt) { |
411 | T *EltPtr = reserveForParamAndGetAddress(Elt); |
412 | ::new ((void *)this->end()) T(::std::move(*EltPtr)); |
413 | this->set_size(this->size() + 1); |
414 | } |
415 | |
416 | void pop_back() { |
417 | this->set_size(this->size() - 1); |
418 | this->end()->~T(); |
419 | } |
420 | }; |
421 | |
422 | |
423 | template <typename T, bool TriviallyCopyable> |
424 | void SmallVectorTemplateBase<T, TriviallyCopyable>::grow(size_t MinSize) { |
425 | size_t NewCapacity; |
426 | T *NewElts = mallocForGrow(MinSize, NewCapacity); |
427 | moveElementsForGrow(NewElts); |
428 | takeAllocationForGrow(NewElts, NewCapacity); |
429 | } |
430 | |
431 | |
432 | template <typename T, bool TriviallyCopyable> |
433 | void SmallVectorTemplateBase<T, TriviallyCopyable>::moveElementsForGrow( |
434 | T *NewElts) { |
435 | |
436 | this->uninitialized_move(this->begin(), this->end(), NewElts); |
437 | |
438 | |
439 | destroy_range(this->begin(), this->end()); |
440 | } |
441 | |
442 | |
443 | template <typename T, bool TriviallyCopyable> |
444 | void SmallVectorTemplateBase<T, TriviallyCopyable>::takeAllocationForGrow( |
445 | T *NewElts, size_t NewCapacity) { |
446 | |
447 | if (!this->isSmall()) |
448 | free(this->begin()); |
449 | |
450 | this->BeginX = NewElts; |
451 | this->Capacity = NewCapacity; |
452 | } |
453 | |
454 | |
455 | |
456 | |
457 | |
458 | template <typename T> |
459 | class SmallVectorTemplateBase<T, true> : public SmallVectorTemplateCommon<T> { |
460 | friend class SmallVectorTemplateCommon<T>; |
461 | |
462 | protected: |
463 | |
464 | |
465 | static constexpr bool TakesParamByValue = sizeof(T) <= 2 * sizeof(void *); |
466 | |
467 | |
468 | |
469 | using ValueParamT = |
470 | typename std::conditional<TakesParamByValue, T, const T &>::type; |
471 | |
472 | SmallVectorTemplateBase(size_t Size) : SmallVectorTemplateCommon<T>(Size) {} |
473 | |
474 | |
475 | static void destroy_range(T *, T *) {} |
476 | |
477 | |
478 | |
479 | template<typename It1, typename It2> |
480 | static void uninitialized_move(It1 I, It1 E, It2 Dest) { |
481 | |
482 | uninitialized_copy(I, E, Dest); |
483 | } |
484 | |
485 | |
486 | |
487 | template<typename It1, typename It2> |
488 | static void uninitialized_copy(It1 I, It1 E, It2 Dest) { |
489 | |
490 | std::uninitialized_copy(I, E, Dest); |
491 | } |
492 | |
493 | |
494 | |
495 | template <typename T1, typename T2> |
496 | static void uninitialized_copy( |
497 | T1 *I, T1 *E, T2 *Dest, |
498 | std::enable_if_t<std::is_same<typename std::remove_const<T1>::type, |
499 | T2>::value> * = nullptr) { |
500 | |
501 | |
502 | |
503 | |
504 | if (I != E) |
505 | memcpy(reinterpret_cast<void *>(Dest), I, (E - I) * sizeof(T)); |
506 | } |
507 | |
508 | |
509 | |
510 | void grow(size_t MinSize = 0) { this->grow_pod(MinSize, sizeof(T)); } |
511 | |
512 | |
513 | |
514 | const T *reserveForParamAndGetAddress(const T &Elt, size_t N = 1) { |
515 | return this->reserveForParamAndGetAddressImpl(this, Elt, N); |
516 | } |
517 | |
518 | |
519 | |
520 | T *reserveForParamAndGetAddress(T &Elt, size_t N = 1) { |
521 | return const_cast<T *>( |
522 | this->reserveForParamAndGetAddressImpl(this, Elt, N)); |
523 | } |
524 | |
525 | |
526 | static ValueParamT forward_value_param(ValueParamT V) { return V; } |
527 | |
528 | void growAndAssign(size_t NumElts, T Elt) { |
529 | |
530 | |
531 | this->set_size(0); |
532 | this->grow(NumElts); |
533 | std::uninitialized_fill_n(this->begin(), NumElts, Elt); |
534 | this->set_size(NumElts); |
535 | } |
536 | |
537 | template <typename... ArgTypes> T &growAndEmplaceBack(ArgTypes &&... Args) { |
538 | |
539 | |
540 | |
541 | push_back(T(std::forward<ArgTypes>(Args)...)); |
542 | return this->back(); |
543 | } |
544 | |
545 | public: |
546 | void push_back(ValueParamT Elt) { |
547 | const T *EltPtr = reserveForParamAndGetAddress(Elt); |
548 | memcpy(reinterpret_cast<void *>(this->end()), EltPtr, sizeof(T)); |
549 | this->set_size(this->size() + 1); |
550 | } |
551 | |
552 | void pop_back() { this->set_size(this->size() - 1); } |
553 | }; |
554 | |
555 | |
556 | |
557 | template <typename T> |
558 | class SmallVectorImpl : public SmallVectorTemplateBase<T> { |
559 | using SuperClass = SmallVectorTemplateBase<T>; |
560 | |
561 | public: |
562 | using iterator = typename SuperClass::iterator; |
563 | using const_iterator = typename SuperClass::const_iterator; |
564 | using reference = typename SuperClass::reference; |
565 | using size_type = typename SuperClass::size_type; |
566 | |
567 | protected: |
568 | using SmallVectorTemplateBase<T>::TakesParamByValue; |
569 | using ValueParamT = typename SuperClass::ValueParamT; |
570 | |
571 | |
572 | explicit SmallVectorImpl(unsigned N) |
573 | : SmallVectorTemplateBase<T>(N) {} |
574 | |
575 | public: |
576 | SmallVectorImpl(const SmallVectorImpl &) = delete; |
577 | |
578 | ~SmallVectorImpl() { |
579 | |
580 | |
581 | if (!this->isSmall()) |
582 | free(this->begin()); |
583 | } |
584 | |
585 | void clear() { |
586 | this->destroy_range(this->begin(), this->end()); |
587 | this->Size = 0; |
588 | } |
589 | |
590 | private: |
591 | template <bool ForOverwrite> void resizeImpl(size_type N) { |
592 | if (N < this->size()) { |
593 | this->pop_back_n(this->size() - N); |
594 | } else if (N > this->size()) { |
595 | this->reserve(N); |
596 | for (auto I = this->end(), E = this->begin() + N; I != E; ++I) |
597 | if (ForOverwrite) |
598 | new (&*I) T; |
599 | else |
600 | new (&*I) T(); |
601 | this->set_size(N); |
602 | } |
603 | } |
604 | |
605 | public: |
606 | void resize(size_type N) { resizeImpl<false>(N); } |
607 | |
608 | |
609 | void resize_for_overwrite(size_type N) { resizeImpl<true>(N); } |
610 | |
611 | void resize(size_type N, ValueParamT NV) { |
612 | if (N == this->size()) |
613 | return; |
614 | |
615 | if (N < this->size()) { |
616 | this->pop_back_n(this->size() - N); |
617 | return; |
618 | } |
619 | |
620 | |
621 | this->append(N - this->size(), NV); |
622 | } |
623 | |
624 | void reserve(size_type N) { |
625 | if (this->capacity() < N) |
626 | this->grow(N); |
627 | } |
628 | |
629 | void pop_back_n(size_type NumItems) { |
630 | assert(this->size() >= NumItems); |
631 | this->destroy_range(this->end() - NumItems, this->end()); |
632 | this->set_size(this->size() - NumItems); |
633 | } |
634 | |
635 | LLVM_NODISCARD T pop_back_val() { |
636 | T Result = ::std::move(this->back()); |
637 | this->pop_back(); |
638 | return Result; |
639 | } |
640 | |
641 | void swap(SmallVectorImpl &RHS); |
642 | |
643 | |
644 | template <typename in_iter, |
645 | typename = std::enable_if_t<std::is_convertible< |
646 | typename std::iterator_traits<in_iter>::iterator_category, |
647 | std::input_iterator_tag>::value>> |
648 | void append(in_iter in_start, in_iter in_end) { |
649 | this->assertSafeToAddRange(in_start, in_end); |
650 | size_type NumInputs = std::distance(in_start, in_end); |
651 | this->reserve(this->size() + NumInputs); |
652 | this->uninitialized_copy(in_start, in_end, this->end()); |
653 | this->set_size(this->size() + NumInputs); |
654 | } |
655 | |
656 | |
657 | void append(size_type NumInputs, ValueParamT Elt) { |
658 | const T *EltPtr = this->reserveForParamAndGetAddress(Elt, NumInputs); |
659 | std::uninitialized_fill_n(this->end(), NumInputs, *EltPtr); |
660 | this->set_size(this->size() + NumInputs); |
661 | } |
662 | |
663 | void append(std::initializer_list<T> IL) { |
664 | append(IL.begin(), IL.end()); |
665 | } |
666 | |
667 | void append(const SmallVectorImpl &RHS) { append(RHS.begin(), RHS.end()); } |
668 | |
669 | void assign(size_type NumElts, ValueParamT Elt) { |
670 | |
671 | if (NumElts > this->capacity()) { |
672 | this->growAndAssign(NumElts, Elt); |
673 | return; |
674 | } |
675 | |
676 | |
677 | std::fill_n(this->begin(), std::min(NumElts, this->size()), Elt); |
678 | if (NumElts > this->size()) |
679 | std::uninitialized_fill_n(this->end(), NumElts - this->size(), Elt); |
680 | else if (NumElts < this->size()) |
681 | this->destroy_range(this->begin() + NumElts, this->end()); |
682 | this->set_size(NumElts); |
683 | } |
684 | |
685 | |
686 | |
687 | |
688 | template <typename in_iter, |
689 | typename = std::enable_if_t<std::is_convertible< |
690 | typename std::iterator_traits<in_iter>::iterator_category, |
691 | std::input_iterator_tag>::value>> |
692 | void assign(in_iter in_start, in_iter in_end) { |
693 | this->assertSafeToReferenceAfterClear(in_start, in_end); |
694 | clear(); |
695 | append(in_start, in_end); |
696 | } |
697 | |
698 | void assign(std::initializer_list<T> IL) { |
699 | clear(); |
700 | append(IL); |
701 | } |
702 | |
703 | void assign(const SmallVectorImpl &RHS) { assign(RHS.begin(), RHS.end()); } |
704 | |
705 | iterator erase(const_iterator CI) { |
706 | |
707 | iterator I = const_cast<iterator>(CI); |
708 | |
709 | assert(this->isReferenceToStorage(CI) && "Iterator to erase is out of bounds."); |
710 | |
711 | iterator N = I; |
712 | |
713 | std::move(I+1, this->end(), I); |
714 | |
715 | this->pop_back(); |
716 | return(N); |
717 | } |
718 | |
719 | iterator erase(const_iterator CS, const_iterator CE) { |
720 | |
721 | iterator S = const_cast<iterator>(CS); |
722 | iterator E = const_cast<iterator>(CE); |
723 | |
724 | assert(this->isRangeInStorage(S, E) && "Range to erase is out of bounds."); |
725 | |
726 | iterator N = S; |
727 | |
728 | iterator I = std::move(E, this->end(), S); |
729 | |
730 | this->destroy_range(I, this->end()); |
731 | this->set_size(I - this->begin()); |
732 | return(N); |
733 | } |
734 | |
735 | private: |
736 | template <class ArgType> iterator insert_one_impl(iterator I, ArgType &&Elt) { |
737 | |
738 | static_assert( |
739 | std::is_same<std::remove_const_t<std::remove_reference_t<ArgType>>, |
740 | T>::value, |
741 | "ArgType must be derived from T!"); |
742 | |
743 | if (I == this->end()) { |
744 | this->push_back(::std::forward<ArgType>(Elt)); |
745 | return this->end()-1; |
746 | } |
747 | |
748 | assert(this->isReferenceToStorage(I) && "Insertion iterator is out of bounds."); |
749 | |
750 | |
751 | size_t Index = I - this->begin(); |
752 | std::remove_reference_t<ArgType> *EltPtr = |
753 | this->reserveForParamAndGetAddress(Elt); |
754 | I = this->begin() + Index; |
755 | |
756 | ::new ((void*) this->end()) T(::std::move(this->back())); |
757 | |
758 | std::move_backward(I, this->end()-1, this->end()); |
759 | this->set_size(this->size() + 1); |
760 | |
761 | |
762 | |
763 | static_assert(!TakesParamByValue || std::is_same<ArgType, T>::value, |
764 | "ArgType must be 'T' when taking by value!"); |
765 | if (!TakesParamByValue && this->isReferenceToRange(EltPtr, I, this->end())) |
766 | ++EltPtr; |
767 | |
768 | *I = ::std::forward<ArgType>(*EltPtr); |
769 | return I; |
770 | } |
771 | |
772 | public: |
773 | iterator insert(iterator I, T &&Elt) { |
774 | return insert_one_impl(I, this->forward_value_param(std::move(Elt))); |
775 | } |
776 | |
777 | iterator insert(iterator I, const T &Elt) { |
778 | return insert_one_impl(I, this->forward_value_param(Elt)); |
779 | } |
780 | |
781 | iterator insert(iterator I, size_type NumToInsert, ValueParamT Elt) { |
782 | |
783 | size_t InsertElt = I - this->begin(); |
784 | |
785 | if (I == this->end()) { |
786 | append(NumToInsert, Elt); |
787 | return this->begin()+InsertElt; |
788 | } |
789 | |
790 | assert(this->isReferenceToStorage(I) && "Insertion iterator is out of bounds."); |
791 | |
792 | |
793 | |
794 | const T *EltPtr = this->reserveForParamAndGetAddress(Elt, NumToInsert); |
795 | |
796 | |
797 | I = this->begin()+InsertElt; |
798 | |
799 | |
800 | |
801 | |
802 | |
803 | if (size_t(this->end()-I) >= NumToInsert) { |
804 | T *OldEnd = this->end(); |
805 | append(std::move_iterator<iterator>(this->end() - NumToInsert), |
806 | std::move_iterator<iterator>(this->end())); |
807 | |
808 | |
809 | std::move_backward(I, OldEnd-NumToInsert, OldEnd); |
810 | |
811 | |
812 | |
813 | if (!TakesParamByValue && I <= EltPtr && EltPtr < this->end()) |
814 | EltPtr += NumToInsert; |
815 | |
816 | std::fill_n(I, NumToInsert, *EltPtr); |
817 | return I; |
818 | } |
819 | |
820 | |
821 | |
822 | |
823 | |
824 | T *OldEnd = this->end(); |
825 | this->set_size(this->size() + NumToInsert); |
826 | size_t NumOverwritten = OldEnd-I; |
827 | this->uninitialized_move(I, OldEnd, this->end()-NumOverwritten); |
828 | |
829 | |
830 | |
831 | if (!TakesParamByValue && I <= EltPtr && EltPtr < this->end()) |
832 | EltPtr += NumToInsert; |
833 | |
834 | |
835 | std::fill_n(I, NumOverwritten, *EltPtr); |
836 | |
837 | |
838 | std::uninitialized_fill_n(OldEnd, NumToInsert - NumOverwritten, *EltPtr); |
839 | return I; |
840 | } |
841 | |
842 | template <typename ItTy, |
843 | typename = std::enable_if_t<std::is_convertible< |
844 | typename std::iterator_traits<ItTy>::iterator_category, |
845 | std::input_iterator_tag>::value>> |
846 | iterator insert(iterator I, ItTy From, ItTy To) { |
847 | |
848 | size_t InsertElt = I - this->begin(); |
849 | |
850 | if (I == this->end()) { |
851 | append(From, To); |
852 | return this->begin()+InsertElt; |
853 | } |
854 | |
855 | assert(this->isReferenceToStorage(I) && "Insertion iterator is out of bounds."); |
856 | |
857 | |
858 | this->assertSafeToAddRange(From, To); |
859 | |
860 | size_t NumToInsert = std::distance(From, To); |
861 | |
862 | |
863 | reserve(this->size() + NumToInsert); |
864 | |
865 | |
866 | I = this->begin()+InsertElt; |
867 | |
868 | |
869 | |
870 | |
871 | |
872 | if (size_t(this->end()-I) >= NumToInsert) { |
873 | T *OldEnd = this->end(); |
874 | append(std::move_iterator<iterator>(this->end() - NumToInsert), |
875 | std::move_iterator<iterator>(this->end())); |
876 | |
877 | |
878 | std::move_backward(I, OldEnd-NumToInsert, OldEnd); |
879 | |
880 | std::copy(From, To, I); |
881 | return I; |
882 | } |
883 | |
884 | |
885 | |
886 | |
887 | |
888 | T *OldEnd = this->end(); |
889 | this->set_size(this->size() + NumToInsert); |
890 | size_t NumOverwritten = OldEnd-I; |
891 | this->uninitialized_move(I, OldEnd, this->end()-NumOverwritten); |
892 | |
893 | |
894 | for (T *J = I; NumOverwritten > 0; --NumOverwritten) { |
895 | *J = *From; |
896 | ++J; ++From; |
897 | } |
898 | |
899 | |
900 | this->uninitialized_copy(From, To, OldEnd); |
901 | return I; |
902 | } |
903 | |
904 | void insert(iterator I, std::initializer_list<T> IL) { |
905 | insert(I, IL.begin(), IL.end()); |
906 | } |
907 | |
908 | template <typename... ArgTypes> reference emplace_back(ArgTypes &&... Args) { |
909 | if (LLVM_UNLIKELY(this->size() >= this->capacity())) |
910 | return this->growAndEmplaceBack(std::forward<ArgTypes>(Args)...); |
911 | |
912 | ::new ((void *)this->end()) T(std::forward<ArgTypes>(Args)...); |
913 | this->set_size(this->size() + 1); |
914 | return this->back(); |
915 | } |
916 | |
917 | SmallVectorImpl &operator=(const SmallVectorImpl &RHS); |
918 | |
919 | SmallVectorImpl &operator=(SmallVectorImpl &&RHS); |
920 | |
921 | bool operator==(const SmallVectorImpl &RHS) const { |
922 | if (this->size() != RHS.size()) return false; |
923 | return std::equal(this->begin(), this->end(), RHS.begin()); |
924 | } |
925 | bool operator!=(const SmallVectorImpl &RHS) const { |
926 | return !(*this == RHS); |
927 | } |
928 | |
929 | bool operator<(const SmallVectorImpl &RHS) const { |
930 | return std::lexicographical_compare(this->begin(), this->end(), |
931 | RHS.begin(), RHS.end()); |
932 | } |
933 | }; |
934 | |
935 | template <typename T> |
936 | void SmallVectorImpl<T>::swap(SmallVectorImpl<T> &RHS) { |
937 | if (this == &RHS) return; |
938 | |
939 | |
940 | if (!this->isSmall() && !RHS.isSmall()) { |
941 | std::swap(this->BeginX, RHS.BeginX); |
942 | std::swap(this->Size, RHS.Size); |
943 | std::swap(this->Capacity, RHS.Capacity); |
944 | return; |
945 | } |
946 | this->reserve(RHS.size()); |
947 | RHS.reserve(this->size()); |
948 | |
949 | |
950 | size_t NumShared = this->size(); |
951 | if (NumShared > RHS.size()) NumShared = RHS.size(); |
952 | for (size_type i = 0; i != NumShared; ++i) |
953 | std::swap((*this)[i], RHS[i]); |
954 | |
955 | |
956 | if (this->size() > RHS.size()) { |
957 | size_t EltDiff = this->size() - RHS.size(); |
958 | this->uninitialized_copy(this->begin()+NumShared, this->end(), RHS.end()); |
959 | RHS.set_size(RHS.size() + EltDiff); |
960 | this->destroy_range(this->begin()+NumShared, this->end()); |
961 | this->set_size(NumShared); |
962 | } else if (RHS.size() > this->size()) { |
963 | size_t EltDiff = RHS.size() - this->size(); |
964 | this->uninitialized_copy(RHS.begin()+NumShared, RHS.end(), this->end()); |
965 | this->set_size(this->size() + EltDiff); |
966 | this->destroy_range(RHS.begin()+NumShared, RHS.end()); |
967 | RHS.set_size(NumShared); |
968 | } |
969 | } |
970 | |
971 | template <typename T> |
972 | SmallVectorImpl<T> &SmallVectorImpl<T>:: |
973 | operator=(const SmallVectorImpl<T> &RHS) { |
974 | |
975 | if (this == &RHS) return *this; |
976 | |
977 | |
978 | |
979 | size_t RHSSize = RHS.size(); |
980 | size_t CurSize = this->size(); |
981 | if (CurSize >= RHSSize) { |
982 | |
983 | iterator NewEnd; |
984 | if (RHSSize) |
985 | NewEnd = std::copy(RHS.begin(), RHS.begin()+RHSSize, this->begin()); |
986 | else |
987 | NewEnd = this->begin(); |
988 | |
989 | |
990 | this->destroy_range(NewEnd, this->end()); |
991 | |
992 | |
993 | this->set_size(RHSSize); |
994 | return *this; |
995 | } |
996 | |
997 | |
998 | |
999 | |
1000 | if (this->capacity() < RHSSize) { |
1001 | |
1002 | this->clear(); |
1003 | CurSize = 0; |
1004 | this->grow(RHSSize); |
1005 | } else if (CurSize) { |
1006 | |
1007 | std::copy(RHS.begin(), RHS.begin()+CurSize, this->begin()); |
1008 | } |
1009 | |
1010 | |
1011 | this->uninitialized_copy(RHS.begin()+CurSize, RHS.end(), |
1012 | this->begin()+CurSize); |
1013 | |
1014 | |
1015 | this->set_size(RHSSize); |
1016 | return *this; |
1017 | } |
1018 | |
1019 | template <typename T> |
1020 | SmallVectorImpl<T> &SmallVectorImpl<T>::operator=(SmallVectorImpl<T> &&RHS) { |
1021 | |
1022 | if (this == &RHS) return *this; |
1023 | |
1024 | |
1025 | if (!RHS.isSmall()) { |
1026 | this->destroy_range(this->begin(), this->end()); |
1027 | if (!this->isSmall()) free(this->begin()); |
1028 | this->BeginX = RHS.BeginX; |
1029 | this->Size = RHS.Size; |
1030 | this->Capacity = RHS.Capacity; |
1031 | RHS.resetToSmall(); |
1032 | return *this; |
1033 | } |
1034 | |
1035 | |
1036 | |
1037 | size_t RHSSize = RHS.size(); |
1038 | size_t CurSize = this->size(); |
1039 | if (CurSize >= RHSSize) { |
1040 | |
1041 | iterator NewEnd = this->begin(); |
1042 | if (RHSSize) |
1043 | NewEnd = std::move(RHS.begin(), RHS.end(), NewEnd); |
1044 | |
1045 | |
1046 | this->destroy_range(NewEnd, this->end()); |
1047 | this->set_size(RHSSize); |
1048 | |
1049 | |
1050 | RHS.clear(); |
1051 | |
1052 | return *this; |
1053 | } |
1054 | |
1055 | |
1056 | |
1057 | |
1058 | |
1059 | if (this->capacity() < RHSSize) { |
1060 | |
1061 | this->clear(); |
1062 | CurSize = 0; |
1063 | this->grow(RHSSize); |
1064 | } else if (CurSize) { |
1065 | |
1066 | std::move(RHS.begin(), RHS.begin()+CurSize, this->begin()); |
1067 | } |
1068 | |
1069 | |
1070 | this->uninitialized_move(RHS.begin()+CurSize, RHS.end(), |
1071 | this->begin()+CurSize); |
1072 | |
1073 | |
1074 | this->set_size(RHSSize); |
1075 | |
1076 | RHS.clear(); |
1077 | return *this; |
1078 | } |
1079 | |
1080 | |
1081 | |
1082 | template <typename T, unsigned N> |
1083 | struct SmallVectorStorage { |
1084 | alignas(T) char InlineElts[N * sizeof(T)]; |
1085 | }; |
1086 | |
1087 | |
1088 | |
1089 | |
1090 | template <typename T> struct alignas(T) SmallVectorStorage<T, 0> {}; |
1091 | |
1092 | |
1093 | |
1094 | |
1095 | template <typename T, unsigned N> class LLVM_GSL_OWNER SmallVector; |
1096 | |
1097 | |
1098 | |
1099 | |
1100 | |
1101 | |
1102 | template <typename T> struct CalculateSmallVectorDefaultInlinedElements { |
1103 | |
1104 | |
1105 | |
1106 | |
1107 | |
1108 | |
1109 | |
1110 | static constexpr size_t kPreferredSmallVectorSizeof = 64; |
1111 | |
1112 | |
1113 | |
1114 | |
1115 | |
1116 | |
1117 | |
1118 | |
1119 | |
1120 | |
1121 | |
1122 | |
1123 | |
1124 | |
1125 | |
1126 | |
1127 | |
1128 | |
1129 | |
1130 | |
1131 | |
1132 | |
1133 | |
1134 | static_assert( |
1135 | sizeof(T) <= 256, |
1136 | "You are trying to use a default number of inlined elements for " |
1137 | "`SmallVector<T>` but `sizeof(T)` is really big! Please use an " |
1138 | "explicit number of inlined elements with `SmallVector<T, N>` to make " |
1139 | "sure you really want that much inline storage."); |
1140 | |
1141 | |
1142 | |
1143 | static constexpr size_t PreferredInlineBytes = |
1144 | kPreferredSmallVectorSizeof - sizeof(SmallVector<T, 0>); |
1145 | static constexpr size_t NumElementsThatFit = PreferredInlineBytes / sizeof(T); |
1146 | static constexpr size_t value = |
1147 | NumElementsThatFit == 0 ? 1 : NumElementsThatFit; |
1148 | }; |
1149 | |
1150 | |
1151 | |
1152 | |
1153 | |
1154 | |
1155 | |
1156 | |
1157 | |
1158 | |
1159 | |
1160 | |
1161 | |
1162 | |
1163 | |
1164 | |
1165 | |
1166 | template <typename T, |
1167 | unsigned N = CalculateSmallVectorDefaultInlinedElements<T>::value> |
1168 | class LLVM_GSL_OWNER SmallVector : public SmallVectorImpl<T>, |
1169 | SmallVectorStorage<T, N> { |
1170 | public: |
1171 | SmallVector() : SmallVectorImpl<T>(N) {} |
1172 | |
1173 | ~SmallVector() { |
1174 | |
1175 | this->destroy_range(this->begin(), this->end()); |
1176 | } |
1177 | |
1178 | explicit SmallVector(size_t Size, const T &Value = T()) |
1179 | : SmallVectorImpl<T>(N) { |
1180 | this->assign(Size, Value); |
1181 | } |
1182 | |
1183 | template <typename ItTy, |
1184 | typename = std::enable_if_t<std::is_convertible< |
1185 | typename std::iterator_traits<ItTy>::iterator_category, |
1186 | std::input_iterator_tag>::value>> |
1187 | SmallVector(ItTy S, ItTy E) : SmallVectorImpl<T>(N) { |
1188 | this->append(S, E); |
1189 | } |
1190 | |
1191 | template <typename RangeTy> |
1192 | explicit SmallVector(const iterator_range<RangeTy> &R) |
1193 | : SmallVectorImpl<T>(N) { |
1194 | this->append(R.begin(), R.end()); |
1195 | } |
1196 | |
1197 | SmallVector(std::initializer_list<T> IL) : SmallVectorImpl<T>(N) { |
1198 | this->assign(IL); |
1199 | } |
1200 | |
1201 | SmallVector(const SmallVector &RHS) : SmallVectorImpl<T>(N) { |
1202 | if (!RHS.empty()) |
1203 | SmallVectorImpl<T>::operator=(RHS); |
1204 | } |
1205 | |
1206 | SmallVector &operator=(const SmallVector &RHS) { |
1207 | SmallVectorImpl<T>::operator=(RHS); |
1208 | return *this; |
1209 | } |
1210 | |
1211 | SmallVector(SmallVector &&RHS) : SmallVectorImpl<T>(N) { |
1212 | if (!RHS.empty()) |
1213 | SmallVectorImpl<T>::operator=(::std::move(RHS)); |
1214 | } |
1215 | |
1216 | SmallVector(SmallVectorImpl<T> &&RHS) : SmallVectorImpl<T>(N) { |
1217 | if (!RHS.empty()) |
1218 | SmallVectorImpl<T>::operator=(::std::move(RHS)); |
1219 | } |
1220 | |
1221 | SmallVector &operator=(SmallVector &&RHS) { |
1222 | SmallVectorImpl<T>::operator=(::std::move(RHS)); |
1223 | return *this; |
1224 | } |
1225 | |
1226 | SmallVector &operator=(SmallVectorImpl<T> &&RHS) { |
1227 | SmallVectorImpl<T>::operator=(::std::move(RHS)); |
1228 | return *this; |
1229 | } |
1230 | |
1231 | SmallVector &operator=(std::initializer_list<T> IL) { |
1232 | this->assign(IL); |
1233 | return *this; |
1234 | } |
1235 | }; |
1236 | |
1237 | template <typename T, unsigned N> |
1238 | inline size_t capacity_in_bytes(const SmallVector<T, N> &X) { |
1239 | return X.capacity_in_bytes(); |
1240 | } |
1241 | |
1242 | |
1243 | |
1244 | |
1245 | template <unsigned Size, typename R> |
1246 | SmallVector<typename std::remove_const<typename std::remove_reference< |
1247 | decltype(*std::begin(std::declval<R &>()))>::type>::type, |
1248 | Size> |
1249 | to_vector(R &&Range) { |
1250 | return {std::begin(Range), std::end(Range)}; |
1251 | } |
1252 | |
1253 | } |
1254 | |
1255 | namespace std { |
1256 | |
1257 | |
1258 | template<typename T> |
1259 | inline void |
1260 | swap(llvm::SmallVectorImpl<T> &LHS, llvm::SmallVectorImpl<T> &RHS) { |
1261 | LHS.swap(RHS); |
1262 | } |
1263 | |
1264 | |
1265 | template<typename T, unsigned N> |
1266 | inline void |
1267 | swap(llvm::SmallVector<T, N> &LHS, llvm::SmallVector<T, N> &RHS) { |
1268 | LHS.swap(RHS); |
1269 | } |
1270 | |
1271 | } |
1272 | |
1273 | #endif // LLVM_ADT_SMALLVECTOR_H |