clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name X86TargetTransformInfo.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mframe-pointer=none -fmath-errno -fno-rounding-math -mconstructor-aliases -munwind-tables -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -ffunction-sections -fdata-sections -fcoverage-compilation-dir=/build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/build-llvm/lib/Target/X86 -resource-dir /usr/lib/llvm-14/lib/clang/14.0.0 -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I /build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/build-llvm/lib/Target/X86 -I /build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/llvm/lib/Target/X86 -I /build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/build-llvm/include -I /build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/llvm/include -D NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/x86_64-linux-gnu/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10/backward -internal-isystem /usr/lib/llvm-14/lib/clang/14.0.0/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-class-memaccess -Wno-redundant-move -Wno-pessimizing-move -Wno-noexcept-type -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir=/build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/build-llvm/lib/Target/X86 -fdebug-prefix-map=/build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e=. -ferror-limit 19 -fvisibility hidden -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2021-09-04-040900-46481-1 -x c++ /build/llvm-toolchain-snapshot-14~++20210903100615+fd66b44ec19e/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
1 | |
2 | |
3 | |
4 | |
5 | |
6 | |
7 | |
8 | |
9 | |
10 | |
11 | |
12 | |
13 | |
14 | |
15 | |
16 | |
17 | |
18 | |
19 | |
20 | |
21 | |
22 | |
23 | |
24 | |
25 | |
26 | |
27 | |
28 | |
29 | |
30 | |
31 | |
32 | |
33 | |
34 | |
35 | |
36 | |
37 | |
38 | |
39 | |
40 | |
41 | #include "X86TargetTransformInfo.h" |
42 | #include "llvm/Analysis/TargetTransformInfo.h" |
43 | #include "llvm/CodeGen/BasicTTIImpl.h" |
44 | #include "llvm/CodeGen/CostTable.h" |
45 | #include "llvm/CodeGen/TargetLowering.h" |
46 | #include "llvm/IR/IntrinsicInst.h" |
47 | #include "llvm/Support/Debug.h" |
48 | |
49 | using namespace llvm; |
50 | |
51 | #define DEBUG_TYPE "x86tti" |
52 | |
53 | |
54 | |
55 | |
56 | |
57 | |
58 | |
59 | TargetTransformInfo::PopcntSupportKind |
60 | X86TTIImpl::getPopcntSupport(unsigned TyWidth) { |
61 | assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2"); |
62 | |
63 | |
64 | |
65 | return ST->hasPOPCNT() ? TTI::PSK_FastHardware : TTI::PSK_Software; |
66 | } |
67 | |
68 | llvm::Optional<unsigned> X86TTIImpl::getCacheSize( |
69 | TargetTransformInfo::CacheLevel Level) const { |
70 | switch (Level) { |
71 | case TargetTransformInfo::CacheLevel::L1D: |
72 | |
73 | |
74 | |
75 | |
76 | |
77 | |
78 | |
79 | |
80 | |
81 | return 32 * 1024; |
82 | case TargetTransformInfo::CacheLevel::L2D: |
83 | |
84 | |
85 | |
86 | |
87 | |
88 | |
89 | |
90 | |
91 | |
92 | return 256 * 1024; |
93 | } |
94 | |
95 | llvm_unreachable("Unknown TargetTransformInfo::CacheLevel"); |
96 | } |
97 | |
98 | llvm::Optional<unsigned> X86TTIImpl::getCacheAssociativity( |
99 | TargetTransformInfo::CacheLevel Level) const { |
100 | |
101 | |
102 | |
103 | |
104 | |
105 | |
106 | |
107 | |
108 | |
109 | switch (Level) { |
110 | case TargetTransformInfo::CacheLevel::L1D: |
111 | LLVM_FALLTHROUGH; |
112 | case TargetTransformInfo::CacheLevel::L2D: |
113 | return 8; |
114 | } |
115 | |
116 | llvm_unreachable("Unknown TargetTransformInfo::CacheLevel"); |
117 | } |
118 | |
119 | unsigned X86TTIImpl::getNumberOfRegisters(unsigned ClassID) const { |
120 | bool Vector = (ClassID == 1); |
121 | if (Vector && !ST->hasSSE1()) |
122 | return 0; |
123 | |
124 | if (ST->is64Bit()) { |
125 | if (Vector && ST->hasAVX512()) |
126 | return 32; |
127 | return 16; |
128 | } |
129 | return 8; |
130 | } |
131 | |
132 | TypeSize |
133 | X86TTIImpl::getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const { |
134 | unsigned PreferVectorWidth = ST->getPreferVectorWidth(); |
135 | switch (K) { |
136 | case TargetTransformInfo::RGK_Scalar: |
137 | return TypeSize::getFixed(ST->is64Bit() ? 64 : 32); |
138 | case TargetTransformInfo::RGK_FixedWidthVector: |
139 | if (ST->hasAVX512() && PreferVectorWidth >= 512) |
140 | return TypeSize::getFixed(512); |
141 | if (ST->hasAVX() && PreferVectorWidth >= 256) |
142 | return TypeSize::getFixed(256); |
143 | if (ST->hasSSE1() && PreferVectorWidth >= 128) |
144 | return TypeSize::getFixed(128); |
145 | return TypeSize::getFixed(0); |
146 | case TargetTransformInfo::RGK_ScalableVector: |
147 | return TypeSize::getScalable(0); |
148 | } |
149 | |
150 | llvm_unreachable("Unsupported register kind"); |
151 | } |
152 | |
153 | unsigned X86TTIImpl::getLoadStoreVecRegBitWidth(unsigned) const { |
154 | return getRegisterBitWidth(TargetTransformInfo::RGK_FixedWidthVector) |
155 | .getFixedSize(); |
156 | } |
157 | |
158 | unsigned X86TTIImpl::getMaxInterleaveFactor(unsigned VF) { |
159 | |
160 | |
161 | |
162 | if (VF == 1) |
163 | return 1; |
164 | |
165 | if (ST->isAtom()) |
166 | return 1; |
167 | |
168 | |
169 | |
170 | if (ST->hasAVX()) |
171 | return 4; |
172 | |
173 | return 2; |
174 | } |
175 | |
176 | InstructionCost X86TTIImpl::getArithmeticInstrCost( |
177 | unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, |
178 | TTI::OperandValueKind Op1Info, TTI::OperandValueKind Op2Info, |
179 | TTI::OperandValueProperties Opd1PropInfo, |
180 | TTI::OperandValueProperties Opd2PropInfo, ArrayRef<const Value *> Args, |
181 | const Instruction *CxtI) { |
182 | |
183 | if (CostKind != TTI::TCK_RecipThroughput) |
184 | return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info, |
185 | Op2Info, Opd1PropInfo, |
186 | Opd2PropInfo, Args, CxtI); |
187 | |
188 | |
189 | if (Opcode == Instruction::Mul && Ty->isVectorTy() && |
190 | Ty->getScalarSizeInBits() == 8) { |
191 | Type *WideVecTy = |
192 | VectorType::getExtendedElementVectorType(cast<VectorType>(Ty)); |
193 | return getCastInstrCost(Instruction::ZExt, WideVecTy, Ty, |
194 | TargetTransformInfo::CastContextHint::None, |
195 | CostKind) + |
196 | getCastInstrCost(Instruction::Trunc, Ty, WideVecTy, |
197 | TargetTransformInfo::CastContextHint::None, |
198 | CostKind) + |
199 | getArithmeticInstrCost(Opcode, WideVecTy, CostKind, Op1Info, Op2Info, |
200 | Opd1PropInfo, Opd2PropInfo); |
201 | } |
202 | |
203 | |
204 | std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty); |
205 | |
206 | int ISD = TLI->InstructionOpcodeToISD(Opcode); |
207 | assert(ISD && "Invalid opcode"); |
208 | |
209 | if ((ISD == ISD::SDIV || ISD == ISD::SREM || ISD == ISD::UDIV || |
210 | ISD == ISD::UREM) && |
211 | (Op2Info == TargetTransformInfo::OK_UniformConstantValue || |
212 | Op2Info == TargetTransformInfo::OK_NonUniformConstantValue) && |
213 | Opd2PropInfo == TargetTransformInfo::OP_PowerOf2) { |
214 | if (ISD == ISD::SDIV || ISD == ISD::SREM) { |
215 | |
216 | |
217 | |
218 | |
219 | InstructionCost Cost = |
220 | 2 * getArithmeticInstrCost(Instruction::AShr, Ty, CostKind, Op1Info, |
221 | Op2Info, TargetTransformInfo::OP_None, |
222 | TargetTransformInfo::OP_None); |
223 | Cost += getArithmeticInstrCost(Instruction::LShr, Ty, CostKind, Op1Info, |
224 | Op2Info, TargetTransformInfo::OP_None, |
225 | TargetTransformInfo::OP_None); |
226 | Cost += getArithmeticInstrCost(Instruction::Add, Ty, CostKind, Op1Info, |
227 | Op2Info, TargetTransformInfo::OP_None, |
228 | TargetTransformInfo::OP_None); |
229 | |
230 | if (ISD == ISD::SREM) { |
231 | |
232 | Cost += getArithmeticInstrCost(Instruction::Mul, Ty, CostKind, Op1Info, |
233 | Op2Info); |
234 | Cost += getArithmeticInstrCost(Instruction::Sub, Ty, CostKind, Op1Info, |
235 | Op2Info); |
236 | } |
237 | |
238 | return Cost; |
239 | } |
240 | |
241 | |
242 | if (ISD == ISD::UDIV) |
243 | return getArithmeticInstrCost(Instruction::LShr, Ty, CostKind, Op1Info, |
244 | Op2Info, TargetTransformInfo::OP_None, |
245 | TargetTransformInfo::OP_None); |
246 | |
247 | else |
248 | return getArithmeticInstrCost(Instruction::And, Ty, CostKind, Op1Info, |
249 | Op2Info, TargetTransformInfo::OP_None, |
250 | TargetTransformInfo::OP_None); |
251 | } |
252 | |
253 | static const CostTblEntry GLMCostTable[] = { |
254 | { ISD::FDIV, MVT::f32, 18 }, |
255 | { ISD::FDIV, MVT::v4f32, 35 }, |
256 | { ISD::FDIV, MVT::f64, 33 }, |
257 | { ISD::FDIV, MVT::v2f64, 65 }, |
258 | }; |
259 | |
260 | if (ST->useGLMDivSqrtCosts()) |
261 | if (const auto *Entry = CostTableLookup(GLMCostTable, ISD, |
262 | LT.second)) |
263 | return LT.first * Entry->Cost; |
264 | |
265 | static const CostTblEntry SLMCostTable[] = { |
266 | { ISD::MUL, MVT::v4i32, 11 }, |
267 | { ISD::MUL, MVT::v8i16, 2 }, |
268 | { ISD::FMUL, MVT::f64, 2 }, |
269 | { ISD::FMUL, MVT::v2f64, 4 }, |
270 | { ISD::FMUL, MVT::v4f32, 2 }, |
271 | { ISD::FDIV, MVT::f32, 17 }, |
272 | { ISD::FDIV, MVT::v4f32, 39 }, |
273 | { ISD::FDIV, MVT::f64, 32 }, |
274 | { ISD::FDIV, MVT::v2f64, 69 }, |
275 | { ISD::FADD, MVT::v2f64, 2 }, |
276 | { ISD::FSUB, MVT::v2f64, 2 }, |
277 | |
278 | |
279 | |
280 | |
281 | |
282 | { ISD::MUL, MVT::v2i64, 17 }, |
283 | |
284 | { ISD::ADD, MVT::v2i64, 4 }, |
285 | { ISD::SUB, MVT::v2i64, 4 }, |
286 | }; |
287 | |
288 | if (ST->isSLM()) { |
289 | if (Args.size() == 2 && ISD == ISD::MUL && LT.second == MVT::v4i32) { |
290 | |
291 | bool Op1Signed = false; |
292 | unsigned Op1MinSize = BaseT::minRequiredElementSize(Args[0], Op1Signed); |
293 | bool Op2Signed = false; |
294 | unsigned Op2MinSize = BaseT::minRequiredElementSize(Args[1], Op2Signed); |
295 | |
296 | bool SignedMode = Op1Signed || Op2Signed; |
297 | unsigned OpMinSize = std::max(Op1MinSize, Op2MinSize); |
298 | |
299 | if (OpMinSize <= 7) |
300 | return LT.first * 3; |
301 | if (!SignedMode && OpMinSize <= 8) |
302 | return LT.first * 3; |
303 | if (OpMinSize <= 15) |
304 | return LT.first * 5; |
305 | if (!SignedMode && OpMinSize <= 16) |
306 | return LT.first * 5; |
307 | } |
308 | |
309 | if (const auto *Entry = CostTableLookup(SLMCostTable, ISD, |
310 | LT.second)) { |
311 | return LT.first * Entry->Cost; |
312 | } |
313 | } |
314 | |
315 | static const CostTblEntry AVX512BWUniformConstCostTable[] = { |
316 | { ISD::SHL, MVT::v64i8, 2 }, |
317 | { ISD::SRL, MVT::v64i8, 2 }, |
318 | { ISD::SRA, MVT::v64i8, 4 }, |
319 | }; |
320 | |
321 | if (Op2Info == TargetTransformInfo::OK_UniformConstantValue && |
322 | ST->hasBWI()) { |
323 | if (const auto *Entry = CostTableLookup(AVX512BWUniformConstCostTable, ISD, |
324 | LT.second)) |
325 | return LT.first * Entry->Cost; |
326 | } |
327 | |
328 | static const CostTblEntry AVX512UniformConstCostTable[] = { |
329 | { ISD::SRA, MVT::v2i64, 1 }, |
330 | { ISD::SRA, MVT::v4i64, 1 }, |
331 | { ISD::SRA, MVT::v8i64, 1 }, |
332 | |
333 | { ISD::SHL, MVT::v64i8, 4 }, |
334 | { ISD::SRL, MVT::v64i8, 4 }, |
335 | { ISD::SRA, MVT::v64i8, 8 }, |
336 | |
337 | { ISD::SDIV, MVT::v16i32, 6 }, |
338 | { ISD::SREM, MVT::v16i32, 8 }, |
339 | { ISD::UDIV, MVT::v16i32, 5 }, |
340 | { ISD::UREM, MVT::v16i32, 7 }, |
341 | }; |
342 | |
343 | if (Op2Info == TargetTransformInfo::OK_UniformConstantValue && |
344 | ST->hasAVX512()) { |
345 | if (const auto *Entry = CostTableLookup(AVX512UniformConstCostTable, ISD, |
346 | LT.second)) |
347 | return LT.first * Entry->Cost; |
348 | } |
349 | |
350 | static const CostTblEntry AVX2UniformConstCostTable[] = { |
351 | { ISD::SHL, MVT::v32i8, 2 }, |
352 | { ISD::SRL, MVT::v32i8, 2 }, |
353 | { ISD::SRA, MVT::v32i8, 4 }, |
354 | |
355 | { ISD::SRA, MVT::v4i64, 4 }, |
356 | |
357 | { ISD::SDIV, MVT::v8i32, 6 }, |
358 | { ISD::SREM, MVT::v8i32, 8 }, |
359 | { ISD::UDIV, MVT::v8i32, 5 }, |
360 | { ISD::UREM, MVT::v8i32, 7 }, |
361 | }; |
362 | |
363 | if (Op2Info == TargetTransformInfo::OK_UniformConstantValue && |
364 | ST->hasAVX2()) { |
365 | if (const auto *Entry = CostTableLookup(AVX2UniformConstCostTable, ISD, |
366 | LT.second)) |
367 | return LT.first * Entry->Cost; |
368 | } |
369 | |
370 | static const CostTblEntry SSE2UniformConstCostTable[] = { |
371 | { ISD::SHL, MVT::v16i8, 2 }, |
372 | { ISD::SRL, MVT::v16i8, 2 }, |
373 | { ISD::SRA, MVT::v16i8, 4 }, |
374 | |
375 | { ISD::SHL, MVT::v32i8, 4+2 }, |
376 | { ISD::SRL, MVT::v32i8, 4+2 }, |
377 | { ISD::SRA, MVT::v32i8, 8+2 }, |
378 | |
379 | { ISD::SDIV, MVT::v8i32, 12+2 }, |
380 | { ISD::SREM, MVT::v8i32, 16+2 }, |
381 | { ISD::SDIV, MVT::v4i32, 6 }, |
382 | { ISD::SREM, MVT::v4i32, 8 }, |
383 | { ISD::UDIV, MVT::v8i32, 10+2 }, |
384 | { ISD::UREM, MVT::v8i32, 14+2 }, |
385 | { ISD::UDIV, MVT::v4i32, 5 }, |
386 | { ISD::UREM, MVT::v4i32, 7 }, |
387 | }; |
388 | |
389 | |
390 | if (Op2Info == TargetTransformInfo::OK_UniformConstantValue && |
391 | ST->hasSSE2() && !ST->hasXOP()) { |
392 | if (const auto *Entry = |
393 | CostTableLookup(SSE2UniformConstCostTable, ISD, LT.second)) |
394 | return LT.first * Entry->Cost; |
395 | } |
396 | |
397 | static const CostTblEntry AVX512BWConstCostTable[] = { |
398 | { ISD::SDIV, MVT::v64i8, 14 }, |
399 | { ISD::SREM, MVT::v64i8, 16 }, |
400 | { ISD::UDIV, MVT::v64i8, 14 }, |
401 | { ISD::UREM, MVT::v64i8, 16 }, |
402 | { ISD::SDIV, MVT::v32i16, 6 }, |
403 | { ISD::SREM, MVT::v32i16, 8 }, |
404 | { ISD::UDIV, MVT::v32i16, 6 }, |
405 | { ISD::UREM, MVT::v32i16, 8 }, |
406 | }; |
407 | |
408 | if ((Op2Info == TargetTransformInfo::OK_UniformConstantValue || |
409 | Op2Info == TargetTransformInfo::OK_NonUniformConstantValue) && |
410 | ST->hasBWI()) { |
411 | if (const auto *Entry = |
412 | CostTableLookup(AVX512BWConstCostTable, ISD, LT.second)) |
413 | return LT.first * Entry->Cost; |
414 | } |
415 | |
416 | static const CostTblEntry AVX512ConstCostTable[] = { |
417 | { ISD::SDIV, MVT::v16i32, 15 }, |
418 | { ISD::SREM, MVT::v16i32, 17 }, |
419 | { ISD::UDIV, MVT::v16i32, 15 }, |
420 | { ISD::UREM, MVT::v16i32, 17 }, |
421 | { ISD::SDIV, MVT::v64i8, 28 }, |
422 | { ISD::SREM, MVT::v64i8, 32 }, |
423 | { ISD::UDIV, MVT::v64i8, 28 }, |
424 | { ISD::UREM, MVT::v64i8, 32 }, |
425 | { ISD::SDIV, MVT::v32i16, 12 }, |
426 | { ISD::SREM, MVT::v32i16, 16 }, |
427 | { ISD::UDIV, MVT::v32i16, 12 }, |
428 | { ISD::UREM, MVT::v32i16, 16 }, |
429 | }; |
430 | |
431 | if ((Op2Info == TargetTransformInfo::OK_UniformConstantValue || |
432 | Op2Info == TargetTransformInfo::OK_NonUniformConstantValue) && |
433 | ST->hasAVX512()) { |
434 | if (const auto *Entry = |
435 | CostTableLookup(AVX512ConstCostTable, ISD, LT.second)) |
436 | return LT.first * Entry->Cost; |
437 | } |
438 | |
439 | static const CostTblEntry AVX2ConstCostTable[] = { |
440 | { ISD::SDIV, MVT::v32i8, 14 }, |
441 | { ISD::SREM, MVT::v32i8, 16 }, |
442 | { ISD::UDIV, MVT::v32i8, 14 }, |
443 | { ISD::UREM, MVT::v32i8, 16 }, |
444 | { ISD::SDIV, MVT::v16i16, 6 }, |
445 | { ISD::SREM, MVT::v16i16, 8 }, |
446 | { ISD::UDIV, MVT::v16i16, 6 }, |
447 | { ISD::UREM, MVT::v16i16, 8 }, |
448 | { ISD::SDIV, MVT::v8i32, 15 }, |
449 | { ISD::SREM, MVT::v8i32, 19 }, |
450 | { ISD::UDIV, MVT::v8i32, 15 }, |
451 | { ISD::UREM, MVT::v8i32, 19 }, |
452 | }; |
453 | |
454 | if ((Op2Info == TargetTransformInfo::OK_UniformConstantValue || |
455 | Op2Info == TargetTransformInfo::OK_NonUniformConstantValue) && |
456 | ST->hasAVX2()) { |
457 | if (const auto *Entry = CostTableLookup(AVX2ConstCostTable, ISD, LT.second)) |
458 | return LT.first * Entry->Cost; |
459 | } |
460 | |
461 | static const CostTblEntry SSE2ConstCostTable[] = { |
462 | { ISD::SDIV, MVT::v32i8, 28+2 }, |
463 | { ISD::SREM, MVT::v32i8, 32+2 }, |
464 | { ISD::SDIV, MVT::v16i8, 14 }, |
465 | { ISD::SREM, MVT::v16i8, 16 }, |
466 | { ISD::UDIV, MVT::v32i8, 28+2 }, |
467 | { ISD::UREM, MVT::v32i8, 32+2 }, |
468 | { ISD::UDIV, MVT::v16i8, 14 }, |
469 | { ISD::UREM, MVT::v16i8, 16 }, |
470 | { ISD::SDIV, MVT::v16i16, 12+2 }, |
471 | { ISD::SREM, MVT::v16i16, 16+2 }, |
472 | { ISD::SDIV, MVT::v8i16, 6 }, |
473 | { ISD::SREM, MVT::v8i16, 8 }, |
474 | { ISD::UDIV, MVT::v16i16, 12+2 }, |
475 | { ISD::UREM, MVT::v16i16, 16+2 }, |
476 | { ISD::UDIV, MVT::v8i16, 6 }, |
477 | { ISD::UREM, MVT::v8i16, 8 }, |
478 | { ISD::SDIV, MVT::v8i32, 38+2 }, |
479 | { ISD::SREM, MVT::v8i32, 48+2 }, |
480 | { ISD::SDIV, MVT::v4i32, 19 }, |
481 | { ISD::SREM, MVT::v4i32, 24 }, |
482 | { ISD::UDIV, MVT::v8i32, 30+2 }, |
483 | { ISD::UREM, MVT::v8i32, 40+2 }, |
484 | { ISD::UDIV, MVT::v4i32, 15 }, |
485 | { ISD::UREM, MVT::v4i32, 20 }, |
486 | }; |
487 | |
488 | if ((Op2Info == TargetTransformInfo::OK_UniformConstantValue || |
489 | Op2Info == TargetTransformInfo::OK_NonUniformConstantValue) && |
490 | ST->hasSSE2()) { |
491 | |
492 | if (ISD == ISD::SDIV && LT.second == MVT::v8i32 && ST->hasAVX()) |
493 | return LT.first * 32; |
494 | if (ISD == ISD::SREM && LT.second == MVT::v8i32 && ST->hasAVX()) |
495 | return LT.first * 38; |
496 | if (ISD == ISD::SDIV && LT.second == MVT::v4i32 && ST->hasSSE41()) |
497 | return LT.first * 15; |
498 | if (ISD == ISD::SREM && LT.second == MVT::v4i32 && ST->hasSSE41()) |
499 | return LT.first * 20; |
500 | |
501 | if (const auto *Entry = CostTableLookup(SSE2ConstCostTable, ISD, LT.second)) |
502 | return LT.first * Entry->Cost; |
503 | } |
504 | |
505 | static const CostTblEntry AVX512BWShiftCostTable[] = { |
506 | { ISD::SHL, MVT::v16i8, 4 }, |
507 | { ISD::SRL, MVT::v16i8, 4 }, |
508 | { ISD::SRA, MVT::v16i8, 4 }, |
509 | { ISD::SHL, MVT::v32i8, 4 }, |
510 | { ISD::SRL, MVT::v32i8, 4 }, |
511 | { ISD::SRA, MVT::v32i8, 6 }, |
512 | { ISD::SHL, MVT::v64i8, 6 }, |
513 | { ISD::SRL, MVT::v64i8, 7 }, |
514 | { ISD::SRA, MVT::v64i8, 15 }, |
515 | |
516 | { ISD::SHL, MVT::v8i16, 1 }, |
517 | { ISD::SRL, MVT::v8i16, 1 }, |
518 | { ISD::SRA, MVT::v8i16, 1 }, |
519 | { ISD::SHL, MVT::v16i16, 1 }, |
520 | { ISD::SRL, MVT::v16i16, 1 }, |
521 | { ISD::SRA, MVT::v16i16, 1 }, |
522 | { ISD::SHL, MVT::v32i16, 1 }, |
523 | { ISD::SRL, MVT::v32i16, 1 }, |
524 | { ISD::SRA, MVT::v32i16, 1 }, |
525 | }; |
526 | |
527 | if (ST->hasBWI()) |
528 | if (const auto *Entry = CostTableLookup(AVX512BWShiftCostTable, ISD, LT.second)) |
529 | return LT.first * Entry->Cost; |
530 | |
531 | static const CostTblEntry AVX2UniformCostTable[] = { |
532 | |
533 | { ISD::SHL, MVT::v16i16, 1 }, |
534 | { ISD::SRL, MVT::v16i16, 1 }, |
535 | { ISD::SRA, MVT::v16i16, 1 }, |
536 | { ISD::SHL, MVT::v32i16, 2 }, |
537 | { ISD::SRL, MVT::v32i16, 2 }, |
538 | { ISD::SRA, MVT::v32i16, 2 }, |
539 | |
540 | { ISD::SHL, MVT::v8i32, 1 }, |
541 | { ISD::SRL, MVT::v8i32, 1 }, |
542 | { ISD::SRA, MVT::v8i32, 1 }, |
543 | { ISD::SHL, MVT::v4i64, 1 }, |
544 | { ISD::SRL, MVT::v4i64, 1 }, |
545 | }; |
546 | |
547 | if (ST->hasAVX2() && |
548 | ((Op2Info == TargetTransformInfo::OK_UniformConstantValue) || |
549 | (Op2Info == TargetTransformInfo::OK_UniformValue))) { |
550 | if (const auto *Entry = |
551 | CostTableLookup(AVX2UniformCostTable, ISD, LT.second)) |
552 | return LT.first * Entry->Cost; |
553 | } |
554 | |
555 | static const CostTblEntry SSE2UniformCostTable[] = { |
556 | |
557 | { ISD::SHL, MVT::v8i16, 1 }, |
558 | { ISD::SHL, MVT::v4i32, 1 }, |
559 | { ISD::SHL, MVT::v2i64, 1 }, |
560 | |
561 | { ISD::SRL, MVT::v8i16, 1 }, |
562 | { ISD::SRL, MVT::v4i32, 1 }, |
563 | { ISD::SRL, MVT::v2i64, 1 }, |
564 | |
565 | { ISD::SRA, MVT::v8i16, 1 }, |
566 | { ISD::SRA, MVT::v4i32, 1 }, |
567 | }; |
568 | |
569 | if (ST->hasSSE2() && |
570 | ((Op2Info == TargetTransformInfo::OK_UniformConstantValue) || |
571 | (Op2Info == TargetTransformInfo::OK_UniformValue))) { |
572 | if (const auto *Entry = |
573 | CostTableLookup(SSE2UniformCostTable, ISD, LT.second)) |
574 | return LT.first * Entry->Cost; |
575 | } |
576 | |
577 | static const CostTblEntry AVX512DQCostTable[] = { |
578 | { ISD::MUL, MVT::v2i64, 2 }, |
579 | { ISD::MUL, MVT::v4i64, 2 }, |
580 | { ISD::MUL, MVT::v8i64, 2 } |
581 | }; |
582 | |
583 | |
584 | if (ST->hasDQI()) |
585 | if (const auto *Entry = CostTableLookup(AVX512DQCostTable, ISD, LT.second)) |
586 | return LT.first * Entry->Cost; |
587 | |
588 | static const CostTblEntry AVX512BWCostTable[] = { |
589 | { ISD::SHL, MVT::v64i8, 11 }, |
590 | { ISD::SRL, MVT::v64i8, 11 }, |
591 | { ISD::SRA, MVT::v64i8, 24 }, |
592 | }; |
593 | |
594 | |
595 | if (ST->hasBWI()) |
596 | if (const auto *Entry = CostTableLookup(AVX512BWCostTable, ISD, LT.second)) |
597 | return LT.first * Entry->Cost; |
598 | |
599 | static const CostTblEntry AVX512CostTable[] = { |
600 | { ISD::SHL, MVT::v4i32, 1 }, |
601 | { ISD::SRL, MVT::v4i32, 1 }, |
602 | { ISD::SRA, MVT::v4i32, 1 }, |
603 | { ISD::SHL, MVT::v8i32, 1 }, |
604 | { ISD::SRL, MVT::v8i32, 1 }, |
605 | { ISD::SRA, MVT::v8i32, 1 }, |
606 | { ISD::SHL, MVT::v16i32, 1 }, |
607 | { ISD::SRL, MVT::v16i32, 1 }, |
608 | { ISD::SRA, MVT::v16i32, 1 }, |
609 | |
610 | { ISD::SHL, MVT::v2i64, 1 }, |
611 | { ISD::SRL, MVT::v2i64, 1 }, |
612 | { ISD::SHL, MVT::v4i64, 1 }, |
613 | { ISD::SRL, MVT::v4i64, 1 }, |
614 | { ISD::SHL, MVT::v8i64, 1 }, |
615 | { ISD::SRL, MVT::v8i64, 1 }, |
616 | |
617 | { ISD::SRA, MVT::v2i64, 1 }, |
618 | { ISD::SRA, MVT::v4i64, 1 }, |
619 | { ISD::SRA, MVT::v8i64, 1 }, |
620 | |
621 | { ISD::MUL, MVT::v16i32, 1 }, |
622 | { ISD::MUL, MVT::v8i32, 1 }, |
623 | { ISD::MUL, MVT::v4i32, 1 }, |
624 | { ISD::MUL, MVT::v8i64, 6 }, |
625 | |
626 | { ISD::FNEG, MVT::v8f64, 1 }, |
627 | { ISD::FADD, MVT::v8f64, 1 }, |
628 | { ISD::FSUB, MVT::v8f64, 1 }, |
629 | { ISD::FMUL, MVT::v8f64, 1 }, |
630 | { ISD::FDIV, MVT::f64, 4 }, |
631 | { ISD::FDIV, MVT::v2f64, 4 }, |
632 | { ISD::FDIV, MVT::v4f64, 8 }, |
633 | { ISD::FDIV, MVT::v8f64, 16 }, |
634 | |
635 | { ISD::FNEG, MVT::v16f32, 1 }, |
636 | { ISD::FADD, MVT::v16f32, 1 }, |
637 | { ISD::FSUB, MVT::v16f32, 1 }, |
638 | { ISD::FMUL, MVT::v16f32, 1 }, |
639 | { ISD::FDIV, MVT::f32, 3 }, |
640 | { ISD::FDIV, MVT::v4f32, 3 }, |
641 | { ISD::FDIV, MVT::v8f32, 5 }, |
642 | { ISD::FDIV, MVT::v16f32, 10 }, |
643 | }; |
644 | |
645 | if (ST->hasAVX512()) |
646 | if (const auto *Entry = CostTableLookup(AVX512CostTable, ISD, LT.second)) |
647 | return LT.first * Entry->Cost; |
648 | |
649 | static const CostTblEntry AVX2ShiftCostTable[] = { |
650 | |
651 | |
652 | { ISD::SHL, MVT::v4i32, 2 }, |
653 | { ISD::SRL, MVT::v4i32, 2 }, |
654 | { ISD::SRA, MVT::v4i32, 2 }, |
655 | { ISD::SHL, MVT::v8i32, 2 }, |
656 | { ISD::SRL, MVT::v8i32, 2 }, |
657 | { ISD::SRA, MVT::v8i32, 2 }, |
658 | { ISD::SHL, MVT::v2i64, 1 }, |
659 | { ISD::SRL, MVT::v2i64, 1 }, |
660 | { ISD::SHL, MVT::v4i64, 1 }, |
661 | { ISD::SRL, MVT::v4i64, 1 }, |
662 | }; |
663 | |
664 | if (ST->hasAVX512()) { |
665 | if (ISD == ISD::SHL && LT.second == MVT::v32i16 && |
666 | (Op2Info == TargetTransformInfo::OK_UniformConstantValue || |
667 | Op2Info == TargetTransformInfo::OK_NonUniformConstantValue)) |
668 | |
669 | |
670 | return getArithmeticInstrCost(Instruction::Mul, Ty, CostKind, |
671 | Op1Info, Op2Info, |
672 | TargetTransformInfo::OP_None, |
673 | TargetTransformInfo::OP_None); |
674 | } |
675 | |
676 | |
677 | if (ST->hasAVX2() && !(ST->hasXOP() && LT.second == MVT::v4i32)) { |
678 | if (ISD == ISD::SHL && LT.second == MVT::v16i16 && |
679 | (Op2Info == TargetTransformInfo::OK_UniformConstantValue || |
680 | Op2Info == TargetTransformInfo::OK_NonUniformConstantValue)) |
681 | |
682 | |
683 | return getArithmeticInstrCost(Instruction::Mul, Ty, CostKind, |
684 | Op1Info, Op2Info, |
685 | TargetTransformInfo::OP_None, |
686 | TargetTransformInfo::OP_None); |
687 | |
688 | if (const auto *Entry = CostTableLookup(AVX2ShiftCostTable, ISD, LT.second)) |
689 | return LT.first * Entry->Cost; |
690 | } |
691 | |
692 | static const CostTblEntry XOPShiftCostTable[] = { |
693 | |
694 | { ISD::SHL, MVT::v16i8, 1 }, |
695 | { ISD::SRL, MVT::v16i8, 2 }, |
696 | { ISD::SRA, MVT::v16i8, 2 }, |
697 | { ISD::SHL, MVT::v8i16, 1 }, |
698 | { ISD::SRL, MVT::v8i16, 2 }, |
699 | { ISD::SRA, MVT::v8i16, 2 }, |
700 | { ISD::SHL, MVT::v4i32, 1 }, |
701 | { ISD::SRL, MVT::v4i32, 2 }, |
702 | { ISD::SRA, MVT::v4i32, 2 }, |
703 | { ISD::SHL, MVT::v2i64, 1 }, |
704 | { ISD::SRL, MVT::v2i64, 2 }, |
705 | { ISD::SRA, MVT::v2i64, 2 }, |
706 | |
707 | { ISD::SHL, MVT::v32i8, 2+2 }, |
708 | { ISD::SRL, MVT::v32i8, 4+2 }, |
709 | { ISD::SRA, MVT::v32i8, 4+2 }, |
710 | { ISD::SHL, MVT::v16i16, 2+2 }, |
711 | { ISD::SRL, MVT::v16i16, 4+2 }, |
712 | { ISD::SRA, MVT::v16i16, 4+2 }, |
713 | { ISD::SHL, MVT::v8i32, 2+2 }, |
714 | { ISD::SRL, MVT::v8i32, 4+2 }, |
715 | { ISD::SRA, MVT::v8i32, 4+2 }, |
716 | { ISD::SHL, MVT::v4i64, 2+2 }, |
717 | { ISD::SRL, MVT::v4i64, 4+2 }, |
718 | { ISD::SRA, MVT::v4i64, 4+2 }, |
719 | }; |
720 | |
721 | |
722 | if (ST->hasXOP()) { |
723 | |
724 | |
725 | int ShiftISD = ISD; |
726 | if ((ShiftISD == ISD::SRL || ShiftISD == ISD::SRA) && |
727 | (Op2Info == TargetTransformInfo::OK_UniformConstantValue || |
728 | Op2Info == TargetTransformInfo::OK_NonUniformConstantValue)) |
729 | ShiftISD = ISD::SHL; |
730 | if (const auto *Entry = |
731 | CostTableLookup(XOPShiftCostTable, ShiftISD, LT.second)) |
732 | return LT.first * Entry->Cost; |
733 | } |
734 | |
735 | static const CostTblEntry SSE2UniformShiftCostTable[] = { |
736 | |
737 | { ISD::SHL, MVT::v16i16, 2+2 }, |
738 | { ISD::SHL, MVT::v8i32, 2+2 }, |
739 | { ISD::SHL, MVT::v4i64, 2+2 }, |
740 | |
741 | { ISD::SRL, MVT::v16i16, 2+2 }, |
742 | { ISD::SRL, MVT::v8i32, 2+2 }, |
743 | { ISD::SRL, MVT::v4i64, 2+2 }, |
744 | |
745 | { ISD::SRA, MVT::v16i16, 2+2 }, |
746 | { ISD::SRA, MVT::v8i32, 2+2 }, |
747 | { ISD::SRA, MVT::v2i64, 4 }, |
748 | { ISD::SRA, MVT::v4i64, 8+2 }, |
749 | }; |
750 | |
751 | if (ST->hasSSE2() && |
752 | ((Op2Info == TargetTransformInfo::OK_UniformConstantValue) || |
753 | (Op2Info == TargetTransformInfo::OK_UniformValue))) { |
754 | |
755 | |
756 | if (ISD == ISD::SRA && LT.second == MVT::v4i64 && ST->hasAVX2()) |
757 | return LT.first * 4; |
758 | |
759 | if (const auto *Entry = |
760 | CostTableLookup(SSE2UniformShiftCostTable, ISD, LT.second)) |
761 | return LT.first * Entry->Cost; |
762 | } |
763 | |
764 | if (ISD == ISD::SHL && |
765 | Op2Info == TargetTransformInfo::OK_NonUniformConstantValue) { |
766 | MVT VT = LT.second; |
767 | |
768 | |
769 | if (((VT == MVT::v8i16 || VT == MVT::v4i32) && ST->hasSSE2()) || |
770 | ((VT == MVT::v16i16 || VT == MVT::v8i32) && ST->hasAVX())) |
771 | ISD = ISD::MUL; |
772 | } |
773 | |
774 | static const CostTblEntry AVX2CostTable[] = { |
775 | { ISD::SHL, MVT::v16i8, 6 }, |
776 | { ISD::SHL, MVT::v32i8, 6 }, |
777 | { ISD::SHL, MVT::v64i8, 12 }, |
778 | { ISD::SHL, MVT::v8i16, 5 }, |
779 | { ISD::SHL, MVT::v16i16, 7 }, |
780 | { ISD::SHL, MVT::v32i16, 14 }, |
781 | |
782 | { ISD::SRL, MVT::v16i8, 6 }, |
783 | { ISD::SRL, MVT::v32i8, 6 }, |
784 | { ISD::SRL, MVT::v64i8, 12 }, |
785 | { ISD::SRL, MVT::v8i16, 5 }, |
786 | { ISD::SRL, MVT::v16i16, 7 }, |
787 | { ISD::SRL, MVT::v32i16, 14 }, |
788 | |
789 | { ISD::SRA, MVT::v16i8, 17 }, |
790 | { ISD::SRA, MVT::v32i8, 17 }, |
791 | { ISD::SRA, MVT::v64i8, 34 }, |
792 | { ISD::SRA, MVT::v8i16, 5 }, |
793 | { ISD::SRA, MVT::v16i16, 7 }, |
794 | { ISD::SRA, MVT::v32i16, 14 }, |
795 | { ISD::SRA, MVT::v2i64, 2 }, |
796 | { ISD::SRA, MVT::v4i64, 2 }, |
797 | |
798 | { ISD::SUB, MVT::v32i8, 1 }, |
799 | { ISD::ADD, MVT::v32i8, 1 }, |
800 | { ISD::SUB, MVT::v16i16, 1 }, |
801 | { ISD::ADD, MVT::v16i16, 1 }, |
802 | { ISD::SUB, MVT::v8i32, 1 }, |
803 | { ISD::ADD, MVT::v8i32, 1 }, |
804 | { ISD::SUB, MVT::v4i64, 1 }, |
805 | { ISD::ADD, MVT::v4i64, 1 }, |
806 | |
807 | { ISD::MUL, MVT::v16i16, 1 }, |
808 | { ISD::MUL, MVT::v8i32, 2 }, |
809 | { ISD::MUL, MVT::v4i64, 6 }, |
810 | |
811 | { ISD::FNEG, MVT::v4f64, 1 }, |
812 | { ISD::FNEG, MVT::v8f32, 1 }, |
813 | { ISD::FADD, MVT::v4f64, 1 }, |
814 | { ISD::FADD, MVT::v8f32, 1 }, |
815 | { ISD::FSUB, MVT::v4f64, 1 }, |
816 | { ISD::FSUB, MVT::v8f32, 1 }, |
817 | { ISD::FMUL, MVT::f64, 1 }, |
818 | { ISD::FMUL, MVT::v2f64, 1 }, |
819 | { ISD::FMUL, MVT::v4f64, 1 }, |
820 | { ISD::FMUL, MVT::v8f32, 1 }, |
821 | |
822 | { ISD::FDIV, MVT::f32, 7 }, |
823 | { ISD::FDIV, MVT::v4f32, 7 }, |
824 | { ISD::FDIV, MVT::v8f32, 14 }, |
825 | { ISD::FDIV, MVT::f64, 14 }, |
826 | { ISD::FDIV, MVT::v2f64, 14 }, |
827 | { ISD::FDIV, MVT::v4f64, 28 }, |
828 | }; |
829 | |
830 | |
831 | if (ST->hasAVX2()) |
832 | if (const auto *Entry = CostTableLookup(AVX2CostTable, ISD, LT.second)) |
833 | return LT.first * Entry->Cost; |
834 | |
835 | static const CostTblEntry AVX1CostTable[] = { |
836 | |
837 | |
838 | |
839 | { ISD::MUL, MVT::v16i16, 4 }, |
840 | { ISD::MUL, MVT::v8i32, 5 }, |
841 | { ISD::MUL, MVT::v4i64, 12 }, |
842 | |
843 | { ISD::SUB, MVT::v32i8, 4 }, |
844 | { ISD::ADD, MVT::v32i8, 4 }, |
845 | { ISD::SUB, MVT::v16i16, 4 }, |
846 | { ISD::ADD, MVT::v16i16, 4 }, |
847 | { ISD::SUB, MVT::v8i32, 4 }, |
848 | { ISD::ADD, MVT::v8i32, 4 }, |
849 | { ISD::SUB, MVT::v4i64, 4 }, |
850 | { ISD::ADD, MVT::v4i64, 4 }, |
851 | |
852 | { ISD::SHL, MVT::v32i8, 22 }, |
853 | { ISD::SHL, MVT::v8i16, 6 }, |
854 | { ISD::SHL, MVT::v16i16, 13 }, |
855 | { ISD::SHL, MVT::v4i32, 3 }, |
856 | { ISD::SHL, MVT::v8i32, 9 }, |
857 | { ISD::SHL, MVT::v2i64, 2 }, |
858 | { ISD::SHL, MVT::v4i64, 6 }, |
859 | |
860 | { ISD::SRL, MVT::v32i8, 23 }, |
861 | { ISD::SRL, MVT::v16i16, 28 }, |
862 | { ISD::SRL, MVT::v4i32, 6 }, |
863 | { ISD::SRL, MVT::v8i32, 14 }, |
864 | { ISD::SRL, MVT::v2i64, 2 }, |
865 | { ISD::SRL, MVT::v4i64, 6 }, |
866 | |
867 | { ISD::SRA, MVT::v32i8, 44 }, |
868 | { ISD::SRA, MVT::v16i16, 28 }, |
869 | { ISD::SRA, MVT::v4i32, 6 }, |
870 | { ISD::SRA, MVT::v8i32, 14 }, |
871 | { ISD::SRA, MVT::v2i64, 5 }, |
872 | { ISD::SRA, MVT::v4i64, 12 }, |
873 | |
874 | { ISD::FNEG, MVT::v4f64, 2 }, |
875 | { ISD::FNEG, MVT::v8f32, 2 }, |
876 | |
877 | { ISD::FMUL, MVT::f64, 2 }, |
878 | { ISD::FMUL, MVT::v2f64, 2 }, |
879 | { ISD::FMUL, MVT::v4f64, 4 }, |
880 | |
881 | { ISD::FDIV, MVT::f32, 14 }, |
882 | { ISD::FDIV, MVT::v4f32, 14 }, |
883 | { ISD::FDIV, MVT::v8f32, 28 }, |
884 | { ISD::FDIV, MVT::f64, 22 }, |
885 | { ISD::FDIV, MVT::v2f64, 22 }, |
886 | { ISD::FDIV, MVT::v4f64, 44 }, |
887 | }; |
888 | |
889 | if (ST->hasAVX()) |
890 | if (const auto *Entry = CostTableLookup(AVX1CostTable, ISD, LT.second)) |
891 | return LT.first * Entry->Cost; |
892 | |
893 | static const CostTblEntry SSE42CostTable[] = { |
894 | { ISD::FADD, MVT::f64, 1 }, |
895 | { ISD::FADD, MVT::f32, 1 }, |
896 | { ISD::FADD, MVT::v2f64, 1 }, |
897 | { ISD::FADD, MVT::v4f32, 1 }, |
898 | |
899 | { ISD::FSUB, MVT::f64, 1 }, |
900 | { ISD::FSUB, MVT::f32 , 1 }, |
901 | { ISD::FSUB, MVT::v2f64, 1 }, |
902 | { ISD::FSUB, MVT::v4f32, 1 }, |
903 | |
904 | { ISD::FMUL, MVT::f64, 1 }, |
905 | { ISD::FMUL, MVT::f32, 1 }, |
906 | { ISD::FMUL, MVT::v2f64, 1 }, |
907 | { ISD::FMUL, MVT::v4f32, 1 }, |
908 | |
909 | { ISD::FDIV, MVT::f32, 14 }, |
910 | { ISD::FDIV, MVT::v4f32, 14 }, |
911 | { ISD::FDIV, MVT::f64, 22 }, |
912 | { ISD::FDIV, MVT::v2f64, 22 }, |
913 | |
914 | { ISD::MUL, MVT::v2i64, 6 } |
915 | }; |
916 | |
917 | if (ST->hasSSE42()) |
918 | if (const auto *Entry = CostTableLookup(SSE42CostTable, ISD, LT.second)) |
919 | return LT.first * Entry->Cost; |
920 | |
921 | static const CostTblEntry SSE41CostTable[] = { |
922 | { ISD::SHL, MVT::v16i8, 10 }, |
923 | { ISD::SHL, MVT::v8i16, 11 }, |
924 | { ISD::SHL, MVT::v4i32, 4 }, |
925 | |
926 | { ISD::SRL, MVT::v16i8, 11 }, |
927 | { ISD::SRL, MVT::v8i16, 13 }, |
928 | { ISD::SRL, MVT::v4i32, 16 }, |
929 | |
930 | { ISD::SRA, MVT::v16i8, 21 }, |
931 | { ISD::SRA, MVT::v8i16, 13 }, |
932 | |
933 | { ISD::MUL, MVT::v4i32, 2 } |
934 | }; |
935 | |
936 | if (ST->hasSSE41()) |
937 | if (const auto *Entry = CostTableLookup(SSE41CostTable, ISD, LT.second)) |
938 | return LT.first * Entry->Cost; |
939 | |
940 | static const CostTblEntry SSE2CostTable[] = { |
941 | |
942 | |
943 | { ISD::SHL, MVT::v16i8, 13 }, |
944 | { ISD::SHL, MVT::v8i16, 25 }, |
945 | { ISD::SHL, MVT::v4i32, 16 }, |
946 | { ISD::SHL, MVT::v2i64, 4 }, |
947 | |
948 | { ISD::SRL, MVT::v16i8, 14 }, |
949 | { ISD::SRL, MVT::v8i16, 16 }, |
950 | { ISD::SRL, MVT::v4i32, 12 }, |
951 | { ISD::SRL, MVT::v2i64, 4 }, |
952 | |
953 | { ISD::SRA, MVT::v16i8, 27 }, |
954 | { ISD::SRA, MVT::v8i16, 16 }, |
955 | { ISD::SRA, MVT::v4i32, 12 }, |
956 | { ISD::SRA, MVT::v2i64, 8 }, |
957 | |
958 | { ISD::MUL, MVT::v8i16, 1 }, |
959 | { ISD::MUL, MVT::v4i32, 6 }, |
960 | { ISD::MUL, MVT::v2i64, 8 }, |
961 | |
962 | { ISD::FDIV, MVT::f32, 23 }, |
963 | { ISD::FDIV, MVT::v4f32, 39 }, |
964 | { ISD::FDIV, MVT::f64, 38 }, |
965 | { ISD::FDIV, MVT::v2f64, 69 }, |
966 | |
967 | { ISD::FNEG, MVT::f32, 1 }, |
968 | { ISD::FNEG, MVT::f64, 1 }, |
969 | { ISD::FNEG, MVT::v4f32, 1 }, |
970 | { ISD::FNEG, MVT::v2f64, 1 }, |
971 | |
972 | { ISD::FADD, MVT::f32, 2 }, |
973 | { ISD::FADD, MVT::f64, 2 }, |
974 | |
975 | { ISD::FSUB, MVT::f32, 2 }, |
976 | { ISD::FSUB, MVT::f64, 2 }, |
977 | }; |
978 | |
979 | if (ST->hasSSE2()) |
980 | if (const auto *Entry = CostTableLookup(SSE2CostTable, ISD, LT.second)) |
981 | return LT.first * Entry->Cost; |
982 | |
983 | static const CostTblEntry SSE1CostTable[] = { |
984 | { ISD::FDIV, MVT::f32, 17 }, |
985 | { ISD::FDIV, MVT::v4f32, 34 }, |
986 | |
987 | { ISD::FNEG, MVT::f32, 2 }, |
988 | { ISD::FNEG, MVT::v4f32, 2 }, |
989 | |
990 | { ISD::FADD, MVT::f32, 1 }, |
991 | { ISD::FADD, MVT::v4f32, 2 }, |
992 | |
993 | { ISD::FSUB, MVT::f32, 1 }, |
994 | { ISD::FSUB, MVT::v4f32, 2 }, |
995 | }; |
996 | |
997 | if (ST->hasSSE1()) |
998 | if (const auto *Entry = CostTableLookup(SSE1CostTable, ISD, LT.second)) |
999 | return LT.first * Entry->Cost; |
1000 | |
1001 | static const CostTblEntry X64CostTbl[] = { |
1002 | { ISD::ADD, MVT::i64, 1 }, |
1003 | { ISD::SUB, MVT::i64, 1 }, |
1004 | }; |
1005 | |
1006 | if (ST->is64Bit()) |
1007 | if (const auto *Entry = CostTableLookup(X64CostTbl, ISD, LT.second)) |
1008 | return LT.first * Entry->Cost; |
1009 | |
1010 | static const CostTblEntry X86CostTbl[] = { |
1011 | { ISD::ADD, MVT::i8, 1 }, |
1012 | { ISD::ADD, MVT::i16, 1 }, |
1013 | { ISD::ADD, MVT::i32, 1 }, |
1014 | |
1015 | { ISD::SUB, MVT::i8, 1 }, |
1016 | { ISD::SUB, MVT::i16, 1 }, |
1017 | { ISD::SUB, MVT::i32, 1 }, |
1018 | }; |
1019 | |
1020 | if (const auto *Entry = CostTableLookup(X86CostTbl, ISD, LT.second)) |
1021 | return LT.first * Entry->Cost; |
1022 | |
1023 | |
1024 | |
1025 | |
1026 | |
1027 | |
1028 | |
1029 | if (LT.second.isVector() && (ISD == ISD::SDIV || ISD == ISD::SREM || |
1030 | ISD == ISD::UDIV || ISD == ISD::UREM)) { |
1031 | InstructionCost ScalarCost = getArithmeticInstrCost( |
1032 | Opcode, Ty->getScalarType(), CostKind, Op1Info, Op2Info, |
1033 | TargetTransformInfo::OP_None, TargetTransformInfo::OP_None); |
1034 | return 20 * LT.first * LT.second.getVectorNumElements() * ScalarCost; |
1035 | } |
1036 | |
1037 | |
1038 | return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info, Op2Info); |
1039 | } |
1040 | |
1041 | InstructionCost X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, |
1042 | VectorType *BaseTp, |
1043 | ArrayRef<int> Mask, int Index, |
1044 | VectorType *SubTp) { |
1045 | |
1046 | |
1047 | std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, BaseTp); |
1048 | |
1049 | Kind = improveShuffleKindFromMask(Kind, Mask); |
1050 | |
1051 | if (Kind == TTI::SK_Transpose) |
1052 | Kind = TTI::SK_PermuteTwoSrc; |
1053 | |
1054 | |
1055 | |
1056 | |
1057 | if (Kind == TTI::SK_Broadcast) |
1058 | LT.first = 1; |
1059 | |
1060 | |
1061 | |
1062 | if (Kind == TTI::SK_ExtractSubvector && LT.second.isVector()) { |
1063 | int NumElts = LT.second.getVectorNumElements(); |
1064 | if ((Index % NumElts) == 0) |
1065 | return 0; |
1066 | std::pair<InstructionCost, MVT> SubLT = |
1067 | TLI->getTypeLegalizationCost(DL, SubTp); |
1068 | if (SubLT.second.isVector()) { |
1069 | int NumSubElts = SubLT.second.getVectorNumElements(); |
1070 | if ((Index % NumSubElts) == 0 && (NumElts % NumSubElts) == 0) |
1071 | return SubLT.first; |
1072 | |
1073 | |
1074 | |
1075 | |
1076 | |
1077 | |
1078 | int OrigSubElts = cast<FixedVectorType>(SubTp)->getNumElements(); |
1079 | if (NumSubElts > OrigSubElts && (Index % OrigSubElts) == 0 && |
1080 | (NumSubElts % OrigSubElts) == 0 && |
1081 | LT.second.getVectorElementType() == |
1082 | SubLT.second.getVectorElementType() && |
1083 | LT.second.getVectorElementType().getSizeInBits() == |
1084 | BaseTp->getElementType()->getPrimitiveSizeInBits()) { |
1085 | assert(NumElts >= NumSubElts && NumElts > OrigSubElts && |
1086 | "Unexpected number of elements!"); |
1087 | auto *VecTy = FixedVectorType::get(BaseTp->getElementType(), |
1088 | LT.second.getVectorNumElements()); |
1089 | auto *SubTy = FixedVectorType::get(BaseTp->getElementType(), |
1090 | SubLT.second.getVectorNumElements()); |
1091 | int ExtractIndex = alignDown((Index % NumElts), NumSubElts); |
1092 | InstructionCost ExtractCost = getShuffleCost( |
1093 | TTI::SK_ExtractSubvector, VecTy, None, ExtractIndex, SubTy); |
1094 | |
1095 | |
1096 | |
1097 | if (SubTp->getPrimitiveSizeInBits() >= 32 || ST->hasSSSE3()) |
1098 | return ExtractCost + 1; |
1099 | |
1100 | assert(SubTp->getPrimitiveSizeInBits() == 16 && |
1101 | "Unexpected vector size"); |
1102 | |
1103 | return ExtractCost + 2; |
1104 | } |
1105 | } |
1106 | } |
1107 | |
1108 | |
1109 | |
1110 | |
1111 | if (Kind == TTI::SK_InsertSubvector && LT.second.isVector()) { |
1112 | int NumElts = LT.second.getVectorNumElements(); |
1113 | std::pair<InstructionCost, MVT> SubLT = |
1114 | TLI->getTypeLegalizationCost(DL, SubTp); |
1115 | if (SubLT.second.isVector()) { |
1116 | int NumSubElts = SubLT.second.getVectorNumElements(); |
1117 | if ((Index % NumSubElts) == 0 && (NumElts % NumSubElts) == 0) |
1118 | return SubLT.first; |
1119 | } |
1120 | |
1121 | |
1122 | Kind = TTI::SK_PermuteTwoSrc; |
1123 | } |
1124 | |
1125 | |
1126 | |
1127 | EVT VT = TLI->getValueType(DL, BaseTp); |
1128 | if (VT.isSimple() && VT.isVector() && VT.getSizeInBits() < 128 && |
1129 | !ST->hasSSSE3()) { |
1130 | static const CostTblEntry SSE2SubVectorShuffleTbl[] = { |
1131 | {TTI::SK_Broadcast, MVT::v4i16, 1}, |
1132 | {TTI::SK_Broadcast, MVT::v2i16, 1}, |
1133 | {TTI::SK_Broadcast, MVT::v8i8, 2}, |
1134 | {TTI::SK_Broadcast, MVT::v4i8, 2}, |
1135 | {TTI::SK_Broadcast, MVT::v2i8, 1}, |
1136 | |
1137 | {TTI::SK_Reverse, MVT::v4i16, 1}, |
1138 | {TTI::SK_Reverse, MVT::v2i16, 1}, |
1139 | {TTI::SK_Reverse, MVT::v4i8, 3}, |
1140 | {TTI::SK_Reverse, MVT::v2i8, 1}, |
1141 | |
1142 | {TTI::SK_PermuteTwoSrc, MVT::v4i16, 2}, |
1143 | {TTI::SK_PermuteTwoSrc, MVT::v2i16, 2}, |
1144 | {TTI::SK_PermuteTwoSrc, MVT::v8i8, 7}, |
1145 | {TTI::SK_PermuteTwoSrc, MVT::v4i8, 4}, |
1146 | {TTI::SK_PermuteTwoSrc, MVT::v2i8, 2}, |
1147 | |
1148 | {TTI::SK_PermuteSingleSrc, MVT::v4i16, 1}, |
1149 | {TTI::SK_PermuteSingleSrc, MVT::v2i16, 1}, |
1150 | {TTI::SK_PermuteSingleSrc, MVT::v8i8, 5}, |
1151 | {TTI::SK_PermuteSingleSrc, MVT::v4i8, 3}, |
1152 | {TTI::SK_PermuteSingleSrc, MVT::v2i8, 1}, |
1153 | }; |
1154 | |
1155 | if (ST->hasSSE2()) |
1156 | if (const auto *Entry = |
1157 | CostTableLookup(SSE2SubVectorShuffleTbl, Kind, VT.getSimpleVT())) |
1158 | return Entry->Cost; |
1159 | } |
1160 | |
1161 | |
1162 | |
1163 | |
1164 | if (Kind == TTI::SK_PermuteSingleSrc && LT.first != 1) { |
1165 | MVT LegalVT = LT.second; |
1166 | if (LegalVT.isVector() && |
1167 | LegalVT.getVectorElementType().getSizeInBits() == |
1168 | BaseTp->getElementType()->getPrimitiveSizeInBits() && |
1169 | LegalVT.getVectorNumElements() < |
1170 | cast<FixedVectorType>(BaseTp)->getNumElements()) { |
1171 | |
1172 | unsigned VecTySize = DL.getTypeStoreSize(BaseTp); |
1173 | unsigned LegalVTSize = LegalVT.getStoreSize(); |
1174 | |
1175 | unsigned NumOfSrcs = (VecTySize + LegalVTSize - 1) / LegalVTSize; |
1176 | |
1177 | InstructionCost NumOfDests = LT.first; |
1178 | |
1179 | auto *SingleOpTy = FixedVectorType::get(BaseTp->getElementType(), |
1180 | LegalVT.getVectorNumElements()); |
1181 | |
1182 | InstructionCost NumOfShuffles = (NumOfSrcs - 1) * NumOfDests; |
1183 | return NumOfShuffles * getShuffleCost(TTI::SK_PermuteTwoSrc, SingleOpTy, |
1184 | None, 0, nullptr); |
1185 | } |
1186 | |
1187 | return BaseT::getShuffleCost(Kind, BaseTp, Mask, Index, SubTp); |
1188 | } |
1189 | |
1190 | |
1191 | if (Kind == TTI::SK_PermuteTwoSrc && LT.first != 1) { |
1192 | |
1193 | InstructionCost NumOfDests = LT.first; |
1194 | InstructionCost NumOfShufflesPerDest = LT.first * 2 - 1; |
1195 | LT.first = NumOfDests * NumOfShufflesPerDest; |
1196 | } |
1197 | |
1198 | static const CostTblEntry AVX512FP16ShuffleTbl[] = { |
1199 | {TTI::SK_Broadcast, MVT::v32f16, 1}, |
1200 | {TTI::SK_Broadcast, MVT::v16f16, 1}, |
1201 | {TTI::SK_Broadcast, MVT::v8f16, 1}, |
1202 | |
1203 | {TTI::SK_Reverse, MVT::v32f16, 2}, |
1204 | {TTI::SK_Reverse, MVT::v16f16, 2}, |
1205 | {TTI::SK_Reverse, MVT::v8f16, 1}, |
1206 | |
1207 | {TTI::SK_PermuteSingleSrc, MVT::v32f16, 2}, |
1208 | {TTI::SK_PermuteSingleSrc, MVT::v16f16, 2}, |
1209 | {TTI::SK_PermuteSingleSrc, MVT::v8f16, 1}, |
1210 | |
1211 | {TTI::SK_PermuteTwoSrc, MVT::v32f16, 2}, |
1212 | {TTI::SK_PermuteTwoSrc, MVT::v16f16, 2}, |
1213 | {TTI::SK_PermuteTwoSrc, MVT::v8f16, 2} |
1214 | }; |
1215 | |
1216 | if (!ST->useSoftFloat() && ST->hasFP16()) |
1217 | if (const auto *Entry = |
1218 | CostTableLookup(AVX512FP16ShuffleTbl, Kind, LT.second)) |
1219 | return LT.first * Entry->Cost; |
1220 | |
1221 | static const CostTblEntry AVX512VBMIShuffleTbl[] = { |
1222 | {TTI::SK_Reverse, MVT::v64i8, 1}, |
1223 | {TTI::SK_Reverse, MVT::v32i8, 1}, |
1224 | |
1225 | {TTI::SK_PermuteSingleSrc, MVT::v64i8, 1}, |
1226 | {TTI::SK_PermuteSingleSrc, MVT::v32i8, 1}, |
1227 | |
1228 | {TTI::SK_PermuteTwoSrc, MVT::v64i8, 2}, |
1229 | {TTI::SK_PermuteTwoSrc, MVT::v32i8, 2}, |
1230 | {TTI::SK_PermuteTwoSrc, MVT::v16i8, 2} |
1231 | }; |
1232 | |
1233 | if (ST->hasVBMI()) |
1234 | if (const auto *Entry = |
1235 | CostTableLookup(AVX512VBMIShuffleTbl, Kind, LT.second)) |
1236 | return LT.first * Entry->Cost; |
1237 | |
1238 | static const CostTblEntry AVX512BWShuffleTbl[] = { |
1239 | {TTI::SK_Broadcast, MVT::v32i16, 1}, |
1240 | {TTI::SK_Broadcast, MVT::v64i8, 1}, |
1241 | |
1242 | {TTI::SK_Reverse, MVT::v32i16, 2}, |
1243 | {TTI::SK_Reverse, MVT::v16i16, 2}, |
1244 | {TTI::SK_Reverse, MVT::v64i8, 2}, |
1245 | |
1246 | {TTI::SK_PermuteSingleSrc, MVT::v32i16, 2}, |
1247 | {TTI::SK_PermuteSingleSrc, MVT::v16i16, 2}, |
1248 | {TTI::SK_PermuteSingleSrc, MVT::v64i8, 8}, |
1249 | |
1250 | {TTI::SK_PermuteTwoSrc, MVT::v32i16, 2}, |
1251 | {TTI::SK_PermuteTwoSrc, MVT::v16i16, 2}, |
1252 | {TTI::SK_PermuteTwoSrc, MVT::v8i16, 2}, |
1253 | {TTI::SK_PermuteTwoSrc, MVT::v64i8, 19}, |
1254 | |
1255 | {TTI::SK_Select, MVT::v32i16, 1}, |
1256 | {TTI::SK_Select, MVT::v64i8, 1}, |
1257 | }; |
1258 | |
1259 | if (ST->hasBWI()) |
1260 | if (const auto *Entry = |
1261 | CostTableLookup(AVX512BWShuffleTbl, Kind, LT.second)) |
1262 | return LT.first * Entry->Cost; |
1263 | |
1264 | static const CostTblEntry AVX512ShuffleTbl[] = { |
1265 | {TTI::SK_Broadcast, MVT::v8f64, 1}, |
1266 | {TTI::SK_Broadcast, MVT::v16f32, 1}, |
1267 | {TTI::SK_Broadcast, MVT::v8i64, 1}, |
1268 | {TTI::SK_Broadcast, MVT::v16i32, 1}, |
1269 | {TTI::SK_Broadcast, MVT::v32i16, 1}, |
1270 | {TTI::SK_Broadcast, MVT::v64i8, 1}, |
1271 | |
1272 | {TTI::SK_Reverse, MVT::v8f64, 1}, |
1273 | {TTI::SK_Reverse, MVT::v16f32, 1}, |
1274 | {TTI::SK_Reverse, MVT::v8i64, 1}, |
1275 | {TTI::SK_Reverse, MVT::v16i32, 1}, |
1276 | {TTI::SK_Reverse, MVT::v32i16, 7}, |
1277 | {TTI::SK_Reverse, MVT::v64i8, 7}, |
1278 | |
1279 | {TTI::SK_PermuteSingleSrc, MVT::v8f64, 1}, |
1280 | {TTI::SK_PermuteSingleSrc, MVT::v4f64, 1}, |
1281 | {TTI::SK_PermuteSingleSrc, MVT::v2f64, 1}, |
1282 | {TTI::SK_PermuteSingleSrc, MVT::v16f32, 1}, |
1283 | {TTI::SK_PermuteSingleSrc, MVT::v8f32, 1}, |
1284 | {TTI::SK_PermuteSingleSrc, MVT::v4f32, 1}, |
1285 | {TTI::SK_PermuteSingleSrc, MVT::v8i64, 1}, |
1286 | {TTI::SK_PermuteSingleSrc, MVT::v4i64, 1}, |
1287 | {TTI::SK_PermuteSingleSrc, MVT::v2i64, 1}, |
1288 | {TTI::SK_PermuteSingleSrc, MVT::v16i32, 1}, |
1289 | {TTI::SK_PermuteSingleSrc, MVT::v8i32, 1}, |
1290 | {TTI::SK_PermuteSingleSrc, MVT::v4i32, 1}, |
1291 | {TTI::SK_PermuteSingleSrc, MVT::v16i8, 1}, |
1292 | |
1293 | {TTI::SK_PermuteTwoSrc, MVT::v8f64, 1}, |
1294 | {TTI::SK_PermuteTwoSrc, MVT::v16f32, 1}, |
1295 | {TTI::SK_PermuteTwoSrc, MVT::v8i64, 1}, |
1296 | {TTI::SK_PermuteTwoSrc, MVT::v16i32, 1}, |
1297 | {TTI::SK_PermuteTwoSrc, MVT::v4f64, 1}, |
1298 | {TTI::SK_PermuteTwoSrc, MVT::v8f32, 1}, |
1299 | {TTI::SK_PermuteTwoSrc, MVT::v4i64, 1}, |
1300 | {TTI::SK_PermuteTwoSrc, MVT::v8i32, 1}, |
1301 | {TTI::SK_PermuteTwoSrc, MVT::v2f64, 1}, |
1302 | {TTI::SK_PermuteTwoSrc, MVT::v4f32, 1}, |
1303 | {TTI::SK_PermuteTwoSrc, MVT::v2i64, 1}, |
1304 | {TTI::SK_PermuteTwoSrc, MVT::v4i32, 1}, |
1305 | |
1306 | |
1307 | |
1308 | {TTI::SK_PermuteSingleSrc, MVT::v32i16, 14}, |
1309 | {TTI::SK_PermuteSingleSrc, MVT::v64i8, 14}, |
1310 | {TTI::SK_PermuteTwoSrc, MVT::v32i16, 42}, |
1311 | {TTI::SK_PermuteTwoSrc, MVT::v64i8, 42}, |
1312 | |
1313 | {TTI::SK_Select, MVT::v32i16, 1}, |
1314 | {TTI::SK_Select, MVT::v64i8, 1}, |
1315 | {TTI::SK_Select, MVT::v8f64, 1}, |
1316 | {TTI::SK_Select, MVT::v16f32, 1}, |
1317 | {TTI::SK_Select, MVT::v8i64, 1}, |
1318 | {TTI::SK_Select, MVT::v16i32, 1}, |
1319 | }; |
1320 | |
1321 | if (ST->hasAVX512()) |
1322 | if (const auto *Entry = CostTableLookup(AVX512ShuffleTbl, Kind, LT.second)) |
1323 | return LT.first * Entry->Cost; |
1324 | |
1325 | static const CostTblEntry AVX2ShuffleTbl[] = { |
1326 | {TTI::SK_Broadcast, MVT::v4f64, 1}, |
1327 | {TTI::SK_Broadcast, MVT::v8f32, 1}, |
1328 | {TTI::SK_Broadcast, MVT::v4i64, 1}, |
1329 | {TTI::SK_Broadcast, MVT::v8i32, 1}, |
1330 | {TTI::SK_Broadcast, MVT::v16i16, 1}, |
1331 | {TTI::SK_Broadcast, MVT::v32i8, 1}, |
1332 | |
1333 | {TTI::SK_Reverse, MVT::v4f64, 1}, |
1334 | {TTI::SK_Reverse, MVT::v8f32, 1}, |
1335 | {TTI::SK_Reverse, MVT::v4i64, 1}, |
1336 | {TTI::SK_Reverse, MVT::v8i32, 1}, |
1337 | {TTI::SK_Reverse, MVT::v16i16, 2}, |
1338 | {TTI::SK_Reverse, MVT::v32i8, 2}, |
1339 | |
1340 | {TTI::SK_Select, MVT::v16i16, 1}, |
1341 | {TTI::SK_Select, MVT::v32i8, 1}, |
1342 | |
1343 | {TTI::SK_PermuteSingleSrc, MVT::v4f64, 1}, |
1344 | {TTI::SK_PermuteSingleSrc, MVT::v8f32, 1}, |
1345 | {TTI::SK_PermuteSingleSrc, MVT::v4i64, 1}, |
1346 | {TTI::SK_PermuteSingleSrc, MVT::v8i32, 1}, |
1347 | {TTI::SK_PermuteSingleSrc, MVT::v16i16, 4}, |
1348 | |
1349 | {TTI::SK_PermuteSingleSrc, MVT::v32i8, 4}, |
1350 | |
1351 | |
1352 | {TTI::SK_PermuteTwoSrc, MVT::v4f64, 3}, |
1353 | {TTI::SK_PermuteTwoSrc, MVT::v8f32, 3}, |
1354 | {TTI::SK_PermuteTwoSrc, MVT::v4i64, 3}, |
1355 | {TTI::SK_PermuteTwoSrc, MVT::v8i32, 3}, |
1356 | {TTI::SK_PermuteTwoSrc, MVT::v16i16, 7}, |
1357 | |
1358 | {TTI::SK_PermuteTwoSrc, MVT::v32i8, 7}, |
1359 | |
1360 | }; |
1361 | |
1362 | if (ST->hasAVX2()) |
1363 | if (const auto *Entry = CostTableLookup(AVX2ShuffleTbl, Kind, LT.second)) |
1364 | return LT.first * Entry->Cost; |
1365 | |
1366 | static const CostTblEntry XOPShuffleTbl[] = { |
1367 | {TTI::SK_PermuteSingleSrc, MVT::v4f64, 2}, |
1368 | {TTI::SK_PermuteSingleSrc, MVT::v8f32, 2}, |
1369 | {TTI::SK_PermuteSingleSrc, MVT::v4i64, 2}, |
1370 | {TTI::SK_PermuteSingleSrc, MVT::v8i32, 2}, |
1371 | {TTI::SK_PermuteSingleSrc, MVT::v16i16, 4}, |
1372 | |
1373 | {TTI::SK_PermuteSingleSrc, MVT::v32i8, 4}, |
1374 | |
1375 | |
1376 | {TTI::SK_PermuteTwoSrc, MVT::v16i16, 9}, |
1377 | |
1378 | {TTI::SK_PermuteTwoSrc, MVT::v8i16, 1}, |
1379 | {TTI::SK_PermuteTwoSrc, MVT::v32i8, 9}, |
1380 | |
1381 | {TTI::SK_PermuteTwoSrc, MVT::v16i8, 1}, |
1382 | }; |
1383 | |
1384 | if (ST->hasXOP()) |
1385 | if (const auto *Entry = CostTableLookup(XOPShuffleTbl, Kind, LT.second)) |
1386 | return LT.first * Entry->Cost; |
1387 | |
1388 | static const CostTblEntry AVX1ShuffleTbl[] = { |
1389 | {TTI::SK_Broadcast, MVT::v4f64, 2}, |
1390 | {TTI::SK_Broadcast, MVT::v8f32, 2}, |
1391 | {TTI::SK_Broadcast, MVT::v4i64, 2}, |
1392 | {TTI::SK_Broadcast, MVT::v8i32, 2}, |
1393 | {TTI::SK_Broadcast, MVT::v16i16, 3}, |
1394 | {TTI::SK_Broadcast, MVT::v32i8, 2}, |
1395 | |
1396 | {TTI::SK_Reverse, MVT::v4f64, 2}, |
1397 | {TTI::SK_Reverse, MVT::v8f32, 2}, |
1398 | {TTI::SK_Reverse, MVT::v4i64, 2}, |
1399 | {TTI::SK_Reverse, MVT::v8i32, 2}, |
1400 | {TTI::SK_Reverse, MVT::v16i16, 4}, |
1401 | |
1402 | {TTI::SK_Reverse, MVT::v32i8, 4}, |
1403 | |
1404 | |
1405 | {TTI::SK_Select, MVT::v4i64, 1}, |
1406 | {TTI::SK_Select, MVT::v4f64, 1}, |
1407 | {TTI::SK_Select, MVT::v8i32, 1}, |
1408 | {TTI::SK_Select, MVT::v8f32, 1}, |
1409 | {TTI::SK_Select, MVT::v16i16, 3}, |
1410 | {TTI::SK_Select, MVT::v32i8, 3}, |
1411 | |
1412 | {TTI::SK_PermuteSingleSrc, MVT::v4f64, 2}, |
1413 | {TTI::SK_PermuteSingleSrc, MVT::v4i64, 2}, |
1414 | {TTI::SK_PermuteSingleSrc, MVT::v8f32, 4}, |
1415 | {TTI::SK_PermuteSingleSrc, MVT::v8i32, 4}, |
1416 | {TTI::SK_PermuteSingleSrc, MVT::v16i16, 8}, |
1417 | |
1418 | {TTI::SK_PermuteSingleSrc, MVT::v32i8, 8}, |
1419 | |
1420 | |
1421 | {TTI::SK_PermuteTwoSrc, MVT::v4f64, 3}, |
1422 | {TTI::SK_PermuteTwoSrc, MVT::v4i64, 3}, |
1423 | {TTI::SK_PermuteTwoSrc, MVT::v8f32, 4}, |
1424 | {TTI::SK_PermuteTwoSrc, MVT::v8i32, 4}, |
1425 | {TTI::SK_PermuteTwoSrc, MVT::v16i16, 15}, |
1426 | |
1427 | {TTI::SK_PermuteTwoSrc, MVT::v32i8, 15}, |
1428 | |
1429 | }; |
1430 | |
1431 | if (ST->hasAVX()) |
1432 | if (const auto *Entry = CostTableLookup(AVX1ShuffleTbl, Kind, LT.second)) |
1433 | return LT.first * Entry->Cost; |
1434 | |
1435 | static const CostTblEntry SSE41ShuffleTbl[] = { |
1436 | {TTI::SK_Select, MVT::v2i64, 1}, |
1437 | {TTI::SK_Select, MVT::v2f64, 1}, |
1438 | {TTI::SK_Select, MVT::v4i32, 1}, |
1439 | {TTI::SK_Select, MVT::v4f32, 1}, |
1440 | {TTI::SK_Select, MVT::v8i16, 1}, |
1441 | {TTI::SK_Select, MVT::v16i8, 1} |
1442 | }; |
1443 | |
1444 | if (ST->hasSSE41()) |
1445 | if (const auto *Entry = CostTableLookup(SSE41ShuffleTbl, Kind, LT.second)) |
1446 | return LT.first * Entry->Cost; |
1447 | |
1448 | static const CostTblEntry SSSE3ShuffleTbl[] = { |
1449 | {TTI::SK_Broadcast, MVT::v8i16, 1}, |
1450 | {TTI::SK_Broadcast, MVT::v16i8, 1}, |
1451 | |
1452 | {TTI::SK_Reverse, MVT::v8i16, 1}, |
1453 | {TTI::SK_Reverse, MVT::v16i8, 1}, |
1454 | |
1455 | {TTI::SK_Select, MVT::v8i16, 3}, |
1456 | {TTI::SK_Select, MVT::v16i8, 3}, |
1457 | |
1458 | {TTI::SK_PermuteSingleSrc, MVT::v8i16, 1}, |
1459 | {TTI::SK_PermuteSingleSrc, MVT::v16i8, 1}, |
1460 | |
1461 | {TTI::SK_PermuteTwoSrc, MVT::v8i16, 3}, |
1462 | {TTI::SK_PermuteTwoSrc, MVT::v16i8, 3}, |
1463 | }; |
1464 | |
1465 | if (ST->hasSSSE3()) |
1466 | if (const auto *Entry = CostTableLookup(SSSE3ShuffleTbl, Kind, LT.second)) |
1467 | return LT.first * Entry->Cost; |
1468 | |
1469 | static const CostTblEntry SSE2ShuffleTbl[] = { |
1470 | {TTI::SK_Broadcast, MVT::v2f64, 1}, |
1471 | {TTI::SK_Broadcast, MVT::v2i64, 1}, |
1472 | {TTI::SK_Broadcast, MVT::v4i32, 1}, |
1473 | {TTI::SK_Broadcast, MVT::v8i16, 2}, |
1474 | {TTI::SK_Broadcast, MVT::v16i8, 3}, |
1475 | |
1476 | {TTI::SK_Reverse, MVT::v2f64, 1}, |
1477 | {TTI::SK_Reverse, MVT::v2i64, 1}, |
1478 | {TTI::SK_Reverse, MVT::v4i32, 1}, |
1479 | {TTI::SK_Reverse, MVT::v8i16, 3}, |
1480 | {TTI::SK_Reverse, MVT::v16i8, 9}, |
1481 | |
1482 | |
1483 | {TTI::SK_Select, MVT::v2i64, 1}, |
1484 | {TTI::SK_Select, MVT::v2f64, 1}, |
1485 | {TTI::SK_Select, MVT::v4i32, 2}, |
1486 | {TTI::SK_Select, MVT::v8i16, 3}, |
1487 | {TTI::SK_Select, MVT::v16i8, 3}, |
1488 | |
1489 | {TTI::SK_PermuteSingleSrc, MVT::v2f64, 1}, |
1490 | {TTI::SK_PermuteSingleSrc, MVT::v2i64, 1}, |
1491 | {TTI::SK_PermuteSingleSrc, MVT::v4i32, 1}, |
1492 | {TTI::SK_PermuteSingleSrc, MVT::v8i16, 5}, |
1493 | |
1494 | { TTI::SK_PermuteSingleSrc, MVT::v16i8, 10 }, |
1495 | |
1496 | |
1497 | { TTI::SK_PermuteTwoSrc, MVT::v2f64, 1 }, |
1498 | { TTI::SK_PermuteTwoSrc, MVT::v2i64, 1 }, |
1499 | { TTI::SK_PermuteTwoSrc, MVT::v4i32, 2 }, |
1500 | { TTI::SK_PermuteTwoSrc, MVT::v8i16, 8 }, |
1501 | { TTI::SK_PermuteTwoSrc, MVT::v16i8, 13 }, |
1502 | }; |
1503 | |
1504 | if (ST->hasSSE2()) |
1505 | if (const auto *Entry = CostTableLookup(SSE2ShuffleTbl, Kind, LT.second)) |
1506 | return LT.first * Entry->Cost; |
1507 | |
1508 | static const CostTblEntry SSE1ShuffleTbl[] = { |
1509 | { TTI::SK_Broadcast, MVT::v4f32, 1 }, |
1510 | { TTI::SK_Reverse, MVT::v4f32, 1 }, |
1511 | { TTI::SK_Select, MVT::v4f32, 2 }, |
1512 | { TTI::SK_PermuteSingleSrc, MVT::v4f32, 1 }, |
1513 | { TTI::SK_PermuteTwoSrc, MVT::v4f32, 2 }, |
1514 | }; |
1515 | |
1516 | if (ST->hasSSE1()) |
1517 | if (const auto *Entry = CostTableLookup(SSE1ShuffleTbl, Kind, LT.second)) |
1518 | return LT.first * Entry->Cost; |
1519 | |
1520 | return BaseT::getShuffleCost(Kind, BaseTp, Mask, Index, SubTp); |
1521 | } |
1522 | |
1523 | InstructionCost X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, |
1524 | Type *Src, |
1525 | TTI::CastContextHint CCH, |
1526 | TTI::TargetCostKind CostKind, |
1527 | const Instruction *I) { |
1528 | int ISD = TLI->InstructionOpcodeToISD(Opcode); |
1529 | assert(ISD && "Invalid opcode"); |
1530 | |
1531 | |
1532 | auto AdjustCost = [&CostKind](InstructionCost Cost) -> InstructionCost { |
1533 | if (CostKind != TTI::TCK_RecipThroughput) |
1534 | return Cost == 0 ? 0 : 1; |
1535 | return Cost; |
1536 | }; |
1537 | |
1538 | |
1539 | |
1540 | |
1541 | |
1542 | |
1543 | static const TypeConversionCostTblEntry AVX512BWConversionTbl[] { |
1544 | { ISD::SIGN_EXTEND, MVT::v32i16, MVT::v32i8, 1 }, |
1545 | { ISD::ZERO_EXTEND, MVT::v32i16, MVT::v32i8, 1 }, |
1546 | |
1547 | |
1548 | { ISD::SIGN_EXTEND, MVT::v2i8, MVT::v2i1, 1 }, |
1549 | { ISD::SIGN_EXTEND, MVT::v2i16, MVT::v2i1, 1 }, |
1550 | { ISD::SIGN_EXTEND, MVT::v4i8, MVT::v4i1, 1 }, |
1551 | { ISD::SIGN_EXTEND, MVT::v4i16, MVT::v4i1, 1 }, |
1552 | { ISD::SIGN_EXTEND, MVT::v8i8, MVT::v8i1, 1 }, |
1553 | { ISD::SIGN_EXTEND, MVT::v8i16, MVT::v8i1, 1 }, |
1554 | { ISD::SIGN_EXTEND, MVT::v16i8, MVT::v16i1, 1 }, |
1555 | { ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i1, 1 }, |
1556 | { ISD::SIGN_EXTEND, MVT::v32i8, MVT::v32i1, 1 }, |
1557 | { ISD::SIGN_EXTEND, MVT::v32i16, MVT::v32i1, 1 }, |
1558 | { ISD::SIGN_EXTEND, MVT::v64i8, MVT::v64i1, 1 }, |
1559 | |
1560 | |
1561 | { ISD::ZERO_EXTEND, MVT::v2i8, MVT::v2i1, 2 }, |
1562 | { ISD::ZERO_EXTEND, MVT::v2i16, MVT::v2i1, 2 }, |
1563 | { ISD::ZERO_EXTEND, MVT::v4i8, MVT::v4i1, 2 }, |
1564 | { ISD::ZERO_EXTEND, MVT::v4i16, MVT::v4i1, 2 }, |
1565 | { ISD::ZERO_EXTEND, MVT::v8i8, MVT::v8i1, 2 }, |
1566 | { ISD::ZERO_EXTEND, MVT::v8i16, MVT::v8i1, 2 }, |
1567 | { ISD::ZERO_EXTEND, MVT::v16i8, MVT::v16i1, 2 }, |
1568 | { ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i1, 2 }, |
1569 | { ISD::ZERO_EXTEND, MVT::v32i8, MVT::v32i1, 2 }, |
1570 | { ISD::ZERO_EXTEND, MVT::v32i16, MVT::v32i1, 2 }, |
1571 | { ISD::ZERO_EXTEND, MVT::v64i8, MVT::v64i1, 2 }, |
1572 | |
1573 | { ISD::TRUNCATE, MVT::v32i8, MVT::v32i16, 2 }, |
1574 | { ISD::TRUNCATE, MVT::v16i8, MVT::v16i16, 2 }, |
1575 | { ISD::TRUNCATE, MVT::v2i1, MVT::v2i8, 2 }, |
1576 | { ISD::TRUNCATE, MVT::v2i1, MVT::v2i16, 2 }, |
1577 | { ISD::TRUNCATE, MVT::v2i8, MVT::v2i16, 2 }, |
1578 | { ISD::TRUNCATE, MVT::v4i1, MVT::v4i8, 2 }, |
1579 | { ISD::TRUNCATE, MVT::v4i1, MVT::v4i16, 2 }, |
1580 | { ISD::TRUNCATE, MVT::v4i8, MVT::v4i16, 2 }, |
1581 | { ISD::TRUNCATE, MVT::v8i1, MVT::v8i8, 2 }, |
1582 | { ISD::TRUNCATE, MVT::v8i1, MVT::v8i16, 2 }, |
1583 | { ISD::TRUNCATE, MVT::v8i8, MVT::v8i16, 2 }, |
1584 | { ISD::TRUNCATE, MVT::v16i1, MVT::v16i8, 2 }, |
1585 | { ISD::TRUNCATE, MVT::v16i1, MVT::v16i16, 2 }, |
1586 | { ISD::TRUNCATE, MVT::v32i1, MVT::v32i8, 2 }, |
1587 | { ISD::TRUNCATE, MVT::v32i1, MVT::v32i16, 2 }, |
1588 | { ISD::TRUNCATE, MVT::v64i1, MVT::v64i8, 2 }, |
1589 | }; |
1590 | |
1591 | static const TypeConversionCostTblEntry AVX512DQConversionTbl[] = { |
1592 | { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i64, 1 }, |
1593 | { ISD::SINT_TO_FP, MVT::v8f64, MVT::v8i64, 1 }, |
1594 | |
1595 | { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i64, 1 }, |
1596 | { ISD::UINT_TO_FP, MVT::v8f64, MVT::v8i64, 1 }, |
1597 | |
1598 | { ISD::FP_TO_SINT, MVT::v8i64, MVT::v8f32, 1 }, |
1599 | { ISD::FP_TO_SINT, MVT::v8i64, MVT::v8f64, 1 }, |
1600 | |
1601 | { ISD::FP_TO_UINT, MVT::v8i64, MVT::v8f32, 1 }, |
1602 | { ISD::FP_TO_UINT, MVT::v8i64, MVT::v8f64, 1 }, |
1603 | }; |
1604 | |
1605 | |
1606 | |
1607 | |
1608 | static const TypeConversionCostTblEntry AVX512FConversionTbl[] = { |
1609 | { ISD::FP_EXTEND, MVT::v8f64, MVT::v8f32, 1 }, |
1610 | { ISD::FP_EXTEND, MVT::v8f64, MVT::v16f32, 3 }, |
1611 | { ISD::FP_ROUND, MVT::v8f32, MVT::v8f64, 1 }, |
1612 | |
1613 | { ISD::TRUNCATE, MVT::v2i1, MVT::v2i8, 3 }, |
1614 | { ISD::TRUNCATE, MVT::v4i1, MVT::v4i8, 3 }, |
1615 | { ISD::TRUNCATE, MVT::v8i1, MVT::v8i8, 3 }, |
1616 | { ISD::TRUNCATE, MVT::v16i1, MVT::v16i8, 3 }, |
1617 | { ISD::TRUNCATE, MVT::v2i1, MVT::v2i16, 3 }, |
1618 | { ISD::TRUNCATE, MVT::v4i1, MVT::v4i16, 3 }, |
1619 | { ISD::TRUNCATE, MVT::v8i1, MVT::v8i16, 3 }, |
1620 | { ISD::TRUNCATE, MVT::v16i1, MVT::v16i16, 3 }, |
1621 | { ISD::TRUNCATE, MVT::v2i1, MVT::v2i32, 2 }, |
1622 | { ISD::TRUNCATE, MVT::v4i1, MVT::v4i32, 2 }, |
1623 | { ISD::TRUNCATE, MVT::v8i1, MVT::v8i32, 2 }, |
1624 | { ISD::TRUNCATE, MVT::v16i1, MVT::v16i32, 2 }, |
1625 | { ISD::TRUNCATE, MVT::v2i1, MVT::v2i64, 2 }, |
1626 | { ISD::TRUNCATE, MVT::v4i1, MVT::v4i64, 2 }, |
1627 | { ISD::TRUNCATE, MVT::v8i1, MVT::v8i64, 2 }, |
1628 | { ISD::TRUNCATE, MVT::v2i8, MVT::v2i32, 2 }, |
1629 | { ISD::TRUNCATE, MVT::v4i8, MVT::v4i32, 2 }, |
1630 | { ISD::TRUNCATE, MVT::v16i8, MVT::v16i32, 2 }, |
1631 | { ISD::TRUNCATE, MVT::v16i16, MVT::v16i32, 2 }, |
1632 | { ISD::TRUNCATE, MVT::v2i8, MVT::v2i64, 2 }, |
1633 | { ISD::TRUNCATE, MVT::v2i16, MVT::v2i64, 1 }, |
1634 | { ISD::TRUNCATE, MVT::v8i8, MVT::v8i64, 2 }, |
1635 | { ISD::TRUNCATE, MVT::v8i16, MVT::v8i64, 2 }, |
1636 | { ISD::TRUNCATE, MVT::v8i32, MVT::v8i64, 1 }, |
1637 | { ISD::TRUNCATE, MVT::v4i32, MVT::v4i64, 1 }, |
1638 | { ISD::TRUNCATE, MVT::v16i8, MVT::v16i64, 5 }, |
1639 | |
1640 | { ISD::TRUNCATE, MVT::v16i8, MVT::v16i16, 3 }, |
1641 | { ISD::TRUNCATE, MVT::v32i8, MVT::v32i16, 8 }, |
1642 | |
1643 | |
1644 | |
1645 | { ISD::SIGN_EXTEND, MVT::v2i8, MVT::v2i1, 3 }, |
1646 | { ISD::ZERO_EXTEND, MVT::v2i8, MVT::v2i1, 4 }, |
1647 | { ISD::SIGN_EXTEND, MVT::v4i8, MVT::v4i1, 3 }, |
1648 | { ISD::ZERO_EXTEND, MVT::v4i8, MVT::v4i1, 4 }, |
1649 | { ISD::SIGN_EXTEND, MVT::v8i8, MVT::v8i1, 3 }, |
1650 | { ISD::ZERO_EXTEND, MVT::v8i8, MVT::v8i1, 4 }, |
1651 | { ISD::SIGN_EXTEND, MVT::v16i8, MVT::v16i1, 3 }, |
1652 | { ISD::ZERO_EXTEND, MVT::v16i8, MVT::v16i1, 4 }, |
1653 | |
1654 | |
1655 | |
1656 | { ISD::SIGN_EXTEND, MVT::v2i16, MVT::v2i1, 3 }, |
1657 | { ISD::ZERO_EXTEND, MVT::v2i16, MVT::v2i1, 4 }, |
1658 | { ISD::SIGN_EXTEND, MVT::v4i16, MVT::v4i1, 3 }, |
1659 | { ISD::ZERO_EXTEND, MVT::v4i16, MVT::v4i1, 4 }, |
1660 | { ISD::SIGN_EXTEND, MVT::v8i16, MVT::v8i1, 3 }, |
1661 | { ISD::ZERO_EXTEND, MVT::v8i16, MVT::v8i1, 4 }, |
1662 | { ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i1, 3 }, |
1663 | { ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i1, 4 }, |
1664 | |
1665 | { ISD::SIGN_EXTEND, MVT::v2i32, MVT::v2i1, 1 }, |
1666 | { ISD::ZERO_EXTEND, MVT::v2i32, MVT::v2i1, 2 }, |
1667 | { ISD::SIGN_EXTEND, MVT::v4i32, MVT::v4i1, 1 }, |
1668 | { ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i1, 2 }, |
1669 | { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i1, 1 }, |
1670 | { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i1, 2 }, |
1671 | { ISD::SIGN_EXTEND, MVT::v2i64, MVT::v2i1, 1 }, |
1672 | { ISD::ZERO_EXTEND, MVT::v2i64, MVT::v2i1, 2 }, |
1673 | { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i1, 1 }, |
1674 | { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i1, 2 }, |
1675 | |
1676 | { ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i1, 1 }, |
1677 | { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i1, 2 }, |
1678 | { ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i1, 1 }, |
1679 | { ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i1, 2 }, |
1680 | |
1681 | { ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i8, 1 }, |
1682 | { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i8, 1 }, |
1683 | { ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i16, 1 }, |
1684 | { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i16, 1 }, |
1685 | { ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i8, 1 }, |
1686 | { ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i8, 1 }, |
1687 | { ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i16, 1 }, |
1688 | { ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i16, 1 }, |
1689 | { ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i32, 1 }, |
1690 | { ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i32, 1 }, |
1691 | |
1692 | { ISD::SIGN_EXTEND, MVT::v32i16, MVT::v32i8, 3 }, |
1693 | { ISD::ZERO_EXTEND, MVT::v32i16, MVT::v32i8, 3 }, |
1694 | |
1695 | { ISD::SINT_TO_FP, MVT::v8f64, MVT::v8i1, 4 }, |
1696 | { ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i1, 3 }, |
1697 | { ISD::SINT_TO_FP, MVT::v8f64, MVT::v16i8, 2 }, |
1698 | { ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i8, 1 }, |
1699 | { ISD::SINT_TO_FP, MVT::v8f64, MVT::v8i16, 2 }, |
1700 | { ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i16, 1 }, |
1701 | { ISD::SINT_TO_FP, MVT::v8f64, MVT::v8i32, 1 }, |
1702 | { ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i32, 1 }, |
1703 | |
1704 | { ISD::UINT_TO_FP, MVT::v8f64, MVT::v8i1, 4 }, |
1705 | { ISD::UINT_TO_FP, MVT::v16f32, MVT::v16i1, 3 }, |
1706 | { ISD::UINT_TO_FP, MVT::v8f64, MVT::v16i8, 2 }, |
1707 | { ISD::UINT_TO_FP, MVT::v16f32, MVT::v16i8, 1 }, |
1708 | { ISD::UINT_TO_FP, MVT::v8f64, MVT::v8i16, 2 }, |
1709 | { ISD::UINT_TO_FP, MVT::v16f32, MVT::v16i16, 1 }, |
1710 | { ISD::UINT_TO_FP, MVT::v8f64, MVT::v8i32, 1 }, |
1711 | { ISD::UINT_TO_FP, MVT::v16f32, MVT::v16i32, 1 }, |
1712 | { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i64, 26 }, |
1713 | { ISD::UINT_TO_FP, MVT::v8f64, MVT::v8i64, 5 }, |
1714 | |
1715 | { ISD::FP_TO_SINT, MVT::v16i8, MVT::v16f32, 2 }, |
1716 | { ISD::FP_TO_SINT, MVT::v16i8, MVT::v16f64, 7 }, |
1717 | { ISD::FP_TO_SINT, MVT::v32i8, MVT::v32f64,15 }, |
1718 | { ISD::FP_TO_SINT, MVT::v64i8, MVT::v64f32,11 }, |
1719 | { ISD::FP_TO_SINT, MVT::v64i8, MVT::v64f64,31 }, |
1720 | { ISD::FP_TO_SINT, MVT::v8i16, MVT::v8f64, 3 }, |
1721 | { ISD::FP_TO_SINT, MVT::v16i16, MVT::v16f64, 7 }, |
1722 | { ISD::FP_TO_SINT, MVT::v32i16, MVT::v32f32, 5 }, |
1723 | { ISD::FP_TO_SINT, MVT::v32i16, MVT::v32f64,15 }, |
1724 | { ISD::FP_TO_SINT, MVT::v8i32, MVT::v8f64, 1 }, |
1725 | { ISD::FP_TO_SINT, MVT::v16i32, MVT::v16f64, 3 }, |
1726 | |
1727 | { ISD::FP_TO_UINT, MVT::v8i32, MVT::v8f64, 1 }, |
1728 | { ISD::FP_TO_UINT, MVT::v8i16, MVT::v8f64, 3 }, |
1729 | { ISD::FP_TO_UINT, MVT::v8i8, MVT::v8f64, 3 }, |
1730 | { ISD::FP_TO_UINT, MVT::v16i32, MVT::v16f32, 1 }, |
1731 | { ISD::FP_TO_UINT, MVT::v16i16, MVT::v16f32, 3 }, |
1732 | { ISD::FP_TO_UINT, MVT::v16i8, MVT::v16f32, 3 }, |
1733 | }; |
1734 | |
1735 | static const TypeConversionCostTblEntry AVX512BWVLConversionTbl[] { |
1736 | |
1737 | { ISD::SIGN_EXTEND, MVT::v2i8, MVT::v2i1, 1 }, |
1738 | { ISD::SIGN_EXTEND, MVT::v2i16, MVT::v2i1, 1 }, |
1739 | { ISD::SIGN_EXTEND, MVT::v4i8, MVT::v4i1, 1 }, |
1740 | { ISD::SIGN_EXTEND, MVT::v4i16, MVT::v4i1, 1 }, |
1741 | { ISD::SIGN_EXTEND, MVT::v8i8, MVT::v8i1, 1 }, |
1742 | { ISD::SIGN_EXTEND, MVT::v8i16, MVT::v8i1, 1 }, |
1743 | { ISD::SIGN_EXTEND, MVT::v16i8, MVT::v16i1, 1 }, |
1744 | { ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i1, 1 }, |
1745 | { ISD::SIGN_EXTEND, MVT::v32i8, MVT::v32i1, 1 }, |
1746 | |
1747 | |
1748 | { ISD::ZERO_EXTEND, MVT::v2i8, MVT::v2i1, 2 }, |
1749 | { ISD::ZERO_EXTEND, MVT::v2i16, MVT::v2i1, 2 }, |
1750 | { ISD::ZERO_EXTEND, MVT::v4i8, MVT::v4i1, 2 }, |
1751 | { ISD::ZERO_EXTEND, MVT::v4i16, MVT::v4i1, 2 }, |
1752 | { ISD::ZERO_EXTEND, MVT::v8i8, MVT::v8i1, 2 }, |
1753 | { ISD::ZERO_EXTEND, MVT::v8i16, MVT::v8i1, 2 }, |
1754 | { ISD::ZERO_EXTEND, MVT::v16i8, MVT::v16i1, 2 }, |
1755 | { ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i1, 2 }, |
1756 | { ISD::ZERO_EXTEND, MVT::v32i8, MVT::v32i1, 2 }, |
1757 | |
1758 | { ISD::TRUNCATE, MVT::v16i8, MVT::v16i16, 2 }, |
1759 | { ISD::TRUNCATE, MVT::v2i1, MVT::v2i8, 2 }, |
1760 | { ISD::TRUNCATE, MVT::v2i1, MVT::v2i16, 2 }, |
1761 | { ISD::TRUNCATE, MVT::v4i1, MVT::v4i8, 2 }, |
1762 | { ISD::TRUNCATE, MVT::v4i1, MVT::v4i16, 2 }, |
1763 | { ISD::TRUNCATE, MVT::v8i1, MVT::v8i8, 2 }, |
1764 | { ISD::TRUNCATE, MVT::v8i1, MVT::v8i16, 2 }, |
1765 | { ISD::TRUNCATE, MVT::v16i1, MVT::v16i8, 2 }, |
1766 | { ISD::TRUNCATE, MVT::v16i1, MVT::v16i16, 2 }, |
1767 | { ISD::TRUNCATE, MVT::v32i1, MVT::v32i8, 2 }, |
1768 | }; |
1769 | |
1770 | static const TypeConversionCostTblEntry AVX512DQVLConversionTbl[] = { |
1771 | { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i64, 1 }, |
1772 | { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i64, 1 }, |
1773 | { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i64, 1 }, |
1774 | { ISD::SINT_TO_FP, MVT::v4f64, MVT::v4i64, 1 }, |
1775 | |
1776 | { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i64, 1 }, |
1777 | { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i64, 1 }, |
1778 | { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i64, 1 }, |
1779 | { ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i64, 1 }, |
1780 | |
1781 | { ISD::FP_TO_SINT, MVT::v2i64, MVT::v4f32, 1 }, |
1782 | { ISD::FP_TO_SINT, MVT::v4i64, MVT::v4f32, 1 }, |
1783 | { ISD::FP_TO_SINT, MVT::v2i64, MVT::v2f64, 1 }, |
1784 | { ISD::FP_TO_SINT, MVT::v4i64, MVT::v4f64, 1 }, |
1785 | |
1786 | { ISD::FP_TO_UINT, MVT::v2i64, MVT::v4f32, 1 }, |
1787 | { ISD::FP_TO_UINT, MVT::v4i64, MVT::v4f32, 1 }, |
1788 | { ISD::FP_TO_UINT, MVT::v2i64, MVT::v2f64, 1 }, |
1789 | { ISD::FP_TO_UINT, MVT::v4i64, MVT::v4f64, 1 }, |
1790 | }; |
1791 | |
1792 | static const TypeConversionCostTblEntry AVX512VLConversionTbl[] = { |
1793 | { ISD::TRUNCATE, MVT::v2i1, MVT::v2i8, 3 }, |
1794 | { ISD::TRUNCATE, MVT::v4i1, MVT::v4i8, 3 }, |
1795 | { ISD::TRUNCATE, MVT::v8i1, MVT::v8i8, 3 }, |
1796 | { ISD::TRUNCATE, MVT::v16i1, MVT::v16i8, 8 }, |
1797 | { ISD::TRUNCATE, MVT::v2i1, MVT::v2i16, 3 }, |
1798 | { ISD::TRUNCATE, MVT::v4i1, MVT::v4i16, 3 }, |
1799 | { ISD::TRUNCATE, MVT::v8i1, MVT::v8i16, 3 }, |
1800 | { ISD::TRUNCATE, MVT::v16i1, MVT::v16i16, 8 }, |
1801 | { ISD::TRUNCATE, MVT::v2i1, MVT::v2i32, 2 }, |
1802 | { ISD::TRUNCATE, MVT::v4i1, MVT::v4i32, 2 }, |
1803 | { ISD::TRUNCATE, MVT::v8i1, MVT::v8i32, 2 }, |
1804 | { ISD::TRUNCATE, MVT::v2i1, MVT::v2i64, 2 }, |
1805 | { ISD::TRUNCATE, MVT::v4i1, MVT::v4i64, 2 }, |
1806 | { ISD::TRUNCATE, MVT::v4i32, MVT::v4i64, 1 }, |
1807 | { ISD::TRUNCATE, MVT::v4i8, MVT::v4i64, 2 }, |
1808 | { ISD::TRUNCATE, MVT::v4i16, MVT::v4i64, 2 }, |
1809 | { ISD::TRUNCATE, MVT::v8i8, MVT::v8i32, 2 }, |
1810 | |
1811 | |
1812 | |
1813 | { ISD::SIGN_EXTEND, MVT::v2i8, MVT::v2i1, 5 }, |
1814 | { ISD::ZERO_EXTEND, MVT::v2i8, MVT::v2i1, 6 }, |
1815 | { ISD::SIGN_EXTEND, MVT::v4i8, MVT::v4i1, 5 }, |
1816 | { ISD::ZERO_EXTEND, MVT::v4i8, MVT::v4i1, 6 }, |
1817 | { ISD::SIGN_EXTEND, MVT::v8i8, MVT::v8i1, 5 }, |
1818 | { ISD::ZERO_EXTEND, MVT::v8i8, MVT::v8i1, 6 }, |
1819 | { ISD::SIGN_EXTEND, MVT::v16i8, MVT::v16i1, 10 }, |
1820 | { ISD::ZERO_EXTEND, MVT::v16i8, MVT::v16i1, 12 }, |
1821 | |
1822 | |
1823 | |
1824 | { ISD::SIGN_EXTEND, MVT::v2i16, MVT::v2i1, 4 }, |
1825 | { ISD::ZERO_EXTEND, MVT::v2i16, MVT::v2i1, 5 }, |
1826 | { ISD::SIGN_EXTEND, MVT::v4i16, MVT::v4i1, 4 }, |
1827 | { ISD::ZERO_EXTEND, MVT::v4i16, MVT::v4i1, 5 }, |
1828 | { ISD::SIGN_EXTEND, MVT::v8i16, MVT::v8i1, 4 }, |
1829 | { ISD::ZERO_EXTEND, MVT::v8i16, MVT::v8i1, 5 }, |
1830 | { ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i1, 10 }, |
1831 | { ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i1, 12 }, |
1832 | |
1833 | { ISD::SIGN_EXTEND, MVT::v2i32, MVT::v2i1, 1 }, |
1834 | { ISD::ZERO_EXTEND, MVT::v2i32, MVT::v2i1, 2 }, |
1835 | { ISD::SIGN_EXTEND, MVT::v4i32, MVT::v4i1, 1 }, |
1836 | { ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i1, 2 }, |
1837 | { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i1, 1 }, |
1838 | { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i1, 2 }, |
1839 | { ISD::SIGN_EXTEND, MVT::v2i64, MVT::v2i1, 1 }, |
1840 | { ISD::ZERO_EXTEND, MVT::v2i64, MVT::v2i1, 2 }, |
1841 | { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i1, 1 }, |
1842 | { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i1, 2 }, |
1843 | |
1844 | { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v16i8, 1 }, |
1845 | { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v16i8, 1 }, |
1846 | { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v16i8, 1 }, |
1847 | { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v16i8, 1 }, |
1848 | { ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i8, 1 }, |
1849 | { ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i8, 1 }, |
1850 | { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v8i16, 1 }, |
1851 | { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v8i16, 1 }, |
1852 | { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i16, 1 }, |
1853 | { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i16, 1 }, |
1854 | { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i32, 1 }, |
1855 | { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i32, 1 }, |
1856 | |
1857 | { ISD::SINT_TO_FP, MVT::v2f64, MVT::v16i8, 1 }, |
1858 | { ISD::SINT_TO_FP, MVT::v8f32, MVT::v16i8, 1 }, |
1859 | { ISD::SINT_TO_FP, MVT::v2f64, MVT::v8i16, 1 }, |
1860 | { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i16, 1 }, |
1861 | |
1862 | { ISD::UINT_TO_FP, MVT::f32, MVT::i64, 1 }, |
1863 | { ISD::UINT_TO_FP, MVT::f64, MVT::i64, 1 }, |
1864 | { ISD::UINT_TO_FP, MVT::v2f64, MVT::v16i8, 1 }, |
1865 | { ISD::UINT_TO_FP, MVT::v8f32, MVT::v16i8, 1 }, |
1866 | { ISD::UINT_TO_FP, MVT::v2f64, MVT::v8i16, 1 }, |
1867 | { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i16, 1 }, |
1868 | { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i32, 1 }, |
1869 | { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 }, |
1870 | { ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i32, 1 }, |
1871 | { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i32, 1 }, |
1872 | { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i64, 5 }, |
1873 | { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i64, 5 }, |
1874 | { ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i64, 5 }, |
1875 | |
1876 | { ISD::FP_TO_SINT, MVT::v16i8, MVT::v8f32, 2 }, |
1877 | { ISD::FP_TO_SINT, MVT::v16i8, MVT::v16f32, 2 }, |
1878 | { ISD::FP_TO_SINT, MVT::v32i8, MVT::v32f32, 5 }, |
1879 | |
1880 | { ISD::FP_TO_UINT, MVT::i64, MVT::f32, 1 }, |
1881 | { ISD::FP_TO_UINT, MVT::i64, MVT::f64, 1 }, |
1882 | { ISD::FP_TO_UINT, MVT::v4i32, MVT::v4f32, 1 }, |
1883 | { ISD::FP_TO_UINT, MVT::v4i32, MVT::v2f64, 1 }, |
1884 | { ISD::FP_TO_UINT, MVT::v4i32, MVT::v4f64, 1 }, |
1885 | { ISD::FP_TO_UINT, MVT::v8i32, MVT::v8f32, 1 }, |
1886 | { ISD::FP_TO_UINT, MVT::v8i32, MVT::v8f64, 1 }, |
1887 | }; |
1888 | |
1889 | static const TypeConversionCostTblEntry AVX2ConversionTbl[] = { |
1890 | { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i1, 3 }, |
1891 | { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i1, 3 }, |
1892 | { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i1, 3 }, |
1893 | { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i1, 3 }, |
1894 | { ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i1, 1 }, |
1895 | { ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i1, 1 }, |
1896 | |
1897 | { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v16i8, 2 }, |
1898 | { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v16i8, 2 }, |
1899 | { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v16i8, 2 }, |
1900 | { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v16i8, 2 }, |
1901 | { ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i8, 2 }, |
1902 | { ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i8, 2 }, |
1903 | { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v8i16, 2 }, |
1904 | { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v8i16, 2 }, |
1905 | { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i16, 2 }, |
1906 | { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i16, 2 }, |
1907 | { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i16, 3 }, |
1908 | { ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i16, 3 }, |
1909 | { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i32, 2 }, |
1910 | { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i32, 2 }, |
1911 | |
1912 | { ISD::TRUNCATE, MVT::v8i1, MVT::v8i32, 2 }, |
1913 | |
1914 | { ISD::TRUNCATE, MVT::v16i8, MVT::v8i16, 1 }, |
1915 | { ISD::TRUNCATE, MVT::v16i8, MVT::v4i32, 1 }, |
1916 | { ISD::TRUNCATE, MVT::v16i8, MVT::v2i64, 1 }, |
1917 | { ISD::TRUNCATE, MVT::v16i8, MVT::v8i32, 4 }, |
1918 | { ISD::TRUNCATE, MVT::v16i8, MVT::v4i64, 4 }, |
1919 | { ISD::TRUNCATE, MVT::v8i16, MVT::v4i32, 1 }, |
1920 | { ISD::TRUNCATE, MVT::v8i16, MVT::v2i64, 1 }, |
1921 | { ISD::TRUNCATE, MVT::v8i16, MVT::v4i64, 5 }, |
1922 | { ISD::TRUNCATE, MVT::v4i32, MVT::v4i64, 1 }, |
1923 | { ISD::TRUNCATE, MVT::v8i16, MVT::v8i32, 2 }, |
1924 | |
1925 | { ISD::FP_EXTEND, MVT::v8f64, MVT::v8f32, 3 }, |
1926 | { ISD::FP_ROUND, MVT::v8f32, MVT::v8f64, 3 }, |
1927 | |
1928 | { ISD::FP_TO_SINT, MVT::v16i16, MVT::v8f32, 1 }, |
1929 | { ISD::FP_TO_SINT, MVT::v4i32, MVT::v4f64, 1 }, |
1930 | { ISD::FP_TO_SINT, MVT::v8i32, MVT::v8f32, 1 }, |
1931 | { ISD::FP_TO_SINT, MVT::v8i32, MVT::v8f64, 3 }, |
1932 | |
1933 | { ISD::FP_TO_UINT, MVT::i64, MVT::f32, 3 }, |
1934 | { ISD::FP_TO_UINT, MVT::i64, MVT::f64, 3 }, |
1935 | { ISD::FP_TO_UINT, MVT::v16i16, MVT::v8f32, 1 }, |
1936 | { ISD::FP_TO_UINT, MVT::v4i32, MVT::v4f32, 3 }, |
1937 | { ISD::FP_TO_UINT, MVT::v4i32, MVT::v2f64, 4 }, |
1938 | { ISD::FP_TO_UINT, MVT::v4i32, MVT::v4f64, 4 }, |
1939 | { ISD::FP_TO_UINT, MVT::v8i32, MVT::v8f32, 3 }, |
1940 | { ISD::FP_TO_UINT, MVT::v8i32, MVT::v4f64, 4 }, |
1941 | |
1942 | { ISD::SINT_TO_FP, MVT::v2f64, MVT::v16i8, 2 }, |
1943 | { ISD::SINT_TO_FP, MVT::v8f32, MVT::v16i8, 2 }, |
1944 | { ISD::SINT_TO_FP, MVT::v2f64, MVT::v8i16, 2 }, |
1945 | { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i16, 2 }, |
1946 | { ISD::SINT_TO_FP, MVT::v4f64, MVT::v4i32, 1 }, |
1947 | { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i32, 1 }, |
1948 | { ISD::SINT_TO_FP, MVT::v8f64, MVT::v8i32, 3 }, |
1949 | |
1950 | { ISD::UINT_TO_FP, MVT::v2f64, MVT::v16i8, 2 }, |
1951 | { ISD::UINT_TO_FP, MVT::v8f32, MVT::v16i8, 2 }, |
1952 | { ISD::UINT_TO_FP, MVT::v2f64, MVT::v8i16, 2 }, |
1953 | { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i16, 2 }, |
1954 | { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i32, 2 }, |
1955 | { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i32, 1 }, |
1956 | { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 2 }, |
1957 | { ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i32, 2 }, |
1958 | { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i32, 2 }, |
1959 | { ISD::UINT_TO_FP, MVT::v8f64, MVT::v8i32, 4 }, |
1960 | }; |
1961 | |
1962 | static const TypeConversionCostTblEntry AVXConversionTbl[] = { |
1963 | { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i1, 6 }, |
1964 | { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i1, 4 }, |
1965 | { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i1, 7 }, |
1966 | { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i1, 4 }, |
1967 | { ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i1, 4 }, |
1968 | { ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i1, 4 }, |
1969 | |
1970 | { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v16i8, 3 }, |
1971 | { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v16i8, 3 }, |
1972 | { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v16i8, 3 }, |
1973 | { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v16i8, 3 }, |
1974 | { ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i8, 3 }, |
1975 | { ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i8, 3 }, |
1976 | { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v8i16, 3 }, |
1977 | { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v8i16, 3 }, |
1978 | { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i16, 3 }, |
1979 | { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i16, 3 }, |
1980 | { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i32, 3 }, |
1981 | { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i32, 3 }, |
1982 | |
1983 | { ISD::TRUNCATE, MVT::v4i1, MVT::v4i64, 4 }, |
1984 | { ISD::TRUNCATE, MVT::v8i1, MVT::v8i32, 5 }, |
1985 | { ISD::TRUNCATE, MVT::v16i1, MVT::v16i16, 4 }, |
1986 | { ISD::TRUNCATE, MVT::v8i1, MVT::v8i64, 9 }, |
1987 | { ISD::TRUNCATE, MVT::v16i1, MVT::v16i64, 11 }, |
1988 | |
1989 | { ISD::TRUNCATE, MVT::v16i8, MVT::v16i16, 2 }, |
1990 | { ISD::TRUNCATE, MVT::v16i8, MVT::v8i32, 5 }, |
1991 | { ISD::TRUNCATE, MVT::v8i16, MVT::v8i32, 5 }, |
1992 | { ISD::TRUNCATE, MVT::v16i8, MVT::v4i64, 5 }, |
1993 | { ISD::TRUNCATE, MVT::v8i16, MVT::v4i64, 3 }, |
1994 | { ISD::TRUNCATE, MVT::v4i32, MVT::v4i64, 2 }, |
1995 | |
1996 | { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i1, 3 }, |
1997 | { ISD::SINT_TO_FP, MVT::v4f64, MVT::v4i1, 3 }, |
1998 | { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i1, 8 }, |
1999 | { ISD::SINT_TO_FP, MVT::v8f32, MVT::v16i8, 4 }, |
2000 | { ISD::SINT_TO_FP, MVT::v4f64, MVT::v16i8, 2 }, |
2001 | { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i16, 4 }, |
2002 | { ISD::SINT_TO_FP, MVT::v4f64, MVT::v8i16, 2 }, |
2003 | { ISD::SINT_TO_FP, MVT::v4f64, MVT::v4i32, 2 }, |
2004 | { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i32, 2 }, |
2005 | { ISD::SINT_TO_FP, MVT::v8f64, MVT::v8i32, 4 }, |
2006 | { ISD::SINT_TO_FP, MVT::v4f32, MVT::v2i64, 5 }, |
2007 | { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i64, 8 }, |
2008 | |
2009 | { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i1, 7 }, |
2010 | { ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i1, 7 }, |
2011 | { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i1, 6 }, |
2012 | { ISD::UINT_TO_FP, MVT::v8f32, MVT::v16i8, 4 }, |
2013 | { ISD::UINT_TO_FP, MVT::v4f64, MVT::v16i8, 2 }, |
2014 | { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i16, 4 }, |
2015 | { ISD::UINT_TO_FP, MVT::v4f64, MVT::v8i16, 2 }, |
2016 | { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i32, 4 }, |
2017 | { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i32, 4 }, |
2018 | { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 5 }, |
2019 | { ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i32, 6 }, |
2020 | { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i32, 8 }, |
2021 | { ISD::UINT_TO_FP, MVT::v8f64, MVT::v8i32, 10 }, |
2022 | { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i64, 10 }, |
2023 | { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i64, 18 }, |
2024 | { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i64, 5 }, |
2025 | { ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i64, 10 }, |
2026 | |
2027 | { ISD::FP_TO_SINT, MVT::v16i8, MVT::v8f32, 2 }, |
2028 | { ISD::FP_TO_SINT, MVT::v16i8, MVT::v4f64, 2 }, |
2029 | { ISD::FP_TO_SINT, MVT::v32i8, MVT::v8f32, 2 }, |
2030 | { ISD::FP_TO_SINT, MVT::v32i8, MVT::v4f64, 2 }, |
2031 | { ISD::FP_TO_SINT, MVT::v8i16, MVT::v8f32, 2 }, |
2032 | { ISD::FP_TO_SINT, MVT::v8i16, MVT::v4f64, 2 }, |
2033 | { ISD::FP_TO_SINT, MVT::v16i16, MVT::v8f32, 2 }, |
2034 | { ISD::FP_TO_SINT, MVT::v16i16, MVT::v4f64, 2 }, |
2035 | { ISD::FP_TO_SINT, MVT::v4i32, MVT::v4f64, 2 }, |
2036 | { ISD::FP_TO_SINT, MVT::v8i32, MVT::v8f32, 2 }, |
2037 | { ISD::FP_TO_SINT, MVT::v8i32, MVT::v8f64, 5 }, |
2038 | |
2039 | { ISD::FP_TO_UINT, MVT::v16i8, MVT::v8f32, 2 }, |
2040 | { ISD::FP_TO_UINT, MVT::v16i8, MVT::v4f64, 2 }, |
2041 | { ISD::FP_TO_UINT, MVT::v32i8, MVT::v8f32, 2 }, |
2042 | { ISD::FP_TO_UINT, MVT::v32i8, MVT::v4f64, 2 }, |
2043 | { ISD::FP_TO_UINT, MVT::v8i16, MVT::v8f32, 2 }, |
2044 | { ISD::FP_TO_UINT, MVT::v8i16, MVT::v4f64, 2 }, |
2045 | { ISD::FP_TO_UINT, MVT::v16i16, MVT::v8f32, 2 }, |
2046 | { ISD::FP_TO_UINT, MVT::v16i16, MVT::v4f64, 2 }, |
2047 | { ISD::FP_TO_UINT, MVT::v4i32, MVT::v4f32, 3 }, |
2048 | { ISD::FP_TO_UINT, MVT::v4i32, MVT::v2f64, 4 }, |
2049 | { ISD::FP_TO_UINT, MVT::v4i32, MVT::v4f64, 6 }, |
2050 | { ISD::FP_TO_UINT, MVT::v8i32, MVT::v8f32, 7 }, |
2051 | { ISD::FP_TO_UINT, MVT::v8i32, MVT::v4f64, 7 }, |
2052 | |
2053 | { ISD::FP_EXTEND, MVT::v4f64, MVT::v4f32, 1 }, |
2054 | { ISD::FP_ROUND, MVT::v4f32, MVT::v4f64, 1 }, |
2055 | }; |
2056 | |
2057 | static const TypeConversionCostTblEntry SSE41ConversionTbl[] = { |
2058 | { ISD::ZERO_EXTEND, MVT::v2i64, MVT::v16i8, 1 }, |
2059 | { ISD::SIGN_EXTEND, MVT::v2i64, MVT::v16i8, 1 }, |
2060 | { ISD::ZERO_EXTEND, MVT::v4i32, MVT::v16i8, 1 }, |
2061 | { ISD::SIGN_EXTEND, MVT::v4i32, MVT::v16i8, 1 }, |
2062 | { ISD::ZERO_EXTEND, MVT::v8i16, MVT::v16i8, 1 }, |
2063 | { ISD::SIGN_EXTEND, MVT::v8i16, MVT::v16i8, 1 }, |
2064 | { ISD::ZERO_EXTEND, MVT::v2i64, MVT::v8i16, 1 }, |
2065 | { ISD::SIGN_EXTEND, MVT::v2i64, MVT::v8i16, 1 }, |
2066 | { ISD::ZERO_EXTEND, MVT::v4i32, MVT::v8i16, 1 }, |
2067 | { ISD::SIGN_EXTEND, MVT::v4i32, MVT::v8i16, 1 }, |
2068 | { ISD::ZERO_EXTEND, MVT::v2i64, MVT::v4i32, 1 }, |
2069 | { ISD::SIGN_EXTEND, MVT::v2i64, MVT::v4i32, 1 }, |
2070 | |
2071 | |
2072 | { ISD::TRUNCATE, MVT::v2i1, MVT::v2i8, 1 }, |
2073 | { ISD::TRUNCATE, MVT::v2i1, MVT::v2i16, 1 }, |
2074 | { ISD::TRUNCATE, MVT::v4i1, MVT::v4i8, 1 }, |
2075 | |
2076 | { ISD::TRUNCATE, MVT::v16i8, MVT::v4i32, 2 }, |
2077 | { ISD::TRUNCATE, MVT::v8i16, MVT::v4i32, 2 }, |
2078 | { ISD::TRUNCATE, MVT::v16i8, MVT::v2i64, 2 }, |
2079 | |
2080 | { ISD::SINT_TO_FP, MVT::f32, MVT::i32, 1 }, |
2081 | { ISD::SINT_TO_FP, MVT::f64, MVT::i32, 1 }, |
2082 | { ISD::SINT_TO_FP, MVT::f32, MVT::i64, 1 }, |
2083 | { ISD::SINT_TO_FP, MVT::f64, MVT::i64, 1 }, |
2084 | { ISD::SINT_TO_FP, MVT::v4f32, MVT::v16i8, 1 }, |
2085 | { ISD::SINT_TO_FP, MVT::v2f64, MVT::v16i8, 1 }, |
2086 | { ISD::SINT_TO_FP, MVT::v4f32, MVT::v8i16, 1 }, |
2087 | { ISD::SINT_TO_FP, MVT::v2f64, MVT::v8i16, 1 }, |
2088 | { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 }, |
2089 | { ISD::SINT_TO_FP, MVT::v2f64, MVT::v4i32, 1 }, |
2090 | { ISD::SINT_TO_FP, MVT::v4f64, MVT::v4i32, 2 }, |
2091 | |
2092 | { ISD::UINT_TO_FP, MVT::f32, MVT::i32, 1 }, |
2093 | { ISD::UINT_TO_FP, MVT::f64, MVT::i32, 1 }, |
2094 | { ISD::UINT_TO_FP, MVT::f32, MVT::i64, 4 }, |
2095 | { ISD::UINT_TO_FP, MVT::f64, MVT::i64, 4 }, |
2096 | { ISD::UINT_TO_FP, MVT::v4f32, MVT::v16i8, 1 }, |
2097 | { ISD::UINT_TO_FP, MVT::v2f64, MVT::v16i8, 1 }, |
2098 | { ISD::UINT_TO_FP, MVT::v4f32, MVT::v8i16, 1 }, |
2099 | { ISD::UINT_TO_FP, MVT::v2f64, MVT::v8i16, 1 }, |
2100 | { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i32, 3 }, |
2101 | { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 3 }, |
2102 | { ISD::UINT_TO_FP, MVT::v2f64, MVT::v4i32, 2 }, |
2103 | { ISD::UINT_TO_FP, MVT::v4f32, MVT::v2i64, 12 }, |
2104 | { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i64, 22 }, |
2105 | { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i64, 4 }, |
2106 | |
2107 | { ISD::FP_TO_SINT, MVT::i32, MVT::f32, 1 }, |
2108 | { ISD::FP_TO_SINT, MVT::i64, MVT::f32, 1 }, |
2109 | { ISD::FP_TO_SINT, MVT::i32, MVT::f64, 1 }, |
2110 | { ISD::FP_TO_SINT, MVT::i64, MVT::f64, 1 }, |
2111 | { ISD::FP_TO_SINT, MVT::v16i8, MVT::v4f32, 2 }, |
2112 | { ISD::FP_TO_SINT, MVT::v16i8, MVT::v2f64, 2 }, |
2113 | { ISD::FP_TO_SINT, MVT::v8i16, MVT::v4f32, 1 }, |
2114 | { ISD::FP_TO_SINT, MVT::v8i16, MVT::v2f64, 1 }, |
2115 | { ISD::FP_TO_SINT, MVT::v4i32, MVT::v4f32, 1 }, |
2116 | { ISD::FP_TO_SINT, MVT::v4i32, MVT::v2f64, 1 }, |
2117 | |
2118 | { ISD::FP_TO_UINT, MVT::i32, MVT::f32, 1 }, |
2119 | { ISD::FP_TO_UINT, MVT::i64, MVT::f32, 4 }, |
2120 | { ISD::FP_TO_UINT, MVT::i32, MVT::f64, 1 }, |
2121 | { ISD::FP_TO_UINT, MVT::i64, MVT::f64, 4 }, |
2122 | { ISD::FP_TO_UINT, MVT::v16i8, MVT::v4f32, 2 }, |
2123 | { ISD::FP_TO_UINT, MVT::v16i8, MVT::v2f64, 2 }, |
2124 | { ISD::FP_TO_UINT, MVT::v8i16, MVT::v4f32, 1 }, |
2125 | { ISD::FP_TO_UINT, MVT::v8i16, MVT::v2f64, 1 }, |
2126 | { ISD::FP_TO_UINT, MVT::v4i32, MVT::v4f32, 4 }, |
2127 | { ISD::FP_TO_UINT, MVT::v4i32, MVT::v2f64, 4 }, |
2128 | }; |
2129 | |
2130 | static const TypeConversionCostTblEntry SSE2ConversionTbl[] = { |
2131 | |
2132 | |
2133 | |
2134 | { ISD::SINT_TO_FP, MVT::f32, MVT::i32, 3 }, |
2135 | { ISD::SINT_TO_FP, MVT::f64, MVT::i32, 3 }, |
2136 | { ISD::SINT_TO_FP, MVT::f32, MVT::i64, 3 }, |
2137 | { ISD::SINT_TO_FP, MVT::f64, MVT::i64, 3 }, |
2138 | { ISD::SINT_TO_FP, MVT::v4f32, MVT::v16i8, 3 }, |
2139 | { ISD::SINT_TO_FP, MVT::v2f64, MVT::v16i8, 4 }, |
2140 | { ISD::SINT_TO_FP, MVT::v4f32, MVT::v8i16, 3 }, |
2141 | { ISD::SINT_TO_FP, MVT::v2f64, MVT::v8i16, 4 }, |
2142 | { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i32, 3 }, |
2143 | { ISD::SINT_TO_FP, MVT::v2f64, MVT::v4i32, 4 }, |
2144 | { ISD::SINT_TO_FP, MVT::v4f32, MVT::v2i64, 8 }, |
2145 | { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i64, 8 }, |
2146 | |
2147 | { ISD::UINT_TO_FP, MVT::f32, MVT::i32, 3 }, |
2148 | { ISD::UINT_TO_FP, MVT::f64, MVT::i32, 3 }, |
2149 | { ISD::UINT_TO_FP, MVT::f32, MVT::i64, 8 }, |
2150 | { ISD::UINT_TO_FP, MVT::f64, MVT::i64, 9 }, |
2151 | { ISD::UINT_TO_FP, MVT::v2f64, MVT::v16i8, 4 }, |
2152 | { ISD::UINT_TO_FP, MVT::v4f32, MVT::v16i8, 4 }, |
2153 | { ISD::UINT_TO_FP, MVT::v4f32, MVT::v8i16, 4 }, |
2154 | { ISD::UINT_TO_FP, MVT::v2f64, MVT::v8i16, 4 }, |
2155 | { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i32, 7 }, |
2156 | { ISD::UINT_TO_FP, MVT::v2f64, MVT::v4i32, 7 }, |
2157 | { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 5 }, |
2158 | { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i64, 15 }, |
2159 | { ISD::UINT_TO_FP, MVT::v4f32, MVT::v2i64, 18 }, |
2160 | |
2161 | { ISD::FP_TO_SINT, MVT::i32, MVT::f32, 4 }, |
2162 | { ISD::FP_TO_SINT, MVT::i64, MVT::f32, 4 }, |
2163 | { ISD::FP_TO_SINT, MVT::i32, MVT::f64, 4 }, |
2164 | { ISD::FP_TO_SINT, MVT::i64, MVT::f64, 4 }, |
2165 | { ISD::FP_TO_SINT, MVT::v16i8, MVT::v4f32, 6 }, |
2166 | { ISD::FP_TO_SINT, MVT::v16i8, MVT::v2f64, 6 }, |
2167 | { ISD::FP_TO_SINT, MVT::v8i16, MVT::v4f32, 5 }, |
2168 | { ISD::FP_TO_SINT, MVT::v8i16, MVT::v2f64, 5 }, |
2169 | { ISD::FP_TO_SINT, MVT::v4i32, MVT::v4f32, 4 }, |
2170 | { ISD::FP_TO_SINT, MVT::v4i32, MVT::v2f64, 4 }, |
2171 | |
2172 | { ISD::FP_TO_UINT, MVT::i32, MVT::f32, 4 }, |
2173 | { ISD::FP_TO_UINT, MVT::i64, MVT::f32, 4 }, |
2174 | { ISD::FP_TO_UINT, MVT::i32, MVT::f64, 4 }, |
2175 | { ISD::FP_TO_UINT, MVT::i64, MVT::f64, 15 }, |
2176 | { ISD::FP_TO_UINT, MVT::v16i8, MVT::v4f32, 6 }, |
2177 | { ISD::FP_TO_UINT, MVT::v16i8, MVT::v2f64, 6 }, |
2178 | { ISD::FP_TO_UINT, MVT::v8i16, MVT::v4f32, 5 }, |
2179 | { ISD::FP_TO_UINT, MVT::v8i16, MVT::v2f64, 5 }, |
2180 | { ISD::FP_TO_UINT, MVT::v4i32, MVT::v4f32, 8 }, |
2181 | { ISD::FP_TO_UINT, MVT::v4i32, MVT::v2f64, 8 }, |
2182 | |
2183 | { ISD::ZERO_EXTEND, MVT::v2i64, MVT::v16i8, 4 }, |
2184 | { ISD::SIGN_EXTEND, MVT::v2i64, MVT::v16i8, 4 }, |
2185 | { ISD::ZERO_EXTEND, MVT::v4i32, MVT::v16i8, 2 }, |
2186 | { ISD::SIGN_EXTEND, MVT::v4i32, MVT::v16i8, 3 }, |
2187 | { ISD::ZERO_EXTEND, MVT::v8i16, MVT::v16i8, 1 }, |
2188 | { ISD::SIGN_EXTEND, MVT::v8i16, MVT::v16i8, 2 }, |
2189 | { ISD::ZERO_EXTEND, MVT::v2i64, MVT::v8i16, 2 }, |
2190 | { ISD::SIGN_EXTEND, MVT::v2i64, MVT::v8i16, 3 }, |
2191 | { ISD::ZERO_EXTEND, MVT::v4i32, MVT::v8i16, 1 }, |
2192 | { ISD::SIGN_EXTEND, MVT::v4i32, MVT::v8i16, 2 }, |
2193 | { ISD::ZERO_EXTEND, MVT::v2i64, MVT::v4i32, 1 }, |
2194 | { ISD::SIGN_EXTEND, MVT::v2i64, MVT::v4i32, 2 }, |
2195 | |
2196 | |
2197 | { ISD::TRUNCATE, MVT::v2i1, MVT::v2i32, 1 }, |
2198 | { ISD::TRUNCATE, MVT::v2i1, MVT::v2i16, 2 }, |
2199 | { ISD::TRUNCATE, MVT::v2i1, MVT::v2i8, 3 }, |
2200 | { ISD::TRUNCATE, MVT::v4i1, MVT::v4i16, 1 }, |
2201 | { ISD::TRUNCATE, MVT::v4i1, MVT::v4i8, 2 }, |
2202 | { ISD::TRUNCATE, MVT::v8i1, MVT::v8i8, 1 }, |
2203 | |
2204 | { ISD::TRUNCATE, MVT::v16i8, MVT::v8i16, 2 }, |
2205 | { ISD::TRUNCATE, MVT::v16i8, MVT::v16i16, 3 }, |
2206 | { ISD::TRUNCATE, MVT::v16i8, MVT::v4i32, 3 }, |
2207 | { ISD::TRUNCATE, MVT::v16i8, MVT::v16i32, 7 }, |
2208 | { ISD::TRUNCATE, MVT::v2i16, MVT::v2i32, 1 }, |
2209 | { ISD::TRUNCATE, MVT::v8i16, MVT::v4i32, 3 }, |
2210 | { ISD::TRUNCATE, MVT::v8i16, MVT::v8i32, 5 }, |
2211 | { ISD::TRUNCATE, MVT::v16i16, MVT::v16i32,10 }, |
2212 | { ISD::TRUNCATE, MVT::v16i8, MVT::v2i64, 4 }, |
2213 | { ISD::TRUNCATE, MVT::v8i16, MVT::v2i64, 2 }, |
2214 | { ISD::TRUNCATE, MVT::v4i32, MVT::v2i64, 1 }, |
2215 | }; |
2216 | |
2217 | |
2218 | EVT SrcTy = TLI->getValueType(DL, Src); |
2219 | EVT DstTy = TLI->getValueType(DL, Dst); |
2220 | |
2221 | |
2222 | if (SrcTy.isSimple() && DstTy.isSimple()) { |
2223 | MVT SimpleSrcTy = SrcTy.getSimpleVT(); |
2224 | MVT SimpleDstTy = DstTy.getSimpleVT(); |
2225 | |
2226 | if (ST->useAVX512Regs()) { |
2227 | if (ST->hasBWI()) |
2228 | if (const auto *Entry = ConvertCostTableLookup( |
2229 | AVX512BWConversionTbl, ISD, SimpleDstTy, SimpleSrcTy)) |
2230 | return AdjustCost(Entry->Cost); |
2231 | |
2232 | if (ST->hasDQI()) |
2233 | if (const auto *Entry = ConvertCostTableLookup( |
2234 | AVX512DQConversionTbl, ISD, SimpleDstTy, SimpleSrcTy)) |
2235 | return AdjustCost(Entry->Cost); |
2236 | |
2237 | if (ST->hasAVX512()) |
2238 | if (const auto *Entry = ConvertCostTableLookup( |
2239 | AVX512FConversionTbl, ISD, SimpleDstTy, SimpleSrcTy)) |
2240 | return AdjustCost(Entry->Cost); |
2241 | } |
2242 | |
2243 | if (ST->hasBWI()) |
2244 | if (const auto *Entry = ConvertCostTableLookup( |
2245 | AVX512BWVLConversionTbl, ISD, SimpleDstTy, SimpleSrcTy)) |
2246 | return AdjustCost(Entry->Cost); |
2247 | |
2248 | if (ST->hasDQI()) |
2249 | if (const auto *Entry = ConvertCostTableLookup( |
2250 | AVX512DQVLConversionTbl, ISD, SimpleDstTy, SimpleSrcTy)) |
2251 | return AdjustCost(Entry->Cost); |
2252 | |
2253 | if (ST->hasAVX512()) |
2254 | if (const auto *Entry = ConvertCostTableLookup(AVX512VLConversionTbl, ISD, |
2255 | SimpleDstTy, SimpleSrcTy)) |
2256 | return AdjustCost(Entry->Cost); |
2257 | |
2258 | if (ST->hasAVX2()) { |
2259 | if (const auto *Entry = ConvertCostTableLookup(AVX2ConversionTbl, ISD, |
2260 | SimpleDstTy, SimpleSrcTy)) |
2261 | return AdjustCost(Entry->Cost); |
2262 | } |
2263 | |
2264 | if (ST->hasAVX()) { |
2265 | if (const auto *Entry = ConvertCostTableLookup(AVXConversionTbl, ISD, |
2266 | SimpleDstTy, SimpleSrcTy)) |
2267 | return AdjustCost(Entry->Cost); |
2268 | } |
2269 | |
2270 | if (ST->hasSSE41()) { |
2271 | if (const auto *Entry = ConvertCostTableLookup(SSE41ConversionTbl, ISD, |
2272 | SimpleDstTy, SimpleSrcTy)) |
2273 | return AdjustCost(Entry->Cost); |
2274 | } |
2275 | |
2276 | if (ST->hasSSE2()) { |
2277 | if (const auto *Entry = ConvertCostTableLookup(SSE2ConversionTbl, ISD, |
2278 | SimpleDstTy, SimpleSrcTy)) |
2279 | return AdjustCost(Entry->Cost); |
2280 | } |
2281 | } |
2282 | |
2283 | |
2284 | std::pair<InstructionCost, MVT> LTSrc = TLI->getTypeLegalizationCost(DL, Src); |
2285 | std::pair<InstructionCost, MVT> LTDest = |
2286 | TLI->getTypeLegalizationCost(DL, Dst); |
2287 | |
2288 | if (ST->useAVX512Regs()) { |
2289 | if (ST->hasBWI()) |
2290 | if (const auto *Entry = ConvertCostTableLookup( |
2291 | AVX512BWConversionTbl, ISD, LTDest.second, LTSrc.second)) |
2292 | return AdjustCost(std::max(LTSrc.first, LTDest.first) * Entry->Cost); |
2293 | |
2294 | if (ST->hasDQI()) |
2295 | if (const auto *Entry = ConvertCostTableLookup( |
2296 | AVX512DQConversionTbl, ISD, LTDest.second, LTSrc.second)) |
2297 | return AdjustCost(std::max(LTSrc.first, LTDest.first) * Entry->Cost); |
2298 | |
2299 | if (ST->hasAVX512()) |
2300 | if (const auto *Entry = ConvertCostTableLookup( |
2301 | AVX512FConversionTbl, ISD, LTDest.second, LTSrc.second)) |
2302 | return AdjustCost(std::max(LTSrc.first, LTDest.first) * Entry->Cost); |
2303 | } |
2304 | |
2305 | if (ST->hasBWI()) |
2306 | if (const auto *Entry = ConvertCostTableLookup(AVX512BWVLConversionTbl, ISD, |
2307 | LTDest.second, LTSrc.second)) |
2308 | return AdjustCost(std::max(LTSrc.first, LTDest.first) * Entry->Cost); |
2309 | |
2310 | if (ST->hasDQI()) |
2311 | if (const auto *Entry = ConvertCostTableLookup(AVX512DQVLConversionTbl, ISD, |
2312 | LTDest.second, LTSrc.second)) |
2313 | return AdjustCost(std::max(LTSrc.first, LTDest.first) * Entry->Cost); |
2314 | |
2315 | if (ST->hasAVX512()) |
2316 | if (const auto *Entry = ConvertCostTableLookup(AVX512VLConversionTbl, ISD, |
2317 | LTDest.second, LTSrc.second)) |
2318 | return AdjustCost(std::max(LTSrc.first, LTDest.first) * Entry->Cost); |
2319 | |
2320 | if (ST->hasAVX2()) |
2321 | if (const auto *Entry = ConvertCostTableLookup(AVX2ConversionTbl, ISD, |
2322 | LTDest.second, LTSrc.second)) |
2323 | return AdjustCost(std::max(LTSrc.first, LTDest.first) * Entry->Cost); |
2324 | |
2325 | if (ST->hasAVX()) |
2326 | if (const auto *Entry = ConvertCostTableLookup(AVXConversionTbl, ISD, |
2327 | LTDest.second, LTSrc.second)) |
2328 | return AdjustCost(std::max(LTSrc.first, LTDest.first) * Entry->Cost); |
2329 | |
2330 | if (ST->hasSSE41()) |
2331 | if (const auto *Entry = ConvertCostTableLookup(SSE41ConversionTbl, ISD, |
2332 | LTDest.second, LTSrc.second)) |
2333 | return AdjustCost(std::max(LTSrc.first, LTDest.first) * Entry->Cost); |
2334 | |
2335 | if (ST->hasSSE2()) |
2336 | if (const auto *Entry = ConvertCostTableLookup(SSE2ConversionTbl, ISD, |
2337 | LTDest.second, LTSrc.second)) |
2338 | return AdjustCost(std::max(LTSrc.first, LTDest.first) * Entry->Cost); |
2339 | |
2340 | |
2341 | |
2342 | if ((ISD == ISD::SINT_TO_FP || ISD == ISD::UINT_TO_FP) && |
2343 | 1 < Src->getScalarSizeInBits() && Src->getScalarSizeInBits() < 32) { |
2344 | Type *ExtSrc = Src->getWithNewBitWidth(32); |
2345 | unsigned ExtOpc = |
2346 | (ISD == ISD::SINT_TO_FP) ? Instruction::SExt : Instruction::ZExt; |
2347 | |
2348 | |
2349 | InstructionCost ExtCost = 0; |
2350 | if (!(Src->isIntegerTy() && I && isa<LoadInst>(I->getOperand(0)))) |
2351 | ExtCost = getCastInstrCost(ExtOpc, ExtSrc, Src, CCH, CostKind); |
2352 | |
2353 | return ExtCost + getCastInstrCost(Instruction::SIToFP, Dst, ExtSrc, |
2354 | TTI::CastContextHint::None, CostKind); |
2355 | } |
2356 | |
2357 | |
2358 | |
2359 | if ((ISD == ISD::FP_TO_SINT || ISD == ISD::FP_TO_UINT) && |
2360 | 1 < Dst->getScalarSizeInBits() && Dst->getScalarSizeInBits() < 32) { |
2361 | Type *TruncDst = Dst->getWithNewBitWidth(32); |
2362 | return getCastInstrCost(Instruction::FPToSI, TruncDst, Src, CCH, CostKind) + |
2363 | getCastInstrCost(Instruction::Trunc, Dst, TruncDst, |
2364 | TTI::CastContextHint::None, CostKind); |
2365 | } |
2366 | |
2367 | return AdjustCost( |
2368 | BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I)); |
2369 | } |
2370 | |
2371 | InstructionCost X86TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, |
2372 | Type *CondTy, |
2373 | CmpInst::Predicate VecPred, |
2374 | TTI::TargetCostKind CostKind, |
2375 | const Instruction *I) { |
2376 | |
2377 | if (CostKind != TTI::TCK_RecipThroughput) |
2378 | return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, |
2379 | I); |
2380 | |
2381 | |
2382 | std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy); |
2383 | |
2384 | MVT MTy = LT.second; |
2385 | |
2386 | int ISD = TLI->InstructionOpcodeToISD(Opcode); |
2387 | assert(ISD && "Invalid opcode"); |
2388 | |
2389 | unsigned ExtraCost = 0; |
2390 | if (I && (Opcode == Instruction::ICmp || Opcode == Instruction::FCmp)) { |
2391 | |
2392 | if (MTy.isVector() && |
2393 | !((ST->hasXOP() && (!ST->hasAVX2() || MTy.is128BitVector())) || |
2394 | (ST->hasAVX512() && 32 <= MTy.getScalarSizeInBits()) || |
2395 | ST->hasBWI())) { |
2396 | switch (cast<CmpInst>(I)->getPredicate()) { |
2397 | case CmpInst::Predicate::ICMP_NE: |
2398 | |
2399 | ExtraCost = 1; |
2400 | break; |
2401 | case CmpInst::Predicate::ICMP_SGE: |
2402 | case CmpInst::Predicate::ICMP_SLE: |
2403 | |
2404 | ExtraCost = 1; |
2405 | break; |
2406 | case CmpInst::Predicate::ICMP_ULT: |
2407 | case CmpInst::Predicate::ICMP_UGT: |
2408 | |
2409 | |
2410 | ExtraCost = 2; |
2411 | break; |
2412 | case CmpInst::Predicate::ICMP_ULE: |
2413 | case CmpInst::Predicate::ICMP_UGE: |
2414 | if ((ST->hasSSE41() && MTy.getScalarSizeInBits() == 32) || |
2415 | (ST->hasSSE2() && MTy.getScalarSizeInBits() < 32)) { |
2416 | |
2417 | |
2418 | ExtraCost = 1; |
2419 | } else { |
2420 | |
2421 | ExtraCost = 3; |
2422 | } |
2423 | break; |
2424 | default: |
2425 | break; |
2426 | } |
2427 | } |
2428 | } |
2429 | |
2430 | static const CostTblEntry SLMCostTbl[] = { |
2431 | |
2432 | { ISD::SETCC, MVT::v2i64, 2 }, |
2433 | }; |
2434 | |
2435 | static const CostTblEntry AVX512BWCostTbl[] = { |
2436 | { ISD::SETCC, MVT::v32i16, 1 }, |
2437 | { ISD::SETCC, MVT::v64i8, 1 }, |
2438 | |
2439 | { ISD::SELECT, MVT::v32i16, 1 }, |
2440 | { ISD::SELECT, MVT::v64i8, 1 }, |
2441 | }; |
2442 | |
2443 | static const CostTblEntry AVX512CostTbl[] = { |
2444 | { ISD::SETCC, MVT::v8i64, 1 }, |
2445 | { ISD::SETCC, MVT::v16i32, 1 }, |
2446 | { ISD::SETCC, MVT::v8f64, 1 }, |
2447 | { ISD::SETCC, MVT::v16f32, 1 }, |
2448 | |
2449 | { ISD::SELECT, MVT::v8i64, 1 }, |
2450 | { ISD::SELECT, MVT::v16i32, 1 }, |
2451 | { ISD::SELECT, MVT::v8f64, 1 }, |
2452 | { ISD::SELECT, MVT::v16f32, 1 }, |
2453 | |
2454 | { ISD::SETCC, MVT::v32i16, 2 }, |
2455 | { ISD::SETCC, MVT::v64i8, 2 }, |
2456 | |
2457 | { ISD::SELECT, MVT::v32i16, 2 }, |
2458 | { ISD::SELECT, MVT::v64i8, 2 }, |
2459 | }; |
2460 | |
2461 | static const CostTblEntry AVX2CostTbl[] = { |
2462 | { ISD::SETCC, MVT::v4i64, 1 }, |
2463 | { ISD::SETCC, MVT::v8i32, 1 }, |
2464 | { ISD::SETCC, MVT::v16i16, 1 }, |
2465 | { ISD::SETCC, MVT::v32i8, 1 }, |
2466 | |
2467 | { ISD::SELECT, MVT::v4i64, 1 }, |
2468 | { ISD::SELECT, MVT::v8i32, 1 }, |
2469 | { ISD::SELECT, MVT::v16i16, 1 }, |
2470 | { ISD::SELECT, MVT::v32i8, 1 }, |
2471 | }; |
2472 | |
2473 | static const CostTblEntry AVX1CostTbl[] = { |
2474 | { ISD::SETCC, MVT::v4f64, 1 }, |
2475 | { ISD::SETCC, MVT::v8f32, 1 }, |
2476 | |
2477 | { ISD::SETCC, MVT::v4i64, 4 }, |
2478 | { ISD::SETCC, MVT::v8i32, 4 }, |
2479 | { ISD::SETCC, MVT::v16i16, 4 }, |
2480 | { ISD::SETCC, MVT::v32i8, 4 }, |
2481 | |
2482 | { ISD::SELECT, MVT::v4f64, 1 }, |
2483 | { ISD::SELECT, MVT::v8f32, 1 }, |
2484 | { ISD::SELECT, MVT::v4i64, 1 }, |
2485 | { ISD::SELECT, MVT::v8i32, 1 }, |
2486 | { ISD::SELECT, MVT::v16i16, 3 }, |
2487 | { ISD::SELECT, MVT::v32i8, 3 }, |
2488 | }; |
2489 | |
2490 | static const CostTblEntry SSE42CostTbl[] = { |
2491 | { ISD::SETCC, MVT::v2f64, 1 }, |
2492 | { ISD::SETCC, MVT::v4f32, 1 }, |
2493 | { ISD::SETCC, MVT::v2i64, 1 }, |
2494 | }; |
2495 | |
2496 | static const CostTblEntry SSE41CostTbl[] = { |
2497 | { ISD::SELECT, MVT::v2f64, 1 }, |
2498 | { ISD::SELECT, MVT::v4f32, 1 }, |
2499 | { ISD::SELECT, MVT::v2i64, 1 }, |
2500 | { ISD::SELECT, MVT::v4i32, 1 }, |
2501 | { ISD::SELECT, MVT::v8i16, 1 }, |
2502 | { ISD::SELECT, MVT::v16i8, 1 }, |
2503 | }; |
2504 | |
2505 | static const CostTblEntry SSE2CostTbl[] = { |
2506 | { ISD::SETCC, MVT::v2f64, 2 }, |
2507 | { ISD::SETCC, MVT::f64, 1 }, |
2508 | { ISD::SETCC, MVT::v2i64, 8 }, |
2509 | { ISD::SETCC, MVT::v4i32, 1 }, |
2510 | { ISD::SETCC, MVT::v8i16, 1 }, |
2511 | { ISD::SETCC, MVT::v16i8, 1 }, |
2512 | |
2513 | { ISD::SELECT, MVT::v2f64, 3 }, |
2514 | { ISD::SELECT, MVT::v2i64, 3 }, |
2515 | { ISD::SELECT, MVT::v4i32, 3 }, |
2516 | { ISD::SELECT, MVT::v8i16, 3 }, |
2517 | { ISD::SELECT, MVT::v16i8, 3 }, |
2518 | }; |
2519 | |
2520 | static const CostTblEntry SSE1CostTbl[] = { |
2521 | { ISD::SETCC, MVT::v4f32, 2 }, |
2522 | { ISD::SETCC, MVT::f32, 1 }, |
2523 | |
2524 | { ISD::SELECT, MVT::v4f32, 3 }, |
2525 | }; |
2526 | |
2527 | if (ST->isSLM()) |
2528 | if (const auto *Entry = CostTableLookup(SLMCostTbl, ISD, MTy)) |
2529 | return LT.first * (ExtraCost + Entry->Cost); |
2530 | |
2531 | if (ST->hasBWI()) |
2532 | if (const auto *Entry = CostTableLookup(AVX512BWCostTbl, ISD, MTy)) |
2533 | return LT.first * (ExtraCost + Entry->Cost); |
2534 | |
2535 | if (ST->hasAVX512()) |
2536 | if (const auto *Entry = CostTableLookup(AVX512CostTbl, ISD, MTy)) |
2537 | return LT.first * (ExtraCost + Entry->Cost); |
2538 | |
2539 | if (ST->hasAVX2()) |
2540 | if (const auto *Entry = CostTableLookup(AVX2CostTbl, ISD, MTy)) |
2541 | return LT.first * (ExtraCost + Entry->Cost); |
2542 | |
2543 | if (ST->hasAVX()) |
2544 | if (const auto *Entry = CostTableLookup(AVX1CostTbl, ISD, MTy)) |
2545 | return LT.first * (ExtraCost + Entry->Cost); |
2546 | |
2547 | if (ST->hasSSE42()) |
2548 | if (const auto *Entry = CostTableLookup(SSE42CostTbl, ISD, MTy)) |
2549 | return LT.first * (ExtraCost + Entry->Cost); |
2550 | |
2551 | if (ST->hasSSE41()) |
2552 | if (const auto *Entry = CostTableLookup(SSE41CostTbl, ISD, MTy)) |
2553 | return LT.first * (ExtraCost + Entry->Cost); |
2554 | |
2555 | if (ST->hasSSE2()) |
2556 | if (const auto *Entry = CostTableLookup(SSE2CostTbl, ISD, MTy)) |
2557 | return LT.first * (ExtraCost + Entry->Cost); |
2558 | |
2559 | if (ST->hasSSE1()) |
2560 | if (const auto *Entry = CostTableLookup(SSE1CostTbl, ISD, MTy)) |
2561 | return LT.first * (ExtraCost + Entry->Cost); |
2562 | |
2563 | return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, I); |
2564 | } |
2565 | |
2566 | unsigned X86TTIImpl::getAtomicMemIntrinsicMaxElementSize() const { return 16; } |
2567 | |
2568 | InstructionCost |
2569 | X86TTIImpl::getTypeBasedIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, |
2570 | TTI::TargetCostKind CostKind) { |
2571 | |
2572 | |
2573 | |
2574 | |
2575 | |
2576 | |
2577 | |
2578 | |
2579 | |
2580 | |
2581 | static const CostTblEntry AVX512BITALGCostTbl[] = { |
2582 | { ISD::CTPOP, MVT::v32i16, 1 }, |
2583 | { ISD::CTPOP, MVT::v64i8, 1 }, |
2584 | { ISD::CTPOP, MVT::v16i16, 1 }, |
2585 | { ISD::CTPOP, MVT::v32i8, 1 }, |
2586 | { ISD::CTPOP, MVT::v8i16, 1 }, |
2587 | { ISD::CTPOP, MVT::v16i8, 1 }, |
2588 | }; |
2589 | static const CostTblEntry AVX512VPOPCNTDQCostTbl[] = { |
2590 | { ISD::CTPOP, MVT::v8i64, 1 }, |
2591 | { ISD::CTPOP, MVT::v16i32, 1 }, |
2592 | { ISD::CTPOP, MVT::v4i64, 1 }, |
2593 | { ISD::CTPOP, MVT::v8i32, 1 }, |
2594 | { ISD::CTPOP, MVT::v2i64, 1 }, |
2595 | { ISD::CTPOP, MVT::v4i32, 1 }, |
2596 | }; |
2597 | static const CostTblEntry AVX512CDCostTbl[] = { |
2598 | { ISD::CTLZ, MVT::v8i64, 1 }, |
2599 | { ISD::CTLZ, MVT::v16i32, 1 }, |
2600 | { ISD::CTLZ, MVT::v32i16, 8 }, |
2601 | { ISD::CTLZ, MVT::v64i8, 20 }, |
2602 | { ISD::CTLZ, MVT::v4i64, 1 }, |
2603 | { ISD::CTLZ, MVT::v8i32, 1 }, |
2604 | { ISD::CTLZ, MVT::v16i16, 4 }, |
2605 | { ISD::CTLZ, MVT::v32i8, 10 }, |
2606 | { ISD::CTLZ, MVT::v2i64, 1 }, |
2607 | { ISD::CTLZ, MVT::v4i32, 1 }, |
2608 | { ISD::CTLZ, MVT::v8i16, 4 }, |
2609 | { ISD::CTLZ, MVT::v16i8, 4 }, |
2610 | }; |
2611 | static const CostTblEntry AVX512BWCostTbl[] = { |
2612 | { ISD::ABS, MVT::v32i16, 1 }, |
2613 | { ISD::ABS, MVT::v64i8, 1 }, |
2614 | { ISD::BITREVERSE, MVT::v8i64, 5 }, |
2615 | { ISD::BITREVERSE, MVT::v16i32, 5 }, |
2616 | { ISD::BITREVERSE, MVT::v32i16, 5 }, |
2617 | { ISD::BITREVERSE, MVT::v64i8, 5 }, |
2618 | { ISD::BSWAP, MVT::v8i64, 1 }, |
2619 | { ISD::BSWAP, MVT::v16i32, 1 }, |
2620 | { ISD::BSWAP, MVT::v32i16, 1 }, |
2621 | { ISD::CTLZ, MVT::v8i64, 23 }, |
2622 | { ISD::CTLZ, MVT::v16i32, 22 }, |
2623 | { ISD::CTLZ, MVT::v32i16, 18 }, |
2624 | { ISD::CTLZ, MVT::v64i8, 17 }, |
2625 | { ISD::CTPOP, MVT::v8i64, 7 }, |
2626 | { ISD::CTPOP, MVT::v16i32, 11 }, |
2627 | { ISD::CTPOP, MVT::v32i16, 9 }, |
2628 | { ISD::CTPOP, MVT::v64i8, 6 }, |
2629 | { ISD::CTTZ, MVT::v8i64, 10 }, |
2630 | { ISD::CTTZ, MVT::v16i32, 14 }, |
2631 | { ISD::CTTZ, MVT::v32i16, 12 }, |
2632 | { ISD::CTTZ, MVT::v64i8, 9 }, |
2633 | { ISD::SADDSAT, MVT::v32i16, 1 }, |
2634 | { ISD::SADDSAT, MVT::v64i8, 1 }, |
2635 | { ISD::SMAX, MVT::v32i16, 1 }, |
2636 | { ISD::SMAX, MVT::v64i8, 1 }, |
2637 | { ISD::SMIN, MVT::v32i16, 1 }, |
2638 | { ISD::SMIN, MVT::v64i8, 1 }, |
2639 | { ISD::SSUBSAT, MVT::v32i16, 1 }, |
2640 | { ISD::SSUBSAT, MVT::v64i8, 1 }, |
2641 | { ISD::UADDSAT, MVT::v32i16, 1 }, |
2642 | { ISD::UADDSAT, MVT::v64i8, 1 }, |
2643 | { ISD::UMAX, MVT::v32i16, 1 }, |
2644 | { ISD::UMAX, MVT::v64i8, 1 }, |
2645 | { ISD::UMIN, MVT::v32i16, 1 }, |
2646 | { ISD::UMIN, MVT::v64i8, 1 }, |
2647 | { ISD::USUBSAT, MVT::v32i16, 1 }, |
2648 | { ISD::USUBSAT, MVT::v64i8, 1 }, |
2649 | }; |
2650 | static const CostTblEntry AVX512CostTbl[] = { |
2651 | { ISD::ABS, MVT::v8i64, 1 }, |
2652 | { ISD::ABS, MVT::v16i32, 1 }, |
2653 | { ISD::ABS, MVT::v32i16, 2 }, |
2654 | { ISD::ABS, MVT::v64i8, 2 }, |
2655 | { ISD::ABS, MVT::v4i64, 1 }, |
2656 | { ISD::ABS, MVT::v2i64, 1 }, |
2657 | { ISD::BITREVERSE, MVT::v8i64, 36 }, |
2658 | { ISD::BITREVERSE, MVT::v16i32, 24 }, |
2659 | { ISD::BITREVERSE, MVT::v32i16, 10 }, |
2660 | { ISD::BITREVERSE, MVT::v64i8, 10 }, |
2661 | { ISD::BSWAP, MVT::v8i64, 4 }, |
2662 | { ISD::BSWAP, MVT::v16i32, 4 }, |
2663 | { ISD::BSWAP, MVT::v32i16, 4 }, |
2664 | { ISD::CTLZ, MVT::v8i64, 29 }, |
2665 | { ISD::CTLZ, MVT::v16i32, 35 }, |
2666 | { ISD::CTLZ, MVT::v32i16, 28 }, |
2667 | { ISD::CTLZ, MVT::v64i8, 18 }, |
2668 | { ISD::CTPOP, MVT::v8i64, 16 }, |
2669 | { ISD::CTPOP, MVT::v16i32, 24 }, |
2670 | { ISD::CTPOP, MVT::v32i16, 18 }, |
2671 | { ISD::CTPOP, MVT::v64i8, 12 }, |
2672 | { ISD::CTTZ, MVT::v8i64, 20 }, |
2673 | { ISD::CTTZ, MVT::v16i32, 28 }, |
2674 | { ISD::CTTZ, MVT::v32i16, 24 }, |
2675 | { ISD::CTTZ, MVT::v64i8, 18 }, |
2676 | { ISD::SMAX, MVT::v8i64, 1 }, |
2677 | { ISD::SMAX, MVT::v16i32, 1 }, |
2678 | { ISD::SMAX, MVT::v32i16, 2 }, |
2679 | { ISD::SMAX, MVT::v64i8, 2 }, |
2680 | { ISD::SMAX, MVT::v4i64, 1 }, |
2681 | { ISD::SMAX, MVT::v2i64, 1 }, |
2682 | { ISD::SMIN, MVT::v8i64, 1 }, |
2683 | { ISD::SMIN, MVT::v16i32, 1 }, |
2684 | { ISD::SMIN, MVT::v32i16, 2 }, |
2685 | { ISD::SMIN, MVT::v64i8, 2 }, |
2686 | { ISD::SMIN, MVT::v4i64, 1 }, |
2687 | { ISD::SMIN, MVT::v2i64, 1 }, |
2688 | { ISD::UMAX, MVT::v8i64, 1 }, |
2689 | { ISD::UMAX, MVT::v16i32, 1 }, |
2690 | { ISD::UMAX, MVT::v32i16, 2 }, |
2691 | { ISD::UMAX, MVT::v64i8, 2 }, |
2692 | { ISD::UMAX, MVT::v4i64, 1 }, |
2693 | { ISD::UMAX, MVT::v2i64, 1 }, |
2694 | { ISD::UMIN, MVT::v8i64, 1 }, |
2695 | { ISD::UMIN, MVT::v16i32, 1 }, |
2696 | { ISD::UMIN, MVT::v32i16, 2 }, |
2697 | { ISD::UMIN, MVT::v64i8, 2 }, |
2698 | { ISD::UMIN, MVT::v4i64, 1 }, |
2699 | { ISD::UMIN, MVT::v2i64, 1 }, |
2700 | { ISD::USUBSAT, MVT::v16i32, 2 }, |
2701 | { ISD::USUBSAT, MVT::v2i64, 2 }, |
2702 | { ISD::USUBSAT, MVT::v4i64, 2 }, |
2703 | { ISD::USUBSAT, MVT::v8i64, 2 }, |
2704 | { ISD::UADDSAT, MVT::v16i32, 3 }, |
2705 | { ISD::UADDSAT, MVT::v2i64, 3 }, |
2706 | { ISD::UADDSAT, MVT::v4i64, 3 }, |
2707 | { ISD::UADDSAT, MVT::v8i64, 3 }, |
2708 | { ISD::SADDSAT, MVT::v32i16, 2 }, |
2709 | { ISD::SADDSAT, MVT::v64i8, 2 }, |
2710 | { ISD::SSUBSAT, MVT::v32i16, 2 }, |
2711 | { ISD::SSUBSAT, MVT::v64i8, 2 }, |
2712 | { ISD::UADDSAT, MVT::v32i16, 2 }, |
2713 | { ISD::UADDSAT, MVT::v64i8, 2 }, |
2714 | { ISD::USUBSAT, MVT::v32i16, 2 }, |
2715 | { ISD::USUBSAT, MVT::v64i8, 2 }, |
2716 | { ISD::FMAXNUM, MVT::f32, 2 }, |
2717 | { ISD::FMAXNUM, MVT::v4f32, 2 }, |
2718 | { ISD::FMAXNUM, MVT::v8f32, 2 }, |
2719 | { ISD::FMAXNUM, MVT::v16f32, 2 }, |
2720 | { ISD::FMAXNUM, MVT::f64, 2 }, |
2721 | { ISD::FMAXNUM, MVT::v2f64, 2 }, |
2722 | { ISD::FMAXNUM, MVT::v4f64, 2 }, |
2723 | { ISD::FMAXNUM, MVT::v8f64, 2 }, |
2724 | }; |
2725 | static const CostTblEntry XOPCostTbl[] = { |
2726 | { ISD::BITREVERSE, MVT::v4i64, 4 }, |
2727 | { ISD::BITREVERSE, MVT::v8i32, 4 }, |
2728 | { ISD::BITREVERSE, MVT::v16i16, 4 }, |
2729 | { ISD::BITREVERSE, MVT::v32i8, 4 }, |
2730 | { ISD::BITREVERSE, MVT::v2i64, 1 }, |
2731 | { ISD::BITREVERSE, MVT::v4i32, 1 }, |
2732 | { ISD::BITREVERSE, MVT::v8i16, 1 }, |
2733 | { ISD::BITREVERSE, MVT::v16i8, 1 }, |
2734 | { ISD::BITREVERSE, MVT::i64, 3 }, |
2735 | { ISD::BITREVERSE, MVT::i32, 3 }, |
2736 | { ISD::BITREVERSE, MVT::i16, 3 }, |
2737 | { ISD::BITREVERSE, MVT::i8, 3 } |
2738 | }; |
2739 | static const CostTblEntry AVX2CostTbl[] = { |
2740 | { ISD::ABS, MVT::v4i64, 2 }, |
2741 | { ISD::ABS, MVT::v8i32, 1 }, |
2742 | { ISD::ABS, MVT::v16i16, 1 }, |
2743 | { ISD::ABS, MVT::v32i8, 1 }, |
2744 | { ISD::BITREVERSE, MVT::v4i64, 5 }, |
2745 | { ISD::BITREVERSE, MVT::v8i32, 5 }, |
2746 | { ISD::BITREVERSE, MVT::v16i16, 5 }, |
2747 | { ISD::BITREVERSE, MVT::v32i8, 5 }, |
2748 | { ISD::BSWAP, MVT::v4i64, 1 }, |
2749 | { ISD::BSWAP, MVT::v8i32, 1 }, |
2750 | { ISD::BSWAP, MVT::v16i16, 1 }, |
2751 | { ISD::CTLZ, MVT::v4i64, 23 }, |
2752 | { ISD::CTLZ, MVT::v8i32, 18 }, |
2753 | { ISD::CTLZ, MVT::v16i16, 14 }, |
2754 | { ISD::CTLZ, MVT::v32i8, 9 }, |
2755 | { ISD::CTPOP, MVT::v4i64, 7 }, |
2756 | { ISD::CTPOP, MVT::v8i32, 11 }, |
2757 | { ISD::CTPOP, MVT::v16i16, 9 }, |
2758 | { ISD::CTPOP, MVT::v32i8, 6 }, |
2759 | { ISD::CTTZ, MVT::v4i64, 10 }, |
2760 | { ISD::CTTZ, MVT::v8i32, 14 }, |
2761 | { ISD::CTTZ, MVT::v16i16, 12 }, |
2762 | { ISD::CTTZ, MVT::v32i8, 9 }, |
2763 | { ISD::SADDSAT, MVT::v16i16, 1 }, |
2764 | { ISD::SADDSAT, MVT::v32i8, 1 }, |
2765 | { ISD::SMAX, MVT::v8i32, 1 }, |
2766 | { ISD::SMAX, MVT::v16i16, 1 }, |
2767 | { ISD::SMAX, MVT::v32i8, 1 }, |
2768 | { ISD::SMIN, MVT::v8i32, 1 }, |
2769 | { ISD::SMIN, MVT::v16i16, 1 }, |
2770 | { ISD::SMIN, MVT::v32i8, 1 }, |
2771 | { ISD::SSUBSAT, MVT::v16i16, 1 }, |
2772 | { ISD::SSUBSAT, MVT::v32i8, 1 }, |
2773 | { ISD::UADDSAT, MVT::v16i16, 1 }, |
2774 | { ISD::UADDSAT, MVT::v32i8, 1 }, |
2775 | { ISD::UADDSAT, MVT::v8i32, 3 }, |
2776 | { ISD::UMAX, MVT::v8i32, 1 }, |
2777 | { ISD::UMAX, MVT::v16i16, 1 }, |
2778 | { ISD::UMAX, MVT::v32i8, 1 }, |
2779 | { ISD::UMIN, MVT::v8i32, 1 }, |
2780 | { ISD::UMIN, MVT::v16i16, 1 }, |
2781 | { ISD::UMIN, MVT::v32i8, 1 }, |
2782 | { ISD::USUBSAT, MVT::v16i16, 1 }, |
2783 | { ISD::USUBSAT, MVT::v32i8, 1 }, |
2784 | { ISD::USUBSAT, MVT::v8i32, 2 }, |
2785 | { ISD::FMAXNUM, MVT::v8f32, 3 }, |
2786 | { ISD::FMAXNUM, MVT::v4f64, 3 }, |
2787 | { ISD::FSQRT, MVT::f32, 7 }, |
2788 | { ISD::FSQRT, MVT::v4f32, 7 }, |
2789 | { ISD::FSQRT, MVT::v8f32, 14 }, |
2790 | { ISD::FSQRT, MVT::f64, 14 }, |
2791 | { ISD::FSQRT, MVT::v2f64, 14 }, |
2792 | { ISD::FSQRT, MVT::v4f64, 28 }, |
2793 | }; |
2794 | static const CostTblEntry AVX1CostTbl[] = { |
2795 | { ISD::ABS, MVT::v4i64, 5 }, |
2796 | { ISD::ABS, MVT::v8i32, 3 }, |
2797 | { ISD::ABS, MVT::v16i16, 3 }, |
2798 | { ISD::ABS, MVT::v32i8, 3 }, |
2799 | { ISD::BITREVERSE, MVT::v4i64, 12 }, |
2800 | { ISD::BITREVERSE, MVT::v8i32, 12 }, |
2801 | { ISD::BITREVERSE, MVT::v16i16, 12 }, |
2802 | { ISD::BITREVERSE, MVT::v32i8, 12 }, |
2803 | { ISD::BSWAP, MVT::v4i64, 4 }, |
2804 | { ISD::BSWAP, MVT::v8i32, 4 }, |
2805 | { ISD::BSWAP, MVT::v16i16, 4 }, |
2806 | { ISD::CTLZ, MVT::v4i64, 48 }, |
2807 | { ISD::CTLZ, MVT::v8i32, 38 }, |
2808 | { ISD::CTLZ, MVT::v16i16, 30 }, |
2809 | { ISD::CTLZ, MVT::v32i8, 20 }, |
2810 | { ISD::CTPOP, MVT::v4i64, 16 }, |
2811 | { ISD::CTPOP, MVT::v8i32, 24 }, |
2812 | { ISD::CTPOP, MVT::v16i16, 20 }, |
2813 | { ISD::CTPOP, MVT::v32i8, 14 }, |
2814 | { ISD::CTTZ, MVT::v4i64, 22 }, |
2815 | { ISD::CTTZ, MVT::v8i32, 30 }, |
2816 | { ISD::CTTZ, MVT::v16i16, 26 }, |
2817 | { ISD::CTTZ, MVT::v32i8, 20 }, |
2818 | { ISD::SADDSAT, MVT::v16i16, 4 }, |
2819 | { ISD::SADDSAT, MVT::v32i8, 4 }, |
2820 | { ISD::SMAX, MVT::v8i32, 4 }, |
2821 | { ISD::SMAX, MVT::v16i16, 4 }, |
2822 | { ISD::SMAX, MVT::v32i8, 4 }, |
2823 | { ISD::SMIN, MVT::v8i32, 4 }, |
2824 | { ISD::SMIN, MVT::v16i16, 4 }, |
2825 | { ISD::SMIN, MVT::v32i8, 4 }, |
2826 | { ISD::SSUBSAT, MVT::v16i16, 4 }, |
2827 | { ISD::SSUBSAT, MVT::v32i8, 4 }, |
2828 | { ISD::UADDSAT, MVT::v16i16, 4 }, |
2829 | { ISD::UADDSAT, MVT::v32i8, 4 }, |
2830 | { ISD::UADDSAT, MVT::v8i32, 8 }, |
2831 | { ISD::UMAX, MVT::v8i32, 4 }, |
2832 | { ISD::UMAX, MVT::v16i16, 4 }, |
2833 | { ISD::UMAX, MVT::v32i8, 4 }, |
2834 | { ISD::UMIN, MVT::v8i32, 4 }, |
2835 | { ISD::UMIN, MVT::v16i16, 4 }, |
2836 | { ISD::UMIN, MVT::v32i8, 4 }, |
2837 | { ISD::USUBSAT, MVT::v16i16, 4 }, |
2838 | { ISD::USUBSAT, MVT::v32i8, 4 }, |
2839 | { ISD::USUBSAT, MVT::v8i32, 6 }, |
2840 | { ISD::FMAXNUM, MVT::f32, 3 }, |
2841 | { ISD::FMAXNUM, MVT::v4f32, 3 }, |
2842 | { ISD::FMAXNUM, MVT::v8f32, 5 }, |
2843 | { ISD::FMAXNUM, MVT::f64, 3 }, |
2844 | { ISD::FMAXNUM, MVT::v2f64, 3 }, |
2845 | { ISD::FMAXNUM, MVT::v4f64, 5 }, |
2846 | { ISD::FSQRT, MVT::f32, 14 }, |
2847 | { ISD::FSQRT, MVT::v4f32, 14 }, |
2848 | { ISD::FSQRT, MVT::v8f32, 28 }, |
2849 | { ISD::FSQRT, MVT::f64, 21 }, |
2850 | { ISD::FSQRT, MVT::v2f64, 21 }, |
2851 | { ISD::FSQRT, MVT::v4f64, 43 }, |
2852 | }; |
2853 | static const CostTblEntry GLMCostTbl[] = { |
2854 | { ISD::FSQRT, MVT::f32, 19 }, |
2855 | { ISD::FSQRT, MVT::v4f32, 37 }, |
2856 | { ISD::FSQRT, MVT::f64, 34 }, |
2857 | { ISD::FSQRT, MVT::v2f64, 67 }, |
2858 | }; |
2859 | static const CostTblEntry SLMCostTbl[] = { |
2860 | { ISD::FSQRT, MVT::f32, 20 }, |
2861 | { ISD::FSQRT, MVT::v4f32, 40 }, |
2862 | { ISD::FSQRT, MVT::f64, 35 }, |
2863 | { ISD::FSQRT, MVT::v2f64, 70 }, |
2864 | }; |
2865 | static const CostTblEntry SSE42CostTbl[] = { |
2866 | { ISD::USUBSAT, MVT::v4i32, 2 }, |
2867 | { ISD::UADDSAT, MVT::v4i32, 3 }, |
2868 | { ISD::FSQRT, MVT::f32, 18 }, |
2869 | { ISD::FSQRT, MVT::v4f32, 18 }, |
2870 | }; |
2871 | static const CostTblEntry SSE41CostTbl[] = { |
2872 | { ISD::ABS, MVT::v2i64, 2 }, |
2873 | { ISD::SMAX, MVT::v4i32, 1 }, |
2874 | { ISD::SMAX, MVT::v16i8, 1 }, |
2875 | { ISD::SMIN, MVT::v4i32, 1 }, |
2876 | { ISD::SMIN, MVT::v16i8, 1 }, |
2877 | { ISD::UMAX, MVT::v4i32, 1 }, |
2878 | { ISD::UMAX, MVT::v8i16, 1 }, |
2879 | { ISD::UMIN, MVT::v4i32, 1 }, |
2880 | { ISD::UMIN, MVT::v8i16, 1 }, |
2881 | }; |
2882 | static const CostTblEntry SSSE3CostTbl[] = { |
2883 | { ISD::ABS, MVT::v4i32, 1 }, |
2884 | { ISD::ABS, MVT::v8i16, 1 }, |
2885 | { ISD::ABS, MVT::v16i8, 1 }, |
2886 | { ISD::BITREVERSE, MVT::v2i64, 5 }, |
2887 | { ISD::BITREVERSE, MVT::v4i32, 5 }, |
2888 | { ISD::BITREVERSE, MVT::v8i16, 5 }, |
2889 | { ISD::BITREVERSE, MVT::v16i8, 5 }, |
2890 | { ISD::BSWAP, MVT::v2i64, 1 }, |
2891 | { ISD::BSWAP, MVT::v4i32, 1 }, |
2892 | { ISD::BSWAP, MVT::v8i16, 1 }, |
2893 | { ISD::CTLZ, MVT::v2i64, 23 }, |
2894 | { ISD::CTLZ, MVT::v4i32, 18 }, |
2895 | { ISD::CTLZ, MVT::v8i16, 14 }, |
2896 | { ISD::CTLZ, MVT::v16i8, 9 }, |
2897 | { ISD::CTPOP, MVT::v2i64, 7 }, |
2898 | { ISD::CTPOP, MVT::v4i32, 11 }, |
2899 | { ISD::CTPOP, MVT::v8i16, 9 }, |
2900 | { ISD::CTPOP, MVT::v16i8, 6 }, |
2901 | { ISD::CTTZ, MVT::v2i64, 10 }, |
2902 | { ISD::CTTZ, MVT::v4i32, 14 }, |
2903 | { ISD::CTTZ, MVT::v8i16, 12 }, |
2904 | { ISD::CTTZ, MVT::v16i8, 9 } |
2905 | }; |
2906 | static const CostTblEntry SSE2CostTbl[] = { |
2907 | { ISD::ABS, MVT::v2i64, 4 }, |
2908 | { ISD::ABS, MVT::v4i32, 3 }, |
2909 | { ISD::ABS, MVT::v8i16, 2 }, |
2910 | { ISD::ABS, MVT::v16i8, 2 }, |
2911 | { ISD::BITREVERSE, MVT::v2i64, 29 }, |
2912 | { ISD::BITREVERSE, MVT::v4i32, 27 }, |
2913 | { ISD::BITREVERSE, MVT::v8i16, 27 }, |
2914 | { ISD::BITREVERSE, MVT::v16i8, 20 }, |
2915 | { ISD::BSWAP, MVT::v2i64, 7 }, |
2916 | { ISD::BSWAP, MVT::v4i32, 7 }, |
2917 | { ISD::BSWAP, MVT::v8i16, 7 }, |
2918 | { ISD::CTLZ, MVT::v2i64, 25 }, |
2919 | { ISD::CTLZ, MVT::v4i32, 26 }, |
2920 | { ISD::CTLZ, MVT::v8i16, 20 }, |
2921 | { ISD::CTLZ, MVT::v16i8, 17 }, |
2922 | { ISD::CTPOP, MVT::v2i64, 12 }, |
2923 | { ISD::CTPOP, MVT::v4i32, 15 }, |
2924 | { ISD::CTPOP, MVT::v8i16, 13 }, |
2925 | { ISD::CTPOP, MVT::v16i8, 10 }, |
2926 | { ISD::CTTZ, MVT::v2i64, 14 }, |
2927 | { ISD::CTTZ, MVT::v4i32, 18 }, |
2928 | { ISD::CTTZ, MVT::v8i16, 16 }, |
2929 | { ISD::CTTZ, MVT::v16i8, 13 }, |
2930 | { ISD::SADDSAT, MVT::v8i16, 1 }, |
2931 | { ISD::SADDSAT, MVT::v16i8, 1 }, |
2932 | { ISD::SMAX, MVT::v8i16, 1 }, |
2933 | { ISD::SMIN, MVT::v8i16, 1 }, |
2934 | { ISD::SSUBSAT, MVT::v8i16, 1 }, |
2935 | { ISD::SSUBSAT, MVT::v16i8, 1 }, |
2936 | { ISD::UADDSAT, MVT::v8i16, 1 }, |
2937 | { ISD::UADDSAT, MVT::v16i8, 1 }, |
2938 | { ISD::UMAX, MVT::v8i16, 2 }, |
2939 | { ISD::UMAX, MVT::v16i8, 1 }, |
2940 | { ISD::UMIN, MVT::v8i16, 2 }, |
2941 | { ISD::UMIN, MVT::v16i8, 1 }, |
2942 | { ISD::USUBSAT, MVT::v8i16, 1 }, |
2943 | { ISD::USUBSAT, MVT::v16i8, 1 }, |
2944 | { ISD::FMAXNUM, MVT::f64, 4 }, |
2945 | { ISD::FMAXNUM, MVT::v2f64, 4 }, |
2946 | { ISD::FSQRT, MVT::f64, 32 }, |
2947 | { ISD::FSQRT, MVT::v2f64, 32 }, |
2948 | }; |
2949 | static const CostTblEntry SSE1CostTbl[] = { |
2950 | { ISD::FMAXNUM, MVT::f32, 4 }, |
2951 | { ISD::FMAXNUM, MVT::v4f32, 4 }, |
2952 | { ISD::FSQRT, MVT::f32, 28 }, |
2953 | { ISD::FSQRT, MVT::v4f32, 56 }, |
2954 | }; |
2955 | static const CostTblEntry BMI64CostTbl[] = { |
2956 | { ISD::CTTZ, MVT::i64, 1 }, |
2957 | }; |
2958 | static const CostTblEntry BMI32CostTbl[] = { |
2959 | { ISD::CTTZ, MVT::i32, 1 }, |
2960 | { ISD::CTTZ, MVT::i16, 1 }, |
2961 | { ISD::CTTZ, MVT::i8, 1 }, |
2962 | }; |
2963 | static const CostTblEntry LZCNT64CostTbl[] = { |
2964 | { ISD::CTLZ, MVT::i64, 1 }, |
2965 | }; |
2966 | static const CostTblEntry LZCNT32CostTbl[] = { |
2967 | { ISD::CTLZ, MVT::i32, 1 }, |
2968 | { ISD::CTLZ, MVT::i16, 1 }, |
2969 | { ISD::CTLZ, MVT::i8, 1 }, |
2970 | }; |
2971 | static const CostTblEntry POPCNT64CostTbl[] = { |
2972 | { ISD::CTPOP, MVT::i64, 1 }, |
2973 | }; |
2974 | static const CostTblEntry POPCNT32CostTbl[] = { |
2975 | { ISD::CTPOP, MVT::i32, 1 }, |
2976 | { ISD::CTPOP, MVT::i16, 1 }, |
2977 | { ISD::CTPOP, MVT::i8, 1 }, |
2978 | }; |
2979 | static const CostTblEntry X64CostTbl[] = { |
2980 | { ISD::ABS, MVT::i64, 2 }, |
2981 | { ISD::BITREVERSE, MVT::i64, 14 }, |
2982 | { ISD::BSWAP, MVT::i64, 1 }, |
2983 | { ISD::CTLZ, MVT::i64, 4 }, |
2984 | { ISD::CTTZ, MVT::i64, 3 }, |
2985 | { ISD::CTPOP, MVT::i64, 10 }, |
2986 | { ISD::SADDO, MVT::i64, 1 }, |
2987 | { ISD::UADDO, MVT::i64, 1 }, |
2988 | { ISD::UMULO, MVT::i64, 2 }, |
2989 | }; |
2990 | static const CostTblEntry X86CostTbl[] = { |
2991 | { ISD::ABS, MVT::i32, 2 }, |
2992 | { ISD::ABS, MVT::i16, 2 }, |
2993 | { ISD::BITREVERSE, MVT::i32, 14 }, |
2994 | { ISD::BITREVERSE, MVT::i16, 14 }, |
2995 | { ISD::BITREVERSE, MVT::i8, 11 }, |
2996 | { ISD::BSWAP, MVT::i32, 1 }, |
2997 | { ISD::BSWAP, MVT::i16, 1 }, |
2998 | { ISD::CTLZ, MVT::i32, 4 }, |
2999 | { ISD::CTLZ, MVT::i16, 4 }, |
3000 | { ISD::CTLZ, MVT::i8, 4 }, |
3001 | { ISD::CTTZ, MVT::i32, 3 }, |
3002 | { ISD::CTTZ, MVT::i16, 3 }, |
3003 | { ISD::CTTZ, MVT::i8, 3 }, |
3004 | { ISD::CTPOP, MVT::i32, 8 }, |
3005 | { ISD::CTPOP, MVT::i16, 9 }, |
3006 | { ISD::CTPOP, MVT::i8, 7 }, |
3007 | { ISD::SADDO, MVT::i32, 1 }, |
3008 | { ISD::SADDO, MVT::i16, 1 }, |
3009 | { ISD::SADDO, MVT::i8, 1 }, |
3010 | { ISD::UADDO, MVT::i32, 1 }, |
3011 | { ISD::UADDO, MVT::i16, 1 }, |
3012 | { ISD::UADDO, MVT::i8, 1 }, |
3013 | { ISD::UMULO, MVT::i32, 2 }, |
3014 | { ISD::UMULO, MVT::i16, 2 }, |
3015 | { ISD::UMULO, MVT::i8, 2 }, |
3016 | }; |
3017 | |
3018 | Type *RetTy = ICA.getReturnType(); |
3019 | Type *OpTy = RetTy; |
3020 | Intrinsic::ID IID = ICA.getID(); |
3021 | unsigned ISD = ISD::DELETED_NODE; |
3022 | switch (IID) { |
3023 | default: |
3024 | break; |
3025 | case Intrinsic::abs: |
3026 | ISD = ISD::ABS; |
3027 | break; |
3028 | case Intrinsic::bitreverse: |
3029 | ISD = ISD::BITREVERSE; |
3030 | break; |
3031 | case Intrinsic::bswap: |
3032 | ISD = ISD::BSWAP; |
3033 | break; |
3034 | case Intrinsic::ctlz: |
3035 | ISD = ISD::CTLZ; |
3036 | break; |
3037 | case Intrinsic::ctpop: |
3038 | ISD = ISD::CTPOP; |
3039 | break; |
3040 | case Intrinsic::cttz: |
3041 | ISD = ISD::CTTZ; |
3042 | break; |
3043 | case Intrinsic::maxnum: |
3044 | case Intrinsic::minnum: |
3045 | |
3046 | ISD = ISD::FMAXNUM; |
3047 | break; |
3048 | case Intrinsic::sadd_sat: |
3049 | ISD = ISD::SADDSAT; |
3050 | break; |
3051 | case Intrinsic::smax: |
3052 | ISD = ISD::SMAX; |
3053 | break; |
3054 | case Intrinsic::smin: |
3055 | ISD = ISD::SMIN; |
3056 | break; |
3057 | case Intrinsic::ssub_sat: |
3058 | ISD = ISD::SSUBSAT; |
3059 | break; |
3060 | case Intrinsic::uadd_sat: |
3061 | ISD = ISD::UADDSAT; |
3062 | break; |
3063 | case Intrinsic::umax: |
3064 | ISD = ISD::UMAX; |
3065 | break; |
3066 | case Intrinsic::umin: |
3067 | ISD = ISD::UMIN; |
3068 | break; |
3069 | case Intrinsic::usub_sat: |
3070 | ISD = ISD::USUBSAT; |
3071 | break; |
3072 | case Intrinsic::sqrt: |
3073 | ISD = ISD::FSQRT; |
3074 | break; |
3075 | case Intrinsic::sadd_with_overflow: |
3076 | case Intrinsic::ssub_with_overflow: |
3077 | |
3078 | ISD = ISD::SADDO; |
3079 | OpTy = RetTy->getContainedType(0); |
3080 | break; |
3081 | case Intrinsic::uadd_with_overflow: |
3082 | case Intrinsic::usub_with_overflow: |
3083 | |
3084 | ISD = ISD::UADDO; |
3085 | OpTy = RetTy->getContainedType(0); |
3086 | break; |
3087 | case Intrinsic::umul_with_overflow: |
3088 | case Intrinsic::smul_with_overflow: |
3089 | |
3090 | ISD = ISD::UMULO; |
3091 | OpTy = RetTy->getContainedType(0); |
3092 | break; |
3093 | } |
3094 | |
3095 | if (ISD != ISD::DELETED_NODE) { |
3096 | |
3097 | std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, OpTy); |
3098 | MVT MTy = LT.second; |
3099 | |
3100 | |
3101 | if (ISD == ISD::BITREVERSE && ST->hasGFNI() && ST->hasSSSE3() && |
3102 | MTy.isVector()) { |
3103 | |
3104 | |
3105 | |
3106 | unsigned Cost = MTy.getVectorElementType() == MVT::i8 ? 1 : 2; |
3107 | |
3108 | |
3109 | |
3110 | if (!(MTy.is128BitVector() || (ST->hasAVX2() && MTy.is256BitVector()) || |
3111 | (ST->hasBWI() && MTy.is512BitVector()))) |
3112 | Cost = Cost * 2 + 2; |
3113 | |
3114 | return LT.first * Cost; |
3115 | } |
3116 | |
3117 | auto adjustTableCost = [](const CostTblEntry &Entry, |
3118 | InstructionCost LegalizationCost, |
3119 | FastMathFlags FMF) { |
3120 | |
3121 | |
3122 | |
3123 | if (Entry.ISD == ISD::FMAXNUM || Entry.ISD == ISD::FMINNUM) { |
3124 | if (FMF.noNaNs()) |
3125 | return LegalizationCost * 1; |
3126 | } |
3127 | return LegalizationCost * (int)Entry.Cost; |
3128 | }; |
3129 | |
3130 | if (ST->useGLMDivSqrtCosts()) |
3131 | if (const auto *Entry = CostTableLookup(GLMCostTbl, ISD, MTy)) |
3132 | return adjustTableCost(*Entry, LT.first, ICA.getFlags()); |
3133 | |
3134 | if (ST->isSLM()) |
3135 | if (const auto *Entry = CostTableLookup(SLMCostTbl, ISD, MTy)) |
3136 | return adjustTableCost(*Entry, LT.first, ICA.getFlags()); |
3137 | |
3138 | if (ST->hasBITALG()) |
3139 | if (const auto *Entry = CostTableLookup(AVX512BITALGCostTbl, ISD, MTy)) |
3140 | return adjustTableCost(*Entry, LT.first, ICA.getFlags()); |
3141 | |
3142 | if (ST->hasVPOPCNTDQ()) |
3143 | if (const auto *Entry = CostTableLookup(AVX512VPOPCNTDQCostTbl, ISD, MTy)) |
3144 | return adjustTableCost(*Entry, LT.first, ICA.getFlags()); |
3145 | |
3146 | if (ST->hasCDI()) |
3147 | if (const auto *Entry = CostTableLookup(AVX512CDCostTbl, ISD, MTy)) |
3148 | return adjustTableCost(*Entry, LT.first, ICA.getFlags()); |
3149 | |
3150 | if (ST->hasBWI()) |
3151 | if (const auto *Entry = CostTableLookup(AVX512BWCostTbl, ISD, MTy)) |
3152 | return adjustTableCost(*Entry, LT.first, ICA.getFlags()); |
3153 | |
3154 | if (ST->hasAVX512()) |
3155 | if (const auto *Entry = CostTableLookup(AVX512CostTbl, ISD, MTy)) |
3156 | return adjustTableCost(*Entry, LT.first, ICA.getFlags()); |
3157 | |
3158 | if (ST->hasXOP()) |
3159 | if (const auto *Entry = CostTableLookup(XOPCostTbl, ISD, MTy)) |
3160 | return adjustTableCost(*Entry, LT.first, ICA.getFlags()); |
3161 | |
3162 | if (ST->hasAVX2()) |
3163 | if (const auto *Entry = CostTableLookup(AVX2CostTbl, ISD, MTy)) |
3164 | return adjustTableCost(*Entry, LT.first, ICA.getFlags()); |
3165 | |
3166 | if (ST->hasAVX()) |
3167 | if (const auto *Entry = CostTableLookup(AVX1CostTbl, ISD, MTy)) |
3168 | return adjustTableCost(*Entry, LT.first, ICA.getFlags()); |
3169 | |
3170 | if (ST->hasSSE42()) |
3171 | if (const auto *Entry = CostTableLookup(SSE42CostTbl, ISD, MTy)) |
3172 | return adjustTableCost(*Entry, LT.first, ICA.getFlags()); |
3173 | |
3174 | if (ST->hasSSE41()) |
3175 | if (const auto *Entry = CostTableLookup(SSE41CostTbl, ISD, MTy)) |
3176 | return adjustTableCost(*Entry, LT.first, ICA.getFlags()); |
3177 | |
3178 | if (ST->hasSSSE3()) |
3179 | if (const auto *Entry = CostTableLookup(SSSE3CostTbl, ISD, MTy)) |
3180 | return adjustTableCost(*Entry, LT.first, ICA.getFlags()); |
3181 | |
3182 | if (ST->hasSSE2()) |
3183 | if (const auto *Entry = CostTableLookup(SSE2CostTbl, ISD, MTy)) |
3184 | return adjustTableCost(*Entry, LT.first, ICA.getFlags()); |
3185 | |
3186 | if (ST->hasSSE1()) |
3187 | if (const auto *Entry = CostTableLookup(SSE1CostTbl, ISD, MTy)) |
3188 | return adjustTableCost(*Entry, LT.first, ICA.getFlags()); |
3189 | |
3190 | if (ST->hasBMI()) { |
3191 | if (ST->is64Bit()) |
3192 | if (const auto *Entry = CostTableLookup(BMI64CostTbl, ISD, MTy)) |
3193 | return adjustTableCost(*Entry, LT.first, ICA.getFlags()); |
3194 | |
3195 | if (const auto *Entry = CostTableLookup(BMI32CostTbl, ISD, MTy)) |
3196 | return adjustTableCost(*Entry, LT.first, ICA.getFlags()); |
3197 | } |
3198 | |
3199 | if (ST->hasLZCNT()) { |
3200 | if (ST->is64Bit()) |
3201 | if (const auto *Entry = CostTableLookup(LZCNT64CostTbl, ISD, MTy)) |
3202 | return adjustTableCost(*Entry, LT.first, ICA.getFlags()); |
3203 | |
3204 | if (const auto *Entry = CostTableLookup(LZCNT32CostTbl, ISD, MTy)) |
3205 | return adjustTableCost(*Entry, LT.first, ICA.getFlags()); |
3206 | } |
3207 | |
3208 | if (ST->hasPOPCNT()) { |
3209 | if (ST->is64Bit()) |
3210 | if (const auto *Entry = CostTableLookup(POPCNT64CostTbl, ISD, MTy)) |
3211 | return adjustTableCost(*Entry, LT.first, ICA.getFlags()); |
3212 | |
3213 | if (const auto *Entry = CostTableLookup(POPCNT32CostTbl, ISD, MTy)) |
3214 | return adjustTableCost(*Entry, LT.first, ICA.getFlags()); |
3215 | } |
3216 | |
3217 | if (ISD == ISD::BSWAP && ST->hasMOVBE() && ST->hasFastMOVBE()) { |
3218 | if (const Instruction *II = ICA.getInst()) { |
3219 | if (II->hasOneUse() && isa<StoreInst>(II->user_back())) |
3220 | return TTI::TCC_Free; |
3221 | if (auto *LI = dyn_cast<LoadInst>(II->getOperand(0))) { |
3222 | if (LI->hasOneUse()) |
3223 | return TTI::TCC_Free; |
3224 | } |
3225 | } |
3226 | } |
3227 | |
3228 | |
3229 | |
3230 | if (ST->is64Bit()) |
3231 | if (const auto *Entry = CostTableLookup(X64CostTbl, ISD, MTy)) |
3232 | return adjustTableCost(*Entry, LT.first, ICA.getFlags()); |
3233 | |
3234 | if (const auto *Entry = CostTableLookup(X86CostTbl, ISD, MTy)) |
3235 | return adjustTableCost(*Entry, LT.first, ICA.getFlags()); |
3236 | } |
3237 | |
3238 | return BaseT::getIntrinsicInstrCost(ICA, CostKind); |
3239 | } |
3240 | |
3241 | InstructionCost |
3242 | X86TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, |
3243 | TTI::TargetCostKind CostKind) { |
3244 | if (ICA.isTypeBasedOnly()) |
3245 | return getTypeBasedIntrinsicInstrCost(ICA, CostKind); |
3246 | |
3247 | static const CostTblEntry AVX512CostTbl[] = { |
3248 | { ISD::ROTL, MVT::v8i64, 1 }, |
3249 | { ISD::ROTL, MVT::v4i64, 1 }, |
3250 | { ISD::ROTL, MVT::v2i64, 1 }, |
3251 | { ISD::ROTL, MVT::v16i32, 1 }, |
3252 | { ISD::ROTL, MVT::v8i32, 1 }, |
3253 | { ISD::ROTL, MVT::v4i32, 1 }, |
3254 | { ISD::ROTR, MVT::v8i64, 1 }, |
3255 | { ISD::ROTR, MVT::v4i64, 1 }, |
3256 | { ISD::ROTR, MVT::v2i64, 1 }, |
3257 | { ISD::ROTR, MVT::v16i32, 1 }, |
3258 | { ISD::ROTR, MVT::v8i32, 1 }, |
3259 | { ISD::ROTR, MVT::v4i32, 1 } |
3260 | }; |
3261 | |
3262 | static const CostTblEntry XOPCostTbl[] = { |
3263 | { ISD::ROTL, MVT::v4i64, 4 }, |
3264 | { ISD::ROTL, MVT::v8i32, 4 }, |
3265 | { ISD::ROTL, MVT::v16i16, 4 }, |
3266 | { ISD::ROTL, MVT::v32i8, 4 }, |
3267 | { ISD::ROTL, MVT::v2i64, 1 }, |
3268 | { ISD::ROTL, MVT::v4i32, 1 }, |
3269 | { ISD::ROTL, MVT::v8i16, 1 }, |
3270 | { ISD::ROTL, MVT::v16i8, 1 }, |
3271 | { ISD::ROTR, MVT::v4i64, 6 }, |
3272 | { ISD::ROTR, MVT::v8i32, 6 }, |
3273 | { ISD::ROTR, MVT::v16i16, 6 }, |
3274 | { ISD::ROTR, MVT::v32i8, 6 }, |
3275 | { ISD::ROTR, MVT::v2i64, 2 }, |
3276 | { ISD::ROTR, MVT::v4i32, 2 }, |
3277 | { ISD::ROTR, MVT::v8i16, 2 }, |
3278 | { ISD::ROTR, MVT::v16i8, 2 } |
3279 | }; |
3280 | static const CostTblEntry X64CostTbl[] = { |
3281 | { ISD::ROTL, MVT::i64, 1 }, |
3282 | { ISD::ROTR, MVT::i64, 1 }, |
3283 | { ISD::FSHL, MVT::i64, 4 } |
3284 | }; |
3285 | static const CostTblEntry X86CostTbl[] = { |
3286 | { ISD::ROTL, MVT::i32, 1 }, |
3287 | { ISD::ROTL, MVT::i16, 1 }, |
3288 | { ISD::ROTL, MVT::i8, 1 }, |
3289 | { ISD::ROTR, MVT::i32, 1 }, |
3290 | { ISD::ROTR, MVT::i16, 1 }, |
3291 | { ISD::ROTR, MVT::i8, 1 }, |
3292 | { ISD::FSHL, MVT::i32, 4 }, |
3293 | { ISD::FSHL, MVT::i16, 4 }, |
3294 | { ISD::FSHL, MVT::i8, 4 } |
3295 | }; |
3296 | |
3297 | Intrinsic::ID IID = ICA.getID(); |
3298 | Type *RetTy = ICA.getReturnType(); |
3299 | const SmallVectorImpl<const Value *> &Args = ICA.getArgs(); |
3300 | unsigned ISD = ISD::DELETED_NODE; |
3301 | switch (IID) { |
3302 | default: |
3303 | break; |
3304 | case Intrinsic::fshl: |
3305 | ISD = ISD::FSHL; |
3306 | if (Args[0] == Args[1]) |
3307 | ISD = ISD::ROTL; |
3308 | break; |
3309 | case Intrinsic::fshr: |
3310 | |
3311 | ISD = ISD::FSHL; |
3312 | if (Args[0] == Args[1]) |
3313 | ISD = ISD::ROTR; |
3314 | break; |
3315 | } |
3316 | |
3317 | if (ISD != ISD::DELETED_NODE) { |
3318 | |
3319 | std::pair<InstructionCost, MVT> LT = |
3320 | TLI->getTypeLegalizationCost(DL, RetTy); |
3321 | MVT MTy = LT.second; |
3322 | |
3323 | |
3324 | if (ST->hasAVX512()) |
3325 | if (const auto *Entry = CostTableLookup(AVX512CostTbl, ISD, MTy)) |
3326 | return LT.first * Entry->Cost; |
3327 | |
3328 | if (ST->hasXOP()) |
3329 | if (const auto *Entry = CostTableLookup(XOPCostTbl, ISD, MTy)) |
3330 | return LT.first * Entry->Cost; |
3331 | |
3332 | if (ST->is64Bit()) |
3333 | if (const auto *Entry = CostTableLookup(X64CostTbl, ISD, MTy)) |
3334 | return LT.first * Entry->Cost; |
3335 | |
3336 | if (const auto *Entry = CostTableLookup(X86CostTbl, ISD, MTy)) |
3337 | return LT.first * Entry->Cost; |
3338 | } |
3339 | |
3340 | return BaseT::getIntrinsicInstrCost(ICA, CostKind); |
3341 | } |
3342 | |
3343 | InstructionCost X86TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, |
3344 | unsigned Index) { |
3345 | static const CostTblEntry SLMCostTbl[] = { |
3346 | { ISD::EXTRACT_VECTOR_ELT, MVT::i8, 4 }, |
3347 | { ISD::EXTRACT_VECTOR_ELT, MVT::i16, 4 }, |
3348 | { ISD::EXTRACT_VECTOR_ELT, MVT::i32, 4 }, |
3349 | { ISD::EXTRACT_VECTOR_ELT, MVT::i64, 7 } |
3350 | }; |
3351 | |
3352 | assert(Val->isVectorTy() && "This must be a vector type"); |
3353 | Type *ScalarType = Val->getScalarType(); |
3354 | int RegisterFileMoveCost = 0; |
3355 | |
3356 | |
3357 | |
3358 | if (Index == -1U && (Opcode == Instruction::ExtractElement || |
3359 | Opcode == Instruction::InsertElement)) { |
3360 | |
3361 | |
3362 | |
3363 | |
3364 | assert(isa<FixedVectorType>(Val) && "Fixed vector type expected"); |
3365 | Align VecAlign = DL.getPrefTypeAlign(Val); |
3366 | Align SclAlign = DL.getPrefTypeAlign(ScalarType); |
3367 | |
3368 | |
3369 | if (Opcode == Instruction::ExtractElement) { |
3370 | return getMemoryOpCost(Instruction::Store, Val, VecAlign, 0, |
3371 | TTI::TargetCostKind::TCK_RecipThroughput) + |
3372 | getMemoryOpCost(Instruction::Load, ScalarType, SclAlign, 0, |
3373 | TTI::TargetCostKind::TCK_RecipThroughput); |
3374 | } |
3375 | |
3376 | if (Opcode == Instruction::InsertElement) { |
3377 | return getMemoryOpCost(Instruction::Store, Val, VecAlign, 0, |
3378 | TTI::TargetCostKind::TCK_RecipThroughput) + |
3379 | getMemoryOpCost(Instruction::Store, ScalarType, SclAlign, 0, |
3380 | TTI::TargetCostKind::TCK_RecipThroughput) + |
3381 | getMemoryOpCost(Instruction::Load, Val, VecAlign, 0, |
3382 | TTI::TargetCostKind::TCK_RecipThroughput); |
3383 | } |
3384 | } |
3385 | |
3386 | if (Index != -1U && (Opcode == Instruction::ExtractElement || |
3387 | Opcode == Instruction::InsertElement)) { |
3388 | |
3389 | std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Val); |
3390 | |
3391 | |
3392 | if (!LT.second.isVector()) |
3393 | return 0; |
3394 | |
3395 | |
3396 | unsigned NumElts = LT.second.getVectorNumElements(); |
3397 | unsigned SubNumElts = NumElts; |
3398 | Index = Index % NumElts; |
3399 | |
3400 | |
3401 | |
3402 | if (LT.second.getSizeInBits() > 128) { |
3403 | assert((LT.second.getSizeInBits() % 128) == 0 && "Illegal vector"); |
3404 | unsigned NumSubVecs = LT.second.getSizeInBits() / 128; |
3405 | SubNumElts = NumElts / NumSubVecs; |
3406 | if (SubNumElts <= Index) { |
3407 | RegisterFileMoveCost += (Opcode == Instruction::InsertElement ? 2 : 1); |
3408 | Index %= SubNumElts; |
3409 | } |
3410 | } |
3411 | |
3412 | if (Index == 0) { |
3413 | |
3414 | |
3415 | |
3416 | if (ScalarType->isFloatingPointTy()) |
3417 | return RegisterFileMoveCost; |
3418 | |
3419 | |
3420 | if (ScalarType->isIntegerTy() && Opcode == Instruction::ExtractElement) |
3421 | return 1 + RegisterFileMoveCost; |
3422 | } |
3423 | |
3424 | int ISD = TLI->InstructionOpcodeToISD(Opcode); |
3425 | assert(ISD && "Unexpected vector opcode"); |
3426 | MVT MScalarTy = LT.second.getScalarType(); |
3427 | if (ST->isSLM()) |
3428 | if (auto *Entry = CostTableLookup(SLMCostTbl, ISD, MScalarTy)) |
3429 | return Entry->Cost + RegisterFileMoveCost; |
3430 | |
3431 | |
3432 | if ((MScalarTy == MVT::i16 && ST->hasSSE2()) || |
3433 | (MScalarTy.isInteger() && ST->hasSSE41())) |
3434 | return 1 + RegisterFileMoveCost; |
3435 | |
3436 | |
3437 | if (MScalarTy == MVT::f32 && ST->hasSSE41() && |
3438 | Opcode == Instruction::InsertElement) |
3439 | return 1 + RegisterFileMoveCost; |
3440 | |
3441 | |
3442 | |
3443 | |
3444 | |
3445 | |
3446 | |
3447 | InstructionCost ShuffleCost = 1; |
3448 | if (Opcode == Instruction::InsertElement) { |
3449 | auto *SubTy = cast<VectorType>(Val); |
3450 | EVT VT = TLI->getValueType(DL, Val); |
3451 | if (VT.getScalarType() != MScalarTy || VT.getSizeInBits() >= 128) |
3452 | SubTy = FixedVectorType::get(ScalarType, SubNumElts); |
3453 | ShuffleCost = |
3454 | getShuffleCost(TTI::SK_PermuteTwoSrc, SubTy, None, 0, SubTy); |
3455 | } |
3456 | int IntOrFpCost = ScalarType->isFloatingPointTy() ? 0 : 1; |
3457 | return ShuffleCost + IntOrFpCost + RegisterFileMoveCost; |
3458 | } |
3459 | |
3460 | |
3461 | |
3462 | if (Opcode == Instruction::ExtractElement && ScalarType->isPointerTy()) |
3463 | RegisterFileMoveCost += 1; |
3464 | |
3465 | return BaseT::getVectorInstrCost(Opcode, Val, Index) + RegisterFileMoveCost; |
3466 | } |
3467 | |
3468 | InstructionCost X86TTIImpl::getScalarizationOverhead(VectorType *Ty, |
3469 | const APInt &DemandedElts, |
3470 | bool Insert, |
3471 | bool Extract) { |
3472 | InstructionCost Cost = 0; |
3473 | |
3474 | |
3475 | |
3476 | if (Insert) { |
3477 | std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty); |
3478 | MVT MScalarTy = LT.second.getScalarType(); |
3479 | |
3480 | if ((MScalarTy == MVT::i16 && ST->hasSSE2()) || |
3481 | (MScalarTy.isInteger() && ST->hasSSE41()) || |
3482 | (MScalarTy == MVT::f32 && ST->hasSSE41())) { |
3483 | |
3484 | |
3485 | if (LT.second.getSizeInBits() <= 128) { |
3486 | Cost += |
3487 | BaseT::getScalarizationOverhead(Ty, DemandedElts, Insert, false); |
3488 | } else { |
3489 | |
3490 | |
3491 | |
3492 | |
3493 | |
3494 | |
3495 | |
3496 | |
3497 | |
3498 | |
3499 | |
3500 | |
3501 | const int CostValue = *LT.first.getValue(); |
3502 | assert(CostValue >= 0 && "Negative cost!"); |
3503 | unsigned Num128Lanes = LT.second.getSizeInBits() / 128 * CostValue; |
3504 | unsigned NumElts = LT.second.getVectorNumElements() * CostValue; |
3505 | APInt WidenedDemandedElts = DemandedElts.zextOrSelf(NumElts); |
3506 | unsigned Scale = NumElts / Num128Lanes; |
3507 | |
3508 | |
3509 | for (unsigned I = 0; I < NumElts; I += Scale) { |
3510 | APInt Mask = WidenedDemandedElts.getBitsSet(NumElts, I, I + Scale); |
3511 | APInt MaskedDE = Mask & WidenedDemandedElts; |
3512 | unsigned Population = MaskedDE.countPopulation(); |
3513 | Cost += (Population > 0 && Population != Scale && |
3514 | I % LT.second.getVectorNumElements() != 0); |
3515 | Cost += Population > 0; |
3516 | } |
3517 | Cost += DemandedElts.countPopulation(); |
3518 | |
3519 | |
3520 | |
3521 | |
3522 | if (MScalarTy == MVT::f32) |
3523 | for (unsigned i = 0, e = cast<FixedVectorType>(Ty)->getNumElements(); |
3524 | i < e; i += 4) |
3525 | if (DemandedElts[i]) |
3526 | Cost--; |
3527 | } |
3528 | } else if (LT.second.isVector()) { |
3529 | |
3530 | |
3531 | |
3532 | |
3533 | if (Ty->isIntOrIntVectorTy()) |
3534 | Cost += DemandedElts.countPopulation(); |
3535 | |
3536 | |
3537 | |
3538 | |
3539 | unsigned NumElts = LT.second.getVectorNumElements(); |
3540 | unsigned Pow2Elts = |
3541 | PowerOf2Ceil(cast<FixedVectorType>(Ty)->getNumElements()); |
3542 | Cost += (std::min<unsigned>(NumElts, Pow2Elts) - 1) * LT.first; |
3543 | } |
3544 | } |
3545 | |
3546 | |
3547 | |
3548 | if (Extract) |
3549 | Cost += BaseT::getScalarizationOverhead(Ty, DemandedElts, false, Extract); |
3550 | |
3551 | return Cost; |
3552 | } |
3553 | |
3554 | InstructionCost X86TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, |
3555 | MaybeAlign Alignment, |
3556 | unsigned AddressSpace, |
3557 | TTI::TargetCostKind CostKind, |
3558 | const Instruction *I) { |
3559 | |
3560 | if (CostKind != TTI::TCK_RecipThroughput) { |
3561 | if (auto *SI = dyn_cast_or_null<StoreInst>(I)) { |
3562 | |
3563 | |
3564 | if (auto *GEP = dyn_cast<GetElementPtrInst>(SI->getPointerOperand())) { |
3565 | if (!all_of(GEP->indices(), [](Value *V) { return isa<Constant>(V); })) |
3566 | return TTI::TCC_Basic * 2; |
3567 | } |
3568 | } |
3569 | return TTI::TCC_Basic; |
3570 | } |
3571 | |
3572 | assert((Opcode == Instruction::Load || Opcode == Instruction::Store) && |
3573 | "Invalid Opcode"); |
3574 | |
3575 | if (TLI->getValueType(DL, Src, true) == MVT::Other) |
3576 | return BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, |
3577 | CostKind); |
3578 | |
3579 | |
3580 | std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Src); |
3581 | |
3582 | auto *VTy = dyn_cast<FixedVectorType>(Src); |
3583 | |
3584 | |
3585 | |
3586 | if (!VTy || !LT.second.isVector()) |
3587 | |
3588 | return LT.first * 1; |
3589 | |
3590 | bool IsLoad = Opcode == Instruction::Load; |
3591 | |
3592 | Type *EltTy = VTy->getElementType(); |
3593 | |
3594 | const int EltTyBits = DL.getTypeSizeInBits(EltTy); |
3595 | |
3596 | InstructionCost Cost = 0; |
3597 | |
3598 | |
3599 | const unsigned SrcNumElt = VTy->getNumElements(); |
3600 | |
3601 | |
3602 | int NumEltRemaining = SrcNumElt; |
3603 | |
3604 | auto NumEltDone = [&]() { return SrcNumElt - NumEltRemaining; }; |
3605 | |
3606 | const int MaxLegalOpSizeBytes = divideCeil(LT.second.getSizeInBits(), 8); |
3607 | |
3608 | |
3609 | const unsigned XMMBits = 128; |
3610 | if (XMMBits % EltTyBits != 0) |
3611 | |
3612 | return BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, |
3613 | CostKind); |
3614 | const int NumEltPerXMM = XMMBits / EltTyBits; |
3615 | |
3616 | auto *XMMVecTy = FixedVectorType::get(EltTy, NumEltPerXMM); |
3617 | |
3618 | for (int CurrOpSizeBytes = MaxLegalOpSizeBytes, SubVecEltsLeft = 0; |
3619 | NumEltRemaining > 0; CurrOpSizeBytes /= 2) { |
3620 | |
3621 | if ((8 * CurrOpSizeBytes) % EltTyBits != 0) |
3622 | |
3623 | return BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, |
3624 | CostKind); |
3625 | int CurrNumEltPerOp = (8 * CurrOpSizeBytes) / EltTyBits; |
3626 | |
3627 | assert(CurrOpSizeBytes > 0 && CurrNumEltPerOp > 0 && "How'd we get here?"); |
3628 | assert((((NumEltRemaining * EltTyBits) < (2 * 8 * CurrOpSizeBytes)) || |
3629 | (CurrOpSizeBytes == MaxLegalOpSizeBytes)) && |
3630 | "Unless we haven't halved the op size yet, " |
3631 | "we have less than two op's sized units of work left."); |
3632 | |
3633 | auto *CurrVecTy = CurrNumEltPerOp > NumEltPerXMM |
3634 | ? FixedVectorType::get(EltTy, CurrNumEltPerOp) |
3635 | : XMMVecTy; |
3636 | |
3637 | assert(CurrVecTy->getNumElements() % CurrNumEltPerOp == 0 && |
3638 | "After halving sizes, the vector elt count is no longer a multiple " |
3639 | "of number of elements per operation?"); |
3640 | auto *CoalescedVecTy = |
3641 | CurrNumEltPerOp == 1 |
3642 | ? CurrVecTy |
3643 | : FixedVectorType::get( |
3644 | IntegerType::get(Src->getContext(), |
3645 | EltTyBits * CurrNumEltPerOp), |
3646 | CurrVecTy->getNumElements() / CurrNumEltPerOp); |
3647 | assert(DL.getTypeSizeInBits(CoalescedVecTy) == |
3648 | DL.getTypeSizeInBits(CurrVecTy) && |
3649 | "coalesciing elements doesn't change vector width."); |
3650 | |
3651 | while (NumEltRemaining > 0) { |
3652 | assert(SubVecEltsLeft >= 0 && "Subreg element count overconsumtion?"); |
3653 | |
3654 | |
3655 | |
3656 | if (NumEltRemaining < CurrNumEltPerOp && |
3657 | (!IsLoad || Alignment.valueOrOne() < CurrOpSizeBytes) && |
3658 | CurrOpSizeBytes != 1) |
3659 | break; |
3660 | |
3661 | bool Is0thSubVec = (NumEltDone() % LT.second.getVectorNumElements()) == 0; |
3662 | |
3663 | |
3664 | if (SubVecEltsLeft == 0) { |
3665 | SubVecEltsLeft += CurrVecTy->getNumElements(); |
3666 | |
3667 | if (!Is0thSubVec) |
3668 | Cost += getShuffleCost(IsLoad ? TTI::ShuffleKind::SK_InsertSubvector |
3669 | : TTI::ShuffleKind::SK_ExtractSubvector, |
3670 | VTy, None, NumEltDone(), CurrVecTy); |
3671 | } |
3672 | |
3673 | |
3674 | |
3675 | |
3676 | |
3677 | if (CurrOpSizeBytes <= 32 / 8 && !Is0thSubVec) { |
3678 | int NumEltDoneInCurrXMM = NumEltDone() % NumEltPerXMM; |
3679 | assert(NumEltDoneInCurrXMM % CurrNumEltPerOp == 0 && ""); |
3680 | int CoalescedVecEltIdx = NumEltDoneInCurrXMM / CurrNumEltPerOp; |
3681 | APInt DemandedElts = |
3682 | APInt::getBitsSet(CoalescedVecTy->getNumElements(), |
3683 | CoalescedVecEltIdx, CoalescedVecEltIdx + 1); |
3684 | assert(DemandedElts.countPopulation() == 1 && "Inserting single value"); |
3685 | Cost += getScalarizationOverhead(CoalescedVecTy, DemandedElts, IsLoad, |
3686 | !IsLoad); |
3687 | } |
3688 | |
3689 | |
3690 | |
3691 | |
3692 | if (CurrOpSizeBytes == 32 && ST->isUnalignedMem32Slow()) |
3693 | Cost += 2; |
3694 | else |
3695 | Cost += 1; |
3696 | |
3697 | SubVecEltsLeft -= CurrNumEltPerOp; |
3698 | NumEltRemaining -= CurrNumEltPerOp; |
3699 | Alignment = commonAlignment(Alignment.valueOrOne(), CurrOpSizeBytes); |
3700 | } |
3701 | } |
3702 | |
3703 | assert(NumEltRemaining <= 0 && "Should have processed all the elements."); |
3704 | |
3705 | return Cost; |
3706 | } |
3707 | |
3708 | InstructionCost |
3709 | X86TTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *SrcTy, Align Alignment, |
3710 | unsigned AddressSpace, |
3711 | TTI::TargetCostKind CostKind) { |
3712 | bool IsLoad = (Instruction::Load == Opcode); |
3713 | bool IsStore = (Instruction::Store == Opcode); |
3714 | |
3715 | auto *SrcVTy = dyn_cast<FixedVectorType>(SrcTy); |
3716 | if (!SrcVTy) |
3717 | |
3718 | return getMemoryOpCost(Opcode, SrcTy, Alignment, AddressSpace, CostKind); |
3719 | |
3720 | unsigned NumElem = SrcVTy->getNumElements(); |
3721 | auto *MaskTy = |
3722 | FixedVectorType::get(Type::getInt8Ty(SrcVTy->getContext()), NumElem); |
3723 | if ((IsLoad && !isLegalMaskedLoad(SrcVTy, Alignment)) || |
3724 | (IsStore && !isLegalMaskedStore(SrcVTy, Alignment))) { |
3725 | |
3726 | APInt DemandedElts = APInt::getAllOnesValue(NumElem); |
3727 | InstructionCost MaskSplitCost = |
3728 | getScalarizationOverhead(MaskTy, DemandedElts, false, true); |
3729 | InstructionCost ScalarCompareCost = getCmpSelInstrCost( |
3730 | Instruction::ICmp, Type::getInt8Ty(SrcVTy->getContext()), nullptr, |
3731 | CmpInst::BAD_ICMP_PREDICATE, CostKind); |
3732 | InstructionCost BranchCost = getCFInstrCost(Instruction::Br, CostKind); |
3733 | InstructionCost MaskCmpCost = NumElem * (BranchCost + ScalarCompareCost); |
3734 | InstructionCost ValueSplitCost = |
3735 | getScalarizationOverhead(SrcVTy, DemandedElts, IsLoad, IsStore); |
3736 | InstructionCost MemopCost = |
3737 | NumElem * BaseT::getMemoryOpCost(Opcode, SrcVTy->getScalarType(), |
3738 | Alignment, AddressSpace, CostKind); |
3739 | return MemopCost + ValueSplitCost + MaskSplitCost + MaskCmpCost; |
3740 | } |
3741 | |
3742 | |
3743 | std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, SrcVTy); |
3744 | auto VT = TLI->getValueType(DL, SrcVTy); |
3745 | InstructionCost Cost = 0; |
3746 | if (VT.isSimple() && LT.second != VT.getSimpleVT() && |
3747 | LT.second.getVectorNumElements() == NumElem) |
3748 | |
3749 | Cost += getShuffleCost(TTI::SK_PermuteTwoSrc, SrcVTy, None, 0, nullptr) + |
3750 | getShuffleCost(TTI::SK_PermuteTwoSrc, MaskTy, None, 0, nullptr); |
3751 | |
3752 | else if (LT.first * LT.second.getVectorNumElements() > NumElem) { |
3753 | auto *NewMaskTy = FixedVectorType::get(MaskTy->getElementType(), |
3754 | LT.second.getVectorNumElements()); |
3755 | |
3756 | Cost += getShuffleCost(TTI::SK_InsertSubvector, NewMaskTy, None, 0, MaskTy); |
3757 | } |
3758 | |
3759 | |
3760 | if (!ST->hasAVX512()) |
3761 | return Cost + LT.first * (IsLoad ? 2 : 8); |
3762 | |
3763 | |
3764 | return Cost + LT.first; |
3765 | } |
3766 | |
3767 | InstructionCost X86TTIImpl::getAddressComputationCost(Type *Ty, |
3768 | ScalarEvolution *SE, |
3769 | const SCEV *Ptr) { |
3770 | |
3771 | |
3772 | |
3773 | |
3774 | const unsigned NumVectorInstToHideOverhead = 10; |
3775 | |
3776 | |
3777 | |
3778 | |
3779 | |
3780 | |
3781 | |
3782 | |
3783 | if (Ty->isVectorTy() && SE) { |
3784 | if (!BaseT::isStridedAccess(Ptr)) |
3785 | return NumVectorInstToHideOverhead; |
3786 | if (!BaseT::getConstantStrideStep(SE, Ptr)) |
3787 | return 1; |
3788 | } |
3789 | |
3790 | return BaseT::getAddressComputationCost(Ty, SE, Ptr); |
3791 | } |
3792 | |
3793 | InstructionCost |
3794 | X86TTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy, |
3795 | Optional<FastMathFlags> FMF, |
3796 | TTI::TargetCostKind CostKind) { |
3797 | if (TTI::requiresOrderedReduction(FMF)) |
3798 | return BaseT::getArithmeticReductionCost(Opcode, ValTy, FMF, CostKind); |
3799 | |
3800 | |
3801 | |
3802 | |
3803 | static const CostTblEntry SLMCostTblNoPairWise[] = { |
3804 | { ISD::FADD, MVT::v2f64, 3 }, |
3805 | { ISD::ADD, MVT::v2i64, 5 }, |
3806 | }; |
3807 | |
3808 | static const CostTblEntry SSE2CostTblNoPairWise[] = { |
3809 | { ISD::FADD, MVT::v2f64, 2 }, |
3810 | { ISD::FADD, MVT::v2f32, 2 }, |
3811 | { ISD::FADD, MVT::v4f32, 4 }, |
3812 | { ISD::ADD, MVT::v2i64, 2 }, |
3813 | { ISD::ADD, MVT::v2i32, 2 }, |
3814 | { ISD::ADD, MVT::v4i32, 3 }, |
3815 | { ISD::ADD, MVT::v2i16, 2 }, |
3816 | { ISD::ADD, MVT::v4i16, 3 }, |
3817 | { ISD::ADD, MVT::v8i16, 4 }, |
3818 | { ISD::ADD, MVT::v2i8, 2 }, |
3819 | { ISD::ADD, MVT::v4i8, 2 }, |
3820 | { ISD::ADD, MVT::v8i8, 2 }, |
3821 | { ISD::ADD, MVT::v16i8, 3 }, |
3822 | }; |
3823 | |
3824 | static const CostTblEntry AVX1CostTblNoPairWise[] = { |
3825 | { ISD::FADD, MVT::v4f64, 3 }, |
3826 | { ISD::FADD, MVT::v4f32, 3 }, |
3827 | { ISD::FADD, MVT::v8f32, 4 }, |
3828 | { ISD::ADD, MVT::v2i64, 1 }, |
3829 | { ISD::ADD, MVT::v4i64, 3 }, |
3830 | { ISD::ADD, MVT::v8i32, 5 }, |
3831 | { ISD::ADD, MVT::v16i16, 5 }, |
3832 | { ISD::ADD, MVT::v32i8, 4 }, |
3833 | }; |
3834 | |
3835 | int ISD = TLI->InstructionOpcodeToISD(Opcode); |
3836 | assert(ISD && "Invalid opcode"); |
3837 | |
3838 | |
3839 | |
3840 | |
3841 | EVT VT = TLI->getValueType(DL, ValTy); |
3842 | if (VT.isSimple()) { |
3843 | MVT MTy = VT.getSimpleVT(); |
3844 | if (ST->isSLM()) |
3845 | if (const auto *Entry = CostTableLookup(SLMCostTblNoPairWise, ISD, MTy)) |
3846 | return Entry->Cost; |
3847 | |
3848 | if (ST->hasAVX()) |
3849 | if (const auto *Entry = CostTableLookup(AVX1CostTblNoPairWise, ISD, MTy)) |
3850 | return Entry->Cost; |
3851 | |
3852 | if (ST->hasSSE2()) |
3853 | if (const auto *Entry = CostTableLookup(SSE2CostTblNoPairWise, ISD, MTy)) |
3854 | return Entry->Cost; |
3855 | } |
3856 | |
3857 | std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy); |
3858 | |
3859 | MVT MTy = LT.second; |
3860 | |
3861 | auto *ValVTy = cast<FixedVectorType>(ValTy); |
3862 | |
3863 | |
3864 | if (ISD == ISD::MUL && MTy.getScalarType() == MVT::i8) { |
3865 | auto *WideSclTy = IntegerType::get(ValVTy->getContext(), 16); |
3866 | auto *WideVecTy = FixedVectorType::get(WideSclTy, ValVTy->getNumElements()); |
3867 | return getCastInstrCost(Instruction::ZExt, WideVecTy, ValTy, |
3868 | TargetTransformInfo::CastContextHint::None, |
3869 | CostKind) + |
3870 | getArithmeticReductionCost(Opcode, WideVecTy, FMF, CostKind); |
3871 | } |
3872 | |
3873 | InstructionCost ArithmeticCost = 0; |
3874 | if (LT.first != 1 && MTy.isVector() && |
3875 | MTy.getVectorNumElements() < ValVTy->getNumElements()) { |
3876 | |
3877 | auto *SingleOpTy = FixedVectorType::get(ValVTy->getElementType(), |
3878 | MTy.getVectorNumElements()); |
3879 | ArithmeticCost = getArithmeticInstrCost(Opcode, SingleOpTy, CostKind); |
3880 | ArithmeticCost *= LT.first - 1; |
3881 | } |
3882 | |
3883 | if (ST->isSLM()) |
3884 | if (const auto *Entry = CostTableLookup(SLMCostTblNoPairWise, ISD, MTy)) |
3885 | return ArithmeticCost + Entry->Cost; |
3886 | |
3887 | if (ST->hasAVX()) |
3888 | if (const auto *Entry = CostTableLookup(AVX1CostTblNoPairWise, ISD, MTy)) |
3889 | return ArithmeticCost + Entry->Cost; |
3890 | |
3891 | if (ST->hasSSE2()) |
3892 | if (const auto *Entry = CostTableLookup(SSE2CostTblNoPairWise, ISD, MTy)) |
3893 | return ArithmeticCost + Entry->Cost; |
3894 | |
3895 | |
3896 | |
3897 | static const CostTblEntry AVX512BoolReduction[] = { |
3898 | { ISD::AND, MVT::v2i1, 3 }, |
3899 | { ISD::AND, MVT::v4i1, 5 }, |
3900 | { ISD::AND, MVT::v8i1, 7 }, |
3901 | { ISD::AND, MVT::v16i1, 9 }, |
3902 | { ISD::AND, MVT::v32i1, 11 }, |
3903 | { ISD::AND, MVT::v64i1, 13 }, |
3904 | { ISD::OR, MVT::v2i1, 3 }, |
3905 | { ISD::OR, MVT::v4i1, 5 }, |
3906 | { ISD::OR, MVT::v8i1, 7 }, |
3907 | { ISD::OR, MVT::v16i1, 9 }, |
3908 | { ISD::OR, MVT::v32i1, 11 }, |
3909 | { ISD::OR, MVT::v64i1, 13 }, |
3910 | }; |
3911 | |
3912 | static const CostTblEntry AVX2BoolReduction[] = { |
3913 | { ISD::AND, MVT::v16i16, 2 }, |
3914 | { ISD::AND, MVT::v32i8, 2 }, |
3915 | { ISD::OR, MVT::v16i16, 2 }, |
3916 | { ISD::OR, MVT::v32i8, 2 }, |
3917 | }; |
3918 | |
3919 | static const CostTblEntry AVX1BoolReduction[] = { |
3920 | { ISD::AND, MVT::v4i64, 2 }, |
3921 | { ISD::AND, MVT::v8i32, 2 }, |
3922 | { ISD::AND, MVT::v16i16, 4 }, |
3923 | { ISD::AND, MVT::v32i8, 4 }, |
3924 | { ISD::OR, MVT::v4i64, 2 }, |
3925 | { ISD::OR, MVT::v8i32, 2 }, |
3926 | { ISD::OR, MVT::v16i16, 4 }, |
3927 | { ISD::OR, MVT::v32i8, 4 }, |
3928 | }; |
3929 | |
3930 | static const CostTblEntry SSE2BoolReduction[] = { |
3931 | { ISD::AND, MVT::v2i64, 2 }, |
3932 | { ISD::AND, MVT::v4i32, 2 }, |
3933 | { ISD::AND, MVT::v8i16, 2 }, |
3934 | { ISD::AND, MVT::v16i8, 2 }, |
3935 | { ISD::OR, MVT::v2i64, 2 }, |
3936 | { ISD::OR, MVT::v4i32, 2 }, |
3937 | { ISD::OR, MVT::v8i16, 2 }, |
3938 | { ISD::OR, MVT::v16i8, 2 }, |
3939 | }; |
3940 | |
3941 | |
3942 | if (ValVTy->getElementType()->isIntegerTy(1)) { |
3943 | InstructionCost ArithmeticCost = 0; |
3944 | if (LT.first != 1 && MTy.isVector() && |
3945 | MTy.getVectorNumElements() < ValVTy->getNumElements()) { |
3946 | |
3947 | auto *SingleOpTy = FixedVectorType::get(ValVTy->getElementType(), |
3948 | MTy.getVectorNumElements()); |
3949 | ArithmeticCost = getArithmeticInstrCost(Opcode, SingleOpTy, CostKind); |
3950 | ArithmeticCost *= LT.first - 1; |
3951 | } |
3952 | |
3953 | if (ST->hasAVX512()) |
3954 | if (const auto *Entry = CostTableLookup(AVX512BoolReduction, ISD, MTy)) |
3955 | return ArithmeticCost + Entry->Cost; |
3956 | if (ST->hasAVX2()) |
3957 | if (const auto *Entry = CostTableLookup(AVX2BoolReduction, ISD, MTy)) |
3958 | return ArithmeticCost + Entry->Cost; |
3959 | if (ST->hasAVX()) |
3960 | if (const auto *Entry = CostTableLookup(AVX1BoolReduction, ISD, MTy)) |
3961 | return ArithmeticCost + Entry->Cost; |
3962 | if (ST->hasSSE2()) |
3963 | if (const auto *Entry = CostTableLookup(SSE2BoolReduction, ISD, MTy)) |
3964 | return ArithmeticCost + Entry->Cost; |
3965 | |
3966 | return BaseT::getArithmeticReductionCost(Opcode, ValVTy, FMF, CostKind); |
3967 | } |
3968 | |
3969 | unsigned NumVecElts = ValVTy->getNumElements(); |
3970 | unsigned ScalarSize = ValVTy->getScalarSizeInBits(); |
3971 | |
3972 | |
3973 | |
3974 | if (!isPowerOf2_32(NumVecElts) || ScalarSize != MTy.getScalarSizeInBits()) |
3975 | return BaseT::getArithmeticReductionCost(Opcode, ValVTy, FMF, CostKind); |
3976 | |
3977 | InstructionCost ReductionCost = 0; |
3978 | |
3979 | auto *Ty = ValVTy; |
3980 | if (LT.first != 1 && MTy.isVector() && |
3981 | MTy.getVectorNumElements() < ValVTy->getNumElements()) { |
3982 | |
3983 | Ty = FixedVectorType::get(ValVTy->getElementType(), |
3984 | MTy.getVectorNumElements()); |
3985 | ReductionCost = getArithmeticInstrCost(Opcode, Ty, CostKind); |
3986 | ReductionCost *= LT.first - 1; |
3987 | NumVecElts = MTy.getVectorNumElements(); |
3988 | } |
3989 | |
3990 | |
3991 | |
3992 | while (NumVecElts > 1) { |
3993 | |
3994 | unsigned Size = NumVecElts * ScalarSize; |
3995 | NumVecElts /= 2; |
3996 | |
3997 | if (Size > 128) { |
3998 | auto *SubTy = FixedVectorType::get(ValVTy->getElementType(), NumVecElts); |
3999 | ReductionCost += |
4000 | getShuffleCost(TTI::SK_ExtractSubvector, Ty, None, NumVecElts, SubTy); |
4001 | Ty = SubTy; |
4002 | } else if (Size == 128) { |
4003 | |
4004 | FixedVectorType *ShufTy; |
4005 | if (ValVTy->isFloatingPointTy()) |
4006 | ShufTy = |
4007 | FixedVectorType::get(Type::getDoubleTy(ValVTy->getContext()), 2); |
4008 | else |
4009 | ShufTy = |
4010 | FixedVectorType::get(Type::getInt64Ty(ValVTy->getContext()), 2); |
4011 | ReductionCost += |
4012 | getShuffleCost(TTI::SK_PermuteSingleSrc, ShufTy, None, 0, nullptr); |
4013 | } else if (Size == 64) { |
4014 | |
4015 | FixedVectorType *ShufTy; |
4016 | if (ValVTy->isFloatingPointTy()) |
4017 | ShufTy = |
4018 | FixedVectorType::get(Type::getFloatTy(ValVTy->getContext()), 4); |
4019 | else |
4020 | ShufTy = |
4021 | FixedVectorType::get(Type::getInt32Ty(ValVTy->getContext()), 4); |
4022 | ReductionCost += |
4023 | getShuffleCost(TTI::SK_PermuteSingleSrc, ShufTy, None, 0, nullptr); |
4024 | } else { |
4025 | |
4026 | auto *ShiftTy = FixedVectorType::get( |
4027 | Type::getIntNTy(ValVTy->getContext(), Size), 128 / Size); |
4028 | ReductionCost += getArithmeticInstrCost( |
4029 | Instruction::LShr, ShiftTy, CostKind, |
4030 | TargetTransformInfo::OK_AnyValue, |
4031 | TargetTransformInfo::OK_UniformConstantValue, |
4032 | TargetTransformInfo::OP_None, TargetTransformInfo::OP_None); |
4033 | } |
4034 | |
4035 | |
4036 | ReductionCost += getArithmeticInstrCost(Opcode, Ty, CostKind); |
4037 | } |
4038 | |
4039 | |
4040 | return ReductionCost + getVectorInstrCost(Instruction::ExtractElement, Ty, 0); |
4041 | } |
4042 | |
4043 | InstructionCost X86TTIImpl::getMinMaxCost(Type *Ty, Type *CondTy, |
4044 | bool IsUnsigned) { |
4045 | std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty); |
4046 | |
4047 | MVT MTy = LT.second; |
4048 | |
4049 | int ISD; |
4050 | if (Ty->isIntOrIntVectorTy()) { |
4051 | ISD = IsUnsigned ? ISD::UMIN : ISD::SMIN; |
4052 | } else { |
4053 | assert(Ty->isFPOrFPVectorTy() && |
4054 | "Expected float point or integer vector type."); |
4055 | ISD = ISD::FMINNUM; |
4056 | } |
4057 | |
4058 | static const CostTblEntry SSE1CostTbl[] = { |
4059 | {ISD::FMINNUM, MVT::v4f32, 1}, |
4060 | }; |
4061 | |
4062 | static const CostTblEntry SSE2CostTbl[] = { |
4063 | {ISD::FMINNUM, MVT::v2f64, 1}, |
4064 | {ISD::SMIN, MVT::v8i16, 1}, |
4065 | {ISD::UMIN, MVT::v16i8, 1}, |
4066 | }; |
4067 | |
4068 | static const CostTblEntry SSE41CostTbl[] = { |
4069 | {ISD::SMIN, MVT::v4i32, 1}, |
4070 | {ISD::UMIN, MVT::v4i32, 1}, |
4071 | {ISD::UMIN, MVT::v8i16, 1}, |
4072 | {ISD::SMIN, MVT::v16i8, 1}, |
4073 | }; |
4074 | |
4075 | static const CostTblEntry SSE42CostTbl[] = { |
4076 | {ISD::UMIN, MVT::v2i64, 3}, |
4077 | }; |
4078 | |
4079 | static const CostTblEntry AVX1CostTbl[] = { |
4080 | {ISD::FMINNUM, MVT::v8f32, 1}, |
4081 | {ISD::FMINNUM, MVT::v4f64, 1}, |
4082 | {ISD::SMIN, MVT::v8i32, 3}, |
4083 | {ISD::UMIN, MVT::v8i32, 3}, |
4084 | {ISD::SMIN, MVT::v16i16, 3}, |
4085 | {ISD::UMIN, MVT::v16i16, 3}, |
4086 | {ISD::SMIN, MVT::v32i8, 3}, |
4087 | {ISD::UMIN, MVT::v32i8, 3}, |
4088 | }; |
4089 | |
4090 | static const CostTblEntry AVX2CostTbl[] = { |
4091 | {ISD::SMIN, MVT::v8i32, 1}, |
4092 | {ISD::UMIN, MVT::v8i32, 1}, |
4093 | {ISD::SMIN, MVT::v16i16, 1}, |
4094 | {ISD::UMIN, MVT::v16i16, 1}, |
4095 | {ISD::SMIN, MVT::v32i8, 1}, |
4096 | {ISD::UMIN, MVT::v32i8, 1}, |
4097 | }; |
4098 | |
4099 | static const CostTblEntry AVX512CostTbl[] = { |
4100 | {ISD::FMINNUM, MVT::v16f32, 1}, |
4101 | {ISD::FMINNUM, MVT::v8f64, 1}, |
4102 | {ISD::SMIN, MVT::v2i64, 1}, |
4103 | {ISD::UMIN, MVT::v2i64, 1}, |
4104 | {ISD::SMIN, MVT::v4i64, 1}, |
4105 | {ISD::UMIN, MVT::v4i64, 1}, |
4106 | {ISD::SMIN, MVT::v8i64, 1}, |
4107 | {ISD::UMIN, MVT::v8i64, 1}, |
4108 | {ISD::SMIN, MVT::v16i32, 1}, |
4109 | {ISD::UMIN, MVT::v16i32, 1}, |
4110 | }; |
4111 | |
4112 | static const CostTblEntry AVX512BWCostTbl[] = { |
4113 | {ISD::SMIN, MVT::v32i16, 1}, |
4114 | {ISD::UMIN, MVT::v32i16, 1}, |
4115 | {ISD::SMIN, MVT::v64i8, 1}, |
4116 | {ISD::UMIN, MVT::v64i8, 1}, |
4117 | }; |
4118 | |
4119 | |
4120 | if (ST->hasBWI()) |
4121 | if (const auto *Entry = CostTableLookup(AVX512BWCostTbl, ISD, MTy)) |
4122 | return LT.first * Entry->Cost; |
4123 | |
4124 | if (ST->hasAVX512()) |
4125 | if (const auto *Entry = CostTableLookup(AVX512CostTbl, ISD, MTy)) |
4126 | return LT.first * Entry->Cost; |
4127 | |
4128 | if (ST->hasAVX2()) |
4129 | if (const auto *Entry = CostTableLookup(AVX2CostTbl, ISD, MTy)) |
4130 | return LT.first * Entry->Cost; |
4131 | |
4132 | if (ST->hasAVX()) |
4133 | if (const auto *Entry = CostTableLookup(AVX1CostTbl, ISD, MTy)) |
4134 | return LT.first * Entry->Cost; |
4135 | |
4136 | if (ST->hasSSE42()) |
4137 | if (const auto *Entry = CostTableLookup(SSE42CostTbl, ISD, MTy)) |
4138 | return LT.first * Entry->Cost; |
4139 | |
4140 | if (ST->hasSSE41()) |
4141 | if (const auto *Entry = CostTableLookup(SSE41CostTbl, ISD, MTy)) |
4142 | return LT.first * Entry->Cost; |
4143 | |
4144 | if (ST->hasSSE2()) |
4145 | if (const auto *Entry = CostTableLookup(SSE2CostTbl, ISD, MTy)) |
4146 | return LT.first * Entry->Cost; |
4147 | |
4148 | if (ST->hasSSE1()) |
4149 | if (const auto *Entry = CostTableLookup(SSE1CostTbl, ISD, MTy)) |
4150 | return LT.first * Entry->Cost; |
4151 | |
4152 | unsigned CmpOpcode; |
4153 | if (Ty->isFPOrFPVectorTy()) { |
4154 | CmpOpcode = Instruction::FCmp; |
4155 | } else { |
4156 | assert(Ty->isIntOrIntVectorTy() && |
4157 | "expecting floating point or integer type for min/max reduction"); |
4158 | CmpOpcode = Instruction::ICmp; |
4159 | } |
4160 | |
4161 | TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput; |
4162 | |
4163 | InstructionCost Result = |
4164 | getCmpSelInstrCost(CmpOpcode, Ty, CondTy, CmpInst::BAD_ICMP_PREDICATE, |
4165 | CostKind) + |
4166 | getCmpSelInstrCost(Instruction::Select, Ty, CondTy, |
4167 | CmpInst::BAD_ICMP_PREDICATE, CostKind); |
4168 | return Result; |
4169 | } |
4170 | |
4171 | InstructionCost |
4172 | X86TTIImpl::getMinMaxReductionCost(VectorType *ValTy, VectorType *CondTy, |
4173 | bool IsUnsigned, |
4174 | TTI::TargetCostKind CostKind) { |
4175 | std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy); |
4176 | |
4177 | MVT MTy = LT.second; |
4178 | |
4179 | int ISD; |
4180 | if (ValTy->isIntOrIntVectorTy()) { |
4181 | ISD = IsUnsigned ? ISD::UMIN : ISD::SMIN; |
4182 | } else { |
4183 | assert(ValTy->isFPOrFPVectorTy() && |
4184 | "Expected float point or integer vector type."); |
4185 | ISD = ISD::FMINNUM; |
4186 | } |
4187 | |
4188 | |
4189 | |
4190 | |
4191 | static const CostTblEntry SSE2CostTblNoPairWise[] = { |
4192 | {ISD::UMIN, MVT::v2i16, 5}, |
4193 | {ISD::UMIN, MVT::v4i16, 7}, |
4194 | {ISD::UMIN, MVT::v8i16, 9}, |
4195 | }; |
4196 | |
4197 | static const CostTblEntry SSE41CostTblNoPairWise[] = { |
4198 | {ISD::SMIN, MVT::v2i16, 3}, |
4199 | {ISD::SMIN, MVT::v4i16, 5}, |
4200 | {ISD::UMIN, MVT::v2i16, 5}, |
4201 | {ISD::UMIN, MVT::v4i16, 7}, |
4202 | {ISD::SMIN, MVT::v8i16, 4}, |
4203 | {ISD::UMIN, MVT::v8i16, 4}, |
4204 | {ISD::SMIN, MVT::v2i8, 3}, |
4205 | {ISD::SMIN, MVT::v4i8, 5}, |
4206 | {ISD::SMIN, MVT::v8i8, 7}, |
4207 | {ISD::SMIN, MVT::v16i8, 6}, |
4208 | {ISD::UMIN, MVT::v2i8, 3}, |
4209 | {ISD::UMIN, MVT::v4i8, 5}, |
4210 | {ISD::UMIN, MVT::v8i8, 7}, |
4211 | {ISD::UMIN, MVT::v16i8, 6}, |
4212 | }; |
4213 | |
4214 | static const CostTblEntry AVX1CostTblNoPairWise[] = { |
4215 | {ISD::SMIN, MVT::v16i16, 6}, |
4216 | {ISD::UMIN, MVT::v16i16, 6}, |
4217 | {ISD::SMIN, MVT::v32i8, 8}, |
4218 | {ISD::UMIN, MVT::v32i8, 8}, |
4219 | }; |
4220 | |
4221 | static const CostTblEntry AVX512BWCostTblNoPairWise[] = { |
4222 | {ISD::SMIN, MVT::v32i16, 8}, |
4223 | {ISD::UMIN, MVT::v32i16, 8}, |
4224 | {ISD::SMIN, MVT::v64i8, 10}, |
4225 | {ISD::UMIN, MVT::v64i8, 10}, |
4226 | }; |
4227 | |
4228 | |
4229 | |
4230 | |
4231 | EVT VT = TLI->getValueType(DL, ValTy); |
4232 | if (VT.isSimple()) { |
4233 | MVT MTy = VT.getSimpleVT(); |
4234 | if (ST->hasBWI()) |
4235 | if (const auto *Entry = CostTableLookup(AVX512BWCostTblNoPairWise, ISD, MTy)) |
4236 | return Entry->Cost; |
4237 | |
4238 | if (ST->hasAVX()) |
4239 | if (const auto *Entry = CostTableLookup(AVX1CostTblNoPairWise, ISD, MTy)) |
4240 | return Entry->Cost; |
4241 | |
4242 | if (ST->hasSSE41()) |
4243 | if (const auto *Entry = CostTableLookup(SSE41CostTblNoPairWise, ISD, MTy)) |
4244 | return Entry->Cost; |
4245 | |
4246 | if (ST->hasSSE2()) |
4247 | if (const auto *Entry = CostTableLookup(SSE2CostTblNoPairWise, ISD, MTy)) |
4248 | return Entry->Cost; |
4249 | } |
4250 | |
4251 | auto *ValVTy = cast<FixedVectorType>(ValTy); |
4252 | unsigned NumVecElts = ValVTy->getNumElements(); |
4253 | |
4254 | auto *Ty = ValVTy; |
4255 | InstructionCost MinMaxCost = 0; |
4256 | if (LT.first != 1 && MTy.isVector() && |
4257 | MTy.getVectorNumElements() < ValVTy->getNumElements()) { |
4258 | |
4259 | Ty = FixedVectorType::get(ValVTy->getElementType(), |
4260 | MTy.getVectorNumElements()); |
4261 | auto *SubCondTy = FixedVectorType::get(CondTy->getElementType(), |
4262 | MTy.getVectorNumElements()); |
4263 | MinMaxCost = getMinMaxCost(Ty, SubCondTy, IsUnsigned); |
4264 | MinMaxCost *= LT.first - 1; |
4265 | NumVecElts = MTy.getVectorNumElements(); |
4266 | } |
4267 | |
4268 | if (ST->hasBWI()) |
4269 | if (const auto *Entry = CostTableLookup(AVX512BWCostTblNoPairWise, ISD, MTy)) |
4270 | return MinMaxCost + Entry->Cost; |
4271 | |
4272 | if (ST->hasAVX()) |
4273 | if (const auto *Entry = CostTableLookup(AVX1CostTblNoPairWise, ISD, MTy)) |
4274 | return MinMaxCost + Entry->Cost; |
4275 | |
4276 | if (ST->hasSSE41()) |
4277 | if (const auto *Entry = CostTableLookup(SSE41CostTblNoPairWise, ISD, MTy)) |
4278 | return MinMaxCost + Entry->Cost; |
4279 | |
4280 | if (ST->hasSSE2()) |
4281 | if (const auto *Entry = CostTableLookup(SSE2CostTblNoPairWise, ISD, MTy)) |
4282 | return MinMaxCost + Entry->Cost; |
4283 | |
4284 | unsigned ScalarSize = ValTy->getScalarSizeInBits(); |
4285 | |
4286 | |
4287 | |
4288 | if (!isPowerOf2_32(ValVTy->getNumElements()) || |
4289 | ScalarSize != MTy.getScalarSizeInBits()) |
4290 | return BaseT::getMinMaxReductionCost(ValTy, CondTy, IsUnsigned, CostKind); |
4291 | |
4292 | |
4293 | |
4294 | while (NumVecElts > 1) { |
4295 | |
4296 | unsigned Size = NumVecElts * ScalarSize; |
4297 | NumVecElts /= 2; |
4298 | |
4299 | if (Size > 128) { |
4300 | auto *SubTy = FixedVectorType::get(ValVTy->getElementType(), NumVecElts); |
4301 | MinMaxCost += |
4302 | getShuffleCost(TTI::SK_ExtractSubvector, Ty, None, NumVecElts, SubTy); |
4303 | Ty = SubTy; |
4304 | } else if (Size == 128) { |
4305 | |
4306 | VectorType *ShufTy; |
4307 | if (ValTy->isFloatingPointTy()) |
4308 | ShufTy = |
4309 | FixedVectorType::get(Type::getDoubleTy(ValTy->getContext()), 2); |
4310 | else |
4311 | ShufTy = FixedVectorType::get(Type::getInt64Ty(ValTy->getContext()), 2); |
4312 | MinMaxCost += |
4313 | getShuffleCost(TTI::SK_PermuteSingleSrc, ShufTy, None, 0, nullptr); |
4314 | } else if (Size == 64) { |
4315 | |
4316 | FixedVectorType *ShufTy; |
4317 | if (ValTy->isFloatingPointTy()) |
4318 | ShufTy = FixedVectorType::get(Type::getFloatTy(ValTy->getContext()), 4); |
4319 | else |
4320 | ShufTy = FixedVectorType::get(Type::getInt32Ty(ValTy->getContext()), 4); |
4321 | MinMaxCost += |
4322 | getShuffleCost(TTI::SK_PermuteSingleSrc, ShufTy, None, 0, nullptr); |
4323 | } else { |
4324 | |
4325 | auto *ShiftTy = FixedVectorType::get( |
4326 | Type::getIntNTy(ValTy->getContext(), Size), 128 / Size); |
4327 | MinMaxCost += getArithmeticInstrCost( |
4328 | Instruction::LShr, ShiftTy, TTI::TCK_RecipThroughput, |
4329 | TargetTransformInfo::OK_AnyValue, |
4330 | TargetTransformInfo::OK_UniformConstantValue, |
4331 | TargetTransformInfo::OP_None, TargetTransformInfo::OP_None); |
4332 | } |
4333 | |
4334 | |
4335 | auto *SubCondTy = |
4336 | FixedVectorType::get(CondTy->getElementType(), Ty->getNumElements()); |
4337 | MinMaxCost += getMinMaxCost(Ty, SubCondTy, IsUnsigned); |
4338 | } |
4339 | |
4340 | |
4341 | return MinMaxCost + getVectorInstrCost(Instruction::ExtractElement, Ty, 0); |
4342 | } |
4343 | |
4344 | |
4345 | |
4346 | |
4347 | InstructionCost X86TTIImpl::getIntImmCost(int64_t Val) { |
4348 | if (Val == 0) |
4349 | return TTI::TCC_Free; |
4350 | |
4351 | if (isInt<32>(Val)) |
4352 | return TTI::TCC_Basic; |
4353 | |
4354 | return 2 * TTI::TCC_Basic; |
4355 | } |
4356 | |
4357 | InstructionCost X86TTIImpl::getIntImmCost(const APInt &Imm, Type *Ty, |
4358 | TTI::TargetCostKind CostKind) { |
4359 | assert(Ty->isIntegerTy()); |
4360 | |
4361 | unsigned BitSize = Ty->getPrimitiveSizeInBits(); |
4362 | if (BitSize == 0) |
4363 | return ~0U; |
4364 | |
4365 | |
4366 | |
4367 | |
4368 | |
4369 | if (BitSize > 128) |
4370 | return TTI::TCC_Free; |
4371 | |
4372 | if (Imm == 0) |
4373 | return TTI::TCC_Free; |
4374 | |
4375 | |
4376 | APInt ImmVal = Imm; |
4377 | if (BitSize % 64 != 0) |
4378 | ImmVal = Imm.sext(alignTo(BitSize, 64)); |
4379 | |
4380 | |
4381 | |
4382 | InstructionCost Cost = 0; |
4383 | for (unsigned ShiftVal = 0; ShiftVal < BitSize; ShiftVal += 64) { |
4384 | APInt Tmp = ImmVal.ashr(ShiftVal).sextOrTrunc(64); |
4385 | int64_t Val = Tmp.getSExtValue(); |
4386 | Cost += getIntImmCost(Val); |
4387 | } |
4388 | |
4389 | return std::max<InstructionCost>(1, Cost); |
4390 | } |
4391 | |
4392 | InstructionCost X86TTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx, |
4393 | const APInt &Imm, Type *Ty, |
4394 | TTI::TargetCostKind CostKind, |
4395 | Instruction *Inst) { |
4396 | assert(Ty->isIntegerTy()); |
4397 | |
4398 | unsigned BitSize = Ty->getPrimitiveSizeInBits(); |
4399 | |
4400 | |
4401 | if (BitSize == 0) |
4402 | return TTI::TCC_Free; |
4403 | |
4404 | unsigned ImmIdx = ~0U; |
4405 | switch (Opcode) { |
4406 | default: |
4407 | return TTI::TCC_Free; |
4408 | case Instruction::GetElementPtr: |
4409 | |
4410 | |
4411 | |
4412 | if (Idx == 0) |
4413 | return 2 * TTI::TCC_Basic; |
4414 | return TTI::TCC_Free; |
4415 | case Instruction::Store: |
4416 | ImmIdx = 0; |
4417 | break; |
4418 | case Instruction::ICmp: |
4419 | |
4420 | |
4421 | |
4422 | |
4423 | |
4424 | if (Idx == 1 && Imm.getBitWidth() == 64) { |
4425 | uint64_t ImmVal = Imm.getZExtValue(); |
4426 | if (ImmVal == 0x100000000ULL || ImmVal == 0xffffffff) |
4427 | return TTI::TCC_Free; |
4428 | } |
4429 | ImmIdx = 1; |
4430 | break; |
4431 | case Instruction::And: |
4432 | |
4433 | |
4434 | |
4435 | if (Idx == 1 && Imm.getBitWidth() == 64 && isUInt<32>(Imm.getZExtValue())) |
4436 | return TTI::TCC_Free; |
4437 | ImmIdx = 1; |
4438 | break; |
4439 | case Instruction::Add: |
4440 | case Instruction::Sub: |
4441 | |
4442 | if (Idx == 1 && Imm.getBitWidth() == 64 && Imm.getZExtValue() == 0x80000000) |
4443 | return TTI::TCC_Free; |
4444 | ImmIdx = 1; |
4445 | break; |
4446 | case Instruction::UDiv: |
4447 | case Instruction::SDiv: |
4448 | case Instruction::URem: |
4449 | case Instruction::SRem: |
4450 | |
4451 | |
4452 | |
4453 | return TTI::TCC_Free; |
4454 | case Instruction::Mul: |
4455 | case Instruction::Or: |
4456 | case Instruction::Xor: |
4457 | ImmIdx = 1; |
4458 | break; |
4459 | |
4460 | case Instruction::Shl: |
4461 | case Instruction::LShr: |
4462 | case Instruction::AShr: |
4463 | if (Idx == 1) |
4464 | return TTI::TCC_Free; |
4465 | break; |
4466 | case Instruction::Trunc: |
4467 | case Instruction::ZExt: |
4468 | case Instruction::SExt: |
4469 | case Instruction::IntToPtr: |
4470 | case Instruction::PtrToInt: |
4471 | case Instruction::BitCast: |
4472 | case Instruction::PHI: |
4473 | case Instruction::Call: |
4474 | case Instruction::Select: |
4475 | case Instruction::Ret: |
4476 | case Instruction::Load: |
4477 | break; |
4478 | } |
4479 | |
4480 | if (Idx == ImmIdx) { |
4481 | int NumConstants = divideCeil(BitSize, 64); |
4482 | InstructionCost Cost = X86TTIImpl::getIntImmCost(Imm, Ty, CostKind); |
4483 | return (Cost <= NumConstants * TTI::TCC_Basic) |
4484 | ? static_cast<int>(TTI::TCC_Free) |
4485 | : Cost; |
4486 | } |
4487 | |
4488 | return X86TTIImpl::getIntImmCost(Imm, Ty, CostKind); |
4489 | } |
4490 | |
4491 | InstructionCost X86TTIImpl::getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, |
4492 | const APInt &Imm, Type *Ty, |
4493 | TTI::TargetCostKind CostKind) { |
4494 | assert(Ty->isIntegerTy()); |
4495 | |
4496 | unsigned BitSize = Ty->getPrimitiveSizeInBits(); |
4497 | |
4498 | |
4499 | if (BitSize == 0) |
4500 | return TTI::TCC_Free; |
4501 | |
4502 | switch (IID) { |
4503 | default: |
4504 | return TTI::TCC_Free; |
4505 | case Intrinsic::sadd_with_overflow: |
4506 | case Intrinsic::uadd_with_overflow: |
4507 | case Intrinsic::ssub_with_overflow: |
4508 | case Intrinsic::usub_with_overflow: |
4509 | case Intrinsic::smul_with_overflow: |
4510 | case Intrinsic::umul_with_overflow: |
4511 | if ((Idx == 1) && Imm.getBitWidth() <= 64 && isInt<32>(Imm.getSExtValue())) |
4512 | return TTI::TCC_Free; |
4513 | break; |
4514 | case Intrinsic::experimental_stackmap: |
4515 | if ((Idx < 2) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue()))) |
4516 | return TTI::TCC_Free; |
4517 | break; |
4518 | case Intrinsic::experimental_patchpoint_void: |
4519 | case Intrinsic::experimental_patchpoint_i64: |
4520 | if ((Idx < 4) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue()))) |
4521 | return TTI::TCC_Free; |
4522 | break; |
4523 | } |
4524 | return X86TTIImpl::getIntImmCost(Imm, Ty, CostKind); |
4525 | } |
4526 | |
4527 | InstructionCost X86TTIImpl::getCFInstrCost(unsigned Opcode, |
4528 | TTI::TargetCostKind CostKind, |
4529 | const Instruction *I) { |
4530 | if (CostKind != TTI::TCK_RecipThroughput) |
4531 | return Opcode == Instruction::PHI ? 0 : 1; |
4532 | |
4533 | return 0; |
4534 | } |
4535 | |
4536 | int X86TTIImpl::getGatherOverhead() const { |
4537 | |
4538 | |
4539 | |
4540 | |
4541 | |
4542 | |
4543 | if (ST->hasAVX512() || (ST->hasAVX2() && ST->hasFastGather())) |
4544 | return 2; |
4545 | |
4546 | return 1024; |
4547 | } |
4548 | |
4549 | int X86TTIImpl::getScatterOverhead() const { |
4550 | if (ST->hasAVX512()) |
4551 | return 2; |
4552 | |
4553 | return 1024; |
4554 | } |
4555 | |
4556 | |
4557 | |
4558 | InstructionCost X86TTIImpl::getGSVectorCost(unsigned Opcode, Type *SrcVTy, |
4559 | const Value *Ptr, Align Alignment, |
4560 | unsigned AddressSpace) { |
4561 | |
4562 | assert(isa<VectorType>(SrcVTy) && "Unexpected type in getGSVectorCost"); |
4563 | unsigned VF = cast<FixedVectorType>(SrcVTy)->getNumElements(); |
4564 | |
4565 | |
4566 | |
4567 | |
4568 | |
4569 | |
4570 | auto getIndexSizeInBits = [](const Value *Ptr, const DataLayout &DL) { |
4571 | unsigned IndexSize = DL.getPointerSizeInBits(); |
4572 | const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr); |
4573 | if (IndexSize < 64 || !GEP) |
4574 | return IndexSize; |
4575 | |
4576 | unsigned NumOfVarIndices = 0; |
4577 | const Value *Ptrs = GEP->getPointerOperand(); |
4578 | if (Ptrs->getType()->isVectorTy() && !getSplatValue(Ptrs)) |
4579 | return IndexSize; |
4580 | for (unsigned i = 1; i < GEP->getNumOperands(); ++i) { |
4581 | if (isa<Constant>(GEP->getOperand(i))) |
4582 | continue; |
4583 | Type *IndxTy = GEP->getOperand(i)->getType(); |
4584 | if (auto *IndexVTy = dyn_cast<VectorType>(IndxTy)) |
4585 | IndxTy = IndexVTy->getElementType(); |
4586 | if ((IndxTy->getPrimitiveSizeInBits() == 64 && |
4587 | !isa<SExtInst>(GEP->getOperand(i))) || |
4588 | ++NumOfVarIndices > 1) |
4589 | return IndexSize; |
4590 | } |
4591 | return (unsigned)32; |
4592 | }; |
4593 | |
4594 | |
4595 | |
4596 | unsigned IndexSize = (ST->hasAVX512() && VF >= 16) |
4597 | ? getIndexSizeInBits(Ptr, DL) |
4598 | : DL.getPointerSizeInBits(); |
4599 | |
4600 | auto *IndexVTy = FixedVectorType::get( |
4601 | IntegerType::get(SrcVTy->getContext(), IndexSize), VF); |
4602 | std::pair<InstructionCost, MVT> IdxsLT = |
4603 | TLI->getTypeLegalizationCost(DL, IndexVTy); |
4604 | std::pair<InstructionCost, MVT> SrcLT = |
4605 | TLI->getTypeLegalizationCost(DL, SrcVTy); |
4606 | InstructionCost::CostType SplitFactor = |
4607 | *std::max(IdxsLT.first, SrcLT.first).getValue(); |
4608 | if (SplitFactor > 1) { |
4609 | |
4610 | auto *SplitSrcTy = |
4611 | FixedVectorType::get(SrcVTy->getScalarType(), VF / SplitFactor); |
4612 | return SplitFactor * getGSVectorCost(Opcode, SplitSrcTy, Ptr, Alignment, |
4613 | AddressSpace); |
4614 | } |
4615 | |
4616 | |
4617 | |
4618 | const int GSOverhead = (Opcode == Instruction::Load) |
4619 | ? getGatherOverhead() |
4620 | : getScatterOverhead(); |
4621 | return GSOverhead + VF * getMemoryOpCost(Opcode, SrcVTy->getScalarType(), |
4622 | MaybeAlign(Alignment), AddressSpace, |
4623 | TTI::TCK_RecipThroughput); |
4624 | } |
4625 | |
4626 | |
4627 | |
4628 | |
4629 | |
4630 | |
4631 | |
4632 | |
4633 | |
4634 | |
4635 | InstructionCost X86TTIImpl::getGSScalarCost(unsigned Opcode, Type *SrcVTy, |
4636 | bool VariableMask, Align Alignment, |
4637 | unsigned AddressSpace) { |
4638 | unsigned VF = cast<FixedVectorType>(SrcVTy)->getNumElements(); |
4639 | APInt DemandedElts = APInt::getAllOnesValue(VF); |
4640 | TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput; |
4641 | |
4642 | InstructionCost MaskUnpackCost = 0; |
4643 | if (VariableMask) { |
4644 | auto *MaskTy = |
4645 | FixedVectorType::get(Type::getInt1Ty(SrcVTy->getContext()), VF); |
4646 | MaskUnpackCost = |
4647 | getScalarizationOverhead(MaskTy, DemandedElts, false, true); |
4648 | InstructionCost ScalarCompareCost = getCmpSelInstrCost( |
4649 | Instruction::ICmp, Type::getInt1Ty(SrcVTy->getContext()), nullptr, |
4650 | CmpInst::BAD_ICMP_PREDICATE, CostKind); |
4651 | InstructionCost BranchCost = getCFInstrCost(Instruction::Br, CostKind); |
4652 | MaskUnpackCost += VF * (BranchCost + ScalarCompareCost); |
4653 | } |
4654 | |
4655 | |
4656 | InstructionCost MemoryOpCost = |
4657 | VF * getMemoryOpCost(Opcode, SrcVTy->getScalarType(), |
4658 | MaybeAlign(Alignment), AddressSpace, CostKind); |
4659 | |
4660 | InstructionCost InsertExtractCost = 0; |
4661 | if (Opcode == Instruction::Load) |
4662 | for (unsigned i = 0; i < VF; ++i) |
4663 | |
4664 | InsertExtractCost += |
4665 | getVectorInstrCost(Instruction::InsertElement, SrcVTy, i); |
4666 | else |
4667 | for (unsigned i = 0; i < VF; ++i) |
4668 | |
4669 | InsertExtractCost += |
4670 | getVectorInstrCost(Instruction::ExtractElement, SrcVTy, i); |
4671 | |
4672 | return MemoryOpCost + MaskUnpackCost + InsertExtractCost; |
4673 | } |
4674 | |
4675 | |
4676 | InstructionCost X86TTIImpl::getGatherScatterOpCost( |
4677 | unsigned Opcode, Type *SrcVTy, const Value *Ptr, bool VariableMask, |
4678 | Align Alignment, TTI::TargetCostKind CostKind, |
4679 | const Instruction *I = nullptr) { |
4680 | if (CostKind != TTI::TCK_RecipThroughput) { |
| 1 | Assuming 'CostKind' is equal to TCK_RecipThroughput | |
|
| |
4681 | if ((Opcode == Instruction::Load && |
4682 | isLegalMaskedGather(SrcVTy, Align(Alignment))) || |
4683 | (Opcode == Instruction::Store && |
4684 | isLegalMaskedScatter(SrcVTy, Align(Alignment)))) |
4685 | return 1; |
4686 | return BaseT::getGatherScatterOpCost(Opcode, SrcVTy, Ptr, VariableMask, |
4687 | Alignment, CostKind, I); |
4688 | } |
4689 | |
4690 | assert(SrcVTy->isVectorTy() && "Unexpected data type for Gather/Scatter"); |
4691 | PointerType *PtrTy = dyn_cast<PointerType>(Ptr->getType()); |
| 3 | | Assuming the object is not a 'PointerType' | |
|
| 4 | | 'PtrTy' initialized to a null pointer value | |
|
4692 | if (!PtrTy && Ptr->getType()->isVectorTy()) |
| |
4693 | PtrTy = dyn_cast<PointerType>( |
4694 | cast<VectorType>(Ptr->getType())->getElementType()); |
4695 | assert(PtrTy && "Unexpected type for Ptr argument"); |
4696 | unsigned AddressSpace = PtrTy->getAddressSpace(); |
| 6 | | Called C++ object pointer is null |
|
4697 | |
4698 | if ((Opcode == Instruction::Load && |
4699 | !isLegalMaskedGather(SrcVTy, Align(Alignment))) || |
4700 | (Opcode == Instruction::Store && |
4701 | !isLegalMaskedScatter(SrcVTy, Align(Alignment)))) |
4702 | return getGSScalarCost(Opcode, SrcVTy, VariableMask, Alignment, |
4703 | AddressSpace); |
4704 | |
4705 | return getGSVectorCost(Opcode, SrcVTy, Ptr, Alignment, AddressSpace); |
4706 | } |
4707 | |
4708 | bool X86TTIImpl::isLSRCostLess(TargetTransformInfo::LSRCost &C1, |
4709 | TargetTransformInfo::LSRCost &C2) { |
4710 | |
4711 | return std::tie(C1.Insns, C1.NumRegs, C1.AddRecCost, |
4712 | C1.NumIVMuls, C1.NumBaseAdds, |
4713 | C1.ScaleCost, C1.ImmCost, C1.SetupCost) < |
4714 | std::tie(C2.Insns, C2.NumRegs, C2.AddRecCost, |
4715 | C2.NumIVMuls, C2.NumBaseAdds, |
4716 | C2.ScaleCost, C2.ImmCost, C2.SetupCost); |
4717 | } |
4718 | |
4719 | bool X86TTIImpl::canMacroFuseCmp() { |
4720 | return ST->hasMacroFusion() || ST->hasBranchFusion(); |
4721 | } |
4722 | |
4723 | bool X86TTIImpl::isLegalMaskedLoad(Type *DataTy, Align Alignment) { |
4724 | if (!ST->hasAVX()) |
4725 | return false; |
4726 | |
4727 | |
4728 | if (isa<VectorType>(DataTy) && |
4729 | cast<FixedVectorType>(DataTy)->getNumElements() == 1) |
4730 | return false; |
4731 | Type *ScalarTy = DataTy->getScalarType(); |
4732 | |
4733 | if (ScalarTy->isPointerTy()) |
4734 | return true; |
4735 | |
4736 | if (ScalarTy->isFloatTy() || ScalarTy->isDoubleTy()) |
4737 | return true; |
4738 | |
4739 | if (ScalarTy->isHalfTy() && ST->hasBWI() && ST->hasFP16()) |
4740 | return true; |
4741 | |
4742 | if (!ScalarTy->isIntegerTy()) |
4743 | return false; |
4744 | |
4745 | unsigned IntWidth = ScalarTy->getIntegerBitWidth(); |
4746 | return IntWidth == 32 || IntWidth == 64 || |
4747 | ((IntWidth == 8 || IntWidth == 16) && ST->hasBWI()); |
4748 | } |
4749 | |
4750 | bool X86TTIImpl::isLegalMaskedStore(Type *DataType, Align Alignment) { |
4751 | return isLegalMaskedLoad(DataType, Alignment); |
4752 | } |
4753 | |
4754 | bool X86TTIImpl::isLegalNTLoad(Type *DataType, Align Alignment) { |
4755 | unsigned DataSize = DL.getTypeStoreSize(DataType); |
4756 | |
4757 | |
4758 | |
4759 | if (Alignment >= DataSize && (DataSize == 16 || DataSize == 32)) |
4760 | return DataSize == 16 ? ST->hasSSE1() : ST->hasAVX2(); |
4761 | |
4762 | return false; |
4763 | } |
4764 | |
4765 | bool X86TTIImpl::isLegalNTStore(Type *DataType, Align Alignment) { |
4766 | unsigned DataSize = DL.getTypeStoreSize(DataType); |
4767 | |
4768 | |
4769 | |
4770 | if (ST->hasSSE4A() && (DataType->isFloatTy() || DataType->isDoubleTy())) |
4771 | return true; |
4772 | |
4773 | |
4774 | |
4775 | |
4776 | if (Alignment < DataSize || DataSize < 4 || DataSize > 32 || |
4777 | !isPowerOf2_32(DataSize)) |
4778 | return false; |
4779 | |
4780 | |
4781 | |
4782 | if (DataSize == 32) |
4783 | return ST->hasAVX(); |
4784 | else if (DataSize == 16) |
4785 | return ST->hasSSE1(); |
4786 | return true; |
4787 | } |
4788 | |
4789 | bool X86TTIImpl::isLegalMaskedExpandLoad(Type *DataTy) { |
4790 | if (!isa<VectorType>(DataTy)) |
4791 | return false; |
4792 | |
4793 | if (!ST->hasAVX512()) |
4794 | return false; |
4795 | |
4796 | |
4797 | if (cast<FixedVectorType>(DataTy)->getNumElements() == 1) |
4798 | return false; |
4799 | |
4800 | Type *ScalarTy = cast<VectorType>(DataTy)->getElementType(); |
4801 | |
4802 | if (ScalarTy->isFloatTy() || ScalarTy->isDoubleTy()) |
4803 | return true; |
4804 | |
4805 | if (!ScalarTy->isIntegerTy()) |
4806 | return false; |
4807 | |
4808 | unsigned IntWidth = ScalarTy->getIntegerBitWidth(); |
4809 | return IntWidth == 32 || IntWidth == 64 || |
4810 | ((IntWidth == 8 || IntWidth == 16) && ST->hasVBMI2()); |
4811 | } |
4812 | |
4813 | bool X86TTIImpl::isLegalMaskedCompressStore(Type *DataTy) { |
4814 | return isLegalMaskedExpandLoad(DataTy); |
4815 | } |
4816 | |
4817 | bool X86TTIImpl::isLegalMaskedGather(Type *DataTy, Align Alignment) { |
4818 | |
4819 | |
4820 | |
4821 | if (!(ST->hasAVX512() || (ST->hasFastGather() && ST->hasAVX2()))) |
4822 | return false; |
4823 | |
4824 | |
4825 | |
4826 | |
4827 | |
4828 | |
4829 | |
4830 | |
4831 | |
4832 | |
4833 | |
4834 | |
4835 | |
4836 | if (auto *DataVTy = dyn_cast<FixedVectorType>(DataTy)) { |
4837 | unsigned NumElts = DataVTy->getNumElements(); |
4838 | if (NumElts == 1) |
4839 | return false; |
4840 | |
4841 | |
4842 | |
4843 | |
4844 | |
4845 | |
4846 | if (ST->hasAVX512() && (NumElts == 2 || (NumElts == 4 && !ST->hasVLX()))) |
4847 | return false; |
4848 | } |
4849 | Type *ScalarTy = DataTy->getScalarType(); |
4850 | if (ScalarTy->isPointerTy()) |
4851 | return true; |
4852 | |
4853 | if (ScalarTy->isFloatTy() || ScalarTy->isDoubleTy()) |
4854 | return true; |
4855 | |
4856 | if (!ScalarTy->isIntegerTy()) |
4857 | return false; |
4858 | |
4859 | unsigned IntWidth = ScalarTy->getIntegerBitWidth(); |
4860 | return IntWidth == 32 || IntWidth == 64; |
4861 | } |
4862 | |
4863 | bool X86TTIImpl::isLegalMaskedScatter(Type *DataType, Align Alignment) { |
4864 | |
4865 | if (!ST->hasAVX512()) |
4866 | return false; |
4867 | return isLegalMaskedGather(DataType, Alignment); |
4868 | } |
4869 | |
4870 | bool X86TTIImpl::hasDivRemOp(Type *DataType, bool IsSigned) { |
4871 | EVT VT = TLI->getValueType(DL, DataType); |
4872 | return TLI->isOperationLegal(IsSigned ? ISD::SDIVREM : ISD::UDIVREM, VT); |
4873 | } |
4874 | |
4875 | bool X86TTIImpl::isFCmpOrdCheaperThanFCmpZero(Type *Ty) { |
4876 | return false; |
4877 | } |
4878 | |
4879 | bool X86TTIImpl::areInlineCompatible(const Function *Caller, |
4880 | const Function *Callee) const { |
4881 | const TargetMachine &TM = getTLI()->getTargetMachine(); |
4882 | |
4883 | |
4884 | const FeatureBitset &CallerBits = |
4885 | TM.getSubtargetImpl(*Caller)->getFeatureBits(); |
4886 | const FeatureBitset &CalleeBits = |
4887 | TM.getSubtargetImpl(*Callee)->getFeatureBits(); |
4888 | |
4889 | FeatureBitset RealCallerBits = CallerBits & ~InlineFeatureIgnoreList; |
4890 | FeatureBitset RealCalleeBits = CalleeBits & ~InlineFeatureIgnoreList; |
4891 | return (RealCallerBits & RealCalleeBits) == RealCalleeBits; |
4892 | } |
4893 | |
4894 | bool X86TTIImpl::areFunctionArgsABICompatible( |
4895 | const Function *Caller, const Function *Callee, |
4896 | SmallPtrSetImpl<Argument *> &Args) const { |
4897 | if (!BaseT::areFunctionArgsABICompatible(Caller, Callee, Args)) |
4898 | return false; |
4899 | |
4900 | |
4901 | |
4902 | |
4903 | const TargetMachine &TM = getTLI()->getTargetMachine(); |
4904 | |
4905 | if (TM.getSubtarget<X86Subtarget>(*Caller).useAVX512Regs() == |
4906 | TM.getSubtarget<X86Subtarget>(*Callee).useAVX512Regs()) |
4907 | return true; |
4908 | |
4909 | |
4910 | |
4911 | |
4912 | |
4913 | |
4914 | |
4915 | return llvm::none_of(Args, [](Argument *A) { |
4916 | auto *EltTy = cast<PointerType>(A->getType())->getElementType(); |
4917 | return EltTy->isVectorTy() || EltTy->isAggregateType(); |
4918 | }); |
4919 | } |
4920 | |
4921 | X86TTIImpl::TTI::MemCmpExpansionOptions |
4922 | X86TTIImpl::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const { |
4923 | TTI::MemCmpExpansionOptions Options; |
4924 | Options.MaxNumLoads = TLI->getMaxExpandSizeMemcmp(OptSize); |
4925 | Options.NumLoadsPerBlock = 2; |
4926 | |
4927 | Options.AllowOverlappingLoads = true; |
4928 | if (IsZeroCmp) { |
4929 | |
4930 | |
4931 | const unsigned PreferredWidth = ST->getPreferVectorWidth(); |
4932 | if (PreferredWidth >= 512 && ST->hasAVX512()) Options.LoadSizes.push_back(64); |
4933 | if (PreferredWidth >= 256 && ST->hasAVX()) Options.LoadSizes.push_back(32); |
4934 | if (PreferredWidth >= 128 && ST->hasSSE2()) Options.LoadSizes.push_back(16); |
4935 | } |
4936 | if (ST->is64Bit()) { |
4937 | Options.LoadSizes.push_back(8); |
4938 | } |
4939 | Options.LoadSizes.push_back(4); |
4940 | Options.LoadSizes.push_back(2); |
4941 | Options.LoadSizes.push_back(1); |
4942 | return Options; |
4943 | } |
4944 | |
4945 | bool X86TTIImpl::enableInterleavedAccessVectorization() { |
4946 | |
4947 | |
4948 | |
4949 | return !(ST->isAtom()); |
4950 | } |
4951 | |
4952 | |
4953 | |
4954 | |
4955 | |
4956 | |
4957 | |
4958 | |
4959 | |
4960 | |
4961 | |
4962 | |
4963 | |
4964 | InstructionCost X86TTIImpl::getInterleavedMemoryOpCostAVX2( |
4965 | unsigned Opcode, FixedVectorType *VecTy, unsigned Factor, |
4966 | ArrayRef<unsigned> Indices, Align Alignment, unsigned AddressSpace, |
4967 | TTI::TargetCostKind CostKind, bool UseMaskForCond, bool UseMaskForGaps) { |
4968 | |
4969 | if (UseMaskForCond || UseMaskForGaps) |
4970 | return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices, |
4971 | Alignment, AddressSpace, CostKind, |
4972 | UseMaskForCond, UseMaskForGaps); |
4973 | |
4974 | |
4975 | |
4976 | if (Indices.size() && Indices.size() != Factor) |
4977 | return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices, |
4978 | Alignment, AddressSpace, CostKind); |
4979 | |
4980 | |
4981 | |
4982 | |
4983 | MVT LegalVT = getTLI()->getTypeLegalizationCost(DL, VecTy).second; |
4984 | |
4985 | |
4986 | |
4987 | |
4988 | if (!LegalVT.isVector()) |
4989 | return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices, |
4990 | Alignment, AddressSpace, CostKind); |
4991 | |
4992 | unsigned VF = VecTy->getNumElements() / Factor; |
4993 | Type *ScalarTy = VecTy->getElementType(); |
4994 | |
4995 | if (!ScalarTy->isIntegerTy()) |
4996 | ScalarTy = |
4997 | Type::getIntNTy(ScalarTy->getContext(), DL.getTypeSizeInBits(ScalarTy)); |
4998 | |
4999 | |
5000 | InstructionCost MemOpCosts = getMemoryOpCost( |
5001 | Opcode, VecTy, MaybeAlign(Alignment), AddressSpace, CostKind); |
5002 | |
5003 | auto *VT = FixedVectorType::get(ScalarTy, VF); |
5004 | EVT ETy = TLI->getValueType(DL, VT); |
5005 | if (!ETy.isSimple()) |
5006 | return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices, |
5007 | Alignment, AddressSpace, CostKind); |
5008 | |
5009 | |
5010 | |
5011 | |
5012 | |
5013 | |
5014 | |
5015 | |
5016 | static const CostTblEntry AVX2InterleavedLoadTbl[] = { |
5017 | {2, MVT::v4i64, 6}, |
5018 | |
5019 | {3, MVT::v2i8, 10}, |
5020 | {3, MVT::v4i8, 4}, |
5021 | {3, MVT::v8i8, 9}, |
5022 | {3, MVT::v16i8, 11}, |
5023 | {3, MVT::v32i8, 13}, |
5024 | |
5025 | {3, MVT::v8i32, 17}, |
5026 | |
5027 | {4, MVT::v2i8, 12}, |
5028 | {4, MVT::v4i8, 4}, |
5029 | {4, MVT::v8i8, 20}, |
5030 | {4, MVT::v16i8, 39}, |
5031 | {4, MVT::v32i8, 80}, |
5032 | |
5033 | {8, MVT::v8i32, 40} |
5034 | }; |
5035 | |
5036 | static const CostTblEntry AVX2InterleavedStoreTbl[] = { |
5037 | {2, MVT::v4i64, 6}, |
5038 | |
5039 | {3, MVT::v2i8, 7}, |
5040 | {3, MVT::v4i8, 8}, |
5041 | {3, MVT::v8i8, 11}, |
5042 | {3, MVT::v16i8, 11}, |
5043 | {3, MVT::v32i8, 13}, |
5044 | |
5045 | {4, MVT::v2i8, 12}, |
5046 | {4, MVT::v4i8, 9}, |
5047 | {4, MVT::v8i8, 10}, |
5048 | {4, MVT::v16i8, 10}, |
5049 | {4, MVT::v32i8, 12} |
5050 | }; |
5051 | |
5052 | if (Opcode == Instruction::Load) { |
5053 | if (const auto *Entry = |
5054 | CostTableLookup(AVX2InterleavedLoadTbl, Factor, ETy.getSimpleVT())) |
5055 | return MemOpCosts + Entry->Cost; |
5056 | } else { |
5057 | assert(Opcode == Instruction::Store && |
5058 | "Expected Store Instruction at this point"); |
5059 | if (const auto *Entry = |
5060 | CostTableLookup(AVX2InterleavedStoreTbl, Factor, ETy.getSimpleVT())) |
5061 | return MemOpCosts + Entry->Cost; |
5062 | } |
5063 | |
5064 | return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices, |
5065 | Alignment, AddressSpace, CostKind); |
5066 | } |
5067 | |
5068 | |
5069 | |
5070 | |
5071 | |
5072 | InstructionCost X86TTIImpl::getInterleavedMemoryOpCostAVX512( |
5073 | unsigned Opcode, FixedVectorType *VecTy, unsigned Factor, |
5074 | ArrayRef<unsigned> Indices, Align Alignment, unsigned AddressSpace, |
5075 | TTI::TargetCostKind CostKind, bool UseMaskForCond, bool UseMaskForGaps) { |
5076 | |
5077 | if (UseMaskForCond || UseMaskForGaps) |
5078 | return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices, |
5079 | Alignment, AddressSpace, CostKind, |
5080 | UseMaskForCond, UseMaskForGaps); |
5081 | |
5082 | |
5083 | |
5084 | |
5085 | |
5086 | |
5087 | |
5088 | MVT LegalVT = getTLI()->getTypeLegalizationCost(DL, VecTy).second; |
5089 | unsigned VecTySize = DL.getTypeStoreSize(VecTy); |
5090 | unsigned LegalVTSize = LegalVT.getStoreSize(); |
5091 | unsigned NumOfMemOps = (VecTySize + LegalVTSize - 1) / LegalVTSize; |
5092 | |
5093 | |
5094 | auto *SingleMemOpTy = FixedVectorType::get(VecTy->getElementType(), |
5095 | LegalVT.getVectorNumElements()); |
5096 | InstructionCost MemOpCost = getMemoryOpCost( |
5097 | Opcode, SingleMemOpTy, MaybeAlign(Alignment), AddressSpace, CostKind); |
5098 | |
5099 | unsigned VF = VecTy->getNumElements() / Factor; |
5100 | MVT VT = MVT::getVectorVT(MVT::getVT(VecTy->getScalarType()), VF); |
5101 | |
5102 | if (Opcode == Instruction::Load) { |
5103 | |
5104 | |
5105 | |
5106 | |
5107 | |
5108 | |
5109 | static const CostTblEntry AVX512InterleavedLoadTbl[] = { |
5110 | {3, MVT::v16i8, 12}, |
5111 | {3, MVT::v32i8, 14}, |
5112 | {3, MVT::v64i8, 22}, |
5113 | }; |
5114 | |
5115 | if (const auto *Entry = |
5116 | CostTableLookup(AVX512InterleavedLoadTbl, Factor, VT)) |
5117 | return NumOfMemOps * MemOpCost + Entry->Cost; |
5118 | |
5119 | |
5120 | |
5121 | |
5122 | |
5123 | TTI::ShuffleKind ShuffleKind = |
5124 | (NumOfMemOps > 1) ? TTI::SK_PermuteTwoSrc : TTI::SK_PermuteSingleSrc; |
5125 | |
5126 | InstructionCost ShuffleCost = |
5127 | getShuffleCost(ShuffleKind, SingleMemOpTy, None, 0, nullptr); |
5128 | |
5129 | unsigned NumOfLoadsInInterleaveGrp = |
5130 | Indices.size() ? Indices.size() : Factor; |
5131 | auto *ResultTy = FixedVectorType::get(VecTy->getElementType(), |
5132 | VecTy->getNumElements() / Factor); |
5133 | InstructionCost NumOfResults = |
5134 | getTLI()->getTypeLegalizationCost(DL, ResultTy).first * |
5135 | NumOfLoadsInInterleaveGrp; |
5136 | |
5137 | |
5138 | |
5139 | unsigned NumOfUnfoldedLoads = |
5140 | NumOfResults > 1 ? NumOfMemOps : NumOfMemOps / 2; |
5141 | |
5142 | |
5143 | unsigned NumOfShufflesPerResult = |
5144 | std::max((unsigned)1, (unsigned)(NumOfMemOps - 1)); |
5145 | |
5146 | |
5147 | |
5148 | |
5149 | InstructionCost NumOfMoves = 0; |
5150 | if (NumOfResults > 1 && ShuffleKind == TTI::SK_PermuteTwoSrc) |
5151 | NumOfMoves = NumOfResults * NumOfShufflesPerResult / 2; |
5152 | |
5153 | InstructionCost Cost = NumOfResults * NumOfShufflesPerResult * ShuffleCost + |
5154 | NumOfUnfoldedLoads * MemOpCost + NumOfMoves; |
5155 | |
5156 | return Cost; |
5157 | } |
5158 | |
5159 | |
5160 | assert(Opcode == Instruction::Store && |
5161 | "Expected Store Instruction at this point"); |
5162 | |
5163 | static const CostTblEntry AVX512InterleavedStoreTbl[] = { |
5164 | {3, MVT::v16i8, 12}, |
5165 | {3, MVT::v32i8, 14}, |
5166 | {3, MVT::v64i8, 26}, |
5167 | |
5168 | {4, MVT::v8i8, 10}, |
5169 | {4, MVT::v16i8, 11}, |
5170 | {4, MVT::v32i8, 14}, |
5171 | {4, MVT::v64i8, 24} |
5172 | }; |
5173 | |
5174 | if (const auto *Entry = |
5175 | CostTableLookup(AVX512InterleavedStoreTbl, Factor, VT)) |
5176 | return NumOfMemOps * MemOpCost + Entry->Cost; |
5177 | |
5178 | |
5179 | |
5180 | |
5181 | unsigned NumOfSources = Factor; |
5182 | InstructionCost ShuffleCost = |
5183 | getShuffleCost(TTI::SK_PermuteTwoSrc, SingleMemOpTy, None, 0, nullptr); |
5184 | unsigned NumOfShufflesPerStore = NumOfSources - 1; |
5185 | |
5186 | |
5187 | |
5188 | unsigned NumOfMoves = NumOfMemOps * NumOfShufflesPerStore / 2; |
5189 | InstructionCost Cost = |
5190 | NumOfMemOps * (MemOpCost + NumOfShufflesPerStore * ShuffleCost) + |
5191 | NumOfMoves; |
5192 | return Cost; |
5193 | } |
5194 | |
5195 | InstructionCost X86TTIImpl::getInterleavedMemoryOpCost( |
5196 | unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices, |
5197 | Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, |
5198 | bool UseMaskForCond, bool UseMaskForGaps) { |
5199 | auto isSupportedOnAVX512 = [&](Type *VecTy, bool HasBW) { |
5200 | Type *EltTy = cast<VectorType>(VecTy)->getElementType(); |
5201 | if (EltTy->isFloatTy() || EltTy->isDoubleTy() || EltTy->isIntegerTy(64) || |
5202 | EltTy->isIntegerTy(32) || EltTy->isPointerTy()) |
5203 | return true; |
5204 | if (EltTy->isIntegerTy(16) || EltTy->isIntegerTy(8) || |
5205 | (!ST->useSoftFloat() && ST->hasFP16() && EltTy->isHalfTy())) |
5206 | return HasBW; |
5207 | return false; |
5208 | }; |
5209 | if (ST->hasAVX512() && isSupportedOnAVX512(VecTy, ST->hasBWI())) |
5210 | return getInterleavedMemoryOpCostAVX512( |
5211 | Opcode, cast<FixedVectorType>(VecTy), Factor, Indices, Alignment, |
5212 | AddressSpace, CostKind, UseMaskForCond, UseMaskForGaps); |
5213 | if (ST->hasAVX2()) |
5214 | return getInterleavedMemoryOpCostAVX2( |
5215 | Opcode, cast<FixedVectorType>(VecTy), Factor, Indices, Alignment, |
5216 | AddressSpace, CostKind, UseMaskForCond, UseMaskForGaps); |
5217 | |
5218 | return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices, |
5219 | Alignment, AddressSpace, CostKind, |
5220 | UseMaskForCond, UseMaskForGaps); |
5221 | } |