LLVM  3.7.0
X86TargetTransformInfo.cpp
Go to the documentation of this file.
1 //===-- X86TargetTransformInfo.cpp - X86 specific TTI pass ----------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 /// \file
10 /// This file implements a TargetTransformInfo analysis pass specific to the
11 /// X86 target machine. It uses the target's detailed information to provide
12 /// more precise answers to certain TTI queries, while letting the target
13 /// independent and default TTI implementations handle the rest.
14 ///
15 //===----------------------------------------------------------------------===//
16 
17 #include "X86TargetTransformInfo.h"
20 #include "llvm/IR/IntrinsicInst.h"
21 #include "llvm/Support/Debug.h"
22 #include "llvm/Target/CostTable.h"
24 using namespace llvm;
25 
26 #define DEBUG_TYPE "x86tti"
27 
28 //===----------------------------------------------------------------------===//
29 //
30 // X86 cost model.
31 //
32 //===----------------------------------------------------------------------===//
33 
35 X86TTIImpl::getPopcntSupport(unsigned TyWidth) {
36  assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
37  // TODO: Currently the __builtin_popcount() implementation using SSE3
38  // instructions is inefficient. Once the problem is fixed, we should
39  // call ST->hasSSE3() instead of ST->hasPOPCNT().
41 }
42 
43 unsigned X86TTIImpl::getNumberOfRegisters(bool Vector) {
44  if (Vector && !ST->hasSSE1())
45  return 0;
46 
47  if (ST->is64Bit()) {
48  if (Vector && ST->hasAVX512())
49  return 32;
50  return 16;
51  }
52  return 8;
53 }
54 
55 unsigned X86TTIImpl::getRegisterBitWidth(bool Vector) {
56  if (Vector) {
57  if (ST->hasAVX512()) return 512;
58  if (ST->hasAVX()) return 256;
59  if (ST->hasSSE1()) return 128;
60  return 0;
61  }
62 
63  if (ST->is64Bit())
64  return 64;
65  return 32;
66 
67 }
68 
69 unsigned X86TTIImpl::getMaxInterleaveFactor(unsigned VF) {
70  // If the loop will not be vectorized, don't interleave the loop.
71  // Let regular unroll to unroll the loop, which saves the overflow
72  // check and memory check cost.
73  if (VF == 1)
74  return 1;
75 
76  if (ST->isAtom())
77  return 1;
78 
79  // Sandybridge and Haswell have multiple execution ports and pipelined
80  // vector units.
81  if (ST->hasAVX())
82  return 4;
83 
84  return 2;
85 }
86 
88  unsigned Opcode, Type *Ty, TTI::OperandValueKind Op1Info,
90  TTI::OperandValueProperties Opd2PropInfo) {
91  // Legalize the type.
92  std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
93 
94  int ISD = TLI->InstructionOpcodeToISD(Opcode);
95  assert(ISD && "Invalid opcode");
96 
97  if (ISD == ISD::SDIV &&
99  Opd2PropInfo == TargetTransformInfo::OP_PowerOf2) {
100  // On X86, vector signed division by constants power-of-two are
101  // normally expanded to the sequence SRA + SRL + ADD + SRA.
102  // The OperandValue properties many not be same as that of previous
103  // operation;conservatively assume OP_None.
104  unsigned Cost =
105  2 * getArithmeticInstrCost(Instruction::AShr, Ty, Op1Info, Op2Info,
108  Cost += getArithmeticInstrCost(Instruction::LShr, Ty, Op1Info, Op2Info,
111  Cost += getArithmeticInstrCost(Instruction::Add, Ty, Op1Info, Op2Info,
114 
115  return Cost;
116  }
117 
119  AVX2UniformConstCostTable[] = {
120  { ISD::SRA, MVT::v4i64, 4 }, // 2 x psrad + shuffle.
121 
122  { ISD::SDIV, MVT::v16i16, 6 }, // vpmulhw sequence
123  { ISD::UDIV, MVT::v16i16, 6 }, // vpmulhuw sequence
124  { ISD::SDIV, MVT::v8i32, 15 }, // vpmuldq sequence
125  { ISD::UDIV, MVT::v8i32, 15 }, // vpmuludq sequence
126  };
127 
129  ST->hasAVX2()) {
130  int Idx = CostTableLookup(AVX2UniformConstCostTable, ISD, LT.second);
131  if (Idx != -1)
132  return LT.first * AVX2UniformConstCostTable[Idx].Cost;
133  }
134 
135  static const CostTblEntry<MVT::SimpleValueType> AVX512CostTable[] = {
136  { ISD::SHL, MVT::v16i32, 1 },
137  { ISD::SRL, MVT::v16i32, 1 },
138  { ISD::SRA, MVT::v16i32, 1 },
139  { ISD::SHL, MVT::v8i64, 1 },
140  { ISD::SRL, MVT::v8i64, 1 },
141  { ISD::SRA, MVT::v8i64, 1 },
142  };
143 
144  static const CostTblEntry<MVT::SimpleValueType> AVX2CostTable[] = {
145  // Shifts on v4i64/v8i32 on AVX2 is legal even though we declare to
146  // customize them to detect the cases where shift amount is a scalar one.
147  { ISD::SHL, MVT::v4i32, 1 },
148  { ISD::SRL, MVT::v4i32, 1 },
149  { ISD::SRA, MVT::v4i32, 1 },
150  { ISD::SHL, MVT::v8i32, 1 },
151  { ISD::SRL, MVT::v8i32, 1 },
152  { ISD::SRA, MVT::v8i32, 1 },
153  { ISD::SHL, MVT::v2i64, 1 },
154  { ISD::SRL, MVT::v2i64, 1 },
155  { ISD::SHL, MVT::v4i64, 1 },
156  { ISD::SRL, MVT::v4i64, 1 },
157 
158  { ISD::SHL, MVT::v32i8, 11 }, // vpblendvb sequence.
159  { ISD::SHL, MVT::v16i16, 10 }, // extend/vpsrlvd/pack sequence.
160 
161  { ISD::SRL, MVT::v32i8, 11 }, // vpblendvb sequence.
162  { ISD::SRL, MVT::v16i16, 10 }, // extend/vpsrlvd/pack sequence.
163 
164  { ISD::SRA, MVT::v32i8, 24 }, // vpblendvb sequence.
165  { ISD::SRA, MVT::v16i16, 10 }, // extend/vpsravd/pack sequence.
166  { ISD::SRA, MVT::v4i64, 4*10 }, // Scalarized.
167 
168  // Vectorizing division is a bad idea. See the SSE2 table for more comments.
169  { ISD::SDIV, MVT::v32i8, 32*20 },
170  { ISD::SDIV, MVT::v16i16, 16*20 },
171  { ISD::SDIV, MVT::v8i32, 8*20 },
172  { ISD::SDIV, MVT::v4i64, 4*20 },
173  { ISD::UDIV, MVT::v32i8, 32*20 },
174  { ISD::UDIV, MVT::v16i16, 16*20 },
175  { ISD::UDIV, MVT::v8i32, 8*20 },
176  { ISD::UDIV, MVT::v4i64, 4*20 },
177  };
178 
179  if (ST->hasAVX512()) {
180  int Idx = CostTableLookup(AVX512CostTable, ISD, LT.second);
181  if (Idx != -1)
182  return LT.first * AVX512CostTable[Idx].Cost;
183  }
184  // Look for AVX2 lowering tricks.
185  if (ST->hasAVX2()) {
186  if (ISD == ISD::SHL && LT.second == MVT::v16i16 &&
189  // On AVX2, a packed v16i16 shift left by a constant build_vector
190  // is lowered into a vector multiply (vpmullw).
191  return LT.first;
192 
193  int Idx = CostTableLookup(AVX2CostTable, ISD, LT.second);
194  if (Idx != -1)
195  return LT.first * AVX2CostTable[Idx].Cost;
196  }
197 
199  SSE2UniformConstCostTable[] = {
200  // We don't correctly identify costs of casts because they are marked as
201  // custom.
202  // Constant splats are cheaper for the following instructions.
203  { ISD::SHL, MVT::v16i8, 1 }, // psllw.
204  { ISD::SHL, MVT::v8i16, 1 }, // psllw.
205  { ISD::SHL, MVT::v4i32, 1 }, // pslld
206  { ISD::SHL, MVT::v2i64, 1 }, // psllq.
207 
208  { ISD::SRL, MVT::v16i8, 1 }, // psrlw.
209  { ISD::SRL, MVT::v8i16, 1 }, // psrlw.
210  { ISD::SRL, MVT::v4i32, 1 }, // psrld.
211  { ISD::SRL, MVT::v2i64, 1 }, // psrlq.
212 
213  { ISD::SRA, MVT::v16i8, 4 }, // psrlw, pand, pxor, psubb.
214  { ISD::SRA, MVT::v8i16, 1 }, // psraw.
215  { ISD::SRA, MVT::v4i32, 1 }, // psrad.
216  { ISD::SRA, MVT::v2i64, 4 }, // 2 x psrad + shuffle.
217 
218  { ISD::SDIV, MVT::v8i16, 6 }, // pmulhw sequence
219  { ISD::UDIV, MVT::v8i16, 6 }, // pmulhuw sequence
220  { ISD::SDIV, MVT::v4i32, 19 }, // pmuludq sequence
221  { ISD::UDIV, MVT::v4i32, 15 }, // pmuludq sequence
222  };
223 
225  ST->hasSSE2()) {
226  // pmuldq sequence.
227  if (ISD == ISD::SDIV && LT.second == MVT::v4i32 && ST->hasSSE41())
228  return LT.first * 15;
229 
230  int Idx = CostTableLookup(SSE2UniformConstCostTable, ISD, LT.second);
231  if (Idx != -1)
232  return LT.first * SSE2UniformConstCostTable[Idx].Cost;
233  }
234 
235  if (ISD == ISD::SHL &&
237  EVT VT = LT.second;
238  if ((VT == MVT::v8i16 && ST->hasSSE2()) ||
239  (VT == MVT::v4i32 && ST->hasSSE41()))
240  // Vector shift left by non uniform constant can be lowered
241  // into vector multiply (pmullw/pmulld).
242  return LT.first;
243  if (VT == MVT::v4i32 && ST->hasSSE2())
244  // A vector shift left by non uniform constant is converted
245  // into a vector multiply; the new multiply is eventually
246  // lowered into a sequence of shuffles and 2 x pmuludq.
247  ISD = ISD::MUL;
248  }
249 
250  static const CostTblEntry<MVT::SimpleValueType> SSE2CostTable[] = {
251  // We don't correctly identify costs of casts because they are marked as
252  // custom.
253  // For some cases, where the shift amount is a scalar we would be able
254  // to generate better code. Unfortunately, when this is the case the value
255  // (the splat) will get hoisted out of the loop, thereby making it invisible
256  // to ISel. The cost model must return worst case assumptions because it is
257  // used for vectorization and we don't want to make vectorized code worse
258  // than scalar code.
259  { ISD::SHL, MVT::v16i8, 26 }, // cmpgtb sequence.
260  { ISD::SHL, MVT::v8i16, 32 }, // cmpgtb sequence.
261  { ISD::SHL, MVT::v4i32, 2*5 }, // We optimized this using mul.
262  { ISD::SHL, MVT::v2i64, 2*10 }, // Scalarized.
263  { ISD::SHL, MVT::v4i64, 4*10 }, // Scalarized.
264 
265  { ISD::SRL, MVT::v16i8, 26 }, // cmpgtb sequence.
266  { ISD::SRL, MVT::v8i16, 32 }, // cmpgtb sequence.
267  { ISD::SRL, MVT::v4i32, 16 }, // Shift each lane + blend.
268  { ISD::SRL, MVT::v2i64, 2*10 }, // Scalarized.
269 
270  { ISD::SRA, MVT::v16i8, 54 }, // unpacked cmpgtb sequence.
271  { ISD::SRA, MVT::v8i16, 32 }, // cmpgtb sequence.
272  { ISD::SRA, MVT::v4i32, 16 }, // Shift each lane + blend.
273  { ISD::SRA, MVT::v2i64, 2*10 }, // Scalarized.
274 
275  // It is not a good idea to vectorize division. We have to scalarize it and
276  // in the process we will often end up having to spilling regular
277  // registers. The overhead of division is going to dominate most kernels
278  // anyways so try hard to prevent vectorization of division - it is
279  // generally a bad idea. Assume somewhat arbitrarily that we have to be able
280  // to hide "20 cycles" for each lane.
281  { ISD::SDIV, MVT::v16i8, 16*20 },
282  { ISD::SDIV, MVT::v8i16, 8*20 },
283  { ISD::SDIV, MVT::v4i32, 4*20 },
284  { ISD::SDIV, MVT::v2i64, 2*20 },
285  { ISD::UDIV, MVT::v16i8, 16*20 },
286  { ISD::UDIV, MVT::v8i16, 8*20 },
287  { ISD::UDIV, MVT::v4i32, 4*20 },
288  { ISD::UDIV, MVT::v2i64, 2*20 },
289  };
290 
291  if (ST->hasSSE2()) {
292  int Idx = CostTableLookup(SSE2CostTable, ISD, LT.second);
293  if (Idx != -1)
294  return LT.first * SSE2CostTable[Idx].Cost;
295  }
296 
297  static const CostTblEntry<MVT::SimpleValueType> AVX1CostTable[] = {
298  // We don't have to scalarize unsupported ops. We can issue two half-sized
299  // operations and we only need to extract the upper YMM half.
300  // Two ops + 1 extract + 1 insert = 4.
301  { ISD::MUL, MVT::v16i16, 4 },
302  { ISD::MUL, MVT::v8i32, 4 },
303  { ISD::SUB, MVT::v8i32, 4 },
304  { ISD::ADD, MVT::v8i32, 4 },
305  { ISD::SUB, MVT::v4i64, 4 },
306  { ISD::ADD, MVT::v4i64, 4 },
307  // A v4i64 multiply is custom lowered as two split v2i64 vectors that then
308  // are lowered as a series of long multiplies(3), shifts(4) and adds(2)
309  // Because we believe v4i64 to be a legal type, we must also include the
310  // split factor of two in the cost table. Therefore, the cost here is 18
311  // instead of 9.
312  { ISD::MUL, MVT::v4i64, 18 },
313  };
314 
315  // Look for AVX1 lowering tricks.
316  if (ST->hasAVX() && !ST->hasAVX2()) {
317  EVT VT = LT.second;
318 
319  // v16i16 and v8i32 shifts by non-uniform constants are lowered into a
320  // sequence of extract + two vector multiply + insert.
321  if (ISD == ISD::SHL && (VT == MVT::v8i32 || VT == MVT::v16i16) &&
323  ISD = ISD::MUL;
324 
325  int Idx = CostTableLookup(AVX1CostTable, ISD, VT);
326  if (Idx != -1)
327  return LT.first * AVX1CostTable[Idx].Cost;
328  }
329 
330  // Custom lowering of vectors.
331  static const CostTblEntry<MVT::SimpleValueType> CustomLowered[] = {
332  // A v2i64/v4i64 and multiply is custom lowered as a series of long
333  // multiplies(3), shifts(4) and adds(2).
334  { ISD::MUL, MVT::v2i64, 9 },
335  { ISD::MUL, MVT::v4i64, 9 },
336  };
337  int Idx = CostTableLookup(CustomLowered, ISD, LT.second);
338  if (Idx != -1)
339  return LT.first * CustomLowered[Idx].Cost;
340 
341  // Special lowering of v4i32 mul on sse2, sse3: Lower v4i32 mul as 2x shuffle,
342  // 2x pmuludq, 2x shuffle.
343  if (ISD == ISD::MUL && LT.second == MVT::v4i32 && ST->hasSSE2() &&
344  !ST->hasSSE41())
345  return LT.first * 6;
346 
347  // Fallback to the default implementation.
348  return BaseT::getArithmeticInstrCost(Opcode, Ty, Op1Info, Op2Info);
349 }
350 
352  Type *SubTp) {
353  // We only estimate the cost of reverse and alternate shuffles.
354  if (Kind != TTI::SK_Reverse && Kind != TTI::SK_Alternate)
355  return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
356 
357  if (Kind == TTI::SK_Reverse) {
358  std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
359  unsigned Cost = 1;
360  if (LT.second.getSizeInBits() > 128)
361  Cost = 3; // Extract + insert + copy.
362 
363  // Multiple by the number of parts.
364  return Cost * LT.first;
365  }
366 
367  if (Kind == TTI::SK_Alternate) {
368  // 64-bit packed float vectors (v2f32) are widened to type v4f32.
369  // 64-bit packed integer vectors (v2i32) are promoted to type v2i64.
370  std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
371 
372  // The backend knows how to generate a single VEX.256 version of
373  // instruction VPBLENDW if the target supports AVX2.
374  if (ST->hasAVX2() && LT.second == MVT::v16i16)
375  return LT.first;
376 
377  static const CostTblEntry<MVT::SimpleValueType> AVXAltShuffleTbl[] = {
378  {ISD::VECTOR_SHUFFLE, MVT::v4i64, 1}, // vblendpd
379  {ISD::VECTOR_SHUFFLE, MVT::v4f64, 1}, // vblendpd
380 
381  {ISD::VECTOR_SHUFFLE, MVT::v8i32, 1}, // vblendps
382  {ISD::VECTOR_SHUFFLE, MVT::v8f32, 1}, // vblendps
383 
384  // This shuffle is custom lowered into a sequence of:
385  // 2x vextractf128 , 2x vpblendw , 1x vinsertf128
387 
388  // This shuffle is custom lowered into a long sequence of:
389  // 2x vextractf128 , 4x vpshufb , 2x vpor , 1x vinsertf128
391  };
392 
393  if (ST->hasAVX()) {
394  int Idx = CostTableLookup(AVXAltShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second);
395  if (Idx != -1)
396  return LT.first * AVXAltShuffleTbl[Idx].Cost;
397  }
398 
399  static const CostTblEntry<MVT::SimpleValueType> SSE41AltShuffleTbl[] = {
400  // These are lowered into movsd.
403 
404  // packed float vectors with four elements are lowered into BLENDI dag
405  // nodes. A v4i32/v4f32 BLENDI generates a single 'blendps'/'blendpd'.
408 
409  // This shuffle generates a single pshufw.
411 
412  // There is no instruction that matches a v16i8 alternate shuffle.
413  // The backend will expand it into the sequence 'pshufb + pshufb + or'.
415  };
416 
417  if (ST->hasSSE41()) {
418  int Idx = CostTableLookup(SSE41AltShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second);
419  if (Idx != -1)
420  return LT.first * SSE41AltShuffleTbl[Idx].Cost;
421  }
422 
423  static const CostTblEntry<MVT::SimpleValueType> SSSE3AltShuffleTbl[] = {
424  {ISD::VECTOR_SHUFFLE, MVT::v2i64, 1}, // movsd
425  {ISD::VECTOR_SHUFFLE, MVT::v2f64, 1}, // movsd
426 
427  // SSE3 doesn't have 'blendps'. The following shuffles are expanded into
428  // the sequence 'shufps + pshufd'
431 
432  {ISD::VECTOR_SHUFFLE, MVT::v8i16, 3}, // pshufb + pshufb + or
433  {ISD::VECTOR_SHUFFLE, MVT::v16i8, 3} // pshufb + pshufb + or
434  };
435 
436  if (ST->hasSSSE3()) {
437  int Idx = CostTableLookup(SSSE3AltShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second);
438  if (Idx != -1)
439  return LT.first * SSSE3AltShuffleTbl[Idx].Cost;
440  }
441 
442  static const CostTblEntry<MVT::SimpleValueType> SSEAltShuffleTbl[] = {
443  {ISD::VECTOR_SHUFFLE, MVT::v2i64, 1}, // movsd
444  {ISD::VECTOR_SHUFFLE, MVT::v2f64, 1}, // movsd
445 
446  {ISD::VECTOR_SHUFFLE, MVT::v4i32, 2}, // shufps + pshufd
447  {ISD::VECTOR_SHUFFLE, MVT::v4f32, 2}, // shufps + pshufd
448 
449  // This is expanded into a long sequence of four extract + four insert.
450  {ISD::VECTOR_SHUFFLE, MVT::v8i16, 8}, // 4 x pextrw + 4 pinsrw.
451 
452  // 8 x (pinsrw + pextrw + and + movb + movzb + or)
454  };
455 
456  // Fall-back (SSE3 and SSE2).
457  int Idx = CostTableLookup(SSEAltShuffleTbl, ISD::VECTOR_SHUFFLE, LT.second);
458  if (Idx != -1)
459  return LT.first * SSEAltShuffleTbl[Idx].Cost;
460  return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
461  }
462 
463  return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
464 }
465 
466 unsigned X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) {
467  int ISD = TLI->InstructionOpcodeToISD(Opcode);
468  assert(ISD && "Invalid opcode");
469 
470  std::pair<unsigned, MVT> LTSrc = TLI->getTypeLegalizationCost(DL, Src);
471  std::pair<unsigned, MVT> LTDest = TLI->getTypeLegalizationCost(DL, Dst);
472 
474  SSE2ConvTbl[] = {
475  // These are somewhat magic numbers justified by looking at the output of
476  // Intel's IACA, running some kernels and making sure when we take
477  // legalization into account the throughput will be overestimated.
486  // There are faster sequences for float conversions.
495  };
496 
497  if (ST->hasSSE2() && !ST->hasAVX()) {
498  int Idx =
499  ConvertCostTableLookup(SSE2ConvTbl, ISD, LTDest.second, LTSrc.second);
500  if (Idx != -1)
501  return LTSrc.first * SSE2ConvTbl[Idx].Cost;
502  }
503 
505  AVX512ConversionTbl[] = {
510 
516 
517  // v16i1 -> v16i32 - load + broadcast
520 
527 
535  };
536 
537  if (ST->hasAVX512()) {
538  int Idx = ConvertCostTableLookup(AVX512ConversionTbl, ISD, LTDest.second,
539  LTSrc.second);
540  if (Idx != -1)
541  return AVX512ConversionTbl[Idx].Cost;
542  }
543  EVT SrcTy = TLI->getValueType(DL, Src);
544  EVT DstTy = TLI->getValueType(DL, Dst);
545 
546  // The function getSimpleVT only handles simple value types.
547  if (!SrcTy.isSimple() || !DstTy.isSimple())
548  return BaseT::getCastInstrCost(Opcode, Dst, Src);
549 
551  AVX2ConversionTbl[] = {
568 
575 
578 
580  };
581 
583  AVXConversionTbl[] = {
600 
608 
621 
634  // The generic code to compute the scalar overhead is currently broken.
635  // Workaround this limitation by estimating the scalarization overhead
636  // here. We have roughly 10 instructions per scalar element.
637  // Multiply that by the vector width.
638  // FIXME: remove that when PR19268 is fixed.
641 
644  // This node is expanded into scalarized operations but BasicTTI is overly
645  // optimistic estimating its cost. It computes 3 per element (one
646  // vector-extract, one scalar conversion and one vector-insert). The
647  // problem is that the inserts form a read-modify-write chain so latency
648  // should be factored in too. Inflating the cost per element by 1.
651  };
652 
653  if (ST->hasAVX2()) {
654  int Idx = ConvertCostTableLookup(AVX2ConversionTbl, ISD,
655  DstTy.getSimpleVT(), SrcTy.getSimpleVT());
656  if (Idx != -1)
657  return AVX2ConversionTbl[Idx].Cost;
658  }
659 
660  if (ST->hasAVX()) {
661  int Idx = ConvertCostTableLookup(AVXConversionTbl, ISD, DstTy.getSimpleVT(),
662  SrcTy.getSimpleVT());
663  if (Idx != -1)
664  return AVXConversionTbl[Idx].Cost;
665  }
666 
667  return BaseT::getCastInstrCost(Opcode, Dst, Src);
668 }
669 
670 unsigned X86TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
671  Type *CondTy) {
672  // Legalize the type.
673  std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
674 
675  MVT MTy = LT.second;
676 
677  int ISD = TLI->InstructionOpcodeToISD(Opcode);
678  assert(ISD && "Invalid opcode");
679 
680  static const CostTblEntry<MVT::SimpleValueType> SSE42CostTbl[] = {
681  { ISD::SETCC, MVT::v2f64, 1 },
682  { ISD::SETCC, MVT::v4f32, 1 },
683  { ISD::SETCC, MVT::v2i64, 1 },
684  { ISD::SETCC, MVT::v4i32, 1 },
685  { ISD::SETCC, MVT::v8i16, 1 },
686  { ISD::SETCC, MVT::v16i8, 1 },
687  };
688 
689  static const CostTblEntry<MVT::SimpleValueType> AVX1CostTbl[] = {
690  { ISD::SETCC, MVT::v4f64, 1 },
691  { ISD::SETCC, MVT::v8f32, 1 },
692  // AVX1 does not support 8-wide integer compare.
693  { ISD::SETCC, MVT::v4i64, 4 },
694  { ISD::SETCC, MVT::v8i32, 4 },
695  { ISD::SETCC, MVT::v16i16, 4 },
696  { ISD::SETCC, MVT::v32i8, 4 },
697  };
698 
699  static const CostTblEntry<MVT::SimpleValueType> AVX2CostTbl[] = {
700  { ISD::SETCC, MVT::v4i64, 1 },
701  { ISD::SETCC, MVT::v8i32, 1 },
702  { ISD::SETCC, MVT::v16i16, 1 },
703  { ISD::SETCC, MVT::v32i8, 1 },
704  };
705 
706  static const CostTblEntry<MVT::SimpleValueType> AVX512CostTbl[] = {
707  { ISD::SETCC, MVT::v8i64, 1 },
708  { ISD::SETCC, MVT::v16i32, 1 },
709  { ISD::SETCC, MVT::v8f64, 1 },
710  { ISD::SETCC, MVT::v16f32, 1 },
711  };
712 
713  if (ST->hasAVX512()) {
714  int Idx = CostTableLookup(AVX512CostTbl, ISD, MTy);
715  if (Idx != -1)
716  return LT.first * AVX512CostTbl[Idx].Cost;
717  }
718 
719  if (ST->hasAVX2()) {
720  int Idx = CostTableLookup(AVX2CostTbl, ISD, MTy);
721  if (Idx != -1)
722  return LT.first * AVX2CostTbl[Idx].Cost;
723  }
724 
725  if (ST->hasAVX()) {
726  int Idx = CostTableLookup(AVX1CostTbl, ISD, MTy);
727  if (Idx != -1)
728  return LT.first * AVX1CostTbl[Idx].Cost;
729  }
730 
731  if (ST->hasSSE42()) {
732  int Idx = CostTableLookup(SSE42CostTbl, ISD, MTy);
733  if (Idx != -1)
734  return LT.first * SSE42CostTbl[Idx].Cost;
735  }
736 
737  return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy);
738 }
739 
740 unsigned X86TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
741  unsigned Index) {
742  assert(Val->isVectorTy() && "This must be a vector type");
743 
744  if (Index != -1U) {
745  // Legalize the type.
746  std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Val);
747 
748  // This type is legalized to a scalar type.
749  if (!LT.second.isVector())
750  return 0;
751 
752  // The type may be split. Normalize the index to the new type.
753  unsigned Width = LT.second.getVectorNumElements();
754  Index = Index % Width;
755 
756  // Floating point scalars are already located in index #0.
757  if (Val->getScalarType()->isFloatingPointTy() && Index == 0)
758  return 0;
759  }
760 
761  return BaseT::getVectorInstrCost(Opcode, Val, Index);
762 }
763 
764 unsigned X86TTIImpl::getScalarizationOverhead(Type *Ty, bool Insert,
765  bool Extract) {
766  assert (Ty->isVectorTy() && "Can only scalarize vectors");
767  unsigned Cost = 0;
768 
769  for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i) {
770  if (Insert)
771  Cost += getVectorInstrCost(Instruction::InsertElement, Ty, i);
772  if (Extract)
774  }
775 
776  return Cost;
777 }
778 
779 unsigned X86TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
780  unsigned Alignment,
781  unsigned AddressSpace) {
782  // Handle non-power-of-two vectors such as <3 x float>
783  if (VectorType *VTy = dyn_cast<VectorType>(Src)) {
784  unsigned NumElem = VTy->getVectorNumElements();
785 
786  // Handle a few common cases:
787  // <3 x float>
788  if (NumElem == 3 && VTy->getScalarSizeInBits() == 32)
789  // Cost = 64 bit store + extract + 32 bit store.
790  return 3;
791 
792  // <3 x double>
793  if (NumElem == 3 && VTy->getScalarSizeInBits() == 64)
794  // Cost = 128 bit store + unpack + 64 bit store.
795  return 3;
796 
797  // Assume that all other non-power-of-two numbers are scalarized.
798  if (!isPowerOf2_32(NumElem)) {
799  unsigned Cost = BaseT::getMemoryOpCost(Opcode, VTy->getScalarType(),
800  Alignment, AddressSpace);
801  unsigned SplitCost = getScalarizationOverhead(Src,
802  Opcode == Instruction::Load,
803  Opcode==Instruction::Store);
804  return NumElem * Cost + SplitCost;
805  }
806  }
807 
808  // Legalize the type.
809  std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Src);
810  assert((Opcode == Instruction::Load || Opcode == Instruction::Store) &&
811  "Invalid Opcode");
812 
813  // Each load/store unit costs 1.
814  unsigned Cost = LT.first * 1;
815 
816  // On Sandybridge 256bit load/stores are double pumped
817  // (but not on Haswell).
818  if (LT.second.getSizeInBits() > 128 && !ST->hasAVX2())
819  Cost*=2;
820 
821  return Cost;
822 }
823 
824 unsigned X86TTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *SrcTy,
825  unsigned Alignment,
826  unsigned AddressSpace) {
827  VectorType *SrcVTy = dyn_cast<VectorType>(SrcTy);
828  if (!SrcVTy)
829  // To calculate scalar take the regular cost, without mask
830  return getMemoryOpCost(Opcode, SrcTy, Alignment, AddressSpace);
831 
832  unsigned NumElem = SrcVTy->getVectorNumElements();
833  VectorType *MaskTy =
835  if ((Opcode == Instruction::Load && !isLegalMaskedLoad(SrcVTy, 1)) ||
836  (Opcode == Instruction::Store && !isLegalMaskedStore(SrcVTy, 1)) ||
837  !isPowerOf2_32(NumElem)) {
838  // Scalarization
839  unsigned MaskSplitCost = getScalarizationOverhead(MaskTy, false, true);
840  unsigned ScalarCompareCost =
841  getCmpSelInstrCost(Instruction::ICmp,
843  unsigned BranchCost = getCFInstrCost(Instruction::Br);
844  unsigned MaskCmpCost = NumElem * (BranchCost + ScalarCompareCost);
845 
846  unsigned ValueSplitCost =
847  getScalarizationOverhead(SrcVTy, Opcode == Instruction::Load,
848  Opcode == Instruction::Store);
849  unsigned MemopCost =
850  NumElem * BaseT::getMemoryOpCost(Opcode, SrcVTy->getScalarType(),
851  Alignment, AddressSpace);
852  return MemopCost + ValueSplitCost + MaskSplitCost + MaskCmpCost;
853  }
854 
855  // Legalize the type.
856  std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, SrcVTy);
857  unsigned Cost = 0;
858  if (LT.second != TLI->getValueType(DL, SrcVTy).getSimpleVT() &&
859  LT.second.getVectorNumElements() == NumElem)
860  // Promotion requires expand/truncate for data and a shuffle for mask.
861  Cost += getShuffleCost(TTI::SK_Alternate, SrcVTy, 0, 0) +
862  getShuffleCost(TTI::SK_Alternate, MaskTy, 0, 0);
863 
864  else if (LT.second.getVectorNumElements() > NumElem) {
865  VectorType *NewMaskTy = VectorType::get(MaskTy->getVectorElementType(),
866  LT.second.getVectorNumElements());
867  // Expanding requires fill mask with zeroes
868  Cost += getShuffleCost(TTI::SK_InsertSubvector, NewMaskTy, 0, MaskTy);
869  }
870  if (!ST->hasAVX512())
871  return Cost + LT.first*4; // Each maskmov costs 4
872 
873  // AVX-512 masked load/store is cheapper
874  return Cost+LT.first;
875 }
876 
877 unsigned X86TTIImpl::getAddressComputationCost(Type *Ty, bool IsComplex) {
878  // Address computations in vectorized code with non-consecutive addresses will
879  // likely result in more instructions compared to scalar code where the
880  // computation can more often be merged into the index mode. The resulting
881  // extra micro-ops can significantly decrease throughput.
882  unsigned NumVectorInstToHideOverhead = 10;
883 
884  if (Ty->isVectorTy() && IsComplex)
885  return NumVectorInstToHideOverhead;
886 
887  return BaseT::getAddressComputationCost(Ty, IsComplex);
888 }
889 
890 unsigned X86TTIImpl::getReductionCost(unsigned Opcode, Type *ValTy,
891  bool IsPairwise) {
892 
893  std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
894 
895  MVT MTy = LT.second;
896 
897  int ISD = TLI->InstructionOpcodeToISD(Opcode);
898  assert(ISD && "Invalid opcode");
899 
900  // We use the Intel Architecture Code Analyzer(IACA) to measure the throughput
901  // and make it as the cost.
902 
903  static const CostTblEntry<MVT::SimpleValueType> SSE42CostTblPairWise[] = {
904  { ISD::FADD, MVT::v2f64, 2 },
905  { ISD::FADD, MVT::v4f32, 4 },
906  { ISD::ADD, MVT::v2i64, 2 }, // The data reported by the IACA tool is "1.6".
907  { ISD::ADD, MVT::v4i32, 3 }, // The data reported by the IACA tool is "3.5".
908  { ISD::ADD, MVT::v8i16, 5 },
909  };
910 
911  static const CostTblEntry<MVT::SimpleValueType> AVX1CostTblPairWise[] = {
912  { ISD::FADD, MVT::v4f32, 4 },
913  { ISD::FADD, MVT::v4f64, 5 },
914  { ISD::FADD, MVT::v8f32, 7 },
915  { ISD::ADD, MVT::v2i64, 1 }, // The data reported by the IACA tool is "1.5".
916  { ISD::ADD, MVT::v4i32, 3 }, // The data reported by the IACA tool is "3.5".
917  { ISD::ADD, MVT::v4i64, 5 }, // The data reported by the IACA tool is "4.8".
918  { ISD::ADD, MVT::v8i16, 5 },
919  { ISD::ADD, MVT::v8i32, 5 },
920  };
921 
922  static const CostTblEntry<MVT::SimpleValueType> SSE42CostTblNoPairWise[] = {
923  { ISD::FADD, MVT::v2f64, 2 },
924  { ISD::FADD, MVT::v4f32, 4 },
925  { ISD::ADD, MVT::v2i64, 2 }, // The data reported by the IACA tool is "1.6".
926  { ISD::ADD, MVT::v4i32, 3 }, // The data reported by the IACA tool is "3.3".
927  { ISD::ADD, MVT::v8i16, 4 }, // The data reported by the IACA tool is "4.3".
928  };
929 
930  static const CostTblEntry<MVT::SimpleValueType> AVX1CostTblNoPairWise[] = {
931  { ISD::FADD, MVT::v4f32, 3 },
932  { ISD::FADD, MVT::v4f64, 3 },
933  { ISD::FADD, MVT::v8f32, 4 },
934  { ISD::ADD, MVT::v2i64, 1 }, // The data reported by the IACA tool is "1.5".
935  { ISD::ADD, MVT::v4i32, 3 }, // The data reported by the IACA tool is "2.8".
936  { ISD::ADD, MVT::v4i64, 3 },
937  { ISD::ADD, MVT::v8i16, 4 },
938  { ISD::ADD, MVT::v8i32, 5 },
939  };
940 
941  if (IsPairwise) {
942  if (ST->hasAVX()) {
943  int Idx = CostTableLookup(AVX1CostTblPairWise, ISD, MTy);
944  if (Idx != -1)
945  return LT.first * AVX1CostTblPairWise[Idx].Cost;
946  }
947 
948  if (ST->hasSSE42()) {
949  int Idx = CostTableLookup(SSE42CostTblPairWise, ISD, MTy);
950  if (Idx != -1)
951  return LT.first * SSE42CostTblPairWise[Idx].Cost;
952  }
953  } else {
954  if (ST->hasAVX()) {
955  int Idx = CostTableLookup(AVX1CostTblNoPairWise, ISD, MTy);
956  if (Idx != -1)
957  return LT.first * AVX1CostTblNoPairWise[Idx].Cost;
958  }
959 
960  if (ST->hasSSE42()) {
961  int Idx = CostTableLookup(SSE42CostTblNoPairWise, ISD, MTy);
962  if (Idx != -1)
963  return LT.first * SSE42CostTblNoPairWise[Idx].Cost;
964  }
965  }
966 
967  return BaseT::getReductionCost(Opcode, ValTy, IsPairwise);
968 }
969 
970 /// \brief Calculate the cost of materializing a 64-bit value. This helper
971 /// method might only calculate a fraction of a larger immediate. Therefore it
972 /// is valid to return a cost of ZERO.
973 unsigned X86TTIImpl::getIntImmCost(int64_t Val) {
974  if (Val == 0)
975  return TTI::TCC_Free;
976 
977  if (isInt<32>(Val))
978  return TTI::TCC_Basic;
979 
980  return 2 * TTI::TCC_Basic;
981 }
982 
983 unsigned X86TTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {
984  assert(Ty->isIntegerTy());
985 
986  unsigned BitSize = Ty->getPrimitiveSizeInBits();
987  if (BitSize == 0)
988  return ~0U;
989 
990  // Never hoist constants larger than 128bit, because this might lead to
991  // incorrect code generation or assertions in codegen.
992  // Fixme: Create a cost model for types larger than i128 once the codegen
993  // issues have been fixed.
994  if (BitSize > 128)
995  return TTI::TCC_Free;
996 
997  if (Imm == 0)
998  return TTI::TCC_Free;
999 
1000  // Sign-extend all constants to a multiple of 64-bit.
1001  APInt ImmVal = Imm;
1002  if (BitSize & 0x3f)
1003  ImmVal = Imm.sext((BitSize + 63) & ~0x3fU);
1004 
1005  // Split the constant into 64-bit chunks and calculate the cost for each
1006  // chunk.
1007  unsigned Cost = 0;
1008  for (unsigned ShiftVal = 0; ShiftVal < BitSize; ShiftVal += 64) {
1009  APInt Tmp = ImmVal.ashr(ShiftVal).sextOrTrunc(64);
1010  int64_t Val = Tmp.getSExtValue();
1011  Cost += getIntImmCost(Val);
1012  }
1013  // We need at least one instruction to materialze the constant.
1014  return std::max(1U, Cost);
1015 }
1016 
1017 unsigned X86TTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx,
1018  const APInt &Imm, Type *Ty) {
1019  assert(Ty->isIntegerTy());
1020 
1021  unsigned BitSize = Ty->getPrimitiveSizeInBits();
1022  // There is no cost model for constants with a bit size of 0. Return TCC_Free
1023  // here, so that constant hoisting will ignore this constant.
1024  if (BitSize == 0)
1025  return TTI::TCC_Free;
1026 
1027  unsigned ImmIdx = ~0U;
1028  switch (Opcode) {
1029  default:
1030  return TTI::TCC_Free;
1031  case Instruction::GetElementPtr:
1032  // Always hoist the base address of a GetElementPtr. This prevents the
1033  // creation of new constants for every base constant that gets constant
1034  // folded with the offset.
1035  if (Idx == 0)
1036  return 2 * TTI::TCC_Basic;
1037  return TTI::TCC_Free;
1038  case Instruction::Store:
1039  ImmIdx = 0;
1040  break;
1041  case Instruction::Add:
1042  case Instruction::Sub:
1043  case Instruction::Mul:
1044  case Instruction::UDiv:
1045  case Instruction::SDiv:
1046  case Instruction::URem:
1047  case Instruction::SRem:
1048  case Instruction::And:
1049  case Instruction::Or:
1050  case Instruction::Xor:
1051  case Instruction::ICmp:
1052  ImmIdx = 1;
1053  break;
1054  // Always return TCC_Free for the shift value of a shift instruction.
1055  case Instruction::Shl:
1056  case Instruction::LShr:
1057  case Instruction::AShr:
1058  if (Idx == 1)
1059  return TTI::TCC_Free;
1060  break;
1061  case Instruction::Trunc:
1062  case Instruction::ZExt:
1063  case Instruction::SExt:
1064  case Instruction::IntToPtr:
1065  case Instruction::PtrToInt:
1066  case Instruction::BitCast:
1067  case Instruction::PHI:
1068  case Instruction::Call:
1069  case Instruction::Select:
1070  case Instruction::Ret:
1071  case Instruction::Load:
1072  break;
1073  }
1074 
1075  if (Idx == ImmIdx) {
1076  unsigned NumConstants = (BitSize + 63) / 64;
1077  unsigned Cost = X86TTIImpl::getIntImmCost(Imm, Ty);
1078  return (Cost <= NumConstants * TTI::TCC_Basic)
1079  ? static_cast<unsigned>(TTI::TCC_Free)
1080  : Cost;
1081  }
1082 
1083  return X86TTIImpl::getIntImmCost(Imm, Ty);
1084 }
1085 
1086 unsigned X86TTIImpl::getIntImmCost(Intrinsic::ID IID, unsigned Idx,
1087  const APInt &Imm, Type *Ty) {
1088  assert(Ty->isIntegerTy());
1089 
1090  unsigned BitSize = Ty->getPrimitiveSizeInBits();
1091  // There is no cost model for constants with a bit size of 0. Return TCC_Free
1092  // here, so that constant hoisting will ignore this constant.
1093  if (BitSize == 0)
1094  return TTI::TCC_Free;
1095 
1096  switch (IID) {
1097  default:
1098  return TTI::TCC_Free;
1099  case Intrinsic::sadd_with_overflow:
1100  case Intrinsic::uadd_with_overflow:
1101  case Intrinsic::ssub_with_overflow:
1102  case Intrinsic::usub_with_overflow:
1103  case Intrinsic::smul_with_overflow:
1104  case Intrinsic::umul_with_overflow:
1105  if ((Idx == 1) && Imm.getBitWidth() <= 64 && isInt<32>(Imm.getSExtValue()))
1106  return TTI::TCC_Free;
1107  break;
1108  case Intrinsic::experimental_stackmap:
1109  if ((Idx < 2) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
1110  return TTI::TCC_Free;
1111  break;
1112  case Intrinsic::experimental_patchpoint_void:
1113  case Intrinsic::experimental_patchpoint_i64:
1114  if ((Idx < 4) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
1115  return TTI::TCC_Free;
1116  break;
1117  }
1118  return X86TTIImpl::getIntImmCost(Imm, Ty);
1119 }
1120 
1121 bool X86TTIImpl::isLegalMaskedLoad(Type *DataTy, int Consecutive) {
1122  int DataWidth = DataTy->getPrimitiveSizeInBits();
1123 
1124  // Todo: AVX512 allows gather/scatter, works with strided and random as well
1125  if ((DataWidth < 32) || (Consecutive == 0))
1126  return false;
1127  if (ST->hasAVX512() || ST->hasAVX2())
1128  return true;
1129  return false;
1130 }
1131 
1133  return isLegalMaskedLoad(DataType, Consecutive);
1134 }
1135 
1137  const Function *Callee) const {
1138  const TargetMachine &TM = getTLI()->getTargetMachine();
1139 
1140  // Work this as a subsetting of subtarget features.
1141  const FeatureBitset &CallerBits =
1142  TM.getSubtargetImpl(*Caller)->getFeatureBits();
1143  const FeatureBitset &CalleeBits =
1144  TM.getSubtargetImpl(*Callee)->getFeatureBits();
1145 
1146  // FIXME: This is likely too limiting as it will include subtarget features
1147  // that we might not care about for inlining, but it is conservatively
1148  // correct.
1149  return (CallerBits & CalleeBits) == CalleeBits;
1150 }
bool isInt< 32 >(int64_t x)
Definition: MathExtras.h:276
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:450
APInt LLVM_ATTRIBUTE_UNUSED_RESULT ashr(unsigned shiftAmt) const
Arithmetic right-shift function.
Definition: APInt.cpp:1051
unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy)
unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, unsigned AddressSpace)
Definition: BasicTTIImpl.h:485
Cost tables and simple lookup functions.
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition: ISDOpcodes.h:301
unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index)
const TargetMachine & getTargetMachine() const
unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, unsigned AddressSpace)
This file a TargetTransformInfo::Concept conforming object specific to the X86 target machine...
int CostTableLookup(const CostTblEntry< TypeTy > *Tbl, unsigned len, int ISD, CompareTy Ty)
Find in cost table, TypeTy must be comparable to CompareTy by ==.
Definition: CostTable.h:30
bool hasSSE41() const
Definition: X86Subtarget.h:327
Type Conversion Cost Table.
Definition: CostTable.h:49
unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp)
Cost Table Entry.
Definition: CostTable.h:22
unsigned getNumberOfRegisters(bool Vector)
unsigned getMaxInterleaveFactor(unsigned VF)
unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src)
Definition: BasicTTIImpl.h:342
unsigned getIntImmCost(int64_t)
Calculate the cost of materializing a 64-bit value.
Shift and rotation operations.
Definition: ISDOpcodes.h:332
static ConstantInt * ExtractElement(Constant *V, Constant *Idx)
unsigned getReductionCost(unsigned Opcode, Type *Ty, bool IsPairwiseForm)
Type * getVectorElementType() const
Definition: Type.h:364
int InstructionOpcodeToISD(unsigned Opcode) const
Get the ISD node that corresponds to the Instruction class opcode.
Choose alternate elements from vector.
unsigned getReductionCost(unsigned Opcode, Type *Ty, bool IsPairwise)
Definition: BasicTTIImpl.h:772
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition: ISDOpcodes.h:393
PopcntSupportKind
Flags indicating the kind of support for population count.
bool is64Bit() const
Is this x86_64? (disregarding specific ABI / programming model)
Definition: X86Subtarget.h:294
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:191
bool isFloatingPointTy() const
isFloatingPointTy - Return true if this is one of the six floating point types
Definition: Type.h:159
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
Reverse the order of the vector.
bool hasSSE2() const
Definition: X86Subtarget.h:324
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:436
unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy)
Definition: BasicTTIImpl.h:440
unsigned getVectorNumElements() const
MVT - Machine Value Type.
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:45
Simple binary floating point operators.
Definition: ISDOpcodes.h:237
bool isVectorTy() const
isVectorTy - True if this is an instance of VectorType.
Definition: Type.h:226
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1339
bool hasAVX2() const
Definition: X86Subtarget.h:330
APInt Or(const APInt &LHS, const APInt &RHS)
Bitwise OR function for APInt.
Definition: APInt.h:1895
APInt Xor(const APInt &LHS, const APInt &RHS)
Bitwise XOR function for APInt.
Definition: APInt.h:1900
Expected to fold away in lowering.
APInt LLVM_ATTRIBUTE_UNUSED_RESULT sext(unsigned width) const
Sign extend to a new width.
Definition: APInt.cpp:955
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1273
bool isLegalMaskedStore(Type *DataType, int Consecutive)
This file provides a helper that implements much of the TTI interface in terms of the target-independ...
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:468
EVT - Extended Value Type.
Definition: ValueTypes.h:31
TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth)
OperandValueProperties
Additional properties of an operand's values.
virtual const TargetSubtargetInfo * getSubtargetImpl(const Function &) const
Virtual method implemented by subclasses that returns a reference to that target's TargetSubtargetInf...
unsigned getCFInstrCost(unsigned Opcode)
Definition: BasicTTIImpl.h:435
unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp)
Definition: BasicTTIImpl.h:334
unsigned getVectorNumElements() const
Definition: Type.cpp:212
AddressSpace
Definition: NVPTXBaseInfo.h:22
const FeatureBitset & getFeatureBits() const
getFeatureBits - Return the feature bits.
bool hasCompatibleFunctionAttributes(const Function *Caller, const Function *Callee) const
unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info=TTI::OK_AnyValue, TTI::OperandValueKind Opd2Info=TTI::OK_AnyValue, TTI::OperandValueProperties Opd1PropInfo=TTI::OP_None, TTI::OperandValueProperties Opd2PropInfo=TTI::OP_None)
Definition: BasicTTIImpl.h:285
VectorType - Class to represent vector types.
Definition: DerivedTypes.h:362
Class for arbitrary precision integers.
Definition: APInt.h:73
unsigned getRegisterBitWidth(bool Vector)
bool isIntegerTy() const
isIntegerTy - True if this is an instance of IntegerType.
Definition: Type.h:193
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:383
LLVM_ATTRIBUTE_UNUSED_RESULT std::enable_if< !is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:285
const Type * getScalarType() const LLVM_READONLY
getScalarType - If this is a vector type, return the element type, otherwise return 'this'...
Definition: Type.cpp:51
APInt And(const APInt &LHS, const APInt &RHS)
Bitwise AND function for APInt.
Definition: APInt.h:1890
bool hasPOPCNT() const
Definition: X86Subtarget.h:337
bool hasSSE1() const
Definition: X86Subtarget.h:323
bool isAtom() const
Definition: X86Subtarget.h:380
bool hasSSE42() const
Definition: X86Subtarget.h:328
unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info=TTI::OK_AnyValue, TTI::OperandValueKind Opd2Info=TTI::OK_AnyValue, TTI::OperandValueProperties Opd1PropInfo=TTI::OP_None, TTI::OperandValueProperties Opd2PropInfo=TTI::OP_None)
bool hasAVX512() const
Definition: X86Subtarget.h:331
unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index)
Definition: BasicTTIImpl.h:478
InsertSubvector. Index indicates start offset.
unsigned getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, unsigned AddressSpace)
unsigned getAddressComputationCost(Type *Ty, bool IsComplex)
Definition: BasicTTIImpl.h:770
The cost of a typical 'add' instruction.
const ARM::ArchExtKind Kind
bool isSimple() const
isSimple - Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:94
int ConvertCostTableLookup(const TypeConversionCostTblEntry< TypeTy > *Tbl, unsigned len, int ISD, CompareTy Dst, CompareTy Src)
Find in type conversion cost table, TypeTy must be comparable to CompareTy by ==. ...
Definition: CostTable.h:59
unsigned getPrimitiveSizeInBits() const LLVM_READONLY
getPrimitiveSizeInBits - Return the basic size of this type if it is a primitive type.
Definition: Type.cpp:121
APInt LLVM_ATTRIBUTE_UNUSED_RESULT sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
Definition: APInt.cpp:1023
static VectorType * get(Type *ElementType, unsigned NumElements)
VectorType::get - This static method is the primary way to construct an VectorType.
Definition: Type.cpp:713
Primary interface to the complete machine description for the target machine.
bool isPowerOf2_32(uint32_t Value)
isPowerOf2_32 - This function returns true if the argument is a power of two > 0. ...
Definition: MathExtras.h:354
OperandValueKind
Additional information about an operand's possible values.
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:365
This pass exposes codegen information to IR-level passes.
Conversion operators.
Definition: ISDOpcodes.h:380
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:389
unsigned getAddressComputationCost(Type *PtrTy, bool IsComplex)
std::pair< unsigned, MVT > getTypeLegalizationCost(const DataLayout &DL, Type *Ty) const
Estimate the cost of type-legalization and the legalized type.
static IntegerType * getInt8Ty(LLVMContext &C)
Definition: Type.cpp:237
LLVMContext & getGlobalContext()
getGlobalContext - Returns a global context.
Definition: LLVMContext.cpp:30
bool hasAVX() const
Definition: X86Subtarget.h:329
MVT getSimpleVT() const
getSimpleVT - Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:203
This file describes how to lower LLVM code to machine code.
bool isLegalMaskedLoad(Type *DataType, int Consecutive)
ShuffleKind
The various kinds of shuffle patterns for vector queries.