LLVM  3.7.0
BasicTTIImpl.h
Go to the documentation of this file.
1 //===- BasicTTIImpl.h -------------------------------------------*- C++ -*-===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 /// \file
10 /// This file provides a helper that implements much of the TTI interface in
11 /// terms of the target-independent code generator and TargetLowering
12 /// interfaces.
13 ///
14 //===----------------------------------------------------------------------===//
15 
16 #ifndef LLVM_CODEGEN_BASICTTIIMPL_H
17 #define LLVM_CODEGEN_BASICTTIIMPL_H
18 
19 #include "llvm/Analysis/LoopInfo.h"
25 
26 namespace llvm {
27 
28 extern cl::opt<unsigned> PartialUnrollingThreshold;
29 
30 /// \brief Base class which can be used to help build a TTI implementation.
31 ///
32 /// This class provides as much implementation of the TTI interface as is
33 /// possible using the target independent parts of the code generator.
34 ///
35 /// In order to subclass it, your class must implement a getST() method to
36 /// return the subtarget, and a getTLI() method to return the target lowering.
37 /// We need these methods implemented in the derived class so that this class
38 /// doesn't have to duplicate storage for them.
39 template <typename T>
41 private:
43  typedef TargetTransformInfo TTI;
44 
45  /// Estimate the overhead of scalarizing an instruction. Insert and Extract
46  /// are set if the result needs to be inserted and/or extracted from vectors.
47  unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) {
48  assert(Ty->isVectorTy() && "Can only scalarize vectors");
49  unsigned Cost = 0;
50 
51  for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i) {
52  if (Insert)
53  Cost += static_cast<T *>(this)
54  ->getVectorInstrCost(Instruction::InsertElement, Ty, i);
55  if (Extract)
56  Cost += static_cast<T *>(this)
58  }
59 
60  return Cost;
61  }
62 
63  /// Estimate the cost overhead of SK_Alternate shuffle.
64  unsigned getAltShuffleOverhead(Type *Ty) {
65  assert(Ty->isVectorTy() && "Can only shuffle vectors");
66  unsigned Cost = 0;
67  // Shuffle cost is equal to the cost of extracting element from its argument
68  // plus the cost of inserting them onto the result vector.
69 
70  // e.g. <4 x float> has a mask of <0,5,2,7> i.e we need to extract from
71  // index 0 of first vector, index 1 of second vector,index 2 of first
72  // vector and finally index 3 of second vector and insert them at index
73  // <0,1,2,3> of result vector.
74  for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i) {
75  Cost += static_cast<T *>(this)
76  ->getVectorInstrCost(Instruction::InsertElement, Ty, i);
77  Cost += static_cast<T *>(this)
79  }
80  return Cost;
81  }
82 
83  /// \brief Local query method delegates up to T which *must* implement this!
84  const TargetSubtargetInfo *getST() const {
85  return static_cast<const T *>(this)->getST();
86  }
87 
88  /// \brief Local query method delegates up to T which *must* implement this!
89  const TargetLoweringBase *getTLI() const {
90  return static_cast<const T *>(this)->getTLI();
91  }
92 
93 protected:
94  explicit BasicTTIImplBase(const TargetMachine *TM, const DataLayout &DL)
95  : BaseT(DL) {}
96 
98 
99 public:
100  // Provide value semantics. MSVC requires that we spell all of these out.
102  : BaseT(static_cast<const BaseT &>(Arg)) {}
104  : BaseT(std::move(static_cast<BaseT &>(Arg))) {}
105 
106  /// \name Scalar TTI Implementations
107  /// @{
108 
109  bool hasBranchDivergence() { return false; }
110 
111  bool isSourceOfDivergence(const Value *V) { return false; }
112 
113  bool isLegalAddImmediate(int64_t imm) {
114  return getTLI()->isLegalAddImmediate(imm);
115  }
116 
117  bool isLegalICmpImmediate(int64_t imm) {
118  return getTLI()->isLegalICmpImmediate(imm);
119  }
120 
121  bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
122  bool HasBaseReg, int64_t Scale,
123  unsigned AddrSpace) {
125  AM.BaseGV = BaseGV;
126  AM.BaseOffs = BaseOffset;
127  AM.HasBaseReg = HasBaseReg;
128  AM.Scale = Scale;
129  return getTLI()->isLegalAddressingMode(DL, AM, Ty, AddrSpace);
130  }
131 
132  int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
133  bool HasBaseReg, int64_t Scale, unsigned AddrSpace) {
135  AM.BaseGV = BaseGV;
136  AM.BaseOffs = BaseOffset;
137  AM.HasBaseReg = HasBaseReg;
138  AM.Scale = Scale;
139  return getTLI()->getScalingFactorCost(DL, AM, Ty, AddrSpace);
140  }
141 
142  bool isTruncateFree(Type *Ty1, Type *Ty2) {
143  return getTLI()->isTruncateFree(Ty1, Ty2);
144  }
145 
147  return getTLI()->isProfitableToHoist(I);
148  }
149 
150  bool isTypeLegal(Type *Ty) {
151  EVT VT = getTLI()->getValueType(DL, Ty);
152  return getTLI()->isTypeLegal(VT);
153  }
154 
155  unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
156  ArrayRef<const Value *> Arguments) {
157  return BaseT::getIntrinsicCost(IID, RetTy, Arguments);
158  }
159 
160  unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
161  ArrayRef<Type *> ParamTys) {
162  if (IID == Intrinsic::cttz) {
163  if (getTLI()->isCheapToSpeculateCttz())
166  }
167 
168  if (IID == Intrinsic::ctlz) {
169  if (getTLI()->isCheapToSpeculateCtlz())
172  }
173 
174  return BaseT::getIntrinsicCost(IID, RetTy, ParamTys);
175  }
176 
177  unsigned getJumpBufAlignment() { return getTLI()->getJumpBufAlignment(); }
178 
179  unsigned getJumpBufSize() { return getTLI()->getJumpBufSize(); }
180 
182  const TargetLoweringBase *TLI = getTLI();
185  }
186 
187  bool haveFastSqrt(Type *Ty) {
188  const TargetLoweringBase *TLI = getTLI();
189  EVT VT = TLI->getValueType(DL, Ty);
190  return TLI->isTypeLegal(VT) &&
192  }
193 
194  unsigned getFPOpCost(Type *Ty) {
195  // By default, FP instructions are no more expensive since they are
196  // implemented in HW. Target specific TTI can override this.
198  }
199 
200  unsigned getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) {
201  const TargetLoweringBase *TLI = getTLI();
202  switch (Opcode) {
203  default: break;
204  case Instruction::Trunc: {
205  if (TLI->isTruncateFree(OpTy, Ty))
208  }
209  case Instruction::ZExt: {
210  if (TLI->isZExtFree(OpTy, Ty))
213  }
214  }
215 
216  return BaseT::getOperationCost(Opcode, Ty, OpTy);
217  }
218 
220  // This unrolling functionality is target independent, but to provide some
221  // motivation for its intended use, for x86:
222 
223  // According to the Intel 64 and IA-32 Architectures Optimization Reference
224  // Manual, Intel Core models and later have a loop stream detector (and
225  // associated uop queue) that can benefit from partial unrolling.
226  // The relevant requirements are:
227  // - The loop must have no more than 4 (8 for Nehalem and later) branches
228  // taken, and none of them may be calls.
229  // - The loop can have no more than 18 (28 for Nehalem and later) uops.
230 
231  // According to the Software Optimization Guide for AMD Family 15h
232  // Processors, models 30h-4fh (Steamroller and later) have a loop predictor
233  // and loop buffer which can benefit from partial unrolling.
234  // The relevant requirements are:
235  // - The loop must have fewer than 16 branches
236  // - The loop must have less than 40 uops in all executed loop branches
237 
238  // The number of taken branches in a loop is hard to estimate here, and
239  // benchmarking has revealed that it is better not to be conservative when
240  // estimating the branch count. As a result, we'll ignore the branch limits
241  // until someone finds a case where it matters in practice.
242 
243  unsigned MaxOps;
244  const TargetSubtargetInfo *ST = getST();
245  if (PartialUnrollingThreshold.getNumOccurrences() > 0)
246  MaxOps = PartialUnrollingThreshold;
247  else if (ST->getSchedModel().LoopMicroOpBufferSize > 0)
248  MaxOps = ST->getSchedModel().LoopMicroOpBufferSize;
249  else
250  return;
251 
252  // Scan the loop: don't unroll loops with calls.
253  for (Loop::block_iterator I = L->block_begin(), E = L->block_end(); I != E;
254  ++I) {
255  BasicBlock *BB = *I;
256 
257  for (BasicBlock::iterator J = BB->begin(), JE = BB->end(); J != JE; ++J)
258  if (isa<CallInst>(J) || isa<InvokeInst>(J)) {
260  if (const Function *F = CS.getCalledFunction()) {
261  if (!static_cast<T *>(this)->isLoweredToCall(F))
262  continue;
263  }
264 
265  return;
266  }
267  }
268 
269  // Enable runtime and partial unrolling up to the specified size.
270  UP.Partial = UP.Runtime = true;
271  UP.PartialThreshold = UP.PartialOptSizeThreshold = MaxOps;
272  }
273 
274  /// @}
275 
276  /// \name Vector TTI Implementations
277  /// @{
278 
279  unsigned getNumberOfRegisters(bool Vector) { return Vector ? 0 : 1; }
280 
281  unsigned getRegisterBitWidth(bool Vector) { return 32; }
282 
283  unsigned getMaxInterleaveFactor(unsigned VF) { return 1; }
284 
286  unsigned Opcode, Type *Ty,
290  TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None) {
291  // Check if any of the operands are vector operands.
292  const TargetLoweringBase *TLI = getTLI();
293  int ISD = TLI->InstructionOpcodeToISD(Opcode);
294  assert(ISD && "Invalid opcode");
295 
296  std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
297 
298  bool IsFloat = Ty->getScalarType()->isFloatingPointTy();
299  // Assume that floating point arithmetic operations cost twice as much as
300  // integer operations.
301  unsigned OpCost = (IsFloat ? 2 : 1);
302 
303  if (TLI->isOperationLegalOrPromote(ISD, LT.second)) {
304  // The operation is legal. Assume it costs 1.
305  // If the type is split to multiple registers, assume that there is some
306  // overhead to this.
307  // TODO: Once we have extract/insert subvector cost we need to use them.
308  if (LT.first > 1)
309  return LT.first * 2 * OpCost;
310  return LT.first * 1 * OpCost;
311  }
312 
313  if (!TLI->isOperationExpand(ISD, LT.second)) {
314  // If the operation is custom lowered then assume
315  // thare the code is twice as expensive.
316  return LT.first * 2 * OpCost;
317  }
318 
319  // Else, assume that we need to scalarize this op.
320  if (Ty->isVectorTy()) {
321  unsigned Num = Ty->getVectorNumElements();
322  unsigned Cost = static_cast<T *>(this)
323  ->getArithmeticInstrCost(Opcode, Ty->getScalarType());
324  // return the cost of multiple scalar invocation plus the cost of
325  // inserting
326  // and extracting the values.
327  return getScalarizationOverhead(Ty, true, true) + Num * Cost;
328  }
329 
330  // We don't know anything about this scalar instruction.
331  return OpCost;
332  }
333 
334  unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
335  Type *SubTp) {
336  if (Kind == TTI::SK_Alternate) {
337  return getAltShuffleOverhead(Tp);
338  }
339  return 1;
340  }
341 
342  unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) {
343  const TargetLoweringBase *TLI = getTLI();
344  int ISD = TLI->InstructionOpcodeToISD(Opcode);
345  assert(ISD && "Invalid opcode");
346  std::pair<unsigned, MVT> SrcLT = TLI->getTypeLegalizationCost(DL, Src);
347  std::pair<unsigned, MVT> DstLT = TLI->getTypeLegalizationCost(DL, Dst);
348 
349  // Check for NOOP conversions.
350  if (SrcLT.first == DstLT.first &&
351  SrcLT.second.getSizeInBits() == DstLT.second.getSizeInBits()) {
352 
353  // Bitcast between types that are legalized to the same type are free.
354  if (Opcode == Instruction::BitCast || Opcode == Instruction::Trunc)
355  return 0;
356  }
357 
358  if (Opcode == Instruction::Trunc &&
359  TLI->isTruncateFree(SrcLT.second, DstLT.second))
360  return 0;
361 
362  if (Opcode == Instruction::ZExt &&
363  TLI->isZExtFree(SrcLT.second, DstLT.second))
364  return 0;
365 
366  // If the cast is marked as legal (or promote) then assume low cost.
367  if (SrcLT.first == DstLT.first &&
368  TLI->isOperationLegalOrPromote(ISD, DstLT.second))
369  return 1;
370 
371  // Handle scalar conversions.
372  if (!Src->isVectorTy() && !Dst->isVectorTy()) {
373 
374  // Scalar bitcasts are usually free.
375  if (Opcode == Instruction::BitCast)
376  return 0;
377 
378  // Just check the op cost. If the operation is legal then assume it costs
379  // 1.
380  if (!TLI->isOperationExpand(ISD, DstLT.second))
381  return 1;
382 
383  // Assume that illegal scalar instruction are expensive.
384  return 4;
385  }
386 
387  // Check vector-to-vector casts.
388  if (Dst->isVectorTy() && Src->isVectorTy()) {
389 
390  // If the cast is between same-sized registers, then the check is simple.
391  if (SrcLT.first == DstLT.first &&
392  SrcLT.second.getSizeInBits() == DstLT.second.getSizeInBits()) {
393 
394  // Assume that Zext is done using AND.
395  if (Opcode == Instruction::ZExt)
396  return 1;
397 
398  // Assume that sext is done using SHL and SRA.
399  if (Opcode == Instruction::SExt)
400  return 2;
401 
402  // Just check the op cost. If the operation is legal then assume it
403  // costs
404  // 1 and multiply by the type-legalization overhead.
405  if (!TLI->isOperationExpand(ISD, DstLT.second))
406  return SrcLT.first * 1;
407  }
408 
409  // If we are converting vectors and the operation is illegal, or
410  // if the vectors are legalized to different types, estimate the
411  // scalarization costs.
412  unsigned Num = Dst->getVectorNumElements();
413  unsigned Cost = static_cast<T *>(this)->getCastInstrCost(
414  Opcode, Dst->getScalarType(), Src->getScalarType());
415 
416  // Return the cost of multiple scalar invocation plus the cost of
417  // inserting and extracting the values.
418  return getScalarizationOverhead(Dst, true, true) + Num * Cost;
419  }
420 
421  // We already handled vector-to-vector and scalar-to-scalar conversions.
422  // This
423  // is where we handle bitcast between vectors and scalars. We need to assume
424  // that the conversion is scalarized in one way or another.
425  if (Opcode == Instruction::BitCast)
426  // Illegal bitcasts are done by storing and loading from a stack slot.
427  return (Src->isVectorTy() ? getScalarizationOverhead(Src, false, true)
428  : 0) +
429  (Dst->isVectorTy() ? getScalarizationOverhead(Dst, true, false)
430  : 0);
431 
432  llvm_unreachable("Unhandled cast");
433  }
434 
435  unsigned getCFInstrCost(unsigned Opcode) {
436  // Branches are assumed to be predicted.
437  return 0;
438  }
439 
440  unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy) {
441  const TargetLoweringBase *TLI = getTLI();
442  int ISD = TLI->InstructionOpcodeToISD(Opcode);
443  assert(ISD && "Invalid opcode");
444 
445  // Selects on vectors are actually vector selects.
446  if (ISD == ISD::SELECT) {
447  assert(CondTy && "CondTy must exist");
448  if (CondTy->isVectorTy())
449  ISD = ISD::VSELECT;
450  }
451  std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
452 
453  if (!(ValTy->isVectorTy() && !LT.second.isVector()) &&
454  !TLI->isOperationExpand(ISD, LT.second)) {
455  // The operation is legal. Assume it costs 1. Multiply
456  // by the type-legalization overhead.
457  return LT.first * 1;
458  }
459 
460  // Otherwise, assume that the cast is scalarized.
461  if (ValTy->isVectorTy()) {
462  unsigned Num = ValTy->getVectorNumElements();
463  if (CondTy)
464  CondTy = CondTy->getScalarType();
465  unsigned Cost = static_cast<T *>(this)->getCmpSelInstrCost(
466  Opcode, ValTy->getScalarType(), CondTy);
467 
468  // Return the cost of multiple scalar invocation plus the cost of
469  // inserting
470  // and extracting the values.
471  return getScalarizationOverhead(ValTy, true, false) + Num * Cost;
472  }
473 
474  // Unknown scalar opcode.
475  return 1;
476  }
477 
478  unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) {
479  std::pair<unsigned, MVT> LT =
480  getTLI()->getTypeLegalizationCost(DL, Val->getScalarType());
481 
482  return LT.first;
483  }
484 
485  unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
486  unsigned AddressSpace) {
487  assert(!Src->isVoidTy() && "Invalid type");
488  std::pair<unsigned, MVT> LT = getTLI()->getTypeLegalizationCost(DL, Src);
489 
490  // Assuming that all loads of legal types cost 1.
491  unsigned Cost = LT.first;
492 
493  if (Src->isVectorTy() &&
494  Src->getPrimitiveSizeInBits() < LT.second.getSizeInBits()) {
495  // This is a vector load that legalizes to a larger type than the vector
496  // itself. Unless the corresponding extending load or truncating store is
497  // legal, then this will scalarize.
499  EVT MemVT = getTLI()->getValueType(DL, Src, true);
500  if (MemVT.isSimple() && MemVT != MVT::Other) {
501  if (Opcode == Instruction::Store)
502  LA = getTLI()->getTruncStoreAction(LT.second, MemVT.getSimpleVT());
503  else
504  LA = getTLI()->getLoadExtAction(ISD::EXTLOAD, LT.second, MemVT);
505  }
506 
507  if (LA != TargetLowering::Legal && LA != TargetLowering::Custom) {
508  // This is a vector load/store for some illegal type that is scalarized.
509  // We must account for the cost of building or decomposing the vector.
510  Cost += getScalarizationOverhead(Src, Opcode != Instruction::Store,
511  Opcode == Instruction::Store);
512  }
513  }
514 
515  return Cost;
516  }
517 
518  unsigned getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
519  unsigned Factor,
520  ArrayRef<unsigned> Indices,
521  unsigned Alignment,
522  unsigned AddressSpace) {
523  VectorType *VT = dyn_cast<VectorType>(VecTy);
524  assert(VT && "Expect a vector type for interleaved memory op");
525 
526  unsigned NumElts = VT->getNumElements();
527  assert(Factor > 1 && NumElts % Factor == 0 && "Invalid interleave factor");
528 
529  unsigned NumSubElts = NumElts / Factor;
530  VectorType *SubVT = VectorType::get(VT->getElementType(), NumSubElts);
531 
532  // Firstly, the cost of load/store operation.
533  unsigned Cost = getMemoryOpCost(Opcode, VecTy, Alignment, AddressSpace);
534 
535  // Then plus the cost of interleave operation.
536  if (Opcode == Instruction::Load) {
537  // The interleave cost is similar to extract sub vectors' elements
538  // from the wide vector, and insert them into sub vectors.
539  //
540  // E.g. An interleaved load of factor 2 (with one member of index 0):
541  // %vec = load <8 x i32>, <8 x i32>* %ptr
542  // %v0 = shuffle %vec, undef, <0, 2, 4, 6> ; Index 0
543  // The cost is estimated as extract elements at 0, 2, 4, 6 from the
544  // <8 x i32> vector and insert them into a <4 x i32> vector.
545 
546  assert(Indices.size() <= Factor &&
547  "Interleaved memory op has too many members");
548  for (unsigned Index : Indices) {
549  assert(Index < Factor && "Invalid index for interleaved memory op");
550 
551  // Extract elements from loaded vector for each sub vector.
552  for (unsigned i = 0; i < NumSubElts; i++)
554  Index + i * Factor);
555  }
556 
557  unsigned InsSubCost = 0;
558  for (unsigned i = 0; i < NumSubElts; i++)
559  InsSubCost += getVectorInstrCost(Instruction::InsertElement, SubVT, i);
560 
561  Cost += Indices.size() * InsSubCost;
562  } else {
563  // The interleave cost is extract all elements from sub vectors, and
564  // insert them into the wide vector.
565  //
566  // E.g. An interleaved store of factor 2:
567  // %v0_v1 = shuffle %v0, %v1, <0, 4, 1, 5, 2, 6, 3, 7>
568  // store <8 x i32> %interleaved.vec, <8 x i32>* %ptr
569  // The cost is estimated as extract all elements from both <4 x i32>
570  // vectors and insert into the <8 x i32> vector.
571 
572  unsigned ExtSubCost = 0;
573  for (unsigned i = 0; i < NumSubElts; i++)
574  ExtSubCost += getVectorInstrCost(Instruction::ExtractElement, SubVT, i);
575 
576  Cost += Factor * ExtSubCost;
577 
578  for (unsigned i = 0; i < NumElts; i++)
579  Cost += getVectorInstrCost(Instruction::InsertElement, VT, i);
580  }
581 
582  return Cost;
583  }
584 
586  ArrayRef<Type *> Tys) {
587  unsigned ISD = 0;
588  switch (IID) {
589  default: {
590  // Assume that we need to scalarize this intrinsic.
591  unsigned ScalarizationCost = 0;
592  unsigned ScalarCalls = 1;
593  Type *ScalarRetTy = RetTy;
594  if (RetTy->isVectorTy()) {
595  ScalarizationCost = getScalarizationOverhead(RetTy, true, false);
596  ScalarCalls = std::max(ScalarCalls, RetTy->getVectorNumElements());
597  ScalarRetTy = RetTy->getScalarType();
598  }
599  SmallVector<Type *, 4> ScalarTys;
600  for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) {
601  Type *Ty = Tys[i];
602  if (Ty->isVectorTy()) {
603  ScalarizationCost += getScalarizationOverhead(Ty, false, true);
604  ScalarCalls = std::max(ScalarCalls, Ty->getVectorNumElements());
605  Ty = Ty->getScalarType();
606  }
607  ScalarTys.push_back(Ty);
608  }
609  if (ScalarCalls == 1)
610  return 1; // Return cost of a scalar intrinsic. Assume it to be cheap.
611 
612  unsigned ScalarCost = static_cast<T *>(this)->getIntrinsicInstrCost(
613  IID, ScalarRetTy, ScalarTys);
614 
615  return ScalarCalls * ScalarCost + ScalarizationCost;
616  }
617  // Look for intrinsics that can be lowered directly or turned into a scalar
618  // intrinsic call.
619  case Intrinsic::sqrt:
620  ISD = ISD::FSQRT;
621  break;
622  case Intrinsic::sin:
623  ISD = ISD::FSIN;
624  break;
625  case Intrinsic::cos:
626  ISD = ISD::FCOS;
627  break;
628  case Intrinsic::exp:
629  ISD = ISD::FEXP;
630  break;
631  case Intrinsic::exp2:
632  ISD = ISD::FEXP2;
633  break;
634  case Intrinsic::log:
635  ISD = ISD::FLOG;
636  break;
637  case Intrinsic::log10:
638  ISD = ISD::FLOG10;
639  break;
640  case Intrinsic::log2:
641  ISD = ISD::FLOG2;
642  break;
643  case Intrinsic::fabs:
644  ISD = ISD::FABS;
645  break;
646  case Intrinsic::minnum:
647  ISD = ISD::FMINNUM;
648  break;
649  case Intrinsic::maxnum:
650  ISD = ISD::FMAXNUM;
651  break;
652  case Intrinsic::copysign:
653  ISD = ISD::FCOPYSIGN;
654  break;
655  case Intrinsic::floor:
656  ISD = ISD::FFLOOR;
657  break;
658  case Intrinsic::ceil:
659  ISD = ISD::FCEIL;
660  break;
661  case Intrinsic::trunc:
662  ISD = ISD::FTRUNC;
663  break;
664  case Intrinsic::nearbyint:
665  ISD = ISD::FNEARBYINT;
666  break;
667  case Intrinsic::rint:
668  ISD = ISD::FRINT;
669  break;
670  case Intrinsic::round:
671  ISD = ISD::FROUND;
672  break;
673  case Intrinsic::pow:
674  ISD = ISD::FPOW;
675  break;
676  case Intrinsic::fma:
677  ISD = ISD::FMA;
678  break;
679  case Intrinsic::fmuladd:
680  ISD = ISD::FMA;
681  break;
682  // FIXME: We should return 0 whenever getIntrinsicCost == TCC_Free.
683  case Intrinsic::lifetime_start:
684  case Intrinsic::lifetime_end:
685  return 0;
686  case Intrinsic::masked_store:
687  return static_cast<T *>(this)
688  ->getMaskedMemoryOpCost(Instruction::Store, Tys[0], 0, 0);
689  case Intrinsic::masked_load:
690  return static_cast<T *>(this)
692  }
693 
694  const TargetLoweringBase *TLI = getTLI();
695  std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, RetTy);
696 
697  if (TLI->isOperationLegalOrPromote(ISD, LT.second)) {
698  // The operation is legal. Assume it costs 1.
699  // If the type is split to multiple registers, assume that there is some
700  // overhead to this.
701  // TODO: Once we have extract/insert subvector cost we need to use them.
702  if (LT.first > 1)
703  return LT.first * 2;
704  return LT.first * 1;
705  }
706 
707  if (!TLI->isOperationExpand(ISD, LT.second)) {
708  // If the operation is custom lowered then assume
709  // thare the code is twice as expensive.
710  return LT.first * 2;
711  }
712 
713  // If we can't lower fmuladd into an FMA estimate the cost as a floating
714  // point mul followed by an add.
715  if (IID == Intrinsic::fmuladd)
716  return static_cast<T *>(this)
717  ->getArithmeticInstrCost(BinaryOperator::FMul, RetTy) +
718  static_cast<T *>(this)
719  ->getArithmeticInstrCost(BinaryOperator::FAdd, RetTy);
720 
721  // Else, assume that we need to scalarize this intrinsic. For math builtins
722  // this will emit a costly libcall, adding call overhead and spills. Make it
723  // very expensive.
724  if (RetTy->isVectorTy()) {
725  unsigned ScalarizationCost = getScalarizationOverhead(RetTy, true, false);
726  unsigned ScalarCalls = RetTy->getVectorNumElements();
727  SmallVector<Type *, 4> ScalarTys;
728  for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) {
729  Type *Ty = Tys[i];
730  if (Ty->isVectorTy())
731  Ty = Ty->getScalarType();
732  ScalarTys.push_back(Ty);
733  }
734  unsigned ScalarCost = static_cast<T *>(this)->getIntrinsicInstrCost(
735  IID, RetTy->getScalarType(), ScalarTys);
736  for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) {
737  if (Tys[i]->isVectorTy()) {
738  ScalarizationCost += getScalarizationOverhead(Tys[i], false, true);
739  ScalarCalls = std::max(ScalarCalls, Tys[i]->getVectorNumElements());
740  }
741  }
742 
743  return ScalarCalls * ScalarCost + ScalarizationCost;
744  }
745 
746  // This is going to be turned into a library call, make it expensive.
747  return 10;
748  }
749 
750  /// \brief Compute a cost of the given call instruction.
751  ///
752  /// Compute the cost of calling function F with return type RetTy and
753  /// argument types Tys. F might be nullptr, in this case the cost of an
754  /// arbitrary call with the specified signature will be returned.
755  /// This is used, for instance, when we estimate call of a vector
756  /// counterpart of the given function.
757  /// \param F Called function, might be nullptr.
758  /// \param RetTy Return value types.
759  /// \param Tys Argument types.
760  /// \returns The cost of Call instruction.
761  unsigned getCallInstrCost(Function *F, Type *RetTy, ArrayRef<Type *> Tys) {
762  return 10;
763  }
764 
765  unsigned getNumberOfParts(Type *Tp) {
766  std::pair<unsigned, MVT> LT = getTLI()->getTypeLegalizationCost(DL, Tp);
767  return LT.first;
768  }
769 
770  unsigned getAddressComputationCost(Type *Ty, bool IsComplex) { return 0; }
771 
772  unsigned getReductionCost(unsigned Opcode, Type *Ty, bool IsPairwise) {
773  assert(Ty->isVectorTy() && "Expect a vector type");
774  unsigned NumVecElts = Ty->getVectorNumElements();
775  unsigned NumReduxLevels = Log2_32(NumVecElts);
776  unsigned ArithCost =
777  NumReduxLevels *
778  static_cast<T *>(this)->getArithmeticInstrCost(Opcode, Ty);
779  // Assume the pairwise shuffles add a cost.
780  unsigned ShuffleCost =
781  NumReduxLevels * (IsPairwise + 1) *
782  static_cast<T *>(this)
783  ->getShuffleCost(TTI::SK_ExtractSubvector, Ty, NumVecElts / 2, Ty);
784  return ShuffleCost + ArithCost + getScalarizationOverhead(Ty, false, true);
785  }
786 
787  /// @}
788 };
789 
790 /// \brief Concrete BasicTTIImpl that can be used if no further customization
791 /// is needed.
792 class BasicTTIImpl : public BasicTTIImplBase<BasicTTIImpl> {
795 
796  const TargetSubtargetInfo *ST;
797  const TargetLoweringBase *TLI;
798 
799  const TargetSubtargetInfo *getST() const { return ST; }
800  const TargetLoweringBase *getTLI() const { return TLI; }
801 
802 public:
803  explicit BasicTTIImpl(const TargetMachine *ST, Function &F);
804 
805  // Provide value semantics. MSVC requires that we spell all of these out.
807  : BaseT(static_cast<const BaseT &>(Arg)), ST(Arg.ST), TLI(Arg.TLI) {}
809  : BaseT(std::move(static_cast<BaseT &>(Arg))), ST(std::move(Arg.ST)),
810  TLI(std::move(Arg.TLI)) {}
811 };
812 
813 }
814 
815 #endif
Base class for use as a mix-in that aids implementing a TargetTransformInfo-compatible class...
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:104
bool Partial
Allow partial unrolling (unrolling of loops to expand the size of the loop body, not only to eliminat...
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg If BaseGV is null...
unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, unsigned AddressSpace)
Definition: BasicTTIImpl.h:485
unsigned PartialOptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size, like OptSizeThreshold, but used for partial/runtime unrolling (set to UINT_MAX to disable).
unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, ArrayRef< Type * > ParamTys)
Definition: BasicTTIImpl.h:160
unsigned PartialThreshold
The cost threshold for the unrolled loop, like Threshold, but used for partial/runtime unrolling (set...
unsigned getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy)
unsigned getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, ArrayRef< Type * > Tys)
Definition: BasicTTIImpl.h:585
virtual bool isLegalICmpImmediate(int64_t) const
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
BasicTTIImpl(const BasicTTIImpl &Arg)
Definition: BasicTTIImpl.h:806
BasicTTIImplBase(const TargetMachine *TM, const DataLayout &DL)
Definition: BasicTTIImpl.h:94
virtual bool isZExtFree(Type *, Type *) const
Return true if any actual instruction that defines a value of type Ty1 implicitly zero-extends the va...
F(f)
bool isProfitableToHoist(Instruction *I)
Definition: BasicTTIImpl.h:146
unsigned getCallInstrCost(Function *F, Type *RetTy, ArrayRef< Type * > Tys)
Compute a cost of the given call instruction.
Definition: BasicTTIImpl.h:761
Base class which can be used to help build a TTI implementation.
Definition: BasicTTIImpl.h:40
unsigned getMaxInterleaveFactor(unsigned VF)
Definition: BasicTTIImpl.h:283
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:231
This file provides helpers for the implementation of a TargetTransformInfo-conforming class...
CRTP base class for use as a mix-in that aids implementing a TargetTransformInfo-compatible class...
unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src)
Definition: BasicTTIImpl.h:342
virtual bool isLegalAddImmediate(int64_t) const
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:98
int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace)
Definition: BasicTTIImpl.h:132
unsigned getJumpBufSize() const
Returns the target's jmp_buf size in bytes (if never set, the default is 200)
const MCSchedModel & getSchedModel() const
Get the machine model for this subtarget's CPU.
static ConstantInt * ExtractElement(Constant *V, Constant *Idx)
int InstructionOpcodeToISD(unsigned Opcode) const
Get the ISD node that corresponds to the Instruction class opcode.
unsigned getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, unsigned Alignment, unsigned AddressSpace)
Definition: BasicTTIImpl.h:518
Choose alternate elements from vector.
unsigned getReductionCost(unsigned Opcode, Type *Ty, bool IsPairwise)
Definition: BasicTTIImpl.h:772
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition: ISDOpcodes.h:351
LegalizeAction getTruncStoreAction(EVT ValVT, EVT MemVT) const
Return how this store with truncation should be treated: either it is legal, needs to be promoted to ...
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: ArrayRef.h:31
bool isFloatingPointTy() const
isFloatingPointTy - Return true if this is one of the six floating point types
Definition: Type.h:159
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
bool isOperationLegalOrPromote(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target or can be made legal using promotion...
unsigned getNumElements() const
Return the number of elements in the Vector type.
Definition: DerivedTypes.h:432
Type * getElementType() const
Definition: DerivedTypes.h:323
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:134
LegalizeAction getLoadExtAction(unsigned ExtType, EVT ValVT, EVT MemVT) const
Return how this load with extension should be treated: either it is legal, needs to be promoted to a ...
unsigned getFPOpCost(Type *Ty)
Definition: BasicTTIImpl.h:194
bool isTruncateFree(Type *Ty1, Type *Ty2)
Definition: BasicTTIImpl.h:142
unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy)
Definition: BasicTTIImpl.h:440
ExtractSubvector Index indicates start offset.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
Concrete BasicTTIImpl that can be used if no further customization is needed.
Definition: BasicTTIImpl.h:792
LLVM Basic Block Representation.
Definition: BasicBlock.h:65
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:45
unsigned getRegisterBitWidth(bool Vector)
Definition: BasicTTIImpl.h:281
FunTy * getCalledFunction() const
getCalledFunction - Return the function being called if this is a direct call, otherwise return null ...
Definition: CallSite.h:99
bool isVectorTy() const
isVectorTy - True if this is an instance of VectorType.
Definition: Type.h:226
bool isOperationLegalOrCustom(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
LegalizeAction
This enum indicates whether operations are valid for a target, and if not, what action should be used...
BasicTTIImpl(BasicTTIImpl &&Arg)
Definition: BasicTTIImpl.h:808
Expected to fold away in lowering.
virtual bool isTruncateFree(Type *, Type *) const
Return true if it's free to truncate a value of type Ty1 to type Ty2.
BasicTTIImplBase(BasicTTIImplBase &&Arg)
Definition: BasicTTIImpl.h:103
bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace)
Definition: BasicTTIImpl.h:121
unsigned getNumberOfParts(Type *Tp)
Definition: BasicTTIImpl.h:765
BasicTTIImplBase(const BasicTTIImplBase &Arg)
Definition: BasicTTIImpl.h:101
virtual int getScalingFactorCost(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS=0) const
Return the cost of the scaling factor used in the addressing mode represented by AM for this target...
EVT - Extended Value Type.
Definition: ValueTypes.h:31
LLVM_READONLY APFloat maxnum(const APFloat &A, const APFloat &B)
Implements IEEE maxNum semantics.
Definition: APFloat.h:670
bool isLegalAddImmediate(int64_t imm)
Definition: BasicTTIImpl.h:113
This base class for TargetLowering contains the SelectionDAG-independent parts that can be used from ...
OperandValueProperties
Additional properties of an operand's values.
unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, ArrayRef< const Value * > Arguments)
unsigned getCFInstrCost(unsigned Opcode)
Definition: BasicTTIImpl.h:435
unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp)
Definition: BasicTTIImpl.h:334
unsigned getNumberOfRegisters(bool Vector)
Definition: BasicTTIImpl.h:279
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
unsigned getVectorNumElements() const
Definition: Type.cpp:212
iterator end()
Definition: BasicBlock.h:233
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:861
AddressSpace
Definition: NVPTXBaseInfo.h:22
cl::opt< unsigned > PartialUnrollingThreshold
bool Runtime
Allow runtime unrolling (unrolling of loops to expand the size of the loop body even when the number ...
unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info=TTI::OK_AnyValue, TTI::OperandValueKind Opd2Info=TTI::OK_AnyValue, TTI::OperandValueProperties Opd1PropInfo=TTI::OP_None, TTI::OperandValueProperties Opd2PropInfo=TTI::OP_None)
Definition: BasicTTIImpl.h:285
virtual bool isProfitableToHoist(Instruction *I) const
unsigned Log2_32(uint32_t Value)
Log2_32 - This function returns the floor log base 2 of the specified value, -1 if the value is zero...
Definition: MathExtras.h:468
VectorType - Class to represent vector types.
Definition: DerivedTypes.h:362
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:342
LLVM_ATTRIBUTE_UNUSED_RESULT std::enable_if< !is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:285
std::vector< BlockT * >::const_iterator block_iterator
Definition: LoopInfo.h:140
unsigned LoopMicroOpBufferSize
Definition: MCSchedule.h:164
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition: ISDOpcodes.h:250
const Type * getScalarType() const LLVM_READONLY
getScalarType - If this is a vector type, return the element type, otherwise return 'this'...
Definition: Type.cpp:51
TargetSubtargetInfo - Generic base class for all target subtargets.
block_iterator block_end() const
Definition: LoopInfo.h:142
unsigned getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, unsigned AddressSpace)
BR_JT - Jumptable branch.
Definition: ISDOpcodes.h:542
unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, ArrayRef< const Value * > Arguments)
Definition: BasicTTIImpl.h:155
unsigned getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy)
Definition: BasicTTIImpl.h:200
unsigned getJumpBufAlignment() const
Returns the target's jmp_buf alignment in bytes (if never set, the default is 0)
virtual bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AddrSpace) const
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
Parameters that control the generic loop unrolling transformation.
unsigned getJumpBufAlignment()
Definition: BasicTTIImpl.h:177
ImmutableCallSite - establish a view to a call site for examination.
Definition: CallSite.h:418
#define I(x, y, z)
Definition: MD5.cpp:54
unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index)
Definition: BasicTTIImpl.h:478
BasicTTIImpl(const TargetMachine *ST, Function &F)
unsigned getAddressComputationCost(Type *Ty, bool IsComplex)
Definition: BasicTTIImpl.h:770
The cost of a typical 'add' instruction.
const ARM::ArchExtKind Kind
bool isSimple() const
isSimple - Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:94
bool isSourceOfDivergence(const Value *V)
Definition: BasicTTIImpl.h:111
aarch64 promote const
unsigned getPrimitiveSizeInBits() const LLVM_READONLY
getPrimitiveSizeInBits - Return the basic size of this type if it is a primitive type.
Definition: Type.cpp:121
LLVM Value Representation.
Definition: Value.h:69
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:240
static VectorType * get(Type *ElementType, unsigned NumElements)
VectorType::get - This static method is the primary way to construct an VectorType.
Definition: Type.cpp:713
bool isOperationExpand(unsigned Op, EVT VT) const
Return true if the specified operation is illegal on this target or unlikely to be made legal with cu...
Primary interface to the complete machine description for the target machine.
bool isTypeLegal(Type *Ty)
Definition: BasicTTIImpl.h:150
block_iterator block_begin() const
Definition: LoopInfo.h:141
OperandValueKind
Additional information about an operand's possible values.
bool haveFastSqrt(Type *Ty)
Definition: BasicTTIImpl.h:187
void getUnrollingPreferences(Loop *L, TTI::UnrollingPreferences &UP)
Definition: BasicTTIImpl.h:219
bool isLegalICmpImmediate(int64_t imm)
Definition: BasicTTIImpl.h:117
std::pair< unsigned, MVT > getTypeLegalizationCost(const DataLayout &DL, Type *Ty) const
Estimate the cost of type-legalization and the legalized type.
The cost of a 'div' instruction on x86.
LLVM_READONLY APFloat minnum(const APFloat &A, const APFloat &B)
Implements IEEE minNum semantics.
Definition: APFloat.h:659
MVT getSimpleVT() const
getSimpleVT - Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:203
This file describes how to lower LLVM code to machine code.
bool isVoidTy() const
isVoidTy - Return true if this is 'void'.
Definition: Type.h:137
ShuffleKind
The various kinds of shuffle patterns for vector queries.
BRIND - Indirect branch.
Definition: ISDOpcodes.h:538