LLVM  4.0.0
BasicTTIImpl.h
Go to the documentation of this file.
1 //===- BasicTTIImpl.h -------------------------------------------*- C++ -*-===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 /// \file
10 /// This file provides a helper that implements much of the TTI interface in
11 /// terms of the target-independent code generator and TargetLowering
12 /// interfaces.
13 ///
14 //===----------------------------------------------------------------------===//
15 
16 #ifndef LLVM_CODEGEN_BASICTTIIMPL_H
17 #define LLVM_CODEGEN_BASICTTIIMPL_H
18 
19 #include "llvm/Analysis/LoopInfo.h"
25 
26 namespace llvm {
27 
28 extern cl::opt<unsigned> PartialUnrollingThreshold;
29 
30 /// \brief Base class which can be used to help build a TTI implementation.
31 ///
32 /// This class provides as much implementation of the TTI interface as is
33 /// possible using the target independent parts of the code generator.
34 ///
35 /// In order to subclass it, your class must implement a getST() method to
36 /// return the subtarget, and a getTLI() method to return the target lowering.
37 /// We need these methods implemented in the derived class so that this class
38 /// doesn't have to duplicate storage for them.
39 template <typename T>
41 private:
43  typedef TargetTransformInfo TTI;
44 
45  /// Estimate the overhead of scalarizing an instruction. Insert and Extract
46  /// are set if the result needs to be inserted and/or extracted from vectors.
47  unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) {
48  assert(Ty->isVectorTy() && "Can only scalarize vectors");
49  unsigned Cost = 0;
50 
51  for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i) {
52  if (Insert)
53  Cost += static_cast<T *>(this)
54  ->getVectorInstrCost(Instruction::InsertElement, Ty, i);
55  if (Extract)
56  Cost += static_cast<T *>(this)
57  ->getVectorInstrCost(Instruction::ExtractElement, Ty, i);
58  }
59 
60  return Cost;
61  }
62 
63  /// Estimate a cost of shuffle as a sequence of extract and insert
64  /// operations.
65  unsigned getPermuteShuffleOverhead(Type *Ty) {
66  assert(Ty->isVectorTy() && "Can only shuffle vectors");
67  unsigned Cost = 0;
68  // Shuffle cost is equal to the cost of extracting element from its argument
69  // plus the cost of inserting them onto the result vector.
70 
71  // e.g. <4 x float> has a mask of <0,5,2,7> i.e we need to extract from
72  // index 0 of first vector, index 1 of second vector,index 2 of first
73  // vector and finally index 3 of second vector and insert them at index
74  // <0,1,2,3> of result vector.
75  for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i) {
76  Cost += static_cast<T *>(this)
77  ->getVectorInstrCost(Instruction::InsertElement, Ty, i);
78  Cost += static_cast<T *>(this)
79  ->getVectorInstrCost(Instruction::ExtractElement, Ty, i);
80  }
81  return Cost;
82  }
83 
84  /// \brief Local query method delegates up to T which *must* implement this!
85  const TargetSubtargetInfo *getST() const {
86  return static_cast<const T *>(this)->getST();
87  }
88 
89  /// \brief Local query method delegates up to T which *must* implement this!
90  const TargetLoweringBase *getTLI() const {
91  return static_cast<const T *>(this)->getTLI();
92  }
93 
94 protected:
95  explicit BasicTTIImplBase(const TargetMachine *TM, const DataLayout &DL)
96  : BaseT(DL) {}
97 
99 
100 public:
101  /// \name Scalar TTI Implementations
102  /// @{
104  unsigned BitWidth, unsigned AddressSpace,
105  unsigned Alignment, bool *Fast) const {
106  EVT E = EVT::getIntegerVT(Context, BitWidth);
107  return getTLI()->allowsMisalignedMemoryAccesses(E, AddressSpace, Alignment, Fast);
108  }
109 
110  bool hasBranchDivergence() { return false; }
111 
112  bool isSourceOfDivergence(const Value *V) { return false; }
113 
114  bool isLegalAddImmediate(int64_t imm) {
115  return getTLI()->isLegalAddImmediate(imm);
116  }
117 
118  bool isLegalICmpImmediate(int64_t imm) {
119  return getTLI()->isLegalICmpImmediate(imm);
120  }
121 
122  bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
123  bool HasBaseReg, int64_t Scale,
124  unsigned AddrSpace) {
126  AM.BaseGV = BaseGV;
127  AM.BaseOffs = BaseOffset;
128  AM.HasBaseReg = HasBaseReg;
129  AM.Scale = Scale;
130  return getTLI()->isLegalAddressingMode(DL, AM, Ty, AddrSpace);
131  }
132 
133  int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
134  bool HasBaseReg, int64_t Scale, unsigned AddrSpace) {
136  AM.BaseGV = BaseGV;
137  AM.BaseOffs = BaseOffset;
138  AM.HasBaseReg = HasBaseReg;
139  AM.Scale = Scale;
140  return getTLI()->getScalingFactorCost(DL, AM, Ty, AddrSpace);
141  }
142 
144  return getTLI()->isFoldableMemAccessOffset(I, Offset);
145  }
146 
147  bool isTruncateFree(Type *Ty1, Type *Ty2) {
148  return getTLI()->isTruncateFree(Ty1, Ty2);
149  }
150 
152  return getTLI()->isProfitableToHoist(I);
153  }
154 
155  bool isTypeLegal(Type *Ty) {
156  EVT VT = getTLI()->getValueType(DL, Ty);
157  return getTLI()->isTypeLegal(VT);
158  }
159 
160  int getGEPCost(Type *PointeeType, const Value *Ptr,
161  ArrayRef<const Value *> Operands) {
162  return BaseT::getGEPCost(PointeeType, Ptr, Operands);
163  }
164 
165  unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
166  ArrayRef<const Value *> Arguments) {
167  return BaseT::getIntrinsicCost(IID, RetTy, Arguments);
168  }
169 
170  unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
171  ArrayRef<Type *> ParamTys) {
172  if (IID == Intrinsic::cttz) {
173  if (getTLI()->isCheapToSpeculateCttz())
176  }
177 
178  if (IID == Intrinsic::ctlz) {
179  if (getTLI()->isCheapToSpeculateCtlz())
182  }
183 
184  return BaseT::getIntrinsicCost(IID, RetTy, ParamTys);
185  }
186 
187  unsigned getJumpBufAlignment() { return getTLI()->getJumpBufAlignment(); }
188 
189  unsigned getJumpBufSize() { return getTLI()->getJumpBufSize(); }
190 
192  const TargetLoweringBase *TLI = getTLI();
195  }
196 
197  bool haveFastSqrt(Type *Ty) {
198  const TargetLoweringBase *TLI = getTLI();
199  EVT VT = TLI->getValueType(DL, Ty);
200  return TLI->isTypeLegal(VT) &&
202  }
203 
204  unsigned getFPOpCost(Type *Ty) {
205  // By default, FP instructions are no more expensive since they are
206  // implemented in HW. Target specific TTI can override this.
208  }
209 
210  unsigned getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) {
211  const TargetLoweringBase *TLI = getTLI();
212  switch (Opcode) {
213  default: break;
214  case Instruction::Trunc: {
215  if (TLI->isTruncateFree(OpTy, Ty))
218  }
219  case Instruction::ZExt: {
220  if (TLI->isZExtFree(OpTy, Ty))
223  }
224  }
225 
226  return BaseT::getOperationCost(Opcode, Ty, OpTy);
227  }
228 
229  unsigned getInliningThresholdMultiplier() { return 1; }
230 
232  // This unrolling functionality is target independent, but to provide some
233  // motivation for its intended use, for x86:
234 
235  // According to the Intel 64 and IA-32 Architectures Optimization Reference
236  // Manual, Intel Core models and later have a loop stream detector (and
237  // associated uop queue) that can benefit from partial unrolling.
238  // The relevant requirements are:
239  // - The loop must have no more than 4 (8 for Nehalem and later) branches
240  // taken, and none of them may be calls.
241  // - The loop can have no more than 18 (28 for Nehalem and later) uops.
242 
243  // According to the Software Optimization Guide for AMD Family 15h
244  // Processors, models 30h-4fh (Steamroller and later) have a loop predictor
245  // and loop buffer which can benefit from partial unrolling.
246  // The relevant requirements are:
247  // - The loop must have fewer than 16 branches
248  // - The loop must have less than 40 uops in all executed loop branches
249 
250  // The number of taken branches in a loop is hard to estimate here, and
251  // benchmarking has revealed that it is better not to be conservative when
252  // estimating the branch count. As a result, we'll ignore the branch limits
253  // until someone finds a case where it matters in practice.
254 
255  unsigned MaxOps;
256  const TargetSubtargetInfo *ST = getST();
257  if (PartialUnrollingThreshold.getNumOccurrences() > 0)
258  MaxOps = PartialUnrollingThreshold;
259  else if (ST->getSchedModel().LoopMicroOpBufferSize > 0)
260  MaxOps = ST->getSchedModel().LoopMicroOpBufferSize;
261  else
262  return;
263 
264  // Scan the loop: don't unroll loops with calls.
265  for (Loop::block_iterator I = L->block_begin(), E = L->block_end(); I != E;
266  ++I) {
267  BasicBlock *BB = *I;
268 
269  for (BasicBlock::iterator J = BB->begin(), JE = BB->end(); J != JE; ++J)
270  if (isa<CallInst>(J) || isa<InvokeInst>(J)) {
271  ImmutableCallSite CS(&*J);
272  if (const Function *F = CS.getCalledFunction()) {
273  if (!static_cast<T *>(this)->isLoweredToCall(F))
274  continue;
275  }
276 
277  return;
278  }
279  }
280 
281  // Enable runtime and partial unrolling up to the specified size.
282  // Enable using trip count upper bound to unroll loops.
283  UP.Partial = UP.Runtime = UP.UpperBound = true;
284  UP.PartialThreshold = MaxOps;
285 
286  // Avoid unrolling when optimizing for size.
287  UP.OptSizeThreshold = 0;
289 
290  // Set number of instructions optimized when "back edge"
291  // becomes "fall through" to default value of 2.
292  UP.BEInsns = 2;
293  }
294 
295  /// @}
296 
297  /// \name Vector TTI Implementations
298  /// @{
299 
300  unsigned getNumberOfRegisters(bool Vector) { return Vector ? 0 : 1; }
301 
302  unsigned getRegisterBitWidth(bool Vector) { return 32; }
303 
304  unsigned getMaxInterleaveFactor(unsigned VF) { return 1; }
305 
307  unsigned Opcode, Type *Ty,
313  // Check if any of the operands are vector operands.
314  const TargetLoweringBase *TLI = getTLI();
315  int ISD = TLI->InstructionOpcodeToISD(Opcode);
316  assert(ISD && "Invalid opcode");
317 
318  std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
319 
320  bool IsFloat = Ty->getScalarType()->isFloatingPointTy();
321  // Assume that floating point arithmetic operations cost twice as much as
322  // integer operations.
323  unsigned OpCost = (IsFloat ? 2 : 1);
324 
325  if (TLI->isOperationLegalOrPromote(ISD, LT.second)) {
326  // The operation is legal. Assume it costs 1.
327  // TODO: Once we have extract/insert subvector cost we need to use them.
328  return LT.first * OpCost;
329  }
330 
331  if (!TLI->isOperationExpand(ISD, LT.second)) {
332  // If the operation is custom lowered, then assume that the code is twice
333  // as expensive.
334  return LT.first * 2 * OpCost;
335  }
336 
337  // Else, assume that we need to scalarize this op.
338  // TODO: If one of the types get legalized by splitting, handle this
339  // similarly to what getCastInstrCost() does.
340  if (Ty->isVectorTy()) {
341  unsigned Num = Ty->getVectorNumElements();
342  unsigned Cost = static_cast<T *>(this)
343  ->getArithmeticInstrCost(Opcode, Ty->getScalarType());
344  // return the cost of multiple scalar invocation plus the cost of
345  // inserting
346  // and extracting the values.
347  return getScalarizationOverhead(Ty, true, true) + Num * Cost;
348  }
349 
350  // We don't know anything about this scalar instruction.
351  return OpCost;
352  }
353 
354  unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
355  Type *SubTp) {
356  if (Kind == TTI::SK_Alternate || Kind == TTI::SK_PermuteTwoSrc ||
357  Kind == TTI::SK_PermuteSingleSrc) {
358  return getPermuteShuffleOverhead(Tp);
359  }
360  return 1;
361  }
362 
363  unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) {
364  const TargetLoweringBase *TLI = getTLI();
365  int ISD = TLI->InstructionOpcodeToISD(Opcode);
366  assert(ISD && "Invalid opcode");
367  std::pair<unsigned, MVT> SrcLT = TLI->getTypeLegalizationCost(DL, Src);
368  std::pair<unsigned, MVT> DstLT = TLI->getTypeLegalizationCost(DL, Dst);
369 
370  // Check for NOOP conversions.
371  if (SrcLT.first == DstLT.first &&
372  SrcLT.second.getSizeInBits() == DstLT.second.getSizeInBits()) {
373 
374  // Bitcast between types that are legalized to the same type are free.
375  if (Opcode == Instruction::BitCast || Opcode == Instruction::Trunc)
376  return 0;
377  }
378 
379  if (Opcode == Instruction::Trunc &&
380  TLI->isTruncateFree(SrcLT.second, DstLT.second))
381  return 0;
382 
383  if (Opcode == Instruction::ZExt &&
384  TLI->isZExtFree(SrcLT.second, DstLT.second))
385  return 0;
386 
387  if (Opcode == Instruction::AddrSpaceCast &&
389  Dst->getPointerAddressSpace()))
390  return 0;
391 
392  // If the cast is marked as legal (or promote) then assume low cost.
393  if (SrcLT.first == DstLT.first &&
394  TLI->isOperationLegalOrPromote(ISD, DstLT.second))
395  return 1;
396 
397  // Handle scalar conversions.
398  if (!Src->isVectorTy() && !Dst->isVectorTy()) {
399 
400  // Scalar bitcasts are usually free.
401  if (Opcode == Instruction::BitCast)
402  return 0;
403 
404  // Just check the op cost. If the operation is legal then assume it costs
405  // 1.
406  if (!TLI->isOperationExpand(ISD, DstLT.second))
407  return 1;
408 
409  // Assume that illegal scalar instruction are expensive.
410  return 4;
411  }
412 
413  // Check vector-to-vector casts.
414  if (Dst->isVectorTy() && Src->isVectorTy()) {
415 
416  // If the cast is between same-sized registers, then the check is simple.
417  if (SrcLT.first == DstLT.first &&
418  SrcLT.second.getSizeInBits() == DstLT.second.getSizeInBits()) {
419 
420  // Assume that Zext is done using AND.
421  if (Opcode == Instruction::ZExt)
422  return 1;
423 
424  // Assume that sext is done using SHL and SRA.
425  if (Opcode == Instruction::SExt)
426  return 2;
427 
428  // Just check the op cost. If the operation is legal then assume it
429  // costs
430  // 1 and multiply by the type-legalization overhead.
431  if (!TLI->isOperationExpand(ISD, DstLT.second))
432  return SrcLT.first * 1;
433  }
434 
435  // If we are legalizing by splitting, query the concrete TTI for the cost
436  // of casting the original vector twice. We also need to factor int the
437  // cost of the split itself. Count that as 1, to be consistent with
438  // TLI->getTypeLegalizationCost().
439  if ((TLI->getTypeAction(Src->getContext(), TLI->getValueType(DL, Src)) ==
441  (TLI->getTypeAction(Dst->getContext(), TLI->getValueType(DL, Dst)) ==
443  Type *SplitDst = VectorType::get(Dst->getVectorElementType(),
444  Dst->getVectorNumElements() / 2);
445  Type *SplitSrc = VectorType::get(Src->getVectorElementType(),
446  Src->getVectorNumElements() / 2);
447  T *TTI = static_cast<T *>(this);
448  return TTI->getVectorSplitCost() +
449  (2 * TTI->getCastInstrCost(Opcode, SplitDst, SplitSrc));
450  }
451 
452  // In other cases where the source or destination are illegal, assume
453  // the operation will get scalarized.
454  unsigned Num = Dst->getVectorNumElements();
455  unsigned Cost = static_cast<T *>(this)->getCastInstrCost(
456  Opcode, Dst->getScalarType(), Src->getScalarType());
457 
458  // Return the cost of multiple scalar invocation plus the cost of
459  // inserting and extracting the values.
460  return getScalarizationOverhead(Dst, true, true) + Num * Cost;
461  }
462 
463  // We already handled vector-to-vector and scalar-to-scalar conversions.
464  // This
465  // is where we handle bitcast between vectors and scalars. We need to assume
466  // that the conversion is scalarized in one way or another.
467  if (Opcode == Instruction::BitCast)
468  // Illegal bitcasts are done by storing and loading from a stack slot.
469  return (Src->isVectorTy() ? getScalarizationOverhead(Src, false, true)
470  : 0) +
471  (Dst->isVectorTy() ? getScalarizationOverhead(Dst, true, false)
472  : 0);
473 
474  llvm_unreachable("Unhandled cast");
475  }
476 
477  unsigned getExtractWithExtendCost(unsigned Opcode, Type *Dst,
478  VectorType *VecTy, unsigned Index) {
479  return static_cast<T *>(this)->getVectorInstrCost(
480  Instruction::ExtractElement, VecTy, Index) +
481  static_cast<T *>(this)->getCastInstrCost(Opcode, Dst,
482  VecTy->getElementType());
483  }
484 
485  unsigned getCFInstrCost(unsigned Opcode) {
486  // Branches are assumed to be predicted.
487  return 0;
488  }
489 
490  unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy) {
491  const TargetLoweringBase *TLI = getTLI();
492  int ISD = TLI->InstructionOpcodeToISD(Opcode);
493  assert(ISD && "Invalid opcode");
494 
495  // Selects on vectors are actually vector selects.
496  if (ISD == ISD::SELECT) {
497  assert(CondTy && "CondTy must exist");
498  if (CondTy->isVectorTy())
499  ISD = ISD::VSELECT;
500  }
501  std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
502 
503  if (!(ValTy->isVectorTy() && !LT.second.isVector()) &&
504  !TLI->isOperationExpand(ISD, LT.second)) {
505  // The operation is legal. Assume it costs 1. Multiply
506  // by the type-legalization overhead.
507  return LT.first * 1;
508  }
509 
510  // Otherwise, assume that the cast is scalarized.
511  // TODO: If one of the types get legalized by splitting, handle this
512  // similarly to what getCastInstrCost() does.
513  if (ValTy->isVectorTy()) {
514  unsigned Num = ValTy->getVectorNumElements();
515  if (CondTy)
516  CondTy = CondTy->getScalarType();
517  unsigned Cost = static_cast<T *>(this)->getCmpSelInstrCost(
518  Opcode, ValTy->getScalarType(), CondTy);
519 
520  // Return the cost of multiple scalar invocation plus the cost of
521  // inserting and extracting the values.
522  return getScalarizationOverhead(ValTy, true, false) + Num * Cost;
523  }
524 
525  // Unknown scalar opcode.
526  return 1;
527  }
528 
529  unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) {
530  std::pair<unsigned, MVT> LT =
531  getTLI()->getTypeLegalizationCost(DL, Val->getScalarType());
532 
533  return LT.first;
534  }
535 
536  unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
537  unsigned AddressSpace) {
538  assert(!Src->isVoidTy() && "Invalid type");
539  std::pair<unsigned, MVT> LT = getTLI()->getTypeLegalizationCost(DL, Src);
540 
541  // Assuming that all loads of legal types cost 1.
542  unsigned Cost = LT.first;
543 
544  if (Src->isVectorTy() &&
545  Src->getPrimitiveSizeInBits() < LT.second.getSizeInBits()) {
546  // This is a vector load that legalizes to a larger type than the vector
547  // itself. Unless the corresponding extending load or truncating store is
548  // legal, then this will scalarize.
550  EVT MemVT = getTLI()->getValueType(DL, Src);
551  if (Opcode == Instruction::Store)
552  LA = getTLI()->getTruncStoreAction(LT.second, MemVT);
553  else
554  LA = getTLI()->getLoadExtAction(ISD::EXTLOAD, LT.second, MemVT);
555 
556  if (LA != TargetLowering::Legal && LA != TargetLowering::Custom) {
557  // This is a vector load/store for some illegal type that is scalarized.
558  // We must account for the cost of building or decomposing the vector.
559  Cost += getScalarizationOverhead(Src, Opcode != Instruction::Store,
560  Opcode == Instruction::Store);
561  }
562  }
563 
564  return Cost;
565  }
566 
567  unsigned getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
568  unsigned Factor,
569  ArrayRef<unsigned> Indices,
570  unsigned Alignment,
571  unsigned AddressSpace) {
572  VectorType *VT = dyn_cast<VectorType>(VecTy);
573  assert(VT && "Expect a vector type for interleaved memory op");
574 
575  unsigned NumElts = VT->getNumElements();
576  assert(Factor > 1 && NumElts % Factor == 0 && "Invalid interleave factor");
577 
578  unsigned NumSubElts = NumElts / Factor;
579  VectorType *SubVT = VectorType::get(VT->getElementType(), NumSubElts);
580 
581  // Firstly, the cost of load/store operation.
582  unsigned Cost = static_cast<T *>(this)->getMemoryOpCost(
583  Opcode, VecTy, Alignment, AddressSpace);
584 
585  // Legalize the vector type, and get the legalized and unlegalized type
586  // sizes.
587  MVT VecTyLT = getTLI()->getTypeLegalizationCost(DL, VecTy).second;
588  unsigned VecTySize =
589  static_cast<T *>(this)->getDataLayout().getTypeStoreSize(VecTy);
590  unsigned VecTyLTSize = VecTyLT.getStoreSize();
591 
592  // Return the ceiling of dividing A by B.
593  auto ceil = [](unsigned A, unsigned B) { return (A + B - 1) / B; };
594 
595  // Scale the cost of the memory operation by the fraction of legalized
596  // instructions that will actually be used. We shouldn't account for the
597  // cost of dead instructions since they will be removed.
598  //
599  // E.g., An interleaved load of factor 8:
600  // %vec = load <16 x i64>, <16 x i64>* %ptr
601  // %v0 = shufflevector %vec, undef, <0, 8>
602  //
603  // If <16 x i64> is legalized to 8 v2i64 loads, only 2 of the loads will be
604  // used (those corresponding to elements [0:1] and [8:9] of the unlegalized
605  // type). The other loads are unused.
606  //
607  // We only scale the cost of loads since interleaved store groups aren't
608  // allowed to have gaps.
609  if (Opcode == Instruction::Load && VecTySize > VecTyLTSize) {
610 
611  // The number of loads of a legal type it will take to represent a load
612  // of the unlegalized vector type.
613  unsigned NumLegalInsts = ceil(VecTySize, VecTyLTSize);
614 
615  // The number of elements of the unlegalized type that correspond to a
616  // single legal instruction.
617  unsigned NumEltsPerLegalInst = ceil(NumElts, NumLegalInsts);
618 
619  // Determine which legal instructions will be used.
620  BitVector UsedInsts(NumLegalInsts, false);
621  for (unsigned Index : Indices)
622  for (unsigned Elt = 0; Elt < NumSubElts; ++Elt)
623  UsedInsts.set((Index + Elt * Factor) / NumEltsPerLegalInst);
624 
625  // Scale the cost of the load by the fraction of legal instructions that
626  // will be used.
627  Cost *= UsedInsts.count() / NumLegalInsts;
628  }
629 
630  // Then plus the cost of interleave operation.
631  if (Opcode == Instruction::Load) {
632  // The interleave cost is similar to extract sub vectors' elements
633  // from the wide vector, and insert them into sub vectors.
634  //
635  // E.g. An interleaved load of factor 2 (with one member of index 0):
636  // %vec = load <8 x i32>, <8 x i32>* %ptr
637  // %v0 = shuffle %vec, undef, <0, 2, 4, 6> ; Index 0
638  // The cost is estimated as extract elements at 0, 2, 4, 6 from the
639  // <8 x i32> vector and insert them into a <4 x i32> vector.
640 
641  assert(Indices.size() <= Factor &&
642  "Interleaved memory op has too many members");
643 
644  for (unsigned Index : Indices) {
645  assert(Index < Factor && "Invalid index for interleaved memory op");
646 
647  // Extract elements from loaded vector for each sub vector.
648  for (unsigned i = 0; i < NumSubElts; i++)
649  Cost += static_cast<T *>(this)->getVectorInstrCost(
650  Instruction::ExtractElement, VT, Index + i * Factor);
651  }
652 
653  unsigned InsSubCost = 0;
654  for (unsigned i = 0; i < NumSubElts; i++)
655  InsSubCost += static_cast<T *>(this)->getVectorInstrCost(
656  Instruction::InsertElement, SubVT, i);
657 
658  Cost += Indices.size() * InsSubCost;
659  } else {
660  // The interleave cost is extract all elements from sub vectors, and
661  // insert them into the wide vector.
662  //
663  // E.g. An interleaved store of factor 2:
664  // %v0_v1 = shuffle %v0, %v1, <0, 4, 1, 5, 2, 6, 3, 7>
665  // store <8 x i32> %interleaved.vec, <8 x i32>* %ptr
666  // The cost is estimated as extract all elements from both <4 x i32>
667  // vectors and insert into the <8 x i32> vector.
668 
669  unsigned ExtSubCost = 0;
670  for (unsigned i = 0; i < NumSubElts; i++)
671  ExtSubCost += static_cast<T *>(this)->getVectorInstrCost(
672  Instruction::ExtractElement, SubVT, i);
673  Cost += ExtSubCost * Factor;
674 
675  for (unsigned i = 0; i < NumElts; i++)
676  Cost += static_cast<T *>(this)
677  ->getVectorInstrCost(Instruction::InsertElement, VT, i);
678  }
679 
680  return Cost;
681  }
682 
683  /// Get intrinsic cost based on arguments
686  switch (IID) {
687  default: {
689  for (Value *Op : Args)
690  Types.push_back(Op->getType());
691  return static_cast<T *>(this)->getIntrinsicInstrCost(IID, RetTy, Types,
692  FMF);
693  }
694  case Intrinsic::masked_scatter: {
695  Value *Mask = Args[3];
696  bool VarMask = !isa<Constant>(Mask);
697  unsigned Alignment = cast<ConstantInt>(Args[2])->getZExtValue();
698  return
699  static_cast<T *>(this)->getGatherScatterOpCost(Instruction::Store,
700  Args[0]->getType(),
701  Args[1], VarMask,
702  Alignment);
703  }
704  case Intrinsic::masked_gather: {
705  Value *Mask = Args[2];
706  bool VarMask = !isa<Constant>(Mask);
707  unsigned Alignment = cast<ConstantInt>(Args[1])->getZExtValue();
708  return
709  static_cast<T *>(this)->getGatherScatterOpCost(Instruction::Load,
710  RetTy, Args[0], VarMask,
711  Alignment);
712  }
713  }
714  }
715 
716  /// Get intrinsic cost based on argument types
718  ArrayRef<Type *> Tys, FastMathFlags FMF) {
720  unsigned SingleCallCost = 10; // Library call cost. Make it expensive.
721  switch (IID) {
722  default: {
723  // Assume that we need to scalarize this intrinsic.
724  unsigned ScalarizationCost = 0;
725  unsigned ScalarCalls = 1;
726  Type *ScalarRetTy = RetTy;
727  if (RetTy->isVectorTy()) {
728  ScalarizationCost = getScalarizationOverhead(RetTy, true, false);
729  ScalarCalls = std::max(ScalarCalls, RetTy->getVectorNumElements());
730  ScalarRetTy = RetTy->getScalarType();
731  }
732  SmallVector<Type *, 4> ScalarTys;
733  for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) {
734  Type *Ty = Tys[i];
735  if (Ty->isVectorTy()) {
736  ScalarizationCost += getScalarizationOverhead(Ty, false, true);
737  ScalarCalls = std::max(ScalarCalls, Ty->getVectorNumElements());
738  Ty = Ty->getScalarType();
739  }
740  ScalarTys.push_back(Ty);
741  }
742  if (ScalarCalls == 1)
743  return 1; // Return cost of a scalar intrinsic. Assume it to be cheap.
744 
745  unsigned ScalarCost = static_cast<T *>(this)->getIntrinsicInstrCost(
746  IID, ScalarRetTy, ScalarTys, FMF);
747 
748  return ScalarCalls * ScalarCost + ScalarizationCost;
749  }
750  // Look for intrinsics that can be lowered directly or turned into a scalar
751  // intrinsic call.
752  case Intrinsic::sqrt:
753  ISDs.push_back(ISD::FSQRT);
754  break;
755  case Intrinsic::sin:
756  ISDs.push_back(ISD::FSIN);
757  break;
758  case Intrinsic::cos:
759  ISDs.push_back(ISD::FCOS);
760  break;
761  case Intrinsic::exp:
762  ISDs.push_back(ISD::FEXP);
763  break;
764  case Intrinsic::exp2:
765  ISDs.push_back(ISD::FEXP2);
766  break;
767  case Intrinsic::log:
768  ISDs.push_back(ISD::FLOG);
769  break;
770  case Intrinsic::log10:
771  ISDs.push_back(ISD::FLOG10);
772  break;
773  case Intrinsic::log2:
774  ISDs.push_back(ISD::FLOG2);
775  break;
776  case Intrinsic::fabs:
777  ISDs.push_back(ISD::FABS);
778  break;
779  case Intrinsic::minnum:
780  ISDs.push_back(ISD::FMINNUM);
781  if (FMF.noNaNs())
782  ISDs.push_back(ISD::FMINNAN);
783  break;
784  case Intrinsic::maxnum:
785  ISDs.push_back(ISD::FMAXNUM);
786  if (FMF.noNaNs())
787  ISDs.push_back(ISD::FMAXNAN);
788  break;
789  case Intrinsic::copysign:
791  break;
792  case Intrinsic::floor:
793  ISDs.push_back(ISD::FFLOOR);
794  break;
795  case Intrinsic::ceil:
796  ISDs.push_back(ISD::FCEIL);
797  break;
798  case Intrinsic::trunc:
799  ISDs.push_back(ISD::FTRUNC);
800  break;
801  case Intrinsic::nearbyint:
803  break;
804  case Intrinsic::rint:
805  ISDs.push_back(ISD::FRINT);
806  break;
807  case Intrinsic::round:
808  ISDs.push_back(ISD::FROUND);
809  break;
810  case Intrinsic::pow:
811  ISDs.push_back(ISD::FPOW);
812  break;
813  case Intrinsic::fma:
814  ISDs.push_back(ISD::FMA);
815  break;
816  case Intrinsic::fmuladd:
817  ISDs.push_back(ISD::FMA);
818  break;
819  // FIXME: We should return 0 whenever getIntrinsicCost == TCC_Free.
820  case Intrinsic::lifetime_start:
821  case Intrinsic::lifetime_end:
822  return 0;
823  case Intrinsic::masked_store:
824  return static_cast<T *>(this)
825  ->getMaskedMemoryOpCost(Instruction::Store, Tys[0], 0, 0);
826  case Intrinsic::masked_load:
827  return static_cast<T *>(this)
829  case Intrinsic::ctpop:
830  ISDs.push_back(ISD::CTPOP);
831  // In case of legalization use TCC_Expensive. This is cheaper than a
832  // library call but still not a cheap instruction.
833  SingleCallCost = TargetTransformInfo::TCC_Expensive;
834  break;
835  // FIXME: ctlz, cttz, ...
836  }
837 
838  const TargetLoweringBase *TLI = getTLI();
839  std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, RetTy);
840 
841  SmallVector<unsigned, 2> LegalCost;
842  SmallVector<unsigned, 2> CustomCost;
843  for (unsigned ISD : ISDs) {
844  if (TLI->isOperationLegalOrPromote(ISD, LT.second)) {
845  if (IID == Intrinsic::fabs && TLI->isFAbsFree(LT.second)) {
846  return 0;
847  }
848 
849  // The operation is legal. Assume it costs 1.
850  // If the type is split to multiple registers, assume that there is some
851  // overhead to this.
852  // TODO: Once we have extract/insert subvector cost we need to use them.
853  if (LT.first > 1)
854  LegalCost.push_back(LT.first * 2);
855  else
856  LegalCost.push_back(LT.first * 1);
857  } else if (!TLI->isOperationExpand(ISD, LT.second)) {
858  // If the operation is custom lowered then assume
859  // that the code is twice as expensive.
860  CustomCost.push_back(LT.first * 2);
861  }
862  }
863 
864  auto MinLegalCostI = std::min_element(LegalCost.begin(), LegalCost.end());
865  if (MinLegalCostI != LegalCost.end())
866  return *MinLegalCostI;
867 
868  auto MinCustomCostI = std::min_element(CustomCost.begin(), CustomCost.end());
869  if (MinCustomCostI != CustomCost.end())
870  return *MinCustomCostI;
871 
872  // If we can't lower fmuladd into an FMA estimate the cost as a floating
873  // point mul followed by an add.
874  if (IID == Intrinsic::fmuladd)
875  return static_cast<T *>(this)
876  ->getArithmeticInstrCost(BinaryOperator::FMul, RetTy) +
877  static_cast<T *>(this)
878  ->getArithmeticInstrCost(BinaryOperator::FAdd, RetTy);
879 
880  // Else, assume that we need to scalarize this intrinsic. For math builtins
881  // this will emit a costly libcall, adding call overhead and spills. Make it
882  // very expensive.
883  if (RetTy->isVectorTy()) {
884  unsigned ScalarizationCost = getScalarizationOverhead(RetTy, true, false);
885  unsigned ScalarCalls = RetTy->getVectorNumElements();
886  SmallVector<Type *, 4> ScalarTys;
887  for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) {
888  Type *Ty = Tys[i];
889  if (Ty->isVectorTy())
890  Ty = Ty->getScalarType();
891  ScalarTys.push_back(Ty);
892  }
893  unsigned ScalarCost = static_cast<T *>(this)->getIntrinsicInstrCost(
894  IID, RetTy->getScalarType(), ScalarTys, FMF);
895  for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) {
896  if (Tys[i]->isVectorTy()) {
897  ScalarizationCost += getScalarizationOverhead(Tys[i], false, true);
898  ScalarCalls = std::max(ScalarCalls, Tys[i]->getVectorNumElements());
899  }
900  }
901 
902  return ScalarCalls * ScalarCost + ScalarizationCost;
903  }
904 
905  // This is going to be turned into a library call, make it expensive.
906  return SingleCallCost;
907  }
908 
909  /// \brief Compute a cost of the given call instruction.
910  ///
911  /// Compute the cost of calling function F with return type RetTy and
912  /// argument types Tys. F might be nullptr, in this case the cost of an
913  /// arbitrary call with the specified signature will be returned.
914  /// This is used, for instance, when we estimate call of a vector
915  /// counterpart of the given function.
916  /// \param F Called function, might be nullptr.
917  /// \param RetTy Return value types.
918  /// \param Tys Argument types.
919  /// \returns The cost of Call instruction.
920  unsigned getCallInstrCost(Function *F, Type *RetTy, ArrayRef<Type *> Tys) {
921  return 10;
922  }
923 
924  unsigned getNumberOfParts(Type *Tp) {
925  std::pair<unsigned, MVT> LT = getTLI()->getTypeLegalizationCost(DL, Tp);
926  return LT.first;
927  }
928 
930  const SCEV *) {
931  return 0;
932  }
933 
934  unsigned getReductionCost(unsigned Opcode, Type *Ty, bool IsPairwise) {
935  assert(Ty->isVectorTy() && "Expect a vector type");
936  Type *ScalarTy = Ty->getVectorElementType();
937  unsigned NumVecElts = Ty->getVectorNumElements();
938  unsigned NumReduxLevels = Log2_32(NumVecElts);
939  // Try to calculate arithmetic and shuffle op costs for reduction operations.
940  // We're assuming that reduction operation are performing the following way:
941  // 1. Non-pairwise reduction
942  // %val1 = shufflevector<n x t> %val, <n x t> %undef,
943  // <n x i32> <i32 n/2, i32 n/2 + 1, ..., i32 n, i32 undef, ..., i32 undef>
944  // \----------------v-------------/ \----------v------------/
945  // n/2 elements n/2 elements
946  // %red1 = op <n x t> %val, <n x t> val1
947  // After this operation we have a vector %red1 with only maningfull the
948  // first n/2 elements, the second n/2 elements are undefined and can be
949  // dropped. All other operations are actually working with the vector of
950  // length n/2, not n. though the real vector length is still n.
951  // %val2 = shufflevector<n x t> %red1, <n x t> %undef,
952  // <n x i32> <i32 n/4, i32 n/4 + 1, ..., i32 n/2, i32 undef, ..., i32 undef>
953  // \----------------v-------------/ \----------v------------/
954  // n/4 elements 3*n/4 elements
955  // %red2 = op <n x t> %red1, <n x t> val2 - working with the vector of
956  // length n/2, the resulting vector has length n/4 etc.
957  // 2. Pairwise reduction:
958  // Everything is the same except for an additional shuffle operation which
959  // is used to produce operands for pairwise kind of reductions.
960  // %val1 = shufflevector<n x t> %val, <n x t> %undef,
961  // <n x i32> <i32 0, i32 2, ..., i32 n-2, i32 undef, ..., i32 undef>
962  // \-------------v----------/ \----------v------------/
963  // n/2 elements n/2 elements
964  // %val2 = shufflevector<n x t> %val, <n x t> %undef,
965  // <n x i32> <i32 1, i32 3, ..., i32 n-1, i32 undef, ..., i32 undef>
966  // \-------------v----------/ \----------v------------/
967  // n/2 elements n/2 elements
968  // %red1 = op <n x t> %val1, <n x t> val2
969  // Again, the operation is performed on <n x t> vector, but the resulting
970  // vector %red1 is <n/2 x t> vector.
971  //
972  // The cost model should take into account that the actual length of the
973  // vector is reduced on each iteration.
974  unsigned ArithCost = 0;
975  unsigned ShuffleCost = 0;
976  auto *ConcreteTTI = static_cast<T *>(this);
977  std::pair<unsigned, MVT> LT =
978  ConcreteTTI->getTLI()->getTypeLegalizationCost(DL, Ty);
979  unsigned LongVectorCount = 0;
980  unsigned MVTLen =
981  LT.second.isVector() ? LT.second.getVectorNumElements() : 1;
982  while (NumVecElts > MVTLen) {
983  NumVecElts /= 2;
984  // Assume the pairwise shuffles add a cost.
985  ShuffleCost += (IsPairwise + 1) *
986  ConcreteTTI->getShuffleCost(TTI::SK_ExtractSubvector, Ty,
987  NumVecElts, Ty);
988  ArithCost += ConcreteTTI->getArithmeticInstrCost(Opcode, Ty);
989  Ty = VectorType::get(ScalarTy, NumVecElts);
990  ++LongVectorCount;
991  }
992  // The minimal length of the vector is limited by the real length of vector
993  // operations performed on the current platform. That's why several final
994  // reduction opertions are perfomed on the vectors with the same
995  // architecture-dependent length.
996  ShuffleCost += (NumReduxLevels - LongVectorCount) * (IsPairwise + 1) *
997  ConcreteTTI->getShuffleCost(TTI::SK_ExtractSubvector, Ty,
998  NumVecElts, Ty);
999  ArithCost += (NumReduxLevels - LongVectorCount) *
1000  ConcreteTTI->getArithmeticInstrCost(Opcode, Ty);
1001  return ShuffleCost + ArithCost + getScalarizationOverhead(Ty, false, true);
1002  }
1003 
1004  unsigned getVectorSplitCost() { return 1; }
1005 
1006  /// @}
1007 };
1008 
1009 /// \brief Concrete BasicTTIImpl that can be used if no further customization
1010 /// is needed.
1011 class BasicTTIImpl : public BasicTTIImplBase<BasicTTIImpl> {
1014 
1015  const TargetSubtargetInfo *ST;
1016  const TargetLoweringBase *TLI;
1017 
1018  const TargetSubtargetInfo *getST() const { return ST; }
1019  const TargetLoweringBase *getTLI() const { return TLI; }
1020 
1021 public:
1022  explicit BasicTTIImpl(const TargetMachine *ST, const Function &F);
1023 };
1024 
1025 }
1026 
1027 #endif
MachineLoop * L
Base class for use as a mix-in that aids implementing a TargetTransformInfo-compatible class...
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:102
bool Partial
Allow partial unrolling (unrolling of loops to expand the size of the loop body, not only to eliminat...
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
Definition: ISDOpcodes.h:524
BitVector & set()
Definition: BitVector.h:219
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg If BaseGV is null...
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, unsigned AddressSpace)
Definition: BasicTTIImpl.h:536
LLVMContext & Context
unsigned getGatherScatterOpCost(unsigned Opcode, Type *DataTy, Value *Ptr, bool VariableMask, unsigned Alignment)
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
size_t i
The main scalar evolution driver.
unsigned PartialOptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size, like OptSizeThreshold, but used for partial/runtime unrolling (set to UINT_MAX to disable).
unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, ArrayRef< Type * > ParamTys)
Definition: BasicTTIImpl.h:170
static uint64_t round(uint64_t Acc, uint64_t Input)
Definition: xxhash.cpp:57
unsigned PartialThreshold
The cost threshold for the unrolled loop, like Threshold, but used for partial/runtime unrolling (set...
unsigned getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy)
virtual bool isFAbsFree(EVT VT) const
Return true if an fabs operation is free to the point where it is never worthwhile to replace it with...
virtual bool isLegalICmpImmediate(int64_t) const
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
BasicTTIImplBase(const TargetMachine *TM, const DataLayout &DL)
Definition: BasicTTIImpl.h:95
virtual bool allowsMisalignedMemoryAccesses(EVT, unsigned AddrSpace=0, unsigned Align=1, bool *=nullptr) const
Determine if the target supports unaligned memory accesses.
int getGEPCost(Type *PointeeType, const Value *Ptr, ArrayRef< const Value * > Operands)
Definition: BasicTTIImpl.h:160
bool isProfitableToHoist(Instruction *I)
Definition: BasicTTIImpl.h:151
FunctionType * getType(LLVMContext &Context, ID id, ArrayRef< Type * > Tys=None)
Return the function type for an intrinsic.
Definition: Function.cpp:905
unsigned getCallInstrCost(Function *F, Type *RetTy, ArrayRef< Type * > Tys)
Compute a cost of the given call instruction.
Definition: BasicTTIImpl.h:920
Base class which can be used to help build a TTI implementation.
Definition: BasicTTIImpl.h:40
unsigned getMaxInterleaveFactor(unsigned VF)
Definition: BasicTTIImpl.h:304
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:228
This file provides helpers for the implementation of a TargetTransformInfo-conforming class...
CRTP base class for use as a mix-in that aids implementing a TargetTransformInfo-compatible class...
unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src)
Definition: BasicTTIImpl.h:363
unsigned getAddressComputationCost(Type *Ty, ScalarEvolution *, const SCEV *)
Definition: BasicTTIImpl.h:929
virtual bool isLegalAddImmediate(int64_t) const
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
unsigned getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy, unsigned Index)
Definition: BasicTTIImpl.h:477
int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace)
Definition: BasicTTIImpl.h:133
int getGEPCost(Type *PointeeType, const Value *Ptr, ArrayRef< const Value * > Operands)
virtual bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const
Returns true if a cast between SrcAS and DestAS is a noop.
unsigned getJumpBufSize() const
Returns the target's jmp_buf size in bytes (if never set, the default is 200)
const MCSchedModel & getSchedModel() const
Get the machine model for this subtarget's CPU.
unsigned getStoreSize() const
getStoreSize - Return the number of bytes overwritten by a store of the specified value type...
Type * getVectorElementType() const
Definition: Type.h:353
BasicTTIImpl(const TargetMachine *ST, const Function &F)
int InstructionOpcodeToISD(unsigned Opcode) const
Get the ISD node that corresponds to the Instruction class opcode.
unsigned getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, unsigned Alignment, unsigned AddressSpace)
Definition: BasicTTIImpl.h:567
Choose alternate elements from vector.
unsigned getReductionCost(unsigned Opcode, Type *Ty, bool IsPairwise)
Definition: BasicTTIImpl.h:934
#define F(x, y, z)
Definition: MD5.cpp:51
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition: Type.h:128
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition: ISDOpcodes.h:363
unsigned getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, ArrayRef< Type * > Tys, FastMathFlags FMF)
Get intrinsic cost based on argument types.
Definition: BasicTTIImpl.h:717
LegalizeAction getTruncStoreAction(EVT ValVT, EVT MemVT) const
Return how this store with truncation should be treated: either it is legal, needs to be promoted to ...
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:33
bool isFloatingPointTy() const
Return true if this is one of the six floating-point types.
Definition: Type.h:160
static GCRegistry::Add< OcamlGC > B("ocaml","ocaml 3.10-compatible GC")
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
bool isOperationLegalOrPromote(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target or can be made legal using promotion...
Type * getScalarType() const LLVM_READONLY
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.cpp:44
Type * getElementType() const
Definition: DerivedTypes.h:336
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:141
LegalizeAction getLoadExtAction(unsigned ExtType, EVT ValVT, EVT MemVT) const
Return how this load with extension should be treated: either it is legal, needs to be promoted to a ...
static GCRegistry::Add< CoreCLRGC > E("coreclr","CoreCLR-compatible GC")
unsigned getFPOpCost(Type *Ty)
Definition: BasicTTIImpl.h:204
bool isTruncateFree(Type *Ty1, Type *Ty2)
Definition: BasicTTIImpl.h:147
unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy)
Definition: BasicTTIImpl.h:490
ExtractSubvector Index indicates start offset.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
size_type count() const
count - Returns the number of bits which are set.
Definition: BitVector.h:122
MVT - Machine Value Type.
Concrete BasicTTIImpl that can be used if no further customization is needed.
LLVM Basic Block Representation.
Definition: BasicBlock.h:51
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:45
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:48
unsigned getRegisterBitWidth(bool Vector)
Definition: BasicTTIImpl.h:302
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:219
bool isOperationLegalOrCustom(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator begin()
Definition: SmallVector.h:115
Expected to fold away in lowering.
uint32_t Offset
uint64_t getNumElements() const
Definition: DerivedTypes.h:335
Merge elements from two source vectors into one with any shuffle mask.
bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace)
Definition: BasicTTIImpl.h:122
unsigned getNumberOfParts(Type *Tp)
Definition: BasicTTIImpl.h:924
virtual int getScalingFactorCost(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS=0) const
Return the cost of the scaling factor used in the addressing mode represented by AM for this target...
EVT - Extended Value Type.
Definition: ValueTypes.h:31
bool isFoldableMemAccessOffset(Instruction *I, int64_t Offset)
Definition: BasicTTIImpl.h:143
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
LLVM_READONLY APFloat maxnum(const APFloat &A, const APFloat &B)
Implements IEEE maxNum semantics.
Definition: APFloat.h:1118
bool isLegalAddImmediate(int64_t imm)
Definition: BasicTTIImpl.h:114
This base class for TargetLowering contains the SelectionDAG-independent parts that can be used from ...
OperandValueProperties
Additional properties of an operand's values.
unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, ArrayRef< const Value * > Arguments)
LegalizeAction
This enum indicates whether operations are valid for a target, and if not, what action should be used...
unsigned getCFInstrCost(unsigned Opcode)
Definition: BasicTTIImpl.h:485
unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp)
Definition: BasicTTIImpl.h:354
unsigned getNumberOfRegisters(bool Vector)
Definition: BasicTTIImpl.h:300
Iterator for intrusive lists based on ilist_node.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info=TTI::OK_AnyValue, TTI::OperandValueKind Opd2Info=TTI::OK_AnyValue, TTI::OperandValueProperties Opd1PropInfo=TTI::OP_None, TTI::OperandValueProperties Opd2PropInfo=TTI::OP_None, ArrayRef< const Value * > Args=ArrayRef< const Value * >())
Definition: BasicTTIImpl.h:306
iterator end()
Definition: BasicBlock.h:230
unsigned getVectorSplitCost()
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:843
AddressSpace
Definition: NVPTXBaseInfo.h:22
cl::opt< unsigned > PartialUnrollingThreshold
bool Runtime
Allow runtime unrolling (unrolling of loops to expand the size of the loop body even when the number ...
virtual bool isProfitableToHoist(Instruction *I) const
unsigned Log2_32(uint32_t Value)
Log2_32 - This function returns the floor log base 2 of the specified value, -1 if the value is zero...
Definition: MathExtras.h:513
Class to represent vector types.
Definition: DerivedTypes.h:369
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:354
std::pair< int, MVT > getTypeLegalizationCost(const DataLayout &DL, Type *Ty) const
Estimate the cost of type-legalization and the legalized type.
unsigned getVectorNumElements() const
Definition: DerivedTypes.h:438
std::vector< BlockT * >::const_iterator block_iterator
Definition: LoopInfo.h:140
unsigned LoopMicroOpBufferSize
Definition: MCSchedule.h:164
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition: ISDOpcodes.h:259
FMINNAN/FMAXNAN - Behave identically to FMINNUM/FMAXNUM, except that when a single input is NaN...
Definition: ISDOpcodes.h:527
const DataLayout & getDataLayout() const
TargetSubtargetInfo - Generic base class for all target subtargets.
block_iterator block_end() const
Definition: LoopInfo.h:142
unsigned getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, unsigned AddressSpace)
BR_JT - Jumptable branch.
Definition: ISDOpcodes.h:560
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator end()
Definition: SmallVector.h:119
unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, ArrayRef< const Value * > Arguments)
Definition: BasicTTIImpl.h:165
unsigned getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy)
Definition: BasicTTIImpl.h:210
This class represents an analyzed expression in the program.
bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth, unsigned AddressSpace, unsigned Alignment, bool *Fast) const
Definition: BasicTTIImpl.h:103
unsigned getJumpBufAlignment() const
Returns the target's jmp_buf alignment in bytes (if never set, the default is 0)
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:368
virtual bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AddrSpace) const
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
Parameters that control the generic loop unrolling transformation.
unsigned getJumpBufAlignment()
Definition: BasicTTIImpl.h:187
unsigned OptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size (set to UINT_MAX to disable)...
ImmutableCallSite - establish a view to a call site for examination.
Definition: CallSite.h:665
#define I(x, y, z)
Definition: MD5.cpp:54
unsigned getInliningThresholdMultiplier()
Definition: BasicTTIImpl.h:229
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:287
unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index)
Definition: BasicTTIImpl.h:529
uint64_t getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type...
Definition: DataLayout.h:391
virtual bool isFoldableMemAccessOffset(Instruction *I, int64_t Offset) const
const unsigned Kind
virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const
Return true if it's free to truncate a value of type FromTy to type ToTy.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
The cost of a typical 'add' instruction.
FunTy * getCalledFunction() const
getCalledFunction - Return the function being called if this is a direct call, otherwise return null ...
Definition: CallSite.h:110
bool isSourceOfDivergence(const Value *V)
Definition: BasicTTIImpl.h:112
unsigned getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition: Type.cpp:108
LLVM Value Representation.
Definition: Value.h:71
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:249
static VectorType * get(Type *ElementType, unsigned NumElements)
This static method is the primary way to construct an VectorType.
Definition: Type.cpp:631
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:81
bool isOperationExpand(unsigned Op, EVT VT) const
Return true if the specified operation is illegal on this target or unlikely to be made legal with cu...
Primary interface to the complete machine description for the target machine.
bool isTypeLegal(Type *Ty)
Definition: BasicTTIImpl.h:155
block_iterator block_begin() const
Definition: LoopInfo.h:141
bool UpperBound
Allow using trip count upper bound to unroll loops.
Convenience struct for specifying and reasoning about fast-math flags.
Definition: Operator.h:168
unsigned getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, ArrayRef< Value * > Args, FastMathFlags FMF)
Get intrinsic cost based on arguments.
Definition: BasicTTIImpl.h:684
OperandValueKind
Additional information about an operand's possible values.
bool haveFastSqrt(Type *Ty)
Definition: BasicTTIImpl.h:197
void getUnrollingPreferences(Loop *L, TTI::UnrollingPreferences &UP)
Definition: BasicTTIImpl.h:231
int * Ptr
bool isLegalICmpImmediate(int64_t imm)
Definition: BasicTTIImpl.h:118
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Definition: DerivedTypes.h:479
static GCRegistry::Add< ErlangGC > A("erlang","erlang-compatible garbage collector")
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
getIntegerVT - Returns the EVT that represents an integer with the given number of bits...
Definition: ValueTypes.h:61
Fast - This calling convention attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:42
The cost of a 'div' instruction on x86.
LLVM_READONLY APFloat minnum(const APFloat &A, const APFloat &B)
Implements IEEE minNum semantics.
Definition: APFloat.h:1107
bool noNaNs() const
Flag queries.
Definition: Operator.h:194
This file describes how to lower LLVM code to machine code.
bool isVoidTy() const
Return true if this is 'void'.
Definition: Type.h:139
ShuffleKind
The various kinds of shuffle patterns for vector queries.
Shuffle elements of single source vector with any shuffle mask.
BRIND - Indirect branch.
Definition: ISDOpcodes.h:556