LLVM  7.0.0svn
BasicTTIImpl.h
Go to the documentation of this file.
1 //===- BasicTTIImpl.h -------------------------------------------*- C++ -*-===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 /// \file
11 /// This file provides a helper that implements much of the TTI interface in
12 /// terms of the target-independent code generator and TargetLowering
13 /// interfaces.
14 //
15 //===----------------------------------------------------------------------===//
16 
17 #ifndef LLVM_CODEGEN_BASICTTIIMPL_H
18 #define LLVM_CODEGEN_BASICTTIIMPL_H
19 
20 #include "llvm/ADT/APInt.h"
21 #include "llvm/ADT/ArrayRef.h"
22 #include "llvm/ADT/BitVector.h"
23 #include "llvm/ADT/SmallPtrSet.h"
24 #include "llvm/ADT/SmallVector.h"
25 #include "llvm/Analysis/LoopInfo.h"
32 #include "llvm/IR/BasicBlock.h"
33 #include "llvm/IR/CallSite.h"
34 #include "llvm/IR/Constant.h"
35 #include "llvm/IR/Constants.h"
36 #include "llvm/IR/DataLayout.h"
37 #include "llvm/IR/DerivedTypes.h"
38 #include "llvm/IR/InstrTypes.h"
39 #include "llvm/IR/Instruction.h"
40 #include "llvm/IR/Instructions.h"
41 #include "llvm/IR/Intrinsics.h"
42 #include "llvm/IR/Operator.h"
43 #include "llvm/IR/Type.h"
44 #include "llvm/IR/Value.h"
45 #include "llvm/MC/MCSchedule.h"
46 #include "llvm/Support/Casting.h"
51 #include <algorithm>
52 #include <cassert>
53 #include <cstdint>
54 #include <limits>
55 #include <utility>
56 
57 namespace llvm {
58 
59 class Function;
60 class GlobalValue;
61 class LLVMContext;
62 class ScalarEvolution;
63 class SCEV;
64 class TargetMachine;
65 
66 extern cl::opt<unsigned> PartialUnrollingThreshold;
67 
68 /// Base class which can be used to help build a TTI implementation.
69 ///
70 /// This class provides as much implementation of the TTI interface as is
71 /// possible using the target independent parts of the code generator.
72 ///
73 /// In order to subclass it, your class must implement a getST() method to
74 /// return the subtarget, and a getTLI() method to return the target lowering.
75 /// We need these methods implemented in the derived class so that this class
76 /// doesn't have to duplicate storage for them.
77 template <typename T>
79 private:
81  using TTI = TargetTransformInfo;
82 
83  /// Estimate a cost of shuffle as a sequence of extract and insert
84  /// operations.
85  unsigned getPermuteShuffleOverhead(Type *Ty) {
86  assert(Ty->isVectorTy() && "Can only shuffle vectors");
87  unsigned Cost = 0;
88  // Shuffle cost is equal to the cost of extracting element from its argument
89  // plus the cost of inserting them onto the result vector.
90 
91  // e.g. <4 x float> has a mask of <0,5,2,7> i.e we need to extract from
92  // index 0 of first vector, index 1 of second vector,index 2 of first
93  // vector and finally index 3 of second vector and insert them at index
94  // <0,1,2,3> of result vector.
95  for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i) {
96  Cost += static_cast<T *>(this)
97  ->getVectorInstrCost(Instruction::InsertElement, Ty, i);
98  Cost += static_cast<T *>(this)
99  ->getVectorInstrCost(Instruction::ExtractElement, Ty, i);
100  }
101  return Cost;
102  }
103 
104  /// Local query method delegates up to T which *must* implement this!
105  const TargetSubtargetInfo *getST() const {
106  return static_cast<const T *>(this)->getST();
107  }
108 
109  /// Local query method delegates up to T which *must* implement this!
110  const TargetLoweringBase *getTLI() const {
111  return static_cast<const T *>(this)->getTLI();
112  }
113 
114  static ISD::MemIndexedMode getISDIndexedMode(TTI::MemIndexedMode M) {
115  switch (M) {
116  case TTI::MIM_Unindexed:
117  return ISD::UNINDEXED;
118  case TTI::MIM_PreInc:
119  return ISD::PRE_INC;
120  case TTI::MIM_PreDec:
121  return ISD::PRE_DEC;
122  case TTI::MIM_PostInc:
123  return ISD::POST_INC;
124  case TTI::MIM_PostDec:
125  return ISD::POST_DEC;
126  }
127  llvm_unreachable("Unexpected MemIndexedMode");
128  }
129 
130 protected:
131  explicit BasicTTIImplBase(const TargetMachine *TM, const DataLayout &DL)
132  : BaseT(DL) {}
133 
135 
136 public:
137  /// \name Scalar TTI Implementations
138  /// @{
140  unsigned BitWidth, unsigned AddressSpace,
141  unsigned Alignment, bool *Fast) const {
142  EVT E = EVT::getIntegerVT(Context, BitWidth);
143  return getTLI()->allowsMisalignedMemoryAccesses(E, AddressSpace, Alignment, Fast);
144  }
145 
146  bool hasBranchDivergence() { return false; }
147 
148  bool isSourceOfDivergence(const Value *V) { return false; }
149 
150  bool isAlwaysUniform(const Value *V) { return false; }
151 
152  unsigned getFlatAddressSpace() {
153  // Return an invalid address space.
154  return -1;
155  }
156 
157  bool isLegalAddImmediate(int64_t imm) {
158  return getTLI()->isLegalAddImmediate(imm);
159  }
160 
161  bool isLegalICmpImmediate(int64_t imm) {
162  return getTLI()->isLegalICmpImmediate(imm);
163  }
164 
165  bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
166  bool HasBaseReg, int64_t Scale,
167  unsigned AddrSpace, Instruction *I = nullptr) {
169  AM.BaseGV = BaseGV;
170  AM.BaseOffs = BaseOffset;
171  AM.HasBaseReg = HasBaseReg;
172  AM.Scale = Scale;
173  return getTLI()->isLegalAddressingMode(DL, AM, Ty, AddrSpace, I);
174  }
175 
177  const DataLayout &DL) const {
178  EVT VT = getTLI()->getValueType(DL, Ty);
179  return getTLI()->isIndexedLoadLegal(getISDIndexedMode(M), VT);
180  }
181 
183  const DataLayout &DL) const {
184  EVT VT = getTLI()->getValueType(DL, Ty);
185  return getTLI()->isIndexedStoreLegal(getISDIndexedMode(M), VT);
186  }
187 
190  }
191 
192  int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
193  bool HasBaseReg, int64_t Scale, unsigned AddrSpace) {
195  AM.BaseGV = BaseGV;
196  AM.BaseOffs = BaseOffset;
197  AM.HasBaseReg = HasBaseReg;
198  AM.Scale = Scale;
199  return getTLI()->getScalingFactorCost(DL, AM, Ty, AddrSpace);
200  }
201 
202  bool isTruncateFree(Type *Ty1, Type *Ty2) {
203  return getTLI()->isTruncateFree(Ty1, Ty2);
204  }
205 
207  return getTLI()->isProfitableToHoist(I);
208  }
209 
210  bool useAA() const { return getST()->useAA(); }
211 
212  bool isTypeLegal(Type *Ty) {
213  EVT VT = getTLI()->getValueType(DL, Ty);
214  return getTLI()->isTypeLegal(VT);
215  }
216 
217  int getGEPCost(Type *PointeeType, const Value *Ptr,
218  ArrayRef<const Value *> Operands) {
219  return BaseT::getGEPCost(PointeeType, Ptr, Operands);
220  }
221 
222  int getExtCost(const Instruction *I, const Value *Src) {
223  if (getTLI()->isExtFree(I))
225 
226  if (isa<ZExtInst>(I) || isa<SExtInst>(I))
227  if (const LoadInst *LI = dyn_cast<LoadInst>(Src))
228  if (getTLI()->isExtLoad(LI, I, DL))
230 
232  }
233 
234  unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
236  return BaseT::getIntrinsicCost(IID, RetTy, Arguments);
237  }
238 
239  unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
240  ArrayRef<Type *> ParamTys) {
241  if (IID == Intrinsic::cttz) {
242  if (getTLI()->isCheapToSpeculateCttz())
245  }
246 
247  if (IID == Intrinsic::ctlz) {
248  if (getTLI()->isCheapToSpeculateCtlz())
251  }
252 
253  return BaseT::getIntrinsicCost(IID, RetTy, ParamTys);
254  }
255 
257  unsigned &JumpTableSize) {
258  /// Try to find the estimated number of clusters. Note that the number of
259  /// clusters identified in this function could be different from the actural
260  /// numbers found in lowering. This function ignore switches that are
261  /// lowered with a mix of jump table / bit test / BTree. This function was
262  /// initially intended to be used when estimating the cost of switch in
263  /// inline cost heuristic, but it's a generic cost model to be used in other
264  /// places (e.g., in loop unrolling).
265  unsigned N = SI.getNumCases();
266  const TargetLoweringBase *TLI = getTLI();
267  const DataLayout &DL = this->getDataLayout();
268 
269  JumpTableSize = 0;
270  bool IsJTAllowed = TLI->areJTsAllowed(SI.getParent()->getParent());
271 
272  // Early exit if both a jump table and bit test are not allowed.
273  if (N < 1 || (!IsJTAllowed && DL.getIndexSizeInBits(0u) < N))
274  return N;
275 
276  APInt MaxCaseVal = SI.case_begin()->getCaseValue()->getValue();
277  APInt MinCaseVal = MaxCaseVal;
278  for (auto CI : SI.cases()) {
279  const APInt &CaseVal = CI.getCaseValue()->getValue();
280  if (CaseVal.sgt(MaxCaseVal))
281  MaxCaseVal = CaseVal;
282  if (CaseVal.slt(MinCaseVal))
283  MinCaseVal = CaseVal;
284  }
285 
286  // Check if suitable for a bit test
287  if (N <= DL.getIndexSizeInBits(0u)) {
289  for (auto I : SI.cases())
290  Dests.insert(I.getCaseSuccessor());
291 
292  if (TLI->isSuitableForBitTests(Dests.size(), N, MinCaseVal, MaxCaseVal,
293  DL))
294  return 1;
295  }
296 
297  // Check if suitable for a jump table.
298  if (IsJTAllowed) {
299  if (N < 2 || N < TLI->getMinimumJumpTableEntries())
300  return N;
301  uint64_t Range =
302  (MaxCaseVal - MinCaseVal)
303  .getLimitedValue(std::numeric_limits<uint64_t>::max() - 1) + 1;
304  // Check whether a range of clusters is dense enough for a jump table
305  if (TLI->isSuitableForJumpTable(&SI, N, Range)) {
306  JumpTableSize = Range;
307  return 1;
308  }
309  }
310  return N;
311  }
312 
313  unsigned getJumpBufAlignment() { return getTLI()->getJumpBufAlignment(); }
314 
315  unsigned getJumpBufSize() { return getTLI()->getJumpBufSize(); }
316 
318  const TargetLoweringBase *TLI = getTLI();
321  }
322 
323  bool haveFastSqrt(Type *Ty) {
324  const TargetLoweringBase *TLI = getTLI();
325  EVT VT = TLI->getValueType(DL, Ty);
326  return TLI->isTypeLegal(VT) &&
328  }
329 
331  return true;
332  }
333 
334  unsigned getFPOpCost(Type *Ty) {
335  // Check whether FADD is available, as a proxy for floating-point in
336  // general.
337  const TargetLoweringBase *TLI = getTLI();
338  EVT VT = TLI->getValueType(DL, Ty);
342  }
343 
344  unsigned getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) {
345  const TargetLoweringBase *TLI = getTLI();
346  switch (Opcode) {
347  default: break;
348  case Instruction::Trunc:
349  if (TLI->isTruncateFree(OpTy, Ty))
352  case Instruction::ZExt:
353  if (TLI->isZExtFree(OpTy, Ty))
356  }
357 
358  return BaseT::getOperationCost(Opcode, Ty, OpTy);
359  }
360 
361  unsigned getInliningThresholdMultiplier() { return 1; }
362 
365  // This unrolling functionality is target independent, but to provide some
366  // motivation for its intended use, for x86:
367 
368  // According to the Intel 64 and IA-32 Architectures Optimization Reference
369  // Manual, Intel Core models and later have a loop stream detector (and
370  // associated uop queue) that can benefit from partial unrolling.
371  // The relevant requirements are:
372  // - The loop must have no more than 4 (8 for Nehalem and later) branches
373  // taken, and none of them may be calls.
374  // - The loop can have no more than 18 (28 for Nehalem and later) uops.
375 
376  // According to the Software Optimization Guide for AMD Family 15h
377  // Processors, models 30h-4fh (Steamroller and later) have a loop predictor
378  // and loop buffer which can benefit from partial unrolling.
379  // The relevant requirements are:
380  // - The loop must have fewer than 16 branches
381  // - The loop must have less than 40 uops in all executed loop branches
382 
383  // The number of taken branches in a loop is hard to estimate here, and
384  // benchmarking has revealed that it is better not to be conservative when
385  // estimating the branch count. As a result, we'll ignore the branch limits
386  // until someone finds a case where it matters in practice.
387 
388  unsigned MaxOps;
389  const TargetSubtargetInfo *ST = getST();
390  if (PartialUnrollingThreshold.getNumOccurrences() > 0)
391  MaxOps = PartialUnrollingThreshold;
392  else if (ST->getSchedModel().LoopMicroOpBufferSize > 0)
393  MaxOps = ST->getSchedModel().LoopMicroOpBufferSize;
394  else
395  return;
396 
397  // Scan the loop: don't unroll loops with calls.
398  for (Loop::block_iterator I = L->block_begin(), E = L->block_end(); I != E;
399  ++I) {
400  BasicBlock *BB = *I;
401 
402  for (BasicBlock::iterator J = BB->begin(), JE = BB->end(); J != JE; ++J)
403  if (isa<CallInst>(J) || isa<InvokeInst>(J)) {
404  ImmutableCallSite CS(&*J);
405  if (const Function *F = CS.getCalledFunction()) {
406  if (!static_cast<T *>(this)->isLoweredToCall(F))
407  continue;
408  }
409 
410  return;
411  }
412  }
413 
414  // Enable runtime and partial unrolling up to the specified size.
415  // Enable using trip count upper bound to unroll loops.
416  UP.Partial = UP.Runtime = UP.UpperBound = true;
417  UP.PartialThreshold = MaxOps;
418 
419  // Avoid unrolling when optimizing for size.
420  UP.OptSizeThreshold = 0;
422 
423  // Set number of instructions optimized when "back edge"
424  // becomes "fall through" to default value of 2.
425  UP.BEInsns = 2;
426  }
427 
429  if (isa<LoadInst>(I))
430  return getST()->getSchedModel().DefaultLoadLatency;
431 
433  }
434 
435  /// @}
436 
437  /// \name Vector TTI Implementations
438  /// @{
439 
440  unsigned getNumberOfRegisters(bool Vector) { return Vector ? 0 : 1; }
441 
442  unsigned getRegisterBitWidth(bool Vector) const { return 32; }
443 
444  /// Estimate the overhead of scalarizing an instruction. Insert and Extract
445  /// are set if the result needs to be inserted and/or extracted from vectors.
446  unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) {
447  assert(Ty->isVectorTy() && "Can only scalarize vectors");
448  unsigned Cost = 0;
449 
450  for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i) {
451  if (Insert)
452  Cost += static_cast<T *>(this)
453  ->getVectorInstrCost(Instruction::InsertElement, Ty, i);
454  if (Extract)
455  Cost += static_cast<T *>(this)
456  ->getVectorInstrCost(Instruction::ExtractElement, Ty, i);
457  }
458 
459  return Cost;
460  }
461 
462  /// Estimate the overhead of scalarizing an instructions unique
463  /// non-constant operands. The types of the arguments are ordinarily
464  /// scalar, in which case the costs are multiplied with VF.
466  unsigned VF) {
467  unsigned Cost = 0;
468  SmallPtrSet<const Value*, 4> UniqueOperands;
469  for (const Value *A : Args) {
470  if (!isa<Constant>(A) && UniqueOperands.insert(A).second) {
471  Type *VecTy = nullptr;
472  if (A->getType()->isVectorTy()) {
473  VecTy = A->getType();
474  // If A is a vector operand, VF should be 1 or correspond to A.
475  assert((VF == 1 || VF == VecTy->getVectorNumElements()) &&
476  "Vector argument does not match VF");
477  }
478  else
479  VecTy = VectorType::get(A->getType(), VF);
480 
481  Cost += getScalarizationOverhead(VecTy, false, true);
482  }
483  }
484 
485  return Cost;
486  }
487 
489  assert(VecTy->isVectorTy());
490 
491  unsigned Cost = 0;
492 
493  Cost += getScalarizationOverhead(VecTy, true, false);
494  if (!Args.empty())
496  VecTy->getVectorNumElements());
497  else
498  // When no information on arguments is provided, we add the cost
499  // associated with one argument as a heuristic.
500  Cost += getScalarizationOverhead(VecTy, false, true);
501 
502  return Cost;
503  }
504 
505  unsigned getMaxInterleaveFactor(unsigned VF) { return 1; }
506 
508  unsigned Opcode, Type *Ty,
514  // Check if any of the operands are vector operands.
515  const TargetLoweringBase *TLI = getTLI();
516  int ISD = TLI->InstructionOpcodeToISD(Opcode);
517  assert(ISD && "Invalid opcode");
518 
519  std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
520 
521  bool IsFloat = Ty->isFPOrFPVectorTy();
522  // Assume that floating point arithmetic operations cost twice as much as
523  // integer operations.
524  unsigned OpCost = (IsFloat ? 2 : 1);
525 
526  if (TLI->isOperationLegalOrPromote(ISD, LT.second)) {
527  // The operation is legal. Assume it costs 1.
528  // TODO: Once we have extract/insert subvector cost we need to use them.
529  return LT.first * OpCost;
530  }
531 
532  if (!TLI->isOperationExpand(ISD, LT.second)) {
533  // If the operation is custom lowered, then assume that the code is twice
534  // as expensive.
535  return LT.first * 2 * OpCost;
536  }
537 
538  // Else, assume that we need to scalarize this op.
539  // TODO: If one of the types get legalized by splitting, handle this
540  // similarly to what getCastInstrCost() does.
541  if (Ty->isVectorTy()) {
542  unsigned Num = Ty->getVectorNumElements();
543  unsigned Cost = static_cast<T *>(this)
544  ->getArithmeticInstrCost(Opcode, Ty->getScalarType());
545  // Return the cost of multiple scalar invocation plus the cost of
546  // inserting and extracting the values.
547  return getScalarizationOverhead(Ty, Args) + Num * Cost;
548  }
549 
550  // We don't know anything about this scalar instruction.
551  return OpCost;
552  }
553 
555  Type *SubTp) {
556  switch (Kind) {
557  case TTI::SK_Select:
558  case TTI::SK_Transpose:
561  return getPermuteShuffleOverhead(Tp);
562  default:
563  return 1;
564  }
565  }
566 
567  unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
568  const Instruction *I = nullptr) {
569  const TargetLoweringBase *TLI = getTLI();
570  int ISD = TLI->InstructionOpcodeToISD(Opcode);
571  assert(ISD && "Invalid opcode");
572  std::pair<unsigned, MVT> SrcLT = TLI->getTypeLegalizationCost(DL, Src);
573  std::pair<unsigned, MVT> DstLT = TLI->getTypeLegalizationCost(DL, Dst);
574 
575  // Check for NOOP conversions.
576  if (SrcLT.first == DstLT.first &&
577  SrcLT.second.getSizeInBits() == DstLT.second.getSizeInBits()) {
578 
579  // Bitcast between types that are legalized to the same type are free.
580  if (Opcode == Instruction::BitCast || Opcode == Instruction::Trunc)
581  return 0;
582  }
583 
584  if (Opcode == Instruction::Trunc &&
585  TLI->isTruncateFree(SrcLT.second, DstLT.second))
586  return 0;
587 
588  if (Opcode == Instruction::ZExt &&
589  TLI->isZExtFree(SrcLT.second, DstLT.second))
590  return 0;
591 
592  if (Opcode == Instruction::AddrSpaceCast &&
594  Dst->getPointerAddressSpace()))
595  return 0;
596 
597  // If this is a zext/sext of a load, return 0 if the corresponding
598  // extending load exists on target.
599  if ((Opcode == Instruction::ZExt || Opcode == Instruction::SExt) &&
600  I && isa<LoadInst>(I->getOperand(0))) {
601  EVT ExtVT = EVT::getEVT(Dst);
602  EVT LoadVT = EVT::getEVT(Src);
603  unsigned LType =
604  ((Opcode == Instruction::ZExt) ? ISD::ZEXTLOAD : ISD::SEXTLOAD);
605  if (TLI->isLoadExtLegal(LType, ExtVT, LoadVT))
606  return 0;
607  }
608 
609  // If the cast is marked as legal (or promote) then assume low cost.
610  if (SrcLT.first == DstLT.first &&
611  TLI->isOperationLegalOrPromote(ISD, DstLT.second))
612  return 1;
613 
614  // Handle scalar conversions.
615  if (!Src->isVectorTy() && !Dst->isVectorTy()) {
616  // Scalar bitcasts are usually free.
617  if (Opcode == Instruction::BitCast)
618  return 0;
619 
620  // Just check the op cost. If the operation is legal then assume it costs
621  // 1.
622  if (!TLI->isOperationExpand(ISD, DstLT.second))
623  return 1;
624 
625  // Assume that illegal scalar instruction are expensive.
626  return 4;
627  }
628 
629  // Check vector-to-vector casts.
630  if (Dst->isVectorTy() && Src->isVectorTy()) {
631  // If the cast is between same-sized registers, then the check is simple.
632  if (SrcLT.first == DstLT.first &&
633  SrcLT.second.getSizeInBits() == DstLT.second.getSizeInBits()) {
634 
635  // Assume that Zext is done using AND.
636  if (Opcode == Instruction::ZExt)
637  return 1;
638 
639  // Assume that sext is done using SHL and SRA.
640  if (Opcode == Instruction::SExt)
641  return 2;
642 
643  // Just check the op cost. If the operation is legal then assume it
644  // costs
645  // 1 and multiply by the type-legalization overhead.
646  if (!TLI->isOperationExpand(ISD, DstLT.second))
647  return SrcLT.first * 1;
648  }
649 
650  // If we are legalizing by splitting, query the concrete TTI for the cost
651  // of casting the original vector twice. We also need to factor in the
652  // cost of the split itself. Count that as 1, to be consistent with
653  // TLI->getTypeLegalizationCost().
654  if ((TLI->getTypeAction(Src->getContext(), TLI->getValueType(DL, Src)) ==
656  (TLI->getTypeAction(Dst->getContext(), TLI->getValueType(DL, Dst)) ==
658  Type *SplitDst = VectorType::get(Dst->getVectorElementType(),
659  Dst->getVectorNumElements() / 2);
660  Type *SplitSrc = VectorType::get(Src->getVectorElementType(),
661  Src->getVectorNumElements() / 2);
662  T *TTI = static_cast<T *>(this);
663  return TTI->getVectorSplitCost() +
664  (2 * TTI->getCastInstrCost(Opcode, SplitDst, SplitSrc, I));
665  }
666 
667  // In other cases where the source or destination are illegal, assume
668  // the operation will get scalarized.
669  unsigned Num = Dst->getVectorNumElements();
670  unsigned Cost = static_cast<T *>(this)->getCastInstrCost(
671  Opcode, Dst->getScalarType(), Src->getScalarType(), I);
672 
673  // Return the cost of multiple scalar invocation plus the cost of
674  // inserting and extracting the values.
675  return getScalarizationOverhead(Dst, true, true) + Num * Cost;
676  }
677 
678  // We already handled vector-to-vector and scalar-to-scalar conversions.
679  // This
680  // is where we handle bitcast between vectors and scalars. We need to assume
681  // that the conversion is scalarized in one way or another.
682  if (Opcode == Instruction::BitCast)
683  // Illegal bitcasts are done by storing and loading from a stack slot.
684  return (Src->isVectorTy() ? getScalarizationOverhead(Src, false, true)
685  : 0) +
686  (Dst->isVectorTy() ? getScalarizationOverhead(Dst, true, false)
687  : 0);
688 
689  llvm_unreachable("Unhandled cast");
690  }
691 
692  unsigned getExtractWithExtendCost(unsigned Opcode, Type *Dst,
693  VectorType *VecTy, unsigned Index) {
694  return static_cast<T *>(this)->getVectorInstrCost(
695  Instruction::ExtractElement, VecTy, Index) +
696  static_cast<T *>(this)->getCastInstrCost(Opcode, Dst,
697  VecTy->getElementType());
698  }
699 
700  unsigned getCFInstrCost(unsigned Opcode) {
701  // Branches are assumed to be predicted.
702  return 0;
703  }
704 
705  unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
706  const Instruction *I) {
707  const TargetLoweringBase *TLI = getTLI();
708  int ISD = TLI->InstructionOpcodeToISD(Opcode);
709  assert(ISD && "Invalid opcode");
710 
711  // Selects on vectors are actually vector selects.
712  if (ISD == ISD::SELECT) {
713  assert(CondTy && "CondTy must exist");
714  if (CondTy->isVectorTy())
715  ISD = ISD::VSELECT;
716  }
717  std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
718 
719  if (!(ValTy->isVectorTy() && !LT.second.isVector()) &&
720  !TLI->isOperationExpand(ISD, LT.second)) {
721  // The operation is legal. Assume it costs 1. Multiply
722  // by the type-legalization overhead.
723  return LT.first * 1;
724  }
725 
726  // Otherwise, assume that the cast is scalarized.
727  // TODO: If one of the types get legalized by splitting, handle this
728  // similarly to what getCastInstrCost() does.
729  if (ValTy->isVectorTy()) {
730  unsigned Num = ValTy->getVectorNumElements();
731  if (CondTy)
732  CondTy = CondTy->getScalarType();
733  unsigned Cost = static_cast<T *>(this)->getCmpSelInstrCost(
734  Opcode, ValTy->getScalarType(), CondTy, I);
735 
736  // Return the cost of multiple scalar invocation plus the cost of
737  // inserting and extracting the values.
738  return getScalarizationOverhead(ValTy, true, false) + Num * Cost;
739  }
740 
741  // Unknown scalar opcode.
742  return 1;
743  }
744 
745  unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) {
746  std::pair<unsigned, MVT> LT =
747  getTLI()->getTypeLegalizationCost(DL, Val->getScalarType());
748 
749  return LT.first;
750  }
751 
752  unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
753  unsigned AddressSpace, const Instruction *I = nullptr) {
754  assert(!Src->isVoidTy() && "Invalid type");
755  std::pair<unsigned, MVT> LT = getTLI()->getTypeLegalizationCost(DL, Src);
756 
757  // Assuming that all loads of legal types cost 1.
758  unsigned Cost = LT.first;
759 
760  if (Src->isVectorTy() &&
761  Src->getPrimitiveSizeInBits() < LT.second.getSizeInBits()) {
762  // This is a vector load that legalizes to a larger type than the vector
763  // itself. Unless the corresponding extending load or truncating store is
764  // legal, then this will scalarize.
766  EVT MemVT = getTLI()->getValueType(DL, Src);
767  if (Opcode == Instruction::Store)
768  LA = getTLI()->getTruncStoreAction(LT.second, MemVT);
769  else
770  LA = getTLI()->getLoadExtAction(ISD::EXTLOAD, LT.second, MemVT);
771 
772  if (LA != TargetLowering::Legal && LA != TargetLowering::Custom) {
773  // This is a vector load/store for some illegal type that is scalarized.
774  // We must account for the cost of building or decomposing the vector.
775  Cost += getScalarizationOverhead(Src, Opcode != Instruction::Store,
776  Opcode == Instruction::Store);
777  }
778  }
779 
780  return Cost;
781  }
782 
783  unsigned getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
784  unsigned Factor,
785  ArrayRef<unsigned> Indices,
786  unsigned Alignment,
787  unsigned AddressSpace) {
788  VectorType *VT = dyn_cast<VectorType>(VecTy);
789  assert(VT && "Expect a vector type for interleaved memory op");
790 
791  unsigned NumElts = VT->getNumElements();
792  assert(Factor > 1 && NumElts % Factor == 0 && "Invalid interleave factor");
793 
794  unsigned NumSubElts = NumElts / Factor;
795  VectorType *SubVT = VectorType::get(VT->getElementType(), NumSubElts);
796 
797  // Firstly, the cost of load/store operation.
798  unsigned Cost = static_cast<T *>(this)->getMemoryOpCost(
799  Opcode, VecTy, Alignment, AddressSpace);
800 
801  // Legalize the vector type, and get the legalized and unlegalized type
802  // sizes.
803  MVT VecTyLT = getTLI()->getTypeLegalizationCost(DL, VecTy).second;
804  unsigned VecTySize =
805  static_cast<T *>(this)->getDataLayout().getTypeStoreSize(VecTy);
806  unsigned VecTyLTSize = VecTyLT.getStoreSize();
807 
808  // Return the ceiling of dividing A by B.
809  auto ceil = [](unsigned A, unsigned B) { return (A + B - 1) / B; };
810 
811  // Scale the cost of the memory operation by the fraction of legalized
812  // instructions that will actually be used. We shouldn't account for the
813  // cost of dead instructions since they will be removed.
814  //
815  // E.g., An interleaved load of factor 8:
816  // %vec = load <16 x i64>, <16 x i64>* %ptr
817  // %v0 = shufflevector %vec, undef, <0, 8>
818  //
819  // If <16 x i64> is legalized to 8 v2i64 loads, only 2 of the loads will be
820  // used (those corresponding to elements [0:1] and [8:9] of the unlegalized
821  // type). The other loads are unused.
822  //
823  // We only scale the cost of loads since interleaved store groups aren't
824  // allowed to have gaps.
825  if (Opcode == Instruction::Load && VecTySize > VecTyLTSize) {
826  // The number of loads of a legal type it will take to represent a load
827  // of the unlegalized vector type.
828  unsigned NumLegalInsts = ceil(VecTySize, VecTyLTSize);
829 
830  // The number of elements of the unlegalized type that correspond to a
831  // single legal instruction.
832  unsigned NumEltsPerLegalInst = ceil(NumElts, NumLegalInsts);
833 
834  // Determine which legal instructions will be used.
835  BitVector UsedInsts(NumLegalInsts, false);
836  for (unsigned Index : Indices)
837  for (unsigned Elt = 0; Elt < NumSubElts; ++Elt)
838  UsedInsts.set((Index + Elt * Factor) / NumEltsPerLegalInst);
839 
840  // Scale the cost of the load by the fraction of legal instructions that
841  // will be used.
842  Cost *= UsedInsts.count() / NumLegalInsts;
843  }
844 
845  // Then plus the cost of interleave operation.
846  if (Opcode == Instruction::Load) {
847  // The interleave cost is similar to extract sub vectors' elements
848  // from the wide vector, and insert them into sub vectors.
849  //
850  // E.g. An interleaved load of factor 2 (with one member of index 0):
851  // %vec = load <8 x i32>, <8 x i32>* %ptr
852  // %v0 = shuffle %vec, undef, <0, 2, 4, 6> ; Index 0
853  // The cost is estimated as extract elements at 0, 2, 4, 6 from the
854  // <8 x i32> vector and insert them into a <4 x i32> vector.
855 
856  assert(Indices.size() <= Factor &&
857  "Interleaved memory op has too many members");
858 
859  for (unsigned Index : Indices) {
860  assert(Index < Factor && "Invalid index for interleaved memory op");
861 
862  // Extract elements from loaded vector for each sub vector.
863  for (unsigned i = 0; i < NumSubElts; i++)
864  Cost += static_cast<T *>(this)->getVectorInstrCost(
865  Instruction::ExtractElement, VT, Index + i * Factor);
866  }
867 
868  unsigned InsSubCost = 0;
869  for (unsigned i = 0; i < NumSubElts; i++)
870  InsSubCost += static_cast<T *>(this)->getVectorInstrCost(
871  Instruction::InsertElement, SubVT, i);
872 
873  Cost += Indices.size() * InsSubCost;
874  } else {
875  // The interleave cost is extract all elements from sub vectors, and
876  // insert them into the wide vector.
877  //
878  // E.g. An interleaved store of factor 2:
879  // %v0_v1 = shuffle %v0, %v1, <0, 4, 1, 5, 2, 6, 3, 7>
880  // store <8 x i32> %interleaved.vec, <8 x i32>* %ptr
881  // The cost is estimated as extract all elements from both <4 x i32>
882  // vectors and insert into the <8 x i32> vector.
883 
884  unsigned ExtSubCost = 0;
885  for (unsigned i = 0; i < NumSubElts; i++)
886  ExtSubCost += static_cast<T *>(this)->getVectorInstrCost(
887  Instruction::ExtractElement, SubVT, i);
888  Cost += ExtSubCost * Factor;
889 
890  for (unsigned i = 0; i < NumElts; i++)
891  Cost += static_cast<T *>(this)
892  ->getVectorInstrCost(Instruction::InsertElement, VT, i);
893  }
894 
895  return Cost;
896  }
897 
898  /// Get intrinsic cost based on arguments.
901  unsigned VF = 1) {
902  unsigned RetVF = (RetTy->isVectorTy() ? RetTy->getVectorNumElements() : 1);
903  assert((RetVF == 1 || VF == 1) && "VF > 1 and RetVF is a vector type");
904 
905  switch (IID) {
906  default: {
907  // Assume that we need to scalarize this intrinsic.
909  for (Value *Op : Args) {
910  Type *OpTy = Op->getType();
911  assert(VF == 1 || !OpTy->isVectorTy());
912  Types.push_back(VF == 1 ? OpTy : VectorType::get(OpTy, VF));
913  }
914 
915  if (VF > 1 && !RetTy->isVoidTy())
916  RetTy = VectorType::get(RetTy, VF);
917 
918  // Compute the scalarization overhead based on Args for a vector
919  // intrinsic. A vectorizer will pass a scalar RetTy and VF > 1, while
920  // CostModel will pass a vector RetTy and VF is 1.
921  unsigned ScalarizationCost = std::numeric_limits<unsigned>::max();
922  if (RetVF > 1 || VF > 1) {
923  ScalarizationCost = 0;
924  if (!RetTy->isVoidTy())
925  ScalarizationCost += getScalarizationOverhead(RetTy, true, false);
926  ScalarizationCost += getOperandsScalarizationOverhead(Args, VF);
927  }
928 
929  return static_cast<T *>(this)->
930  getIntrinsicInstrCost(IID, RetTy, Types, FMF, ScalarizationCost);
931  }
932  case Intrinsic::masked_scatter: {
933  assert(VF == 1 && "Can't vectorize types here.");
934  Value *Mask = Args[3];
935  bool VarMask = !isa<Constant>(Mask);
936  unsigned Alignment = cast<ConstantInt>(Args[2])->getZExtValue();
937  return
938  static_cast<T *>(this)->getGatherScatterOpCost(Instruction::Store,
939  Args[0]->getType(),
940  Args[1], VarMask,
941  Alignment);
942  }
943  case Intrinsic::masked_gather: {
944  assert(VF == 1 && "Can't vectorize types here.");
945  Value *Mask = Args[2];
946  bool VarMask = !isa<Constant>(Mask);
947  unsigned Alignment = cast<ConstantInt>(Args[1])->getZExtValue();
948  return
949  static_cast<T *>(this)->getGatherScatterOpCost(Instruction::Load,
950  RetTy, Args[0], VarMask,
951  Alignment);
952  }
953  case Intrinsic::experimental_vector_reduce_add:
954  case Intrinsic::experimental_vector_reduce_mul:
955  case Intrinsic::experimental_vector_reduce_and:
956  case Intrinsic::experimental_vector_reduce_or:
957  case Intrinsic::experimental_vector_reduce_xor:
958  case Intrinsic::experimental_vector_reduce_fadd:
959  case Intrinsic::experimental_vector_reduce_fmul:
960  case Intrinsic::experimental_vector_reduce_smax:
961  case Intrinsic::experimental_vector_reduce_smin:
962  case Intrinsic::experimental_vector_reduce_fmax:
963  case Intrinsic::experimental_vector_reduce_fmin:
964  case Intrinsic::experimental_vector_reduce_umax:
965  case Intrinsic::experimental_vector_reduce_umin:
966  return getIntrinsicInstrCost(IID, RetTy, Args[0]->getType(), FMF);
967  }
968  }
969 
970  /// Get intrinsic cost based on argument types.
971  /// If ScalarizationCostPassed is std::numeric_limits<unsigned>::max(), the
972  /// cost of scalarizing the arguments and the return value will be computed
973  /// based on types.
975  Intrinsic::ID IID, Type *RetTy, ArrayRef<Type *> Tys, FastMathFlags FMF,
976  unsigned ScalarizationCostPassed = std::numeric_limits<unsigned>::max()) {
978  unsigned SingleCallCost = 10; // Library call cost. Make it expensive.
979  switch (IID) {
980  default: {
981  // Assume that we need to scalarize this intrinsic.
982  unsigned ScalarizationCost = ScalarizationCostPassed;
983  unsigned ScalarCalls = 1;
984  Type *ScalarRetTy = RetTy;
985  if (RetTy->isVectorTy()) {
986  if (ScalarizationCostPassed == std::numeric_limits<unsigned>::max())
987  ScalarizationCost = getScalarizationOverhead(RetTy, true, false);
988  ScalarCalls = std::max(ScalarCalls, RetTy->getVectorNumElements());
989  ScalarRetTy = RetTy->getScalarType();
990  }
991  SmallVector<Type *, 4> ScalarTys;
992  for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) {
993  Type *Ty = Tys[i];
994  if (Ty->isVectorTy()) {
995  if (ScalarizationCostPassed == std::numeric_limits<unsigned>::max())
996  ScalarizationCost += getScalarizationOverhead(Ty, false, true);
997  ScalarCalls = std::max(ScalarCalls, Ty->getVectorNumElements());
998  Ty = Ty->getScalarType();
999  }
1000  ScalarTys.push_back(Ty);
1001  }
1002  if (ScalarCalls == 1)
1003  return 1; // Return cost of a scalar intrinsic. Assume it to be cheap.
1004 
1005  unsigned ScalarCost = static_cast<T *>(this)->getIntrinsicInstrCost(
1006  IID, ScalarRetTy, ScalarTys, FMF);
1007 
1008  return ScalarCalls * ScalarCost + ScalarizationCost;
1009  }
1010  // Look for intrinsics that can be lowered directly or turned into a scalar
1011  // intrinsic call.
1012  case Intrinsic::sqrt:
1013  ISDs.push_back(ISD::FSQRT);
1014  break;
1015  case Intrinsic::sin:
1016  ISDs.push_back(ISD::FSIN);
1017  break;
1018  case Intrinsic::cos:
1019  ISDs.push_back(ISD::FCOS);
1020  break;
1021  case Intrinsic::exp:
1022  ISDs.push_back(ISD::FEXP);
1023  break;
1024  case Intrinsic::exp2:
1025  ISDs.push_back(ISD::FEXP2);
1026  break;
1027  case Intrinsic::log:
1028  ISDs.push_back(ISD::FLOG);
1029  break;
1030  case Intrinsic::log10:
1031  ISDs.push_back(ISD::FLOG10);
1032  break;
1033  case Intrinsic::log2:
1034  ISDs.push_back(ISD::FLOG2);
1035  break;
1036  case Intrinsic::fabs:
1037  ISDs.push_back(ISD::FABS);
1038  break;
1039  case Intrinsic::minnum:
1040  ISDs.push_back(ISD::FMINNUM);
1041  if (FMF.noNaNs())
1042  ISDs.push_back(ISD::FMINNAN);
1043  break;
1044  case Intrinsic::maxnum:
1045  ISDs.push_back(ISD::FMAXNUM);
1046  if (FMF.noNaNs())
1047  ISDs.push_back(ISD::FMAXNAN);
1048  break;
1049  case Intrinsic::copysign:
1050  ISDs.push_back(ISD::FCOPYSIGN);
1051  break;
1052  case Intrinsic::floor:
1053  ISDs.push_back(ISD::FFLOOR);
1054  break;
1055  case Intrinsic::ceil:
1056  ISDs.push_back(ISD::FCEIL);
1057  break;
1058  case Intrinsic::trunc:
1059  ISDs.push_back(ISD::FTRUNC);
1060  break;
1061  case Intrinsic::nearbyint:
1062  ISDs.push_back(ISD::FNEARBYINT);
1063  break;
1064  case Intrinsic::rint:
1065  ISDs.push_back(ISD::FRINT);
1066  break;
1067  case Intrinsic::round:
1068  ISDs.push_back(ISD::FROUND);
1069  break;
1070  case Intrinsic::pow:
1071  ISDs.push_back(ISD::FPOW);
1072  break;
1073  case Intrinsic::fma:
1074  ISDs.push_back(ISD::FMA);
1075  break;
1076  case Intrinsic::fmuladd:
1077  ISDs.push_back(ISD::FMA);
1078  break;
1079  // FIXME: We should return 0 whenever getIntrinsicCost == TCC_Free.
1080  case Intrinsic::lifetime_start:
1081  case Intrinsic::lifetime_end:
1082  case Intrinsic::sideeffect:
1083  return 0;
1084  case Intrinsic::masked_store:
1085  return static_cast<T *>(this)
1086  ->getMaskedMemoryOpCost(Instruction::Store, Tys[0], 0, 0);
1087  case Intrinsic::masked_load:
1088  return static_cast<T *>(this)
1089  ->getMaskedMemoryOpCost(Instruction::Load, RetTy, 0, 0);
1090  case Intrinsic::experimental_vector_reduce_add:
1091  return static_cast<T *>(this)->getArithmeticReductionCost(
1092  Instruction::Add, Tys[0], /*IsPairwiseForm=*/false);
1093  case Intrinsic::experimental_vector_reduce_mul:
1094  return static_cast<T *>(this)->getArithmeticReductionCost(
1095  Instruction::Mul, Tys[0], /*IsPairwiseForm=*/false);
1096  case Intrinsic::experimental_vector_reduce_and:
1097  return static_cast<T *>(this)->getArithmeticReductionCost(
1098  Instruction::And, Tys[0], /*IsPairwiseForm=*/false);
1099  case Intrinsic::experimental_vector_reduce_or:
1100  return static_cast<T *>(this)->getArithmeticReductionCost(
1101  Instruction::Or, Tys[0], /*IsPairwiseForm=*/false);
1102  case Intrinsic::experimental_vector_reduce_xor:
1103  return static_cast<T *>(this)->getArithmeticReductionCost(
1104  Instruction::Xor, Tys[0], /*IsPairwiseForm=*/false);
1105  case Intrinsic::experimental_vector_reduce_fadd:
1106  return static_cast<T *>(this)->getArithmeticReductionCost(
1107  Instruction::FAdd, Tys[0], /*IsPairwiseForm=*/false);
1108  case Intrinsic::experimental_vector_reduce_fmul:
1109  return static_cast<T *>(this)->getArithmeticReductionCost(
1110  Instruction::FMul, Tys[0], /*IsPairwiseForm=*/false);
1111  case Intrinsic::experimental_vector_reduce_smax:
1112  case Intrinsic::experimental_vector_reduce_smin:
1113  case Intrinsic::experimental_vector_reduce_fmax:
1114  case Intrinsic::experimental_vector_reduce_fmin:
1115  return static_cast<T *>(this)->getMinMaxReductionCost(
1116  Tys[0], CmpInst::makeCmpResultType(Tys[0]), /*IsPairwiseForm=*/false,
1117  /*IsSigned=*/true);
1118  case Intrinsic::experimental_vector_reduce_umax:
1119  case Intrinsic::experimental_vector_reduce_umin:
1120  return static_cast<T *>(this)->getMinMaxReductionCost(
1121  Tys[0], CmpInst::makeCmpResultType(Tys[0]), /*IsPairwiseForm=*/false,
1122  /*IsSigned=*/false);
1123  case Intrinsic::ctpop:
1124  ISDs.push_back(ISD::CTPOP);
1125  // In case of legalization use TCC_Expensive. This is cheaper than a
1126  // library call but still not a cheap instruction.
1127  SingleCallCost = TargetTransformInfo::TCC_Expensive;
1128  break;
1129  // FIXME: ctlz, cttz, ...
1130  }
1131 
1132  const TargetLoweringBase *TLI = getTLI();
1133  std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, RetTy);
1134 
1135  SmallVector<unsigned, 2> LegalCost;
1136  SmallVector<unsigned, 2> CustomCost;
1137  for (unsigned ISD : ISDs) {
1138  if (TLI->isOperationLegalOrPromote(ISD, LT.second)) {
1139  if (IID == Intrinsic::fabs && TLI->isFAbsFree(LT.second)) {
1140  return 0;
1141  }
1142 
1143  // The operation is legal. Assume it costs 1.
1144  // If the type is split to multiple registers, assume that there is some
1145  // overhead to this.
1146  // TODO: Once we have extract/insert subvector cost we need to use them.
1147  if (LT.first > 1)
1148  LegalCost.push_back(LT.first * 2);
1149  else
1150  LegalCost.push_back(LT.first * 1);
1151  } else if (!TLI->isOperationExpand(ISD, LT.second)) {
1152  // If the operation is custom lowered then assume
1153  // that the code is twice as expensive.
1154  CustomCost.push_back(LT.first * 2);
1155  }
1156  }
1157 
1158  auto MinLegalCostI = std::min_element(LegalCost.begin(), LegalCost.end());
1159  if (MinLegalCostI != LegalCost.end())
1160  return *MinLegalCostI;
1161 
1162  auto MinCustomCostI = std::min_element(CustomCost.begin(), CustomCost.end());
1163  if (MinCustomCostI != CustomCost.end())
1164  return *MinCustomCostI;
1165 
1166  // If we can't lower fmuladd into an FMA estimate the cost as a floating
1167  // point mul followed by an add.
1168  if (IID == Intrinsic::fmuladd)
1169  return static_cast<T *>(this)
1170  ->getArithmeticInstrCost(BinaryOperator::FMul, RetTy) +
1171  static_cast<T *>(this)
1172  ->getArithmeticInstrCost(BinaryOperator::FAdd, RetTy);
1173 
1174  // Else, assume that we need to scalarize this intrinsic. For math builtins
1175  // this will emit a costly libcall, adding call overhead and spills. Make it
1176  // very expensive.
1177  if (RetTy->isVectorTy()) {
1178  unsigned ScalarizationCost =
1179  ((ScalarizationCostPassed != std::numeric_limits<unsigned>::max())
1180  ? ScalarizationCostPassed
1181  : getScalarizationOverhead(RetTy, true, false));
1182  unsigned ScalarCalls = RetTy->getVectorNumElements();
1183  SmallVector<Type *, 4> ScalarTys;
1184  for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) {
1185  Type *Ty = Tys[i];
1186  if (Ty->isVectorTy())
1187  Ty = Ty->getScalarType();
1188  ScalarTys.push_back(Ty);
1189  }
1190  unsigned ScalarCost = static_cast<T *>(this)->getIntrinsicInstrCost(
1191  IID, RetTy->getScalarType(), ScalarTys, FMF);
1192  for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) {
1193  if (Tys[i]->isVectorTy()) {
1194  if (ScalarizationCostPassed == std::numeric_limits<unsigned>::max())
1195  ScalarizationCost += getScalarizationOverhead(Tys[i], false, true);
1196  ScalarCalls = std::max(ScalarCalls, Tys[i]->getVectorNumElements());
1197  }
1198  }
1199 
1200  return ScalarCalls * ScalarCost + ScalarizationCost;
1201  }
1202 
1203  // This is going to be turned into a library call, make it expensive.
1204  return SingleCallCost;
1205  }
1206 
1207  /// Compute a cost of the given call instruction.
1208  ///
1209  /// Compute the cost of calling function F with return type RetTy and
1210  /// argument types Tys. F might be nullptr, in this case the cost of an
1211  /// arbitrary call with the specified signature will be returned.
1212  /// This is used, for instance, when we estimate call of a vector
1213  /// counterpart of the given function.
1214  /// \param F Called function, might be nullptr.
1215  /// \param RetTy Return value types.
1216  /// \param Tys Argument types.
1217  /// \returns The cost of Call instruction.
1219  return 10;
1220  }
1221 
1222  unsigned getNumberOfParts(Type *Tp) {
1223  std::pair<unsigned, MVT> LT = getTLI()->getTypeLegalizationCost(DL, Tp);
1224  return LT.first;
1225  }
1226 
1228  const SCEV *) {
1229  return 0;
1230  }
1231 
1232  /// Try to calculate arithmetic and shuffle op costs for reduction operations.
1233  /// We're assuming that reduction operation are performing the following way:
1234  /// 1. Non-pairwise reduction
1235  /// %val1 = shufflevector<n x t> %val, <n x t> %undef,
1236  /// <n x i32> <i32 n/2, i32 n/2 + 1, ..., i32 n, i32 undef, ..., i32 undef>
1237  /// \----------------v-------------/ \----------v------------/
1238  /// n/2 elements n/2 elements
1239  /// %red1 = op <n x t> %val, <n x t> val1
1240  /// After this operation we have a vector %red1 where only the first n/2
1241  /// elements are meaningful, the second n/2 elements are undefined and can be
1242  /// dropped. All other operations are actually working with the vector of
1243  /// length n/2, not n, though the real vector length is still n.
1244  /// %val2 = shufflevector<n x t> %red1, <n x t> %undef,
1245  /// <n x i32> <i32 n/4, i32 n/4 + 1, ..., i32 n/2, i32 undef, ..., i32 undef>
1246  /// \----------------v-------------/ \----------v------------/
1247  /// n/4 elements 3*n/4 elements
1248  /// %red2 = op <n x t> %red1, <n x t> val2 - working with the vector of
1249  /// length n/2, the resulting vector has length n/4 etc.
1250  /// 2. Pairwise reduction:
1251  /// Everything is the same except for an additional shuffle operation which
1252  /// is used to produce operands for pairwise kind of reductions.
1253  /// %val1 = shufflevector<n x t> %val, <n x t> %undef,
1254  /// <n x i32> <i32 0, i32 2, ..., i32 n-2, i32 undef, ..., i32 undef>
1255  /// \-------------v----------/ \----------v------------/
1256  /// n/2 elements n/2 elements
1257  /// %val2 = shufflevector<n x t> %val, <n x t> %undef,
1258  /// <n x i32> <i32 1, i32 3, ..., i32 n-1, i32 undef, ..., i32 undef>
1259  /// \-------------v----------/ \----------v------------/
1260  /// n/2 elements n/2 elements
1261  /// %red1 = op <n x t> %val1, <n x t> val2
1262  /// Again, the operation is performed on <n x t> vector, but the resulting
1263  /// vector %red1 is <n/2 x t> vector.
1264  ///
1265  /// The cost model should take into account that the actual length of the
1266  /// vector is reduced on each iteration.
1267  unsigned getArithmeticReductionCost(unsigned Opcode, Type *Ty,
1268  bool IsPairwise) {
1269  assert(Ty->isVectorTy() && "Expect a vector type");
1270  Type *ScalarTy = Ty->getVectorElementType();
1271  unsigned NumVecElts = Ty->getVectorNumElements();
1272  unsigned NumReduxLevels = Log2_32(NumVecElts);
1273  unsigned ArithCost = 0;
1274  unsigned ShuffleCost = 0;
1275  auto *ConcreteTTI = static_cast<T *>(this);
1276  std::pair<unsigned, MVT> LT =
1277  ConcreteTTI->getTLI()->getTypeLegalizationCost(DL, Ty);
1278  unsigned LongVectorCount = 0;
1279  unsigned MVTLen =
1280  LT.second.isVector() ? LT.second.getVectorNumElements() : 1;
1281  while (NumVecElts > MVTLen) {
1282  NumVecElts /= 2;
1283  // Assume the pairwise shuffles add a cost.
1284  ShuffleCost += (IsPairwise + 1) *
1285  ConcreteTTI->getShuffleCost(TTI::SK_ExtractSubvector, Ty,
1286  NumVecElts, Ty);
1287  ArithCost += ConcreteTTI->getArithmeticInstrCost(Opcode, Ty);
1288  Ty = VectorType::get(ScalarTy, NumVecElts);
1289  ++LongVectorCount;
1290  }
1291  // The minimal length of the vector is limited by the real length of vector
1292  // operations performed on the current platform. That's why several final
1293  // reduction operations are performed on the vectors with the same
1294  // architecture-dependent length.
1295  ShuffleCost += (NumReduxLevels - LongVectorCount) * (IsPairwise + 1) *
1296  ConcreteTTI->getShuffleCost(TTI::SK_ExtractSubvector, Ty,
1297  NumVecElts, Ty);
1298  ArithCost += (NumReduxLevels - LongVectorCount) *
1299  ConcreteTTI->getArithmeticInstrCost(Opcode, Ty);
1300  return ShuffleCost + ArithCost + getScalarizationOverhead(Ty, false, true);
1301  }
1302 
1303  /// Try to calculate op costs for min/max reduction operations.
1304  /// \param CondTy Conditional type for the Select instruction.
1305  unsigned getMinMaxReductionCost(Type *Ty, Type *CondTy, bool IsPairwise,
1306  bool) {
1307  assert(Ty->isVectorTy() && "Expect a vector type");
1308  Type *ScalarTy = Ty->getVectorElementType();
1309  Type *ScalarCondTy = CondTy->getVectorElementType();
1310  unsigned NumVecElts = Ty->getVectorNumElements();
1311  unsigned NumReduxLevels = Log2_32(NumVecElts);
1312  unsigned CmpOpcode;
1313  if (Ty->isFPOrFPVectorTy()) {
1314  CmpOpcode = Instruction::FCmp;
1315  } else {
1316  assert(Ty->isIntOrIntVectorTy() &&
1317  "expecting floating point or integer type for min/max reduction");
1318  CmpOpcode = Instruction::ICmp;
1319  }
1320  unsigned MinMaxCost = 0;
1321  unsigned ShuffleCost = 0;
1322  auto *ConcreteTTI = static_cast<T *>(this);
1323  std::pair<unsigned, MVT> LT =
1324  ConcreteTTI->getTLI()->getTypeLegalizationCost(DL, Ty);
1325  unsigned LongVectorCount = 0;
1326  unsigned MVTLen =
1327  LT.second.isVector() ? LT.second.getVectorNumElements() : 1;
1328  while (NumVecElts > MVTLen) {
1329  NumVecElts /= 2;
1330  // Assume the pairwise shuffles add a cost.
1331  ShuffleCost += (IsPairwise + 1) *
1332  ConcreteTTI->getShuffleCost(TTI::SK_ExtractSubvector, Ty,
1333  NumVecElts, Ty);
1334  MinMaxCost +=
1335  ConcreteTTI->getCmpSelInstrCost(CmpOpcode, Ty, CondTy, nullptr) +
1336  ConcreteTTI->getCmpSelInstrCost(Instruction::Select, Ty, CondTy,
1337  nullptr);
1338  Ty = VectorType::get(ScalarTy, NumVecElts);
1339  CondTy = VectorType::get(ScalarCondTy, NumVecElts);
1340  ++LongVectorCount;
1341  }
1342  // The minimal length of the vector is limited by the real length of vector
1343  // operations performed on the current platform. That's why several final
1344  // reduction opertions are perfomed on the vectors with the same
1345  // architecture-dependent length.
1346  ShuffleCost += (NumReduxLevels - LongVectorCount) * (IsPairwise + 1) *
1347  ConcreteTTI->getShuffleCost(TTI::SK_ExtractSubvector, Ty,
1348  NumVecElts, Ty);
1349  MinMaxCost +=
1350  (NumReduxLevels - LongVectorCount) *
1351  (ConcreteTTI->getCmpSelInstrCost(CmpOpcode, Ty, CondTy, nullptr) +
1352  ConcreteTTI->getCmpSelInstrCost(Instruction::Select, Ty, CondTy,
1353  nullptr));
1354  // Need 3 extractelement instructions for scalarization + an additional
1355  // scalar select instruction.
1356  return ShuffleCost + MinMaxCost +
1357  3 * getScalarizationOverhead(Ty, /*Insert=*/false,
1358  /*Extract=*/true) +
1359  ConcreteTTI->getCmpSelInstrCost(Instruction::Select, ScalarTy,
1360  ScalarCondTy, nullptr);
1361  }
1362 
1363  unsigned getVectorSplitCost() { return 1; }
1364 
1365  /// @}
1366 };
1367 
1368 /// Concrete BasicTTIImpl that can be used if no further customization
1369 /// is needed.
1370 class BasicTTIImpl : public BasicTTIImplBase<BasicTTIImpl> {
1372 
1374 
1375  const TargetSubtargetInfo *ST;
1376  const TargetLoweringBase *TLI;
1377 
1378  const TargetSubtargetInfo *getST() const { return ST; }
1379  const TargetLoweringBase *getTLI() const { return TLI; }
1380 
1381 public:
1382  explicit BasicTTIImpl(const TargetMachine *TM, const Function &F);
1383 };
1384 
1385 } // end namespace llvm
1386 
1387 #endif // LLVM_CODEGEN_BASICTTIIMPL_H
LegalizeAction getLoadExtAction(unsigned ExtType, EVT ValVT, EVT MemVT) const
Return how this load with extension should be treated: either it is legal, needs to be promoted to a ...
Type * getVectorElementType() const
Definition: Type.h:371
unsigned getNumCases() const
Return the number of &#39;cases&#39; in this switch instruction, excluding the default case.
Base class for use as a mix-in that aids implementing a TargetTransformInfo-compatible class...
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:111
bool Partial
Allow partial unrolling (unrolling of loops to expand the size of the loop body, not only to eliminat...
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
Definition: ISDOpcodes.h:563
BitVector & set()
Definition: BitVector.h:398
unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info=TTI::OK_AnyValue, TTI::OperandValueKind Opd2Info=TTI::OK_AnyValue, TTI::OperandValueProperties Opd1PropInfo=TTI::OP_None, TTI::OperandValueProperties Opd2PropInfo=TTI::OP_None, ArrayRef< const Value *> Args=ArrayRef< const Value *>())
Definition: BasicTTIImpl.h:507
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg If BaseGV is null...
unsigned getIndexSizeInBits(unsigned AS) const
Size in bits of index used for address calculation in getelementptr.
Definition: DataLayout.h:365
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
iterator_range< CaseIt > cases()
Iteration adapter for range-for loops.
LLVMContext & Context
bool noNaNs() const
Definition: Operator.h:200
unsigned getGatherScatterOpCost(unsigned Opcode, Type *DataTy, Value *Ptr, bool VariableMask, unsigned Alignment)
static Type * makeCmpResultType(Type *opnd_type)
Create a result type for fcmp/icmp.
Definition: InstrTypes.h:1127
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract)
Estimate the overhead of scalarizing an instruction.
Definition: BasicTTIImpl.h:446
The main scalar evolution driver.
bool slt(const APInt &RHS) const
Signed less than comparison.
Definition: APInt.h:1197
unsigned PartialOptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size, like OptSizeThreshold, but used for partial/runtime unrolling (set to UINT_MAX to disable).
int getGEPCost(Type *PointeeType, const Value *Ptr, ArrayRef< const Value *> Operands)
Definition: BasicTTIImpl.h:217
MemIndexedMode
The type of load/store indexing.
static uint64_t round(uint64_t Acc, uint64_t Input)
Definition: xxhash.cpp:57
unsigned PartialThreshold
The cost threshold for the unrolled loop, like Threshold, but used for partial/runtime unrolling (set...
CaseIt case_begin()
Returns a read/write iterator that points to the first case in the SwitchInst.
unsigned getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy)
bool isSuitableForBitTests(unsigned NumDests, unsigned NumCmps, const APInt &Low, const APInt &High, const DataLayout &DL) const
Return true if lowering to a bit test is suitable for a set of case clusters which contains NumDests ...
bool sgt(const APInt &RHS) const
Signed greather than comparison.
Definition: APInt.h:1267
BasicTTIImplBase(const TargetMachine *TM, const DataLayout &DL)
Definition: BasicTTIImpl.h:131
int getExtCost(const Instruction *I, const Value *Src)
Definition: BasicTTIImpl.h:222
F(f)
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Definition: DerivedTypes.h:503
An instruction for reading from memory.
Definition: Instructions.h:168
bool isProfitableToHoist(Instruction *I)
Definition: BasicTTIImpl.h:206
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:230
Base class which can be used to help build a TTI implementation.
Definition: BasicTTIImpl.h:78
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
bool isOperationLegalOrCustom(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
unsigned getMaxInterleaveFactor(unsigned VF)
Definition: BasicTTIImpl.h:505
unsigned getJumpBufAlignment() const
Returns the target&#39;s jmp_buf alignment in bytes (if never set, the default is 0)
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition: Type.h:130
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:264
This file provides helpers for the implementation of a TargetTransformInfo-conforming class...
CRTP base class for use as a mix-in that aids implementing a TargetTransformInfo-compatible class...
unsigned getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, ArrayRef< Value *> Args, FastMathFlags FMF, unsigned VF=1)
Get intrinsic cost based on arguments.
Definition: BasicTTIImpl.h:899
unsigned getAddressComputationCost(Type *Ty, ScalarEvolution *, const SCEV *)
unsigned getOperandsScalarizationOverhead(ArrayRef< const Value *> Args, unsigned VF)
Estimate the overhead of scalarizing an instructions unique non-constant operands.
Definition: BasicTTIImpl.h:465
unsigned getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy, unsigned Index)
Definition: BasicTTIImpl.h:692
int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace)
Definition: BasicTTIImpl.h:192
unsigned getArithmeticReductionCost(unsigned Opcode, Type *Ty, bool IsPairwise)
Try to calculate arithmetic and shuffle op costs for reduction operations.
unsigned getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
uint64_t getNumElements() const
Definition: DerivedTypes.h:359
bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth, unsigned AddressSpace, unsigned Alignment, bool *Fast) const
Definition: BasicTTIImpl.h:139
This file implements a class to represent arbitrary precision integral constant values and operations...
virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const
Return true if it&#39;s free to truncate a value of type FromTy to type ToTy.
unsigned getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, unsigned Alignment, unsigned AddressSpace)
Definition: BasicTTIImpl.h:783
unsigned getRegisterBitWidth(bool Vector) const
Definition: BasicTTIImpl.h:442
unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, const Instruction *I)
Definition: BasicTTIImpl.h:705
unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, const Instruction *I=nullptr)
Definition: BasicTTIImpl.h:567
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition: ISDOpcodes.h:399
bool isFCmpOrdCheaperThanFCmpZero(Type *Ty)
Definition: BasicTTIImpl.h:330
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:33
Selects elements from the corresponding lane of either source operand.
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
Definition: Type.h:203
virtual bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AddrSpace, Instruction *I=nullptr) const
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
bool isIndexedStoreLegal(TTI::MemIndexedMode M, Type *Ty, const DataLayout &DL) const
Definition: BasicTTIImpl.h:182
unsigned getFPOpCost(Type *Ty)
Definition: BasicTTIImpl.h:334
bool isTruncateFree(Type *Ty1, Type *Ty2)
Definition: BasicTTIImpl.h:202
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return &#39;this&#39;.
Definition: Type.h:304
ExtractSubvector Index indicates start offset.
bool isVoidTy() const
Return true if this is &#39;void&#39;.
Definition: Type.h:141
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP)
Definition: BasicTTIImpl.h:363
virtual bool isSuitableForJumpTable(const SwitchInst *SI, uint64_t NumCases, uint64_t Range) const
Return true if lowering to a jump table is suitable for a set of case clusters which may contain NumC...
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Machine Value Type.
Concrete BasicTTIImpl that can be used if no further customization is needed.
LLVM Basic Block Representation.
Definition: BasicBlock.h:59
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:46
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:69
unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, ArrayRef< const Value *> Arguments)
Simple binary floating point operators.
Definition: ISDOpcodes.h:260
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:149
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator begin()
Definition: SmallVector.h:117
This file contains the declarations for the subclasses of Constant, which represent the different fla...
unsigned getMinMaxReductionCost(Type *Ty, Type *CondTy, bool IsPairwise, bool)
Try to calculate op costs for min/max reduction operations.
unsigned getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, ArrayRef< Type *> Tys, FastMathFlags FMF, unsigned ScalarizationCostPassed=std::numeric_limits< unsigned >::max())
Get intrinsic cost based on argument types.
Definition: BasicTTIImpl.h:974
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:371
Expected to fold away in lowering.
AMDGPU Lower Kernel Arguments
virtual bool areJTsAllowed(const Function *Fn) const
Return true if lowering to a jump table is allowed.
unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, ArrayRef< Type *> ParamTys)
Definition: BasicTTIImpl.h:239
bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
Merge elements from two source vectors into one with any shuffle mask.
unsigned getNumberOfParts(Type *Tp)
bool isIndexedLoadLegal(unsigned IdxMode, EVT VT) const
Return true if the specified indexed load is legal on this target.
static double log2(double V)
virtual bool isProfitableToHoist(Instruction *I) const
Extended Value Type.
Definition: ValueTypes.h:34
static wasm::ValType getType(const TargetRegisterClass *RC)
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
LLVM_READONLY APFloat maxnum(const APFloat &A, const APFloat &B)
Implements IEEE maxNum semantics.
Definition: APFloat.h:1238
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
bool isLegalAddImmediate(int64_t imm)
Definition: BasicTTIImpl.h:157
This base class for TargetLowering contains the SelectionDAG-independent parts that can be used from ...
OperandValueProperties
Additional properties of an operand&#39;s values.
LegalizeAction
This enum indicates whether operations are valid for a target, and if not, what action should be used...
unsigned getCFInstrCost(unsigned Opcode)
Definition: BasicTTIImpl.h:700
size_type size() const
Definition: SmallPtrSet.h:93
unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp)
Definition: BasicTTIImpl.h:554
unsigned getNumberOfRegisters(bool Vector)
Definition: BasicTTIImpl.h:440
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:418
size_type count() const
count - Returns the number of bits which are set.
Definition: BitVector.h:173
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
int getInstructionLatency(const Instruction *I)
Definition: BasicTTIImpl.h:428
virtual bool allowsMisalignedMemoryAccesses(EVT, unsigned AddrSpace=0, unsigned Align=1, bool *=nullptr) const
Determine if the target supports unaligned memory accesses.
iterator end()
Definition: BasicBlock.h:266
unsigned getJumpBufSize() const
Returns the target&#39;s jmp_buf size in bytes (if never set, the default is 200)
unsigned getVectorSplitCost()
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:861
AddressSpace
Definition: NVPTXBaseInfo.h:22
cl::opt< unsigned > PartialUnrollingThreshold
static const unsigned DefaultLoadLatency
Definition: MCSchedule.h:289
bool Runtime
Allow runtime unrolling (unrolling of loops to expand the size of the loop body even when the number ...
LegalizeAction getTruncStoreAction(EVT ValVT, EVT MemVT) const
Return how this store with truncation should be treated: either it is legal, needs to be promoted to ...
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:539
unsigned getVectorNumElements() const
Definition: DerivedTypes.h:462
Class to represent vector types.
Definition: DerivedTypes.h:393
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
Class for arbitrary precision integers.
Definition: APInt.h:69
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:390
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:91
unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, unsigned AddressSpace, const Instruction *I=nullptr)
Definition: BasicTTIImpl.h:752
unsigned LoopMicroOpBufferSize
Definition: MCSchedule.h:284
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition: ISDOpcodes.h:288
bool isAlwaysUniform(const Value *V)
Definition: BasicTTIImpl.h:150
int InstructionOpcodeToISD(unsigned Opcode) const
Get the ISD node that corresponds to the Instruction class opcode.
FMINNAN/FMAXNAN - Behave identically to FMINNUM/FMAXNUM, except that when a single input is NaN...
Definition: ISDOpcodes.h:566
Fast - This calling convention attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:43
unsigned getFlatAddressSpace()
Definition: BasicTTIImpl.h:152
TargetSubtargetInfo - Generic base class for all target subtargets.
bool isLSRCostLess(TTI::LSRCost &C1, TTI::LSRCost &C2)
virtual bool isLegalICmpImmediate(int64_t) const
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
unsigned getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, unsigned AddressSpace)
BR_JT - Jumptable branch.
Definition: ISDOpcodes.h:599
unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, ArrayRef< const Value *> Arguments)
Definition: BasicTTIImpl.h:234
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator end()
Definition: SmallVector.h:121
unsigned getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy)
Definition: BasicTTIImpl.h:344
This class represents an analyzed expression in the program.
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:459
Parameters that control the generic loop unrolling transformation.
unsigned getJumpBufAlignment()
Definition: BasicTTIImpl.h:313
unsigned OptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size (set to UINT_MAX to disable)...
Establish a view to a call site for examination.
Definition: CallSite.h:714
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return &#39;Legal&#39;) or we ...
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:108
#define I(x, y, z)
Definition: MD5.cpp:58
#define N
int getGEPCost(Type *PointeeType, const Value *Ptr, ArrayRef< const Value *> Operands)
unsigned getInliningThresholdMultiplier()
Definition: BasicTTIImpl.h:361
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index)
Definition: BasicTTIImpl.h:745
block_iterator block_end() const
Definition: LoopInfo.h:155
static EVT getEVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
Definition: ValueTypes.cpp:309
bool isLSRCostLess(TTI::LSRCost C1, TTI::LSRCost C2)
Definition: BasicTTIImpl.h:188
unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI, unsigned &JumpTableSize)
Definition: BasicTTIImpl.h:256
FunTy * getCalledFunction() const
Return the function being called if this is a direct call, otherwise return null (if it&#39;s an indirect...
Definition: CallSite.h:107
bool isFPOrFPVectorTy() const
Return true if this is a FP type or a vector of FP.
Definition: Type.h:185
const unsigned Kind
Multiway switch.
unsigned getScalarizationOverhead(Type *VecTy, ArrayRef< const Value *> Args)
Definition: BasicTTIImpl.h:488
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
The cost of a typical &#39;add&#39; instruction.
bool isSourceOfDivergence(const Value *V)
Definition: BasicTTIImpl.h:148
unsigned getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition: Type.cpp:115
LLVM Value Representation.
Definition: Value.h:73
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:278
uint64_t getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type...
Definition: DataLayout.h:411
static VectorType * get(Type *ElementType, unsigned NumElements)
This static method is the primary way to construct an VectorType.
Definition: Type.cpp:593
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E&#39;s largest value.
Definition: BitmaskEnum.h:81
bool isOperationLegalOrPromote(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target or can be made legal using promotion...
bool isLoadExtLegal(unsigned ExtType, EVT ValVT, EVT MemVT) const
Return true if the specified load with extension is legal on this target.
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:59
bool isTypeLegal(Type *Ty)
Definition: BasicTTIImpl.h:212
Type * getElementType() const
Definition: DerivedTypes.h:360
bool UpperBound
Allow using trip count upper bound to unroll loops.
virtual int getScalingFactorCost(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS=0) const
Return the cost of the scaling factor used in the addressing mode represented by AM for this target...
const DataLayout & getDataLayout() const
virtual bool useAA() const
Enable use of alias analysis during code generation (during MI scheduling, DAGCombine, etc.).
bool isIndexedLoadLegal(TTI::MemIndexedMode M, Type *Ty, const DataLayout &DL) const
Definition: BasicTTIImpl.h:176
Convenience struct for specifying and reasoning about fast-math flags.
Definition: Operator.h:160
unsigned getCallInstrCost(Function *F, Type *RetTy, ArrayRef< Type *> Tys)
Compute a cost of the given call instruction.
OperandValueKind
Additional information about an operand&#39;s possible values.
bool haveFastSqrt(Type *Ty)
Definition: BasicTTIImpl.h:323
This pass exposes codegen information to IR-level passes.
bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace, Instruction *I=nullptr)
Definition: BasicTTIImpl.h:165
bool isLegalICmpImmediate(int64_t imm)
Definition: BasicTTIImpl.h:161
block_iterator block_begin() const
Definition: LoopInfo.h:154
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition: ValueTypes.h:64
virtual bool isLegalAddImmediate(int64_t) const
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
virtual bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const
Returns true if a cast between SrcAS and DestAS is a noop.
The cost of a &#39;div&#39; instruction on x86.
const MCSchedModel & getSchedModel() const
Get the machine model for this subtarget&#39;s CPU.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
bool isOperationExpand(unsigned Op, EVT VT) const
Return true if the specified operation is illegal on this target or unlikely to be made legal with cu...
std::pair< int, MVT > getTypeLegalizationCost(const DataLayout &DL, Type *Ty) const
Estimate the cost of type-legalization and the legalized type.
LLVM_READONLY APFloat minnum(const APFloat &A, const APFloat &B)
Implements IEEE minNum semantics.
Definition: APFloat.h:1227
bool isIndexedStoreLegal(unsigned IdxMode, EVT VT) const
Return true if the specified indexed load is legal on this target.
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:144
virtual bool isFAbsFree(EVT VT) const
Return true if an fabs operation is free to the point where it is never worthwhile to replace it with...
This file describes how to lower LLVM code to machine code.
const BasicBlock * getParent() const
Definition: Instruction.h:67
ShuffleKind
The various kinds of shuffle patterns for vector queries.
Shuffle elements of single source vector with any shuffle mask.
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
Definition: ISDOpcodes.h:866
BRIND - Indirect branch.
Definition: ISDOpcodes.h:595