LLVM  7.0.0svn
BasicTTIImpl.h
Go to the documentation of this file.
1 //===- BasicTTIImpl.h -------------------------------------------*- C++ -*-===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 /// \file
11 /// This file provides a helper that implements much of the TTI interface in
12 /// terms of the target-independent code generator and TargetLowering
13 /// interfaces.
14 //
15 //===----------------------------------------------------------------------===//
16 
17 #ifndef LLVM_CODEGEN_BASICTTIIMPL_H
18 #define LLVM_CODEGEN_BASICTTIIMPL_H
19 
20 #include "llvm/ADT/APInt.h"
21 #include "llvm/ADT/ArrayRef.h"
22 #include "llvm/ADT/BitVector.h"
23 #include "llvm/ADT/SmallPtrSet.h"
24 #include "llvm/ADT/SmallVector.h"
25 #include "llvm/Analysis/LoopInfo.h"
32 #include "llvm/IR/BasicBlock.h"
33 #include "llvm/IR/CallSite.h"
34 #include "llvm/IR/Constant.h"
35 #include "llvm/IR/Constants.h"
36 #include "llvm/IR/DataLayout.h"
37 #include "llvm/IR/DerivedTypes.h"
38 #include "llvm/IR/InstrTypes.h"
39 #include "llvm/IR/Instruction.h"
40 #include "llvm/IR/Instructions.h"
41 #include "llvm/IR/Intrinsics.h"
42 #include "llvm/IR/Operator.h"
43 #include "llvm/IR/Type.h"
44 #include "llvm/IR/Value.h"
45 #include "llvm/MC/MCSchedule.h"
46 #include "llvm/Support/Casting.h"
51 #include <algorithm>
52 #include <cassert>
53 #include <cstdint>
54 #include <limits>
55 #include <utility>
56 
57 namespace llvm {
58 
59 class Function;
60 class GlobalValue;
61 class LLVMContext;
62 class ScalarEvolution;
63 class SCEV;
64 class TargetMachine;
65 
66 extern cl::opt<unsigned> PartialUnrollingThreshold;
67 
68 /// \brief Base class which can be used to help build a TTI implementation.
69 ///
70 /// This class provides as much implementation of the TTI interface as is
71 /// possible using the target independent parts of the code generator.
72 ///
73 /// In order to subclass it, your class must implement a getST() method to
74 /// return the subtarget, and a getTLI() method to return the target lowering.
75 /// We need these methods implemented in the derived class so that this class
76 /// doesn't have to duplicate storage for them.
77 template <typename T>
79 private:
81  using TTI = TargetTransformInfo;
82 
83  /// Estimate a cost of shuffle as a sequence of extract and insert
84  /// operations.
85  unsigned getPermuteShuffleOverhead(Type *Ty) {
86  assert(Ty->isVectorTy() && "Can only shuffle vectors");
87  unsigned Cost = 0;
88  // Shuffle cost is equal to the cost of extracting element from its argument
89  // plus the cost of inserting them onto the result vector.
90 
91  // e.g. <4 x float> has a mask of <0,5,2,7> i.e we need to extract from
92  // index 0 of first vector, index 1 of second vector,index 2 of first
93  // vector and finally index 3 of second vector and insert them at index
94  // <0,1,2,3> of result vector.
95  for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i) {
96  Cost += static_cast<T *>(this)
97  ->getVectorInstrCost(Instruction::InsertElement, Ty, i);
98  Cost += static_cast<T *>(this)
99  ->getVectorInstrCost(Instruction::ExtractElement, Ty, i);
100  }
101  return Cost;
102  }
103 
104  /// \brief Local query method delegates up to T which *must* implement this!
105  const TargetSubtargetInfo *getST() const {
106  return static_cast<const T *>(this)->getST();
107  }
108 
109  /// \brief Local query method delegates up to T which *must* implement this!
110  const TargetLoweringBase *getTLI() const {
111  return static_cast<const T *>(this)->getTLI();
112  }
113 
114  static ISD::MemIndexedMode getISDIndexedMode(TTI::MemIndexedMode M) {
115  switch (M) {
116  case TTI::MIM_Unindexed:
117  return ISD::UNINDEXED;
118  case TTI::MIM_PreInc:
119  return ISD::PRE_INC;
120  case TTI::MIM_PreDec:
121  return ISD::PRE_DEC;
122  case TTI::MIM_PostInc:
123  return ISD::POST_INC;
124  case TTI::MIM_PostDec:
125  return ISD::POST_DEC;
126  }
127  llvm_unreachable("Unexpected MemIndexedMode");
128  }
129 
130 protected:
131  explicit BasicTTIImplBase(const TargetMachine *TM, const DataLayout &DL)
132  : BaseT(DL) {}
133 
135 
136 public:
137  /// \name Scalar TTI Implementations
138  /// @{
140  unsigned BitWidth, unsigned AddressSpace,
141  unsigned Alignment, bool *Fast) const {
142  EVT E = EVT::getIntegerVT(Context, BitWidth);
143  return getTLI()->allowsMisalignedMemoryAccesses(E, AddressSpace, Alignment, Fast);
144  }
145 
146  bool hasBranchDivergence() { return false; }
147 
148  bool isSourceOfDivergence(const Value *V) { return false; }
149 
150  bool isAlwaysUniform(const Value *V) { return false; }
151 
152  unsigned getFlatAddressSpace() {
153  // Return an invalid address space.
154  return -1;
155  }
156 
157  bool isLegalAddImmediate(int64_t imm) {
158  return getTLI()->isLegalAddImmediate(imm);
159  }
160 
161  bool isLegalICmpImmediate(int64_t imm) {
162  return getTLI()->isLegalICmpImmediate(imm);
163  }
164 
165  bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
166  bool HasBaseReg, int64_t Scale,
167  unsigned AddrSpace, Instruction *I = nullptr) {
169  AM.BaseGV = BaseGV;
170  AM.BaseOffs = BaseOffset;
171  AM.HasBaseReg = HasBaseReg;
172  AM.Scale = Scale;
173  return getTLI()->isLegalAddressingMode(DL, AM, Ty, AddrSpace, I);
174  }
175 
177  const DataLayout &DL) const {
178  EVT VT = getTLI()->getValueType(DL, Ty);
179  return getTLI()->isIndexedLoadLegal(getISDIndexedMode(M), VT);
180  }
181 
183  const DataLayout &DL) const {
184  EVT VT = getTLI()->getValueType(DL, Ty);
185  return getTLI()->isIndexedStoreLegal(getISDIndexedMode(M), VT);
186  }
187 
190  }
191 
192  int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
193  bool HasBaseReg, int64_t Scale, unsigned AddrSpace) {
195  AM.BaseGV = BaseGV;
196  AM.BaseOffs = BaseOffset;
197  AM.HasBaseReg = HasBaseReg;
198  AM.Scale = Scale;
199  return getTLI()->getScalingFactorCost(DL, AM, Ty, AddrSpace);
200  }
201 
202  bool isTruncateFree(Type *Ty1, Type *Ty2) {
203  return getTLI()->isTruncateFree(Ty1, Ty2);
204  }
205 
207  return getTLI()->isProfitableToHoist(I);
208  }
209 
210  bool useAA() const { return getST()->useAA(); }
211 
212  bool isTypeLegal(Type *Ty) {
213  EVT VT = getTLI()->getValueType(DL, Ty);
214  return getTLI()->isTypeLegal(VT);
215  }
216 
217  int getGEPCost(Type *PointeeType, const Value *Ptr,
218  ArrayRef<const Value *> Operands) {
219  return BaseT::getGEPCost(PointeeType, Ptr, Operands);
220  }
221 
222  int getExtCost(const Instruction *I, const Value *Src) {
223  if (getTLI()->isExtFree(I))
225 
226  if (isa<ZExtInst>(I) || isa<SExtInst>(I))
227  if (const LoadInst *LI = dyn_cast<LoadInst>(Src))
228  if (getTLI()->isExtLoad(LI, I, DL))
230 
232  }
233 
234  unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
236  return BaseT::getIntrinsicCost(IID, RetTy, Arguments);
237  }
238 
239  unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
240  ArrayRef<Type *> ParamTys) {
241  if (IID == Intrinsic::cttz) {
242  if (getTLI()->isCheapToSpeculateCttz())
245  }
246 
247  if (IID == Intrinsic::ctlz) {
248  if (getTLI()->isCheapToSpeculateCtlz())
251  }
252 
253  return BaseT::getIntrinsicCost(IID, RetTy, ParamTys);
254  }
255 
257  unsigned &JumpTableSize) {
258  /// Try to find the estimated number of clusters. Note that the number of
259  /// clusters identified in this function could be different from the actural
260  /// numbers found in lowering. This function ignore switches that are
261  /// lowered with a mix of jump table / bit test / BTree. This function was
262  /// initially intended to be used when estimating the cost of switch in
263  /// inline cost heuristic, but it's a generic cost model to be used in other
264  /// places (e.g., in loop unrolling).
265  unsigned N = SI.getNumCases();
266  const TargetLoweringBase *TLI = getTLI();
267  const DataLayout &DL = this->getDataLayout();
268 
269  JumpTableSize = 0;
270  bool IsJTAllowed = TLI->areJTsAllowed(SI.getParent()->getParent());
271 
272  // Early exit if both a jump table and bit test are not allowed.
273  if (N < 1 || (!IsJTAllowed && DL.getIndexSizeInBits(0u) < N))
274  return N;
275 
276  APInt MaxCaseVal = SI.case_begin()->getCaseValue()->getValue();
277  APInt MinCaseVal = MaxCaseVal;
278  for (auto CI : SI.cases()) {
279  const APInt &CaseVal = CI.getCaseValue()->getValue();
280  if (CaseVal.sgt(MaxCaseVal))
281  MaxCaseVal = CaseVal;
282  if (CaseVal.slt(MinCaseVal))
283  MinCaseVal = CaseVal;
284  }
285 
286  // Check if suitable for a bit test
287  if (N <= DL.getIndexSizeInBits(0u)) {
289  for (auto I : SI.cases())
290  Dests.insert(I.getCaseSuccessor());
291 
292  if (TLI->isSuitableForBitTests(Dests.size(), N, MinCaseVal, MaxCaseVal,
293  DL))
294  return 1;
295  }
296 
297  // Check if suitable for a jump table.
298  if (IsJTAllowed) {
299  if (N < 2 || N < TLI->getMinimumJumpTableEntries())
300  return N;
301  uint64_t Range =
302  (MaxCaseVal - MinCaseVal)
303  .getLimitedValue(std::numeric_limits<uint64_t>::max() - 1) + 1;
304  // Check whether a range of clusters is dense enough for a jump table
305  if (TLI->isSuitableForJumpTable(&SI, N, Range)) {
306  JumpTableSize = Range;
307  return 1;
308  }
309  }
310  return N;
311  }
312 
313  unsigned getJumpBufAlignment() { return getTLI()->getJumpBufAlignment(); }
314 
315  unsigned getJumpBufSize() { return getTLI()->getJumpBufSize(); }
316 
318  const TargetLoweringBase *TLI = getTLI();
321  }
322 
323  bool haveFastSqrt(Type *Ty) {
324  const TargetLoweringBase *TLI = getTLI();
325  EVT VT = TLI->getValueType(DL, Ty);
326  return TLI->isTypeLegal(VT) &&
328  }
329 
331  return true;
332  }
333 
334  unsigned getFPOpCost(Type *Ty) {
335  // Check whether FADD is available, as a proxy for floating-point in
336  // general.
337  const TargetLoweringBase *TLI = getTLI();
338  EVT VT = TLI->getValueType(DL, Ty);
342  }
343 
344  unsigned getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) {
345  const TargetLoweringBase *TLI = getTLI();
346  switch (Opcode) {
347  default: break;
348  case Instruction::Trunc:
349  if (TLI->isTruncateFree(OpTy, Ty))
352  case Instruction::ZExt:
353  if (TLI->isZExtFree(OpTy, Ty))
356  }
357 
358  return BaseT::getOperationCost(Opcode, Ty, OpTy);
359  }
360 
361  unsigned getInliningThresholdMultiplier() { return 1; }
362 
365  // This unrolling functionality is target independent, but to provide some
366  // motivation for its intended use, for x86:
367 
368  // According to the Intel 64 and IA-32 Architectures Optimization Reference
369  // Manual, Intel Core models and later have a loop stream detector (and
370  // associated uop queue) that can benefit from partial unrolling.
371  // The relevant requirements are:
372  // - The loop must have no more than 4 (8 for Nehalem and later) branches
373  // taken, and none of them may be calls.
374  // - The loop can have no more than 18 (28 for Nehalem and later) uops.
375 
376  // According to the Software Optimization Guide for AMD Family 15h
377  // Processors, models 30h-4fh (Steamroller and later) have a loop predictor
378  // and loop buffer which can benefit from partial unrolling.
379  // The relevant requirements are:
380  // - The loop must have fewer than 16 branches
381  // - The loop must have less than 40 uops in all executed loop branches
382 
383  // The number of taken branches in a loop is hard to estimate here, and
384  // benchmarking has revealed that it is better not to be conservative when
385  // estimating the branch count. As a result, we'll ignore the branch limits
386  // until someone finds a case where it matters in practice.
387 
388  unsigned MaxOps;
389  const TargetSubtargetInfo *ST = getST();
390  if (PartialUnrollingThreshold.getNumOccurrences() > 0)
391  MaxOps = PartialUnrollingThreshold;
392  else if (ST->getSchedModel().LoopMicroOpBufferSize > 0)
393  MaxOps = ST->getSchedModel().LoopMicroOpBufferSize;
394  else
395  return;
396 
397  // Scan the loop: don't unroll loops with calls.
398  for (Loop::block_iterator I = L->block_begin(), E = L->block_end(); I != E;
399  ++I) {
400  BasicBlock *BB = *I;
401 
402  for (BasicBlock::iterator J = BB->begin(), JE = BB->end(); J != JE; ++J)
403  if (isa<CallInst>(J) || isa<InvokeInst>(J)) {
404  ImmutableCallSite CS(&*J);
405  if (const Function *F = CS.getCalledFunction()) {
406  if (!static_cast<T *>(this)->isLoweredToCall(F))
407  continue;
408  }
409 
410  return;
411  }
412  }
413 
414  // Enable runtime and partial unrolling up to the specified size.
415  // Enable using trip count upper bound to unroll loops.
416  UP.Partial = UP.Runtime = UP.UpperBound = true;
417  UP.PartialThreshold = MaxOps;
418 
419  // Avoid unrolling when optimizing for size.
420  UP.OptSizeThreshold = 0;
422 
423  // Set number of instructions optimized when "back edge"
424  // becomes "fall through" to default value of 2.
425  UP.BEInsns = 2;
426  }
427 
429  if (isa<LoadInst>(I))
430  return getST()->getSchedModel().DefaultLoadLatency;
431 
433  }
434 
435  /// @}
436 
437  /// \name Vector TTI Implementations
438  /// @{
439 
440  unsigned getNumberOfRegisters(bool Vector) { return Vector ? 0 : 1; }
441 
442  unsigned getRegisterBitWidth(bool Vector) const { return 32; }
443 
444  /// Estimate the overhead of scalarizing an instruction. Insert and Extract
445  /// are set if the result needs to be inserted and/or extracted from vectors.
446  unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) {
447  assert(Ty->isVectorTy() && "Can only scalarize vectors");
448  unsigned Cost = 0;
449 
450  for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i) {
451  if (Insert)
452  Cost += static_cast<T *>(this)
453  ->getVectorInstrCost(Instruction::InsertElement, Ty, i);
454  if (Extract)
455  Cost += static_cast<T *>(this)
456  ->getVectorInstrCost(Instruction::ExtractElement, Ty, i);
457  }
458 
459  return Cost;
460  }
461 
462  /// Estimate the overhead of scalarizing an instructions unique
463  /// non-constant operands. The types of the arguments are ordinarily
464  /// scalar, in which case the costs are multiplied with VF.
466  unsigned VF) {
467  unsigned Cost = 0;
468  SmallPtrSet<const Value*, 4> UniqueOperands;
469  for (const Value *A : Args) {
470  if (!isa<Constant>(A) && UniqueOperands.insert(A).second) {
471  Type *VecTy = nullptr;
472  if (A->getType()->isVectorTy()) {
473  VecTy = A->getType();
474  // If A is a vector operand, VF should be 1 or correspond to A.
475  assert((VF == 1 || VF == VecTy->getVectorNumElements()) &&
476  "Vector argument does not match VF");
477  }
478  else
479  VecTy = VectorType::get(A->getType(), VF);
480 
481  Cost += getScalarizationOverhead(VecTy, false, true);
482  }
483  }
484 
485  return Cost;
486  }
487 
489  assert(VecTy->isVectorTy());
490 
491  unsigned Cost = 0;
492 
493  Cost += getScalarizationOverhead(VecTy, true, false);
494  if (!Args.empty())
496  VecTy->getVectorNumElements());
497  else
498  // When no information on arguments is provided, we add the cost
499  // associated with one argument as a heuristic.
500  Cost += getScalarizationOverhead(VecTy, false, true);
501 
502  return Cost;
503  }
504 
505  unsigned getMaxInterleaveFactor(unsigned VF) { return 1; }
506 
508  unsigned Opcode, Type *Ty,
514  // Check if any of the operands are vector operands.
515  const TargetLoweringBase *TLI = getTLI();
516  int ISD = TLI->InstructionOpcodeToISD(Opcode);
517  assert(ISD && "Invalid opcode");
518 
519  std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
520 
521  bool IsFloat = Ty->isFPOrFPVectorTy();
522  // Assume that floating point arithmetic operations cost twice as much as
523  // integer operations.
524  unsigned OpCost = (IsFloat ? 2 : 1);
525 
526  if (TLI->isOperationLegalOrPromote(ISD, LT.second)) {
527  // The operation is legal. Assume it costs 1.
528  // TODO: Once we have extract/insert subvector cost we need to use them.
529  return LT.first * OpCost;
530  }
531 
532  if (!TLI->isOperationExpand(ISD, LT.second)) {
533  // If the operation is custom lowered, then assume that the code is twice
534  // as expensive.
535  return LT.first * 2 * OpCost;
536  }
537 
538  // Else, assume that we need to scalarize this op.
539  // TODO: If one of the types get legalized by splitting, handle this
540  // similarly to what getCastInstrCost() does.
541  if (Ty->isVectorTy()) {
542  unsigned Num = Ty->getVectorNumElements();
543  unsigned Cost = static_cast<T *>(this)
544  ->getArithmeticInstrCost(Opcode, Ty->getScalarType());
545  // Return the cost of multiple scalar invocation plus the cost of
546  // inserting and extracting the values.
547  return getScalarizationOverhead(Ty, Args) + Num * Cost;
548  }
549 
550  // We don't know anything about this scalar instruction.
551  return OpCost;
552  }
553 
555  Type *SubTp) {
556  if (Kind == TTI::SK_Alternate || Kind == TTI::SK_PermuteTwoSrc ||
557  Kind == TTI::SK_PermuteSingleSrc) {
558  return getPermuteShuffleOverhead(Tp);
559  }
560  return 1;
561  }
562 
563  unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
564  const Instruction *I = nullptr) {
565  const TargetLoweringBase *TLI = getTLI();
566  int ISD = TLI->InstructionOpcodeToISD(Opcode);
567  assert(ISD && "Invalid opcode");
568  std::pair<unsigned, MVT> SrcLT = TLI->getTypeLegalizationCost(DL, Src);
569  std::pair<unsigned, MVT> DstLT = TLI->getTypeLegalizationCost(DL, Dst);
570 
571  // Check for NOOP conversions.
572  if (SrcLT.first == DstLT.first &&
573  SrcLT.second.getSizeInBits() == DstLT.second.getSizeInBits()) {
574 
575  // Bitcast between types that are legalized to the same type are free.
576  if (Opcode == Instruction::BitCast || Opcode == Instruction::Trunc)
577  return 0;
578  }
579 
580  if (Opcode == Instruction::Trunc &&
581  TLI->isTruncateFree(SrcLT.second, DstLT.second))
582  return 0;
583 
584  if (Opcode == Instruction::ZExt &&
585  TLI->isZExtFree(SrcLT.second, DstLT.second))
586  return 0;
587 
588  if (Opcode == Instruction::AddrSpaceCast &&
590  Dst->getPointerAddressSpace()))
591  return 0;
592 
593  // If this is a zext/sext of a load, return 0 if the corresponding
594  // extending load exists on target.
595  if ((Opcode == Instruction::ZExt || Opcode == Instruction::SExt) &&
596  I && isa<LoadInst>(I->getOperand(0))) {
597  EVT ExtVT = EVT::getEVT(Dst);
598  EVT LoadVT = EVT::getEVT(Src);
599  unsigned LType =
600  ((Opcode == Instruction::ZExt) ? ISD::ZEXTLOAD : ISD::SEXTLOAD);
601  if (TLI->isLoadExtLegal(LType, ExtVT, LoadVT))
602  return 0;
603  }
604 
605  // If the cast is marked as legal (or promote) then assume low cost.
606  if (SrcLT.first == DstLT.first &&
607  TLI->isOperationLegalOrPromote(ISD, DstLT.second))
608  return 1;
609 
610  // Handle scalar conversions.
611  if (!Src->isVectorTy() && !Dst->isVectorTy()) {
612  // Scalar bitcasts are usually free.
613  if (Opcode == Instruction::BitCast)
614  return 0;
615 
616  // Just check the op cost. If the operation is legal then assume it costs
617  // 1.
618  if (!TLI->isOperationExpand(ISD, DstLT.second))
619  return 1;
620 
621  // Assume that illegal scalar instruction are expensive.
622  return 4;
623  }
624 
625  // Check vector-to-vector casts.
626  if (Dst->isVectorTy() && Src->isVectorTy()) {
627  // If the cast is between same-sized registers, then the check is simple.
628  if (SrcLT.first == DstLT.first &&
629  SrcLT.second.getSizeInBits() == DstLT.second.getSizeInBits()) {
630 
631  // Assume that Zext is done using AND.
632  if (Opcode == Instruction::ZExt)
633  return 1;
634 
635  // Assume that sext is done using SHL and SRA.
636  if (Opcode == Instruction::SExt)
637  return 2;
638 
639  // Just check the op cost. If the operation is legal then assume it
640  // costs
641  // 1 and multiply by the type-legalization overhead.
642  if (!TLI->isOperationExpand(ISD, DstLT.second))
643  return SrcLT.first * 1;
644  }
645 
646  // If we are legalizing by splitting, query the concrete TTI for the cost
647  // of casting the original vector twice. We also need to factor in the
648  // cost of the split itself. Count that as 1, to be consistent with
649  // TLI->getTypeLegalizationCost().
650  if ((TLI->getTypeAction(Src->getContext(), TLI->getValueType(DL, Src)) ==
652  (TLI->getTypeAction(Dst->getContext(), TLI->getValueType(DL, Dst)) ==
654  Type *SplitDst = VectorType::get(Dst->getVectorElementType(),
655  Dst->getVectorNumElements() / 2);
656  Type *SplitSrc = VectorType::get(Src->getVectorElementType(),
657  Src->getVectorNumElements() / 2);
658  T *TTI = static_cast<T *>(this);
659  return TTI->getVectorSplitCost() +
660  (2 * TTI->getCastInstrCost(Opcode, SplitDst, SplitSrc, I));
661  }
662 
663  // In other cases where the source or destination are illegal, assume
664  // the operation will get scalarized.
665  unsigned Num = Dst->getVectorNumElements();
666  unsigned Cost = static_cast<T *>(this)->getCastInstrCost(
667  Opcode, Dst->getScalarType(), Src->getScalarType(), I);
668 
669  // Return the cost of multiple scalar invocation plus the cost of
670  // inserting and extracting the values.
671  return getScalarizationOverhead(Dst, true, true) + Num * Cost;
672  }
673 
674  // We already handled vector-to-vector and scalar-to-scalar conversions.
675  // This
676  // is where we handle bitcast between vectors and scalars. We need to assume
677  // that the conversion is scalarized in one way or another.
678  if (Opcode == Instruction::BitCast)
679  // Illegal bitcasts are done by storing and loading from a stack slot.
680  return (Src->isVectorTy() ? getScalarizationOverhead(Src, false, true)
681  : 0) +
682  (Dst->isVectorTy() ? getScalarizationOverhead(Dst, true, false)
683  : 0);
684 
685  llvm_unreachable("Unhandled cast");
686  }
687 
688  unsigned getExtractWithExtendCost(unsigned Opcode, Type *Dst,
689  VectorType *VecTy, unsigned Index) {
690  return static_cast<T *>(this)->getVectorInstrCost(
691  Instruction::ExtractElement, VecTy, Index) +
692  static_cast<T *>(this)->getCastInstrCost(Opcode, Dst,
693  VecTy->getElementType());
694  }
695 
696  unsigned getCFInstrCost(unsigned Opcode) {
697  // Branches are assumed to be predicted.
698  return 0;
699  }
700 
701  unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
702  const Instruction *I) {
703  const TargetLoweringBase *TLI = getTLI();
704  int ISD = TLI->InstructionOpcodeToISD(Opcode);
705  assert(ISD && "Invalid opcode");
706 
707  // Selects on vectors are actually vector selects.
708  if (ISD == ISD::SELECT) {
709  assert(CondTy && "CondTy must exist");
710  if (CondTy->isVectorTy())
711  ISD = ISD::VSELECT;
712  }
713  std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
714 
715  if (!(ValTy->isVectorTy() && !LT.second.isVector()) &&
716  !TLI->isOperationExpand(ISD, LT.second)) {
717  // The operation is legal. Assume it costs 1. Multiply
718  // by the type-legalization overhead.
719  return LT.first * 1;
720  }
721 
722  // Otherwise, assume that the cast is scalarized.
723  // TODO: If one of the types get legalized by splitting, handle this
724  // similarly to what getCastInstrCost() does.
725  if (ValTy->isVectorTy()) {
726  unsigned Num = ValTy->getVectorNumElements();
727  if (CondTy)
728  CondTy = CondTy->getScalarType();
729  unsigned Cost = static_cast<T *>(this)->getCmpSelInstrCost(
730  Opcode, ValTy->getScalarType(), CondTy, I);
731 
732  // Return the cost of multiple scalar invocation plus the cost of
733  // inserting and extracting the values.
734  return getScalarizationOverhead(ValTy, true, false) + Num * Cost;
735  }
736 
737  // Unknown scalar opcode.
738  return 1;
739  }
740 
741  unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) {
742  std::pair<unsigned, MVT> LT =
743  getTLI()->getTypeLegalizationCost(DL, Val->getScalarType());
744 
745  return LT.first;
746  }
747 
748  unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
749  unsigned AddressSpace, const Instruction *I = nullptr) {
750  assert(!Src->isVoidTy() && "Invalid type");
751  std::pair<unsigned, MVT> LT = getTLI()->getTypeLegalizationCost(DL, Src);
752 
753  // Assuming that all loads of legal types cost 1.
754  unsigned Cost = LT.first;
755 
756  if (Src->isVectorTy() &&
757  Src->getPrimitiveSizeInBits() < LT.second.getSizeInBits()) {
758  // This is a vector load that legalizes to a larger type than the vector
759  // itself. Unless the corresponding extending load or truncating store is
760  // legal, then this will scalarize.
762  EVT MemVT = getTLI()->getValueType(DL, Src);
763  if (Opcode == Instruction::Store)
764  LA = getTLI()->getTruncStoreAction(LT.second, MemVT);
765  else
766  LA = getTLI()->getLoadExtAction(ISD::EXTLOAD, LT.second, MemVT);
767 
768  if (LA != TargetLowering::Legal && LA != TargetLowering::Custom) {
769  // This is a vector load/store for some illegal type that is scalarized.
770  // We must account for the cost of building or decomposing the vector.
771  Cost += getScalarizationOverhead(Src, Opcode != Instruction::Store,
772  Opcode == Instruction::Store);
773  }
774  }
775 
776  return Cost;
777  }
778 
779  unsigned getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
780  unsigned Factor,
781  ArrayRef<unsigned> Indices,
782  unsigned Alignment,
783  unsigned AddressSpace) {
784  VectorType *VT = dyn_cast<VectorType>(VecTy);
785  assert(VT && "Expect a vector type for interleaved memory op");
786 
787  unsigned NumElts = VT->getNumElements();
788  assert(Factor > 1 && NumElts % Factor == 0 && "Invalid interleave factor");
789 
790  unsigned NumSubElts = NumElts / Factor;
791  VectorType *SubVT = VectorType::get(VT->getElementType(), NumSubElts);
792 
793  // Firstly, the cost of load/store operation.
794  unsigned Cost = static_cast<T *>(this)->getMemoryOpCost(
795  Opcode, VecTy, Alignment, AddressSpace);
796 
797  // Legalize the vector type, and get the legalized and unlegalized type
798  // sizes.
799  MVT VecTyLT = getTLI()->getTypeLegalizationCost(DL, VecTy).second;
800  unsigned VecTySize =
801  static_cast<T *>(this)->getDataLayout().getTypeStoreSize(VecTy);
802  unsigned VecTyLTSize = VecTyLT.getStoreSize();
803 
804  // Return the ceiling of dividing A by B.
805  auto ceil = [](unsigned A, unsigned B) { return (A + B - 1) / B; };
806 
807  // Scale the cost of the memory operation by the fraction of legalized
808  // instructions that will actually be used. We shouldn't account for the
809  // cost of dead instructions since they will be removed.
810  //
811  // E.g., An interleaved load of factor 8:
812  // %vec = load <16 x i64>, <16 x i64>* %ptr
813  // %v0 = shufflevector %vec, undef, <0, 8>
814  //
815  // If <16 x i64> is legalized to 8 v2i64 loads, only 2 of the loads will be
816  // used (those corresponding to elements [0:1] and [8:9] of the unlegalized
817  // type). The other loads are unused.
818  //
819  // We only scale the cost of loads since interleaved store groups aren't
820  // allowed to have gaps.
821  if (Opcode == Instruction::Load && VecTySize > VecTyLTSize) {
822  // The number of loads of a legal type it will take to represent a load
823  // of the unlegalized vector type.
824  unsigned NumLegalInsts = ceil(VecTySize, VecTyLTSize);
825 
826  // The number of elements of the unlegalized type that correspond to a
827  // single legal instruction.
828  unsigned NumEltsPerLegalInst = ceil(NumElts, NumLegalInsts);
829 
830  // Determine which legal instructions will be used.
831  BitVector UsedInsts(NumLegalInsts, false);
832  for (unsigned Index : Indices)
833  for (unsigned Elt = 0; Elt < NumSubElts; ++Elt)
834  UsedInsts.set((Index + Elt * Factor) / NumEltsPerLegalInst);
835 
836  // Scale the cost of the load by the fraction of legal instructions that
837  // will be used.
838  Cost *= UsedInsts.count() / NumLegalInsts;
839  }
840 
841  // Then plus the cost of interleave operation.
842  if (Opcode == Instruction::Load) {
843  // The interleave cost is similar to extract sub vectors' elements
844  // from the wide vector, and insert them into sub vectors.
845  //
846  // E.g. An interleaved load of factor 2 (with one member of index 0):
847  // %vec = load <8 x i32>, <8 x i32>* %ptr
848  // %v0 = shuffle %vec, undef, <0, 2, 4, 6> ; Index 0
849  // The cost is estimated as extract elements at 0, 2, 4, 6 from the
850  // <8 x i32> vector and insert them into a <4 x i32> vector.
851 
852  assert(Indices.size() <= Factor &&
853  "Interleaved memory op has too many members");
854 
855  for (unsigned Index : Indices) {
856  assert(Index < Factor && "Invalid index for interleaved memory op");
857 
858  // Extract elements from loaded vector for each sub vector.
859  for (unsigned i = 0; i < NumSubElts; i++)
860  Cost += static_cast<T *>(this)->getVectorInstrCost(
861  Instruction::ExtractElement, VT, Index + i * Factor);
862  }
863 
864  unsigned InsSubCost = 0;
865  for (unsigned i = 0; i < NumSubElts; i++)
866  InsSubCost += static_cast<T *>(this)->getVectorInstrCost(
867  Instruction::InsertElement, SubVT, i);
868 
869  Cost += Indices.size() * InsSubCost;
870  } else {
871  // The interleave cost is extract all elements from sub vectors, and
872  // insert them into the wide vector.
873  //
874  // E.g. An interleaved store of factor 2:
875  // %v0_v1 = shuffle %v0, %v1, <0, 4, 1, 5, 2, 6, 3, 7>
876  // store <8 x i32> %interleaved.vec, <8 x i32>* %ptr
877  // The cost is estimated as extract all elements from both <4 x i32>
878  // vectors and insert into the <8 x i32> vector.
879 
880  unsigned ExtSubCost = 0;
881  for (unsigned i = 0; i < NumSubElts; i++)
882  ExtSubCost += static_cast<T *>(this)->getVectorInstrCost(
883  Instruction::ExtractElement, SubVT, i);
884  Cost += ExtSubCost * Factor;
885 
886  for (unsigned i = 0; i < NumElts; i++)
887  Cost += static_cast<T *>(this)
888  ->getVectorInstrCost(Instruction::InsertElement, VT, i);
889  }
890 
891  return Cost;
892  }
893 
894  /// Get intrinsic cost based on arguments.
897  unsigned VF = 1) {
898  unsigned RetVF = (RetTy->isVectorTy() ? RetTy->getVectorNumElements() : 1);
899  assert((RetVF == 1 || VF == 1) && "VF > 1 and RetVF is a vector type");
900 
901  switch (IID) {
902  default: {
903  // Assume that we need to scalarize this intrinsic.
905  for (Value *Op : Args) {
906  Type *OpTy = Op->getType();
907  assert(VF == 1 || !OpTy->isVectorTy());
908  Types.push_back(VF == 1 ? OpTy : VectorType::get(OpTy, VF));
909  }
910 
911  if (VF > 1 && !RetTy->isVoidTy())
912  RetTy = VectorType::get(RetTy, VF);
913 
914  // Compute the scalarization overhead based on Args for a vector
915  // intrinsic. A vectorizer will pass a scalar RetTy and VF > 1, while
916  // CostModel will pass a vector RetTy and VF is 1.
917  unsigned ScalarizationCost = std::numeric_limits<unsigned>::max();
918  if (RetVF > 1 || VF > 1) {
919  ScalarizationCost = 0;
920  if (!RetTy->isVoidTy())
921  ScalarizationCost += getScalarizationOverhead(RetTy, true, false);
922  ScalarizationCost += getOperandsScalarizationOverhead(Args, VF);
923  }
924 
925  return static_cast<T *>(this)->
926  getIntrinsicInstrCost(IID, RetTy, Types, FMF, ScalarizationCost);
927  }
928  case Intrinsic::masked_scatter: {
929  assert(VF == 1 && "Can't vectorize types here.");
930  Value *Mask = Args[3];
931  bool VarMask = !isa<Constant>(Mask);
932  unsigned Alignment = cast<ConstantInt>(Args[2])->getZExtValue();
933  return
934  static_cast<T *>(this)->getGatherScatterOpCost(Instruction::Store,
935  Args[0]->getType(),
936  Args[1], VarMask,
937  Alignment);
938  }
939  case Intrinsic::masked_gather: {
940  assert(VF == 1 && "Can't vectorize types here.");
941  Value *Mask = Args[2];
942  bool VarMask = !isa<Constant>(Mask);
943  unsigned Alignment = cast<ConstantInt>(Args[1])->getZExtValue();
944  return
945  static_cast<T *>(this)->getGatherScatterOpCost(Instruction::Load,
946  RetTy, Args[0], VarMask,
947  Alignment);
948  }
949  case Intrinsic::experimental_vector_reduce_add:
950  case Intrinsic::experimental_vector_reduce_mul:
951  case Intrinsic::experimental_vector_reduce_and:
952  case Intrinsic::experimental_vector_reduce_or:
953  case Intrinsic::experimental_vector_reduce_xor:
954  case Intrinsic::experimental_vector_reduce_fadd:
955  case Intrinsic::experimental_vector_reduce_fmul:
956  case Intrinsic::experimental_vector_reduce_smax:
957  case Intrinsic::experimental_vector_reduce_smin:
958  case Intrinsic::experimental_vector_reduce_fmax:
959  case Intrinsic::experimental_vector_reduce_fmin:
960  case Intrinsic::experimental_vector_reduce_umax:
961  case Intrinsic::experimental_vector_reduce_umin:
962  return getIntrinsicInstrCost(IID, RetTy, Args[0]->getType(), FMF);
963  }
964  }
965 
966  /// Get intrinsic cost based on argument types.
967  /// If ScalarizationCostPassed is std::numeric_limits<unsigned>::max(), the
968  /// cost of scalarizing the arguments and the return value will be computed
969  /// based on types.
971  Intrinsic::ID IID, Type *RetTy, ArrayRef<Type *> Tys, FastMathFlags FMF,
972  unsigned ScalarizationCostPassed = std::numeric_limits<unsigned>::max()) {
974  unsigned SingleCallCost = 10; // Library call cost. Make it expensive.
975  switch (IID) {
976  default: {
977  // Assume that we need to scalarize this intrinsic.
978  unsigned ScalarizationCost = ScalarizationCostPassed;
979  unsigned ScalarCalls = 1;
980  Type *ScalarRetTy = RetTy;
981  if (RetTy->isVectorTy()) {
982  if (ScalarizationCostPassed == std::numeric_limits<unsigned>::max())
983  ScalarizationCost = getScalarizationOverhead(RetTy, true, false);
984  ScalarCalls = std::max(ScalarCalls, RetTy->getVectorNumElements());
985  ScalarRetTy = RetTy->getScalarType();
986  }
987  SmallVector<Type *, 4> ScalarTys;
988  for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) {
989  Type *Ty = Tys[i];
990  if (Ty->isVectorTy()) {
991  if (ScalarizationCostPassed == std::numeric_limits<unsigned>::max())
992  ScalarizationCost += getScalarizationOverhead(Ty, false, true);
993  ScalarCalls = std::max(ScalarCalls, Ty->getVectorNumElements());
994  Ty = Ty->getScalarType();
995  }
996  ScalarTys.push_back(Ty);
997  }
998  if (ScalarCalls == 1)
999  return 1; // Return cost of a scalar intrinsic. Assume it to be cheap.
1000 
1001  unsigned ScalarCost = static_cast<T *>(this)->getIntrinsicInstrCost(
1002  IID, ScalarRetTy, ScalarTys, FMF);
1003 
1004  return ScalarCalls * ScalarCost + ScalarizationCost;
1005  }
1006  // Look for intrinsics that can be lowered directly or turned into a scalar
1007  // intrinsic call.
1008  case Intrinsic::sqrt:
1009  ISDs.push_back(ISD::FSQRT);
1010  break;
1011  case Intrinsic::sin:
1012  ISDs.push_back(ISD::FSIN);
1013  break;
1014  case Intrinsic::cos:
1015  ISDs.push_back(ISD::FCOS);
1016  break;
1017  case Intrinsic::exp:
1018  ISDs.push_back(ISD::FEXP);
1019  break;
1020  case Intrinsic::exp2:
1021  ISDs.push_back(ISD::FEXP2);
1022  break;
1023  case Intrinsic::log:
1024  ISDs.push_back(ISD::FLOG);
1025  break;
1026  case Intrinsic::log10:
1027  ISDs.push_back(ISD::FLOG10);
1028  break;
1029  case Intrinsic::log2:
1030  ISDs.push_back(ISD::FLOG2);
1031  break;
1032  case Intrinsic::fabs:
1033  ISDs.push_back(ISD::FABS);
1034  break;
1035  case Intrinsic::minnum:
1036  ISDs.push_back(ISD::FMINNUM);
1037  if (FMF.noNaNs())
1038  ISDs.push_back(ISD::FMINNAN);
1039  break;
1040  case Intrinsic::maxnum:
1041  ISDs.push_back(ISD::FMAXNUM);
1042  if (FMF.noNaNs())
1043  ISDs.push_back(ISD::FMAXNAN);
1044  break;
1045  case Intrinsic::copysign:
1046  ISDs.push_back(ISD::FCOPYSIGN);
1047  break;
1048  case Intrinsic::floor:
1049  ISDs.push_back(ISD::FFLOOR);
1050  break;
1051  case Intrinsic::ceil:
1052  ISDs.push_back(ISD::FCEIL);
1053  break;
1054  case Intrinsic::trunc:
1055  ISDs.push_back(ISD::FTRUNC);
1056  break;
1057  case Intrinsic::nearbyint:
1058  ISDs.push_back(ISD::FNEARBYINT);
1059  break;
1060  case Intrinsic::rint:
1061  ISDs.push_back(ISD::FRINT);
1062  break;
1063  case Intrinsic::round:
1064  ISDs.push_back(ISD::FROUND);
1065  break;
1066  case Intrinsic::pow:
1067  ISDs.push_back(ISD::FPOW);
1068  break;
1069  case Intrinsic::fma:
1070  ISDs.push_back(ISD::FMA);
1071  break;
1072  case Intrinsic::fmuladd:
1073  ISDs.push_back(ISD::FMA);
1074  break;
1075  // FIXME: We should return 0 whenever getIntrinsicCost == TCC_Free.
1076  case Intrinsic::lifetime_start:
1077  case Intrinsic::lifetime_end:
1078  case Intrinsic::sideeffect:
1079  return 0;
1080  case Intrinsic::masked_store:
1081  return static_cast<T *>(this)
1082  ->getMaskedMemoryOpCost(Instruction::Store, Tys[0], 0, 0);
1083  case Intrinsic::masked_load:
1084  return static_cast<T *>(this)
1085  ->getMaskedMemoryOpCost(Instruction::Load, RetTy, 0, 0);
1086  case Intrinsic::experimental_vector_reduce_add:
1087  return static_cast<T *>(this)->getArithmeticReductionCost(
1088  Instruction::Add, Tys[0], /*IsPairwiseForm=*/false);
1089  case Intrinsic::experimental_vector_reduce_mul:
1090  return static_cast<T *>(this)->getArithmeticReductionCost(
1091  Instruction::Mul, Tys[0], /*IsPairwiseForm=*/false);
1092  case Intrinsic::experimental_vector_reduce_and:
1093  return static_cast<T *>(this)->getArithmeticReductionCost(
1094  Instruction::And, Tys[0], /*IsPairwiseForm=*/false);
1095  case Intrinsic::experimental_vector_reduce_or:
1096  return static_cast<T *>(this)->getArithmeticReductionCost(
1097  Instruction::Or, Tys[0], /*IsPairwiseForm=*/false);
1098  case Intrinsic::experimental_vector_reduce_xor:
1099  return static_cast<T *>(this)->getArithmeticReductionCost(
1100  Instruction::Xor, Tys[0], /*IsPairwiseForm=*/false);
1101  case Intrinsic::experimental_vector_reduce_fadd:
1102  return static_cast<T *>(this)->getArithmeticReductionCost(
1103  Instruction::FAdd, Tys[0], /*IsPairwiseForm=*/false);
1104  case Intrinsic::experimental_vector_reduce_fmul:
1105  return static_cast<T *>(this)->getArithmeticReductionCost(
1106  Instruction::FMul, Tys[0], /*IsPairwiseForm=*/false);
1107  case Intrinsic::experimental_vector_reduce_smax:
1108  case Intrinsic::experimental_vector_reduce_smin:
1109  case Intrinsic::experimental_vector_reduce_fmax:
1110  case Intrinsic::experimental_vector_reduce_fmin:
1111  return static_cast<T *>(this)->getMinMaxReductionCost(
1112  Tys[0], CmpInst::makeCmpResultType(Tys[0]), /*IsPairwiseForm=*/false,
1113  /*IsSigned=*/true);
1114  case Intrinsic::experimental_vector_reduce_umax:
1115  case Intrinsic::experimental_vector_reduce_umin:
1116  return static_cast<T *>(this)->getMinMaxReductionCost(
1117  Tys[0], CmpInst::makeCmpResultType(Tys[0]), /*IsPairwiseForm=*/false,
1118  /*IsSigned=*/false);
1119  case Intrinsic::ctpop:
1120  ISDs.push_back(ISD::CTPOP);
1121  // In case of legalization use TCC_Expensive. This is cheaper than a
1122  // library call but still not a cheap instruction.
1123  SingleCallCost = TargetTransformInfo::TCC_Expensive;
1124  break;
1125  // FIXME: ctlz, cttz, ...
1126  }
1127 
1128  const TargetLoweringBase *TLI = getTLI();
1129  std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, RetTy);
1130 
1131  SmallVector<unsigned, 2> LegalCost;
1132  SmallVector<unsigned, 2> CustomCost;
1133  for (unsigned ISD : ISDs) {
1134  if (TLI->isOperationLegalOrPromote(ISD, LT.second)) {
1135  if (IID == Intrinsic::fabs && TLI->isFAbsFree(LT.second)) {
1136  return 0;
1137  }
1138 
1139  // The operation is legal. Assume it costs 1.
1140  // If the type is split to multiple registers, assume that there is some
1141  // overhead to this.
1142  // TODO: Once we have extract/insert subvector cost we need to use them.
1143  if (LT.first > 1)
1144  LegalCost.push_back(LT.first * 2);
1145  else
1146  LegalCost.push_back(LT.first * 1);
1147  } else if (!TLI->isOperationExpand(ISD, LT.second)) {
1148  // If the operation is custom lowered then assume
1149  // that the code is twice as expensive.
1150  CustomCost.push_back(LT.first * 2);
1151  }
1152  }
1153 
1154  auto MinLegalCostI = std::min_element(LegalCost.begin(), LegalCost.end());
1155  if (MinLegalCostI != LegalCost.end())
1156  return *MinLegalCostI;
1157 
1158  auto MinCustomCostI = std::min_element(CustomCost.begin(), CustomCost.end());
1159  if (MinCustomCostI != CustomCost.end())
1160  return *MinCustomCostI;
1161 
1162  // If we can't lower fmuladd into an FMA estimate the cost as a floating
1163  // point mul followed by an add.
1164  if (IID == Intrinsic::fmuladd)
1165  return static_cast<T *>(this)
1166  ->getArithmeticInstrCost(BinaryOperator::FMul, RetTy) +
1167  static_cast<T *>(this)
1168  ->getArithmeticInstrCost(BinaryOperator::FAdd, RetTy);
1169 
1170  // Else, assume that we need to scalarize this intrinsic. For math builtins
1171  // this will emit a costly libcall, adding call overhead and spills. Make it
1172  // very expensive.
1173  if (RetTy->isVectorTy()) {
1174  unsigned ScalarizationCost =
1175  ((ScalarizationCostPassed != std::numeric_limits<unsigned>::max())
1176  ? ScalarizationCostPassed
1177  : getScalarizationOverhead(RetTy, true, false));
1178  unsigned ScalarCalls = RetTy->getVectorNumElements();
1179  SmallVector<Type *, 4> ScalarTys;
1180  for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) {
1181  Type *Ty = Tys[i];
1182  if (Ty->isVectorTy())
1183  Ty = Ty->getScalarType();
1184  ScalarTys.push_back(Ty);
1185  }
1186  unsigned ScalarCost = static_cast<T *>(this)->getIntrinsicInstrCost(
1187  IID, RetTy->getScalarType(), ScalarTys, FMF);
1188  for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) {
1189  if (Tys[i]->isVectorTy()) {
1190  if (ScalarizationCostPassed == std::numeric_limits<unsigned>::max())
1191  ScalarizationCost += getScalarizationOverhead(Tys[i], false, true);
1192  ScalarCalls = std::max(ScalarCalls, Tys[i]->getVectorNumElements());
1193  }
1194  }
1195 
1196  return ScalarCalls * ScalarCost + ScalarizationCost;
1197  }
1198 
1199  // This is going to be turned into a library call, make it expensive.
1200  return SingleCallCost;
1201  }
1202 
1203  /// \brief Compute a cost of the given call instruction.
1204  ///
1205  /// Compute the cost of calling function F with return type RetTy and
1206  /// argument types Tys. F might be nullptr, in this case the cost of an
1207  /// arbitrary call with the specified signature will be returned.
1208  /// This is used, for instance, when we estimate call of a vector
1209  /// counterpart of the given function.
1210  /// \param F Called function, might be nullptr.
1211  /// \param RetTy Return value types.
1212  /// \param Tys Argument types.
1213  /// \returns The cost of Call instruction.
1215  return 10;
1216  }
1217 
1218  unsigned getNumberOfParts(Type *Tp) {
1219  std::pair<unsigned, MVT> LT = getTLI()->getTypeLegalizationCost(DL, Tp);
1220  return LT.first;
1221  }
1222 
1224  const SCEV *) {
1225  return 0;
1226  }
1227 
1228  /// Try to calculate arithmetic and shuffle op costs for reduction operations.
1229  /// We're assuming that reduction operation are performing the following way:
1230  /// 1. Non-pairwise reduction
1231  /// %val1 = shufflevector<n x t> %val, <n x t> %undef,
1232  /// <n x i32> <i32 n/2, i32 n/2 + 1, ..., i32 n, i32 undef, ..., i32 undef>
1233  /// \----------------v-------------/ \----------v------------/
1234  /// n/2 elements n/2 elements
1235  /// %red1 = op <n x t> %val, <n x t> val1
1236  /// After this operation we have a vector %red1 where only the first n/2
1237  /// elements are meaningful, the second n/2 elements are undefined and can be
1238  /// dropped. All other operations are actually working with the vector of
1239  /// length n/2, not n, though the real vector length is still n.
1240  /// %val2 = shufflevector<n x t> %red1, <n x t> %undef,
1241  /// <n x i32> <i32 n/4, i32 n/4 + 1, ..., i32 n/2, i32 undef, ..., i32 undef>
1242  /// \----------------v-------------/ \----------v------------/
1243  /// n/4 elements 3*n/4 elements
1244  /// %red2 = op <n x t> %red1, <n x t> val2 - working with the vector of
1245  /// length n/2, the resulting vector has length n/4 etc.
1246  /// 2. Pairwise reduction:
1247  /// Everything is the same except for an additional shuffle operation which
1248  /// is used to produce operands for pairwise kind of reductions.
1249  /// %val1 = shufflevector<n x t> %val, <n x t> %undef,
1250  /// <n x i32> <i32 0, i32 2, ..., i32 n-2, i32 undef, ..., i32 undef>
1251  /// \-------------v----------/ \----------v------------/
1252  /// n/2 elements n/2 elements
1253  /// %val2 = shufflevector<n x t> %val, <n x t> %undef,
1254  /// <n x i32> <i32 1, i32 3, ..., i32 n-1, i32 undef, ..., i32 undef>
1255  /// \-------------v----------/ \----------v------------/
1256  /// n/2 elements n/2 elements
1257  /// %red1 = op <n x t> %val1, <n x t> val2
1258  /// Again, the operation is performed on <n x t> vector, but the resulting
1259  /// vector %red1 is <n/2 x t> vector.
1260  ///
1261  /// The cost model should take into account that the actual length of the
1262  /// vector is reduced on each iteration.
1263  unsigned getArithmeticReductionCost(unsigned Opcode, Type *Ty,
1264  bool IsPairwise) {
1265  assert(Ty->isVectorTy() && "Expect a vector type");
1266  Type *ScalarTy = Ty->getVectorElementType();
1267  unsigned NumVecElts = Ty->getVectorNumElements();
1268  unsigned NumReduxLevels = Log2_32(NumVecElts);
1269  unsigned ArithCost = 0;
1270  unsigned ShuffleCost = 0;
1271  auto *ConcreteTTI = static_cast<T *>(this);
1272  std::pair<unsigned, MVT> LT =
1273  ConcreteTTI->getTLI()->getTypeLegalizationCost(DL, Ty);
1274  unsigned LongVectorCount = 0;
1275  unsigned MVTLen =
1276  LT.second.isVector() ? LT.second.getVectorNumElements() : 1;
1277  while (NumVecElts > MVTLen) {
1278  NumVecElts /= 2;
1279  // Assume the pairwise shuffles add a cost.
1280  ShuffleCost += (IsPairwise + 1) *
1281  ConcreteTTI->getShuffleCost(TTI::SK_ExtractSubvector, Ty,
1282  NumVecElts, Ty);
1283  ArithCost += ConcreteTTI->getArithmeticInstrCost(Opcode, Ty);
1284  Ty = VectorType::get(ScalarTy, NumVecElts);
1285  ++LongVectorCount;
1286  }
1287  // The minimal length of the vector is limited by the real length of vector
1288  // operations performed on the current platform. That's why several final
1289  // reduction operations are performed on the vectors with the same
1290  // architecture-dependent length.
1291  ShuffleCost += (NumReduxLevels - LongVectorCount) * (IsPairwise + 1) *
1292  ConcreteTTI->getShuffleCost(TTI::SK_ExtractSubvector, Ty,
1293  NumVecElts, Ty);
1294  ArithCost += (NumReduxLevels - LongVectorCount) *
1295  ConcreteTTI->getArithmeticInstrCost(Opcode, Ty);
1296  return ShuffleCost + ArithCost + getScalarizationOverhead(Ty, false, true);
1297  }
1298 
1299  /// Try to calculate op costs for min/max reduction operations.
1300  /// \param CondTy Conditional type for the Select instruction.
1301  unsigned getMinMaxReductionCost(Type *Ty, Type *CondTy, bool IsPairwise,
1302  bool) {
1303  assert(Ty->isVectorTy() && "Expect a vector type");
1304  Type *ScalarTy = Ty->getVectorElementType();
1305  Type *ScalarCondTy = CondTy->getVectorElementType();
1306  unsigned NumVecElts = Ty->getVectorNumElements();
1307  unsigned NumReduxLevels = Log2_32(NumVecElts);
1308  unsigned CmpOpcode;
1309  if (Ty->isFPOrFPVectorTy()) {
1310  CmpOpcode = Instruction::FCmp;
1311  } else {
1312  assert(Ty->isIntOrIntVectorTy() &&
1313  "expecting floating point or integer type for min/max reduction");
1314  CmpOpcode = Instruction::ICmp;
1315  }
1316  unsigned MinMaxCost = 0;
1317  unsigned ShuffleCost = 0;
1318  auto *ConcreteTTI = static_cast<T *>(this);
1319  std::pair<unsigned, MVT> LT =
1320  ConcreteTTI->getTLI()->getTypeLegalizationCost(DL, Ty);
1321  unsigned LongVectorCount = 0;
1322  unsigned MVTLen =
1323  LT.second.isVector() ? LT.second.getVectorNumElements() : 1;
1324  while (NumVecElts > MVTLen) {
1325  NumVecElts /= 2;
1326  // Assume the pairwise shuffles add a cost.
1327  ShuffleCost += (IsPairwise + 1) *
1328  ConcreteTTI->getShuffleCost(TTI::SK_ExtractSubvector, Ty,
1329  NumVecElts, Ty);
1330  MinMaxCost +=
1331  ConcreteTTI->getCmpSelInstrCost(CmpOpcode, Ty, CondTy, nullptr) +
1332  ConcreteTTI->getCmpSelInstrCost(Instruction::Select, Ty, CondTy,
1333  nullptr);
1334  Ty = VectorType::get(ScalarTy, NumVecElts);
1335  CondTy = VectorType::get(ScalarCondTy, NumVecElts);
1336  ++LongVectorCount;
1337  }
1338  // The minimal length of the vector is limited by the real length of vector
1339  // operations performed on the current platform. That's why several final
1340  // reduction opertions are perfomed on the vectors with the same
1341  // architecture-dependent length.
1342  ShuffleCost += (NumReduxLevels - LongVectorCount) * (IsPairwise + 1) *
1343  ConcreteTTI->getShuffleCost(TTI::SK_ExtractSubvector, Ty,
1344  NumVecElts, Ty);
1345  MinMaxCost +=
1346  (NumReduxLevels - LongVectorCount) *
1347  (ConcreteTTI->getCmpSelInstrCost(CmpOpcode, Ty, CondTy, nullptr) +
1348  ConcreteTTI->getCmpSelInstrCost(Instruction::Select, Ty, CondTy,
1349  nullptr));
1350  // Need 3 extractelement instructions for scalarization + an additional
1351  // scalar select instruction.
1352  return ShuffleCost + MinMaxCost +
1353  3 * getScalarizationOverhead(Ty, /*Insert=*/false,
1354  /*Extract=*/true) +
1355  ConcreteTTI->getCmpSelInstrCost(Instruction::Select, ScalarTy,
1356  ScalarCondTy, nullptr);
1357  }
1358 
1359  unsigned getVectorSplitCost() { return 1; }
1360 
1361  /// @}
1362 };
1363 
1364 /// \brief Concrete BasicTTIImpl that can be used if no further customization
1365 /// is needed.
1366 class BasicTTIImpl : public BasicTTIImplBase<BasicTTIImpl> {
1368 
1370 
1371  const TargetSubtargetInfo *ST;
1372  const TargetLoweringBase *TLI;
1373 
1374  const TargetSubtargetInfo *getST() const { return ST; }
1375  const TargetLoweringBase *getTLI() const { return TLI; }
1376 
1377 public:
1378  explicit BasicTTIImpl(const TargetMachine *ST, const Function &F);
1379 };
1380 
1381 } // end namespace llvm
1382 
1383 #endif // LLVM_CODEGEN_BASICTTIIMPL_H
LegalizeAction getLoadExtAction(unsigned ExtType, EVT ValVT, EVT MemVT) const
Return how this load with extension should be treated: either it is legal, needs to be promoted to a ...
Type * getVectorElementType() const
Definition: Type.h:368
unsigned getNumCases() const
Return the number of &#39;cases&#39; in this switch instruction, excluding the default case.
Base class for use as a mix-in that aids implementing a TargetTransformInfo-compatible class...
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:111
bool Partial
Allow partial unrolling (unrolling of loops to expand the size of the loop body, not only to eliminat...
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
Definition: ISDOpcodes.h:571
BitVector & set()
Definition: BitVector.h:398
unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info=TTI::OK_AnyValue, TTI::OperandValueKind Opd2Info=TTI::OK_AnyValue, TTI::OperandValueProperties Opd1PropInfo=TTI::OP_None, TTI::OperandValueProperties Opd2PropInfo=TTI::OP_None, ArrayRef< const Value *> Args=ArrayRef< const Value *>())
Definition: BasicTTIImpl.h:507
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg If BaseGV is null...
unsigned getIndexSizeInBits(unsigned AS) const
Size in bits of index used for address calculation in getelementptr.
Definition: DataLayout.h:365
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
iterator_range< CaseIt > cases()
Iteration adapter for range-for loops.
LLVMContext & Context
bool noNaNs() const
Definition: Operator.h:200
unsigned getGatherScatterOpCost(unsigned Opcode, Type *DataTy, Value *Ptr, bool VariableMask, unsigned Alignment)
static Type * makeCmpResultType(Type *opnd_type)
Create a result type for fcmp/icmp.
Definition: InstrTypes.h:1127
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract)
Estimate the overhead of scalarizing an instruction.
Definition: BasicTTIImpl.h:446
AMDGPU Rewrite Out Arguments
The main scalar evolution driver.
bool slt(const APInt &RHS) const
Signed less than comparison.
Definition: APInt.h:1183
unsigned PartialOptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size, like OptSizeThreshold, but used for partial/runtime unrolling (set to UINT_MAX to disable).
int getGEPCost(Type *PointeeType, const Value *Ptr, ArrayRef< const Value *> Operands)
Definition: BasicTTIImpl.h:217
MemIndexedMode
The type of load/store indexing.
static uint64_t round(uint64_t Acc, uint64_t Input)
Definition: xxhash.cpp:57
unsigned PartialThreshold
The cost threshold for the unrolled loop, like Threshold, but used for partial/runtime unrolling (set...
CaseIt case_begin()
Returns a read/write iterator that points to the first case in the SwitchInst.
unsigned getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy)
bool isSuitableForBitTests(unsigned NumDests, unsigned NumCmps, const APInt &Low, const APInt &High, const DataLayout &DL) const
Return true if lowering to a bit test is suitable for a set of case clusters which contains NumDests ...
bool sgt(const APInt &RHS) const
Signed greather than comparison.
Definition: APInt.h:1253
BasicTTIImplBase(const TargetMachine *TM, const DataLayout &DL)
Definition: BasicTTIImpl.h:131
int getExtCost(const Instruction *I, const Value *Src)
Definition: BasicTTIImpl.h:222
F(f)
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Definition: DerivedTypes.h:503
An instruction for reading from memory.
Definition: Instructions.h:164
bool isProfitableToHoist(Instruction *I)
Definition: BasicTTIImpl.h:206
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:227
Base class which can be used to help build a TTI implementation.
Definition: BasicTTIImpl.h:78
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
bool isOperationLegalOrCustom(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
unsigned getMaxInterleaveFactor(unsigned VF)
Definition: BasicTTIImpl.h:505
unsigned getJumpBufAlignment() const
Returns the target&#39;s jmp_buf alignment in bytes (if never set, the default is 0)
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition: Type.h:130
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:264
This file provides helpers for the implementation of a TargetTransformInfo-conforming class...
CRTP base class for use as a mix-in that aids implementing a TargetTransformInfo-compatible class...
unsigned getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, ArrayRef< Value *> Args, FastMathFlags FMF, unsigned VF=1)
Get intrinsic cost based on arguments.
Definition: BasicTTIImpl.h:895
unsigned getAddressComputationCost(Type *Ty, ScalarEvolution *, const SCEV *)
unsigned getOperandsScalarizationOverhead(ArrayRef< const Value *> Args, unsigned VF)
Estimate the overhead of scalarizing an instructions unique non-constant operands.
Definition: BasicTTIImpl.h:465
unsigned getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy, unsigned Index)
Definition: BasicTTIImpl.h:688
int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace)
Definition: BasicTTIImpl.h:192
unsigned getArithmeticReductionCost(unsigned Opcode, Type *Ty, bool IsPairwise)
Try to calculate arithmetic and shuffle op costs for reduction operations.
unsigned getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
uint64_t getNumElements() const
Definition: DerivedTypes.h:359
bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth, unsigned AddressSpace, unsigned Alignment, bool *Fast) const
Definition: BasicTTIImpl.h:139
This file implements a class to represent arbitrary precision integral constant values and operations...
virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const
Return true if it&#39;s free to truncate a value of type FromTy to type ToTy.
unsigned getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, unsigned Alignment, unsigned AddressSpace)
Definition: BasicTTIImpl.h:779
unsigned getRegisterBitWidth(bool Vector) const
Definition: BasicTTIImpl.h:442
unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, const Instruction *I)
Definition: BasicTTIImpl.h:701
Choose alternate elements from vector.
unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, const Instruction *I=nullptr)
Definition: BasicTTIImpl.h:563
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition: ISDOpcodes.h:399
bool isFCmpOrdCheaperThanFCmpZero(Type *Ty)
Definition: BasicTTIImpl.h:330
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:33
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
Definition: Type.h:203
virtual bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AddrSpace, Instruction *I=nullptr) const
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
bool isIndexedStoreLegal(TTI::MemIndexedMode M, Type *Ty, const DataLayout &DL) const
Definition: BasicTTIImpl.h:182
unsigned getFPOpCost(Type *Ty)
Definition: BasicTTIImpl.h:334
bool isTruncateFree(Type *Ty1, Type *Ty2)
Definition: BasicTTIImpl.h:202
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return &#39;this&#39;.
Definition: Type.h:301
ExtractSubvector Index indicates start offset.
bool isVoidTy() const
Return true if this is &#39;void&#39;.
Definition: Type.h:141
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP)
Definition: BasicTTIImpl.h:363
virtual bool isSuitableForJumpTable(const SwitchInst *SI, uint64_t NumCases, uint64_t Range) const
Return true if lowering to a jump table is suitable for a set of case clusters which may contain NumC...
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Machine Value Type.
Concrete BasicTTIImpl that can be used if no further customization is needed.
LLVM Basic Block Representation.
Definition: BasicBlock.h:59
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:46
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:69
unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, ArrayRef< const Value *> Arguments)
Simple binary floating point operators.
Definition: ISDOpcodes.h:260
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:149
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator begin()
Definition: SmallVector.h:116
This file contains the declarations for the subclasses of Constant, which represent the different fla...
unsigned getMinMaxReductionCost(Type *Ty, Type *CondTy, bool IsPairwise, bool)
Try to calculate op costs for min/max reduction operations.
unsigned getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, ArrayRef< Type *> Tys, FastMathFlags FMF, unsigned ScalarizationCostPassed=std::numeric_limits< unsigned >::max())
Get intrinsic cost based on argument types.
Definition: BasicTTIImpl.h:970
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:371
Expected to fold away in lowering.
virtual bool areJTsAllowed(const Function *Fn) const
Return true if lowering to a jump table is allowed.
unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, ArrayRef< Type *> ParamTys)
Definition: BasicTTIImpl.h:239
bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
Merge elements from two source vectors into one with any shuffle mask.
unsigned getNumberOfParts(Type *Tp)
bool isIndexedLoadLegal(unsigned IdxMode, EVT VT) const
Return true if the specified indexed load is legal on this target.
static double log2(double V)
virtual bool isProfitableToHoist(Instruction *I) const
Extended Value Type.
Definition: ValueTypes.h:34
static wasm::ValType getType(const TargetRegisterClass *RC)
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
LLVM_READONLY APFloat maxnum(const APFloat &A, const APFloat &B)
Implements IEEE maxNum semantics.
Definition: APFloat.h:1238
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
bool isLegalAddImmediate(int64_t imm)
Definition: BasicTTIImpl.h:157
This base class for TargetLowering contains the SelectionDAG-independent parts that can be used from ...
OperandValueProperties
Additional properties of an operand&#39;s values.
LegalizeAction
This enum indicates whether operations are valid for a target, and if not, what action should be used...
unsigned getCFInstrCost(unsigned Opcode)
Definition: BasicTTIImpl.h:696
size_type size() const
Definition: SmallPtrSet.h:93
unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp)
Definition: BasicTTIImpl.h:554
unsigned getNumberOfRegisters(bool Vector)
Definition: BasicTTIImpl.h:440
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:418
size_type count() const
count - Returns the number of bits which are set.
Definition: BitVector.h:173
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
int getInstructionLatency(const Instruction *I)
Definition: BasicTTIImpl.h:428
virtual bool allowsMisalignedMemoryAccesses(EVT, unsigned AddrSpace=0, unsigned Align=1, bool *=nullptr) const
Determine if the target supports unaligned memory accesses.
iterator end()
Definition: BasicBlock.h:266
unsigned getJumpBufSize() const
Returns the target&#39;s jmp_buf size in bytes (if never set, the default is 200)
unsigned getVectorSplitCost()
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:862
AddressSpace
Definition: NVPTXBaseInfo.h:22
cl::opt< unsigned > PartialUnrollingThreshold
static const unsigned DefaultLoadLatency
Definition: MCSchedule.h:234
Fast - This calling convention attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:43
bool Runtime
Allow runtime unrolling (unrolling of loops to expand the size of the loop body even when the number ...
LegalizeAction getTruncStoreAction(EVT ValVT, EVT MemVT) const
Return how this store with truncation should be treated: either it is legal, needs to be promoted to ...
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:531
unsigned getVectorNumElements() const
Definition: DerivedTypes.h:462
Class to represent vector types.
Definition: DerivedTypes.h:393
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
Class for arbitrary precision integers.
Definition: APInt.h:69
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:390
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:91
unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, unsigned AddressSpace, const Instruction *I=nullptr)
Definition: BasicTTIImpl.h:748
unsigned LoopMicroOpBufferSize
Definition: MCSchedule.h:229
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition: ISDOpcodes.h:288
bool isAlwaysUniform(const Value *V)
Definition: BasicTTIImpl.h:150
int InstructionOpcodeToISD(unsigned Opcode) const
Get the ISD node that corresponds to the Instruction class opcode.
FMINNAN/FMAXNAN - Behave identically to FMINNUM/FMAXNUM, except that when a single input is NaN...
Definition: ISDOpcodes.h:574
unsigned getFlatAddressSpace()
Definition: BasicTTIImpl.h:152
TargetSubtargetInfo - Generic base class for all target subtargets.
bool isLSRCostLess(TTI::LSRCost &C1, TTI::LSRCost &C2)
virtual bool isLegalICmpImmediate(int64_t) const
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
unsigned getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, unsigned AddressSpace)
BR_JT - Jumptable branch.
Definition: ISDOpcodes.h:607
unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, ArrayRef< const Value *> Arguments)
Definition: BasicTTIImpl.h:234
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator end()
Definition: SmallVector.h:120
unsigned getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy)
Definition: BasicTTIImpl.h:344
This class represents an analyzed expression in the program.
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:439
Parameters that control the generic loop unrolling transformation.
unsigned getJumpBufAlignment()
Definition: BasicTTIImpl.h:313
unsigned OptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size (set to UINT_MAX to disable)...
Establish a view to a call site for examination.
Definition: CallSite.h:713
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return &#39;Legal&#39;) or we ...
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:108
#define I(x, y, z)
Definition: MD5.cpp:58
#define N
int getGEPCost(Type *PointeeType, const Value *Ptr, ArrayRef< const Value *> Operands)
unsigned getInliningThresholdMultiplier()
Definition: BasicTTIImpl.h:361
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index)
Definition: BasicTTIImpl.h:741
block_iterator block_end() const
Definition: LoopInfo.h:155
static EVT getEVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
Definition: ValueTypes.cpp:309
bool isLSRCostLess(TTI::LSRCost C1, TTI::LSRCost C2)
Definition: BasicTTIImpl.h:188
unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI, unsigned &JumpTableSize)
Definition: BasicTTIImpl.h:256
FunTy * getCalledFunction() const
Return the function being called if this is a direct call, otherwise return null (if it&#39;s an indirect...
Definition: CallSite.h:107
bool isFPOrFPVectorTy() const
Return true if this is a FP type or a vector of FP.
Definition: Type.h:185
const unsigned Kind
Multiway switch.
unsigned getScalarizationOverhead(Type *VecTy, ArrayRef< const Value *> Args)
Definition: BasicTTIImpl.h:488
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
The cost of a typical &#39;add&#39; instruction.
bool isSourceOfDivergence(const Value *V)
Definition: BasicTTIImpl.h:148
unsigned getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition: Type.cpp:115
LLVM Value Representation.
Definition: Value.h:73
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:278
uint64_t getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type...
Definition: DataLayout.h:411
static VectorType * get(Type *ElementType, unsigned NumElements)
This static method is the primary way to construct an VectorType.
Definition: Type.cpp:593
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E&#39;s largest value.
Definition: BitmaskEnum.h:81
bool isOperationLegalOrPromote(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target or can be made legal using promotion...
bool isLoadExtLegal(unsigned ExtType, EVT ValVT, EVT MemVT) const
Return true if the specified load with extension is legal on this target.
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:59
bool isTypeLegal(Type *Ty)
Definition: BasicTTIImpl.h:212
Type * getElementType() const
Definition: DerivedTypes.h:360
bool UpperBound
Allow using trip count upper bound to unroll loops.
virtual int getScalingFactorCost(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS=0) const
Return the cost of the scaling factor used in the addressing mode represented by AM for this target...
const DataLayout & getDataLayout() const
virtual bool useAA() const
Enable use of alias analysis during code generation (during MI scheduling, DAGCombine, etc.).
bool isIndexedLoadLegal(TTI::MemIndexedMode M, Type *Ty, const DataLayout &DL) const
Definition: BasicTTIImpl.h:176
Convenience struct for specifying and reasoning about fast-math flags.
Definition: Operator.h:160
unsigned getCallInstrCost(Function *F, Type *RetTy, ArrayRef< Type *> Tys)
Compute a cost of the given call instruction.
OperandValueKind
Additional information about an operand&#39;s possible values.
bool haveFastSqrt(Type *Ty)
Definition: BasicTTIImpl.h:323
This pass exposes codegen information to IR-level passes.
bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace, Instruction *I=nullptr)
Definition: BasicTTIImpl.h:165
bool isLegalICmpImmediate(int64_t imm)
Definition: BasicTTIImpl.h:161
block_iterator block_begin() const
Definition: LoopInfo.h:154
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition: ValueTypes.h:64
virtual bool isLegalAddImmediate(int64_t) const
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
virtual bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const
Returns true if a cast between SrcAS and DestAS is a noop.
The cost of a &#39;div&#39; instruction on x86.
const MCSchedModel & getSchedModel() const
Get the machine model for this subtarget&#39;s CPU.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
bool isOperationExpand(unsigned Op, EVT VT) const
Return true if the specified operation is illegal on this target or unlikely to be made legal with cu...
std::pair< int, MVT > getTypeLegalizationCost(const DataLayout &DL, Type *Ty) const
Estimate the cost of type-legalization and the legalized type.
LLVM_READONLY APFloat minnum(const APFloat &A, const APFloat &B)
Implements IEEE minNum semantics.
Definition: APFloat.h:1227
bool isIndexedStoreLegal(unsigned IdxMode, EVT VT) const
Return true if the specified indexed load is legal on this target.
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:144
virtual bool isFAbsFree(EVT VT) const
Return true if an fabs operation is free to the point where it is never worthwhile to replace it with...
This file describes how to lower LLVM code to machine code.
const BasicBlock * getParent() const
Definition: Instruction.h:67
ShuffleKind
The various kinds of shuffle patterns for vector queries.
Shuffle elements of single source vector with any shuffle mask.
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
Definition: ISDOpcodes.h:874
BRIND - Indirect branch.
Definition: ISDOpcodes.h:603