LLVM  6.0.0svn
BasicTTIImpl.h
Go to the documentation of this file.
1 //===- BasicTTIImpl.h -------------------------------------------*- C++ -*-===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 /// \file
11 /// This file provides a helper that implements much of the TTI interface in
12 /// terms of the target-independent code generator and TargetLowering
13 /// interfaces.
14 //
15 //===----------------------------------------------------------------------===//
16 
17 #ifndef LLVM_CODEGEN_BASICTTIIMPL_H
18 #define LLVM_CODEGEN_BASICTTIIMPL_H
19 
20 #include "llvm/ADT/APInt.h"
21 #include "llvm/ADT/ArrayRef.h"
22 #include "llvm/ADT/BitVector.h"
23 #include "llvm/ADT/SmallPtrSet.h"
24 #include "llvm/ADT/SmallVector.h"
25 #include "llvm/Analysis/LoopInfo.h"
33 #include "llvm/IR/BasicBlock.h"
34 #include "llvm/IR/CallSite.h"
35 #include "llvm/IR/Constant.h"
36 #include "llvm/IR/Constants.h"
37 #include "llvm/IR/DataLayout.h"
38 #include "llvm/IR/DerivedTypes.h"
39 #include "llvm/IR/InstrTypes.h"
40 #include "llvm/IR/Instruction.h"
41 #include "llvm/IR/Instructions.h"
42 #include "llvm/IR/Intrinsics.h"
43 #include "llvm/IR/Operator.h"
44 #include "llvm/IR/Type.h"
45 #include "llvm/IR/Value.h"
46 #include "llvm/MC/MCSchedule.h"
47 #include "llvm/Support/Casting.h"
51 #include <algorithm>
52 #include <cassert>
53 #include <cstdint>
54 #include <limits>
55 #include <utility>
56 
57 namespace llvm {
58 
59 class Function;
60 class GlobalValue;
61 class LLVMContext;
62 class ScalarEvolution;
63 class SCEV;
64 class TargetMachine;
65 
66 extern cl::opt<unsigned> PartialUnrollingThreshold;
67 
68 /// \brief Base class which can be used to help build a TTI implementation.
69 ///
70 /// This class provides as much implementation of the TTI interface as is
71 /// possible using the target independent parts of the code generator.
72 ///
73 /// In order to subclass it, your class must implement a getST() method to
74 /// return the subtarget, and a getTLI() method to return the target lowering.
75 /// We need these methods implemented in the derived class so that this class
76 /// doesn't have to duplicate storage for them.
77 template <typename T>
79 private:
81  using TTI = TargetTransformInfo;
82 
83  /// Estimate a cost of shuffle as a sequence of extract and insert
84  /// operations.
85  unsigned getPermuteShuffleOverhead(Type *Ty) {
86  assert(Ty->isVectorTy() && "Can only shuffle vectors");
87  unsigned Cost = 0;
88  // Shuffle cost is equal to the cost of extracting element from its argument
89  // plus the cost of inserting them onto the result vector.
90 
91  // e.g. <4 x float> has a mask of <0,5,2,7> i.e we need to extract from
92  // index 0 of first vector, index 1 of second vector,index 2 of first
93  // vector and finally index 3 of second vector and insert them at index
94  // <0,1,2,3> of result vector.
95  for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i) {
96  Cost += static_cast<T *>(this)
97  ->getVectorInstrCost(Instruction::InsertElement, Ty, i);
98  Cost += static_cast<T *>(this)
99  ->getVectorInstrCost(Instruction::ExtractElement, Ty, i);
100  }
101  return Cost;
102  }
103 
104  /// \brief Local query method delegates up to T which *must* implement this!
105  const TargetSubtargetInfo *getST() const {
106  return static_cast<const T *>(this)->getST();
107  }
108 
109  /// \brief Local query method delegates up to T which *must* implement this!
110  const TargetLoweringBase *getTLI() const {
111  return static_cast<const T *>(this)->getTLI();
112  }
113 
114 protected:
115  explicit BasicTTIImplBase(const TargetMachine *TM, const DataLayout &DL)
116  : BaseT(DL) {}
117 
119 
120 public:
121  /// \name Scalar TTI Implementations
122  /// @{
124  unsigned BitWidth, unsigned AddressSpace,
125  unsigned Alignment, bool *Fast) const {
126  EVT E = EVT::getIntegerVT(Context, BitWidth);
127  return getTLI()->allowsMisalignedMemoryAccesses(E, AddressSpace, Alignment, Fast);
128  }
129 
130  bool hasBranchDivergence() { return false; }
131 
132  bool isSourceOfDivergence(const Value *V) { return false; }
133 
134  bool isAlwaysUniform(const Value *V) { return false; }
135 
136  unsigned getFlatAddressSpace() {
137  // Return an invalid address space.
138  return -1;
139  }
140 
141  bool isLegalAddImmediate(int64_t imm) {
142  return getTLI()->isLegalAddImmediate(imm);
143  }
144 
145  bool isLegalICmpImmediate(int64_t imm) {
146  return getTLI()->isLegalICmpImmediate(imm);
147  }
148 
149  bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
150  bool HasBaseReg, int64_t Scale,
151  unsigned AddrSpace, Instruction *I = nullptr) {
153  AM.BaseGV = BaseGV;
154  AM.BaseOffs = BaseOffset;
155  AM.HasBaseReg = HasBaseReg;
156  AM.Scale = Scale;
157  return getTLI()->isLegalAddressingMode(DL, AM, Ty, AddrSpace, I);
158  }
159 
162  }
163 
164  int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
165  bool HasBaseReg, int64_t Scale, unsigned AddrSpace) {
167  AM.BaseGV = BaseGV;
168  AM.BaseOffs = BaseOffset;
169  AM.HasBaseReg = HasBaseReg;
170  AM.Scale = Scale;
171  return getTLI()->getScalingFactorCost(DL, AM, Ty, AddrSpace);
172  }
173 
174  bool isTruncateFree(Type *Ty1, Type *Ty2) {
175  return getTLI()->isTruncateFree(Ty1, Ty2);
176  }
177 
179  return getTLI()->isProfitableToHoist(I);
180  }
181 
182  bool isTypeLegal(Type *Ty) {
183  EVT VT = getTLI()->getValueType(DL, Ty);
184  return getTLI()->isTypeLegal(VT);
185  }
186 
187  int getGEPCost(Type *PointeeType, const Value *Ptr,
188  ArrayRef<const Value *> Operands) {
189  return BaseT::getGEPCost(PointeeType, Ptr, Operands);
190  }
191 
192  int getExtCost(const Instruction *I, const Value *Src) {
193  if (getTLI()->isExtFree(I))
195 
196  if (isa<ZExtInst>(I) || isa<SExtInst>(I))
197  if (const LoadInst *LI = dyn_cast<LoadInst>(Src))
198  if (getTLI()->isExtLoad(LI, I, DL))
200 
202  }
203 
204  unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
206  return BaseT::getIntrinsicCost(IID, RetTy, Arguments);
207  }
208 
209  unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
210  ArrayRef<Type *> ParamTys) {
211  if (IID == Intrinsic::cttz) {
212  if (getTLI()->isCheapToSpeculateCttz())
215  }
216 
217  if (IID == Intrinsic::ctlz) {
218  if (getTLI()->isCheapToSpeculateCtlz())
221  }
222 
223  return BaseT::getIntrinsicCost(IID, RetTy, ParamTys);
224  }
225 
227  unsigned &JumpTableSize) {
228  /// Try to find the estimated number of clusters. Note that the number of
229  /// clusters identified in this function could be different from the actural
230  /// numbers found in lowering. This function ignore switches that are
231  /// lowered with a mix of jump table / bit test / BTree. This function was
232  /// initially intended to be used when estimating the cost of switch in
233  /// inline cost heuristic, but it's a generic cost model to be used in other
234  /// places (e.g., in loop unrolling).
235  unsigned N = SI.getNumCases();
236  const TargetLoweringBase *TLI = getTLI();
237  const DataLayout &DL = this->getDataLayout();
238 
239  JumpTableSize = 0;
240  bool IsJTAllowed = TLI->areJTsAllowed(SI.getParent()->getParent());
241 
242  // Early exit if both a jump table and bit test are not allowed.
243  if (N < 1 || (!IsJTAllowed && DL.getPointerSizeInBits() < N))
244  return N;
245 
246  APInt MaxCaseVal = SI.case_begin()->getCaseValue()->getValue();
247  APInt MinCaseVal = MaxCaseVal;
248  for (auto CI : SI.cases()) {
249  const APInt &CaseVal = CI.getCaseValue()->getValue();
250  if (CaseVal.sgt(MaxCaseVal))
251  MaxCaseVal = CaseVal;
252  if (CaseVal.slt(MinCaseVal))
253  MinCaseVal = CaseVal;
254  }
255 
256  // Check if suitable for a bit test
257  if (N <= DL.getPointerSizeInBits()) {
259  for (auto I : SI.cases())
260  Dests.insert(I.getCaseSuccessor());
261 
262  if (TLI->isSuitableForBitTests(Dests.size(), N, MinCaseVal, MaxCaseVal,
263  DL))
264  return 1;
265  }
266 
267  // Check if suitable for a jump table.
268  if (IsJTAllowed) {
269  if (N < 2 || N < TLI->getMinimumJumpTableEntries())
270  return N;
271  uint64_t Range =
272  (MaxCaseVal - MinCaseVal)
273  .getLimitedValue(std::numeric_limits<uint64_t>::max() - 1) + 1;
274  // Check whether a range of clusters is dense enough for a jump table
275  if (TLI->isSuitableForJumpTable(&SI, N, Range)) {
276  JumpTableSize = Range;
277  return 1;
278  }
279  }
280  return N;
281  }
282 
283  unsigned getJumpBufAlignment() { return getTLI()->getJumpBufAlignment(); }
284 
285  unsigned getJumpBufSize() { return getTLI()->getJumpBufSize(); }
286 
288  const TargetLoweringBase *TLI = getTLI();
291  }
292 
293  bool haveFastSqrt(Type *Ty) {
294  const TargetLoweringBase *TLI = getTLI();
295  EVT VT = TLI->getValueType(DL, Ty);
296  return TLI->isTypeLegal(VT) &&
298  }
299 
300  unsigned getFPOpCost(Type *Ty) {
301  // By default, FP instructions are no more expensive since they are
302  // implemented in HW. Target specific TTI can override this.
304  }
305 
306  unsigned getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) {
307  const TargetLoweringBase *TLI = getTLI();
308  switch (Opcode) {
309  default: break;
310  case Instruction::Trunc:
311  if (TLI->isTruncateFree(OpTy, Ty))
314  case Instruction::ZExt:
315  if (TLI->isZExtFree(OpTy, Ty))
318  }
319 
320  return BaseT::getOperationCost(Opcode, Ty, OpTy);
321  }
322 
323  unsigned getInliningThresholdMultiplier() { return 1; }
324 
327  // This unrolling functionality is target independent, but to provide some
328  // motivation for its intended use, for x86:
329 
330  // According to the Intel 64 and IA-32 Architectures Optimization Reference
331  // Manual, Intel Core models and later have a loop stream detector (and
332  // associated uop queue) that can benefit from partial unrolling.
333  // The relevant requirements are:
334  // - The loop must have no more than 4 (8 for Nehalem and later) branches
335  // taken, and none of them may be calls.
336  // - The loop can have no more than 18 (28 for Nehalem and later) uops.
337 
338  // According to the Software Optimization Guide for AMD Family 15h
339  // Processors, models 30h-4fh (Steamroller and later) have a loop predictor
340  // and loop buffer which can benefit from partial unrolling.
341  // The relevant requirements are:
342  // - The loop must have fewer than 16 branches
343  // - The loop must have less than 40 uops in all executed loop branches
344 
345  // The number of taken branches in a loop is hard to estimate here, and
346  // benchmarking has revealed that it is better not to be conservative when
347  // estimating the branch count. As a result, we'll ignore the branch limits
348  // until someone finds a case where it matters in practice.
349 
350  unsigned MaxOps;
351  const TargetSubtargetInfo *ST = getST();
352  if (PartialUnrollingThreshold.getNumOccurrences() > 0)
353  MaxOps = PartialUnrollingThreshold;
354  else if (ST->getSchedModel().LoopMicroOpBufferSize > 0)
355  MaxOps = ST->getSchedModel().LoopMicroOpBufferSize;
356  else
357  return;
358 
359  // Scan the loop: don't unroll loops with calls.
360  for (Loop::block_iterator I = L->block_begin(), E = L->block_end(); I != E;
361  ++I) {
362  BasicBlock *BB = *I;
363 
364  for (BasicBlock::iterator J = BB->begin(), JE = BB->end(); J != JE; ++J)
365  if (isa<CallInst>(J) || isa<InvokeInst>(J)) {
366  ImmutableCallSite CS(&*J);
367  if (const Function *F = CS.getCalledFunction()) {
368  if (!static_cast<T *>(this)->isLoweredToCall(F))
369  continue;
370  }
371 
372  return;
373  }
374  }
375 
376  // Enable runtime and partial unrolling up to the specified size.
377  // Enable using trip count upper bound to unroll loops.
378  UP.Partial = UP.Runtime = UP.UpperBound = true;
379  UP.PartialThreshold = MaxOps;
380 
381  // Avoid unrolling when optimizing for size.
382  UP.OptSizeThreshold = 0;
384 
385  // Set number of instructions optimized when "back edge"
386  // becomes "fall through" to default value of 2.
387  UP.BEInsns = 2;
388  }
389 
391  if (isa<LoadInst>(I))
392  return getST()->getSchedModel().DefaultLoadLatency;
393 
395  }
396 
397  /// @}
398 
399  /// \name Vector TTI Implementations
400  /// @{
401 
402  unsigned getNumberOfRegisters(bool Vector) { return Vector ? 0 : 1; }
403 
404  unsigned getRegisterBitWidth(bool Vector) const { return 32; }
405 
406  /// Estimate the overhead of scalarizing an instruction. Insert and Extract
407  /// are set if the result needs to be inserted and/or extracted from vectors.
408  unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) {
409  assert(Ty->isVectorTy() && "Can only scalarize vectors");
410  unsigned Cost = 0;
411 
412  for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i) {
413  if (Insert)
414  Cost += static_cast<T *>(this)
415  ->getVectorInstrCost(Instruction::InsertElement, Ty, i);
416  if (Extract)
417  Cost += static_cast<T *>(this)
418  ->getVectorInstrCost(Instruction::ExtractElement, Ty, i);
419  }
420 
421  return Cost;
422  }
423 
424  /// Estimate the overhead of scalarizing an instructions unique
425  /// non-constant operands. The types of the arguments are ordinarily
426  /// scalar, in which case the costs are multiplied with VF.
428  unsigned VF) {
429  unsigned Cost = 0;
430  SmallPtrSet<const Value*, 4> UniqueOperands;
431  for (const Value *A : Args) {
432  if (!isa<Constant>(A) && UniqueOperands.insert(A).second) {
433  Type *VecTy = nullptr;
434  if (A->getType()->isVectorTy()) {
435  VecTy = A->getType();
436  // If A is a vector operand, VF should be 1 or correspond to A.
437  assert((VF == 1 || VF == VecTy->getVectorNumElements()) &&
438  "Vector argument does not match VF");
439  }
440  else
441  VecTy = VectorType::get(A->getType(), VF);
442 
443  Cost += getScalarizationOverhead(VecTy, false, true);
444  }
445  }
446 
447  return Cost;
448  }
449 
451  assert(VecTy->isVectorTy());
452 
453  unsigned Cost = 0;
454 
455  Cost += getScalarizationOverhead(VecTy, true, false);
456  if (!Args.empty())
458  VecTy->getVectorNumElements());
459  else
460  // When no information on arguments is provided, we add the cost
461  // associated with one argument as a heuristic.
462  Cost += getScalarizationOverhead(VecTy, false, true);
463 
464  return Cost;
465  }
466 
467  unsigned getMaxInterleaveFactor(unsigned VF) { return 1; }
468 
470  unsigned Opcode, Type *Ty,
476  // Check if any of the operands are vector operands.
477  const TargetLoweringBase *TLI = getTLI();
478  int ISD = TLI->InstructionOpcodeToISD(Opcode);
479  assert(ISD && "Invalid opcode");
480 
481  std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
482 
483  bool IsFloat = Ty->isFPOrFPVectorTy();
484  // Assume that floating point arithmetic operations cost twice as much as
485  // integer operations.
486  unsigned OpCost = (IsFloat ? 2 : 1);
487 
488  if (TLI->isOperationLegalOrPromote(ISD, LT.second)) {
489  // The operation is legal. Assume it costs 1.
490  // TODO: Once we have extract/insert subvector cost we need to use them.
491  return LT.first * OpCost;
492  }
493 
494  if (!TLI->isOperationExpand(ISD, LT.second)) {
495  // If the operation is custom lowered, then assume that the code is twice
496  // as expensive.
497  return LT.first * 2 * OpCost;
498  }
499 
500  // Else, assume that we need to scalarize this op.
501  // TODO: If one of the types get legalized by splitting, handle this
502  // similarly to what getCastInstrCost() does.
503  if (Ty->isVectorTy()) {
504  unsigned Num = Ty->getVectorNumElements();
505  unsigned Cost = static_cast<T *>(this)
506  ->getArithmeticInstrCost(Opcode, Ty->getScalarType());
507  // Return the cost of multiple scalar invocation plus the cost of
508  // inserting and extracting the values.
509  return getScalarizationOverhead(Ty, Args) + Num * Cost;
510  }
511 
512  // We don't know anything about this scalar instruction.
513  return OpCost;
514  }
515 
516  unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
517  Type *SubTp) {
518  if (Kind == TTI::SK_Alternate || Kind == TTI::SK_PermuteTwoSrc ||
519  Kind == TTI::SK_PermuteSingleSrc) {
520  return getPermuteShuffleOverhead(Tp);
521  }
522  return 1;
523  }
524 
525  unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
526  const Instruction *I = nullptr) {
527  const TargetLoweringBase *TLI = getTLI();
528  int ISD = TLI->InstructionOpcodeToISD(Opcode);
529  assert(ISD && "Invalid opcode");
530  std::pair<unsigned, MVT> SrcLT = TLI->getTypeLegalizationCost(DL, Src);
531  std::pair<unsigned, MVT> DstLT = TLI->getTypeLegalizationCost(DL, Dst);
532 
533  // Check for NOOP conversions.
534  if (SrcLT.first == DstLT.first &&
535  SrcLT.second.getSizeInBits() == DstLT.second.getSizeInBits()) {
536 
537  // Bitcast between types that are legalized to the same type are free.
538  if (Opcode == Instruction::BitCast || Opcode == Instruction::Trunc)
539  return 0;
540  }
541 
542  if (Opcode == Instruction::Trunc &&
543  TLI->isTruncateFree(SrcLT.second, DstLT.second))
544  return 0;
545 
546  if (Opcode == Instruction::ZExt &&
547  TLI->isZExtFree(SrcLT.second, DstLT.second))
548  return 0;
549 
550  if (Opcode == Instruction::AddrSpaceCast &&
552  Dst->getPointerAddressSpace()))
553  return 0;
554 
555  // If this is a zext/sext of a load, return 0 if the corresponding
556  // extending load exists on target.
557  if ((Opcode == Instruction::ZExt || Opcode == Instruction::SExt) &&
558  I && isa<LoadInst>(I->getOperand(0))) {
559  EVT ExtVT = EVT::getEVT(Dst);
560  EVT LoadVT = EVT::getEVT(Src);
561  unsigned LType =
562  ((Opcode == Instruction::ZExt) ? ISD::ZEXTLOAD : ISD::SEXTLOAD);
563  if (TLI->isLoadExtLegal(LType, ExtVT, LoadVT))
564  return 0;
565  }
566 
567  // If the cast is marked as legal (or promote) then assume low cost.
568  if (SrcLT.first == DstLT.first &&
569  TLI->isOperationLegalOrPromote(ISD, DstLT.second))
570  return 1;
571 
572  // Handle scalar conversions.
573  if (!Src->isVectorTy() && !Dst->isVectorTy()) {
574  // Scalar bitcasts are usually free.
575  if (Opcode == Instruction::BitCast)
576  return 0;
577 
578  // Just check the op cost. If the operation is legal then assume it costs
579  // 1.
580  if (!TLI->isOperationExpand(ISD, DstLT.second))
581  return 1;
582 
583  // Assume that illegal scalar instruction are expensive.
584  return 4;
585  }
586 
587  // Check vector-to-vector casts.
588  if (Dst->isVectorTy() && Src->isVectorTy()) {
589  // If the cast is between same-sized registers, then the check is simple.
590  if (SrcLT.first == DstLT.first &&
591  SrcLT.second.getSizeInBits() == DstLT.second.getSizeInBits()) {
592 
593  // Assume that Zext is done using AND.
594  if (Opcode == Instruction::ZExt)
595  return 1;
596 
597  // Assume that sext is done using SHL and SRA.
598  if (Opcode == Instruction::SExt)
599  return 2;
600 
601  // Just check the op cost. If the operation is legal then assume it
602  // costs
603  // 1 and multiply by the type-legalization overhead.
604  if (!TLI->isOperationExpand(ISD, DstLT.second))
605  return SrcLT.first * 1;
606  }
607 
608  // If we are legalizing by splitting, query the concrete TTI for the cost
609  // of casting the original vector twice. We also need to factor int the
610  // cost of the split itself. Count that as 1, to be consistent with
611  // TLI->getTypeLegalizationCost().
612  if ((TLI->getTypeAction(Src->getContext(), TLI->getValueType(DL, Src)) ==
614  (TLI->getTypeAction(Dst->getContext(), TLI->getValueType(DL, Dst)) ==
616  Type *SplitDst = VectorType::get(Dst->getVectorElementType(),
617  Dst->getVectorNumElements() / 2);
618  Type *SplitSrc = VectorType::get(Src->getVectorElementType(),
619  Src->getVectorNumElements() / 2);
620  T *TTI = static_cast<T *>(this);
621  return TTI->getVectorSplitCost() +
622  (2 * TTI->getCastInstrCost(Opcode, SplitDst, SplitSrc, I));
623  }
624 
625  // In other cases where the source or destination are illegal, assume
626  // the operation will get scalarized.
627  unsigned Num = Dst->getVectorNumElements();
628  unsigned Cost = static_cast<T *>(this)->getCastInstrCost(
629  Opcode, Dst->getScalarType(), Src->getScalarType(), I);
630 
631  // Return the cost of multiple scalar invocation plus the cost of
632  // inserting and extracting the values.
633  return getScalarizationOverhead(Dst, true, true) + Num * Cost;
634  }
635 
636  // We already handled vector-to-vector and scalar-to-scalar conversions.
637  // This
638  // is where we handle bitcast between vectors and scalars. We need to assume
639  // that the conversion is scalarized in one way or another.
640  if (Opcode == Instruction::BitCast)
641  // Illegal bitcasts are done by storing and loading from a stack slot.
642  return (Src->isVectorTy() ? getScalarizationOverhead(Src, false, true)
643  : 0) +
644  (Dst->isVectorTy() ? getScalarizationOverhead(Dst, true, false)
645  : 0);
646 
647  llvm_unreachable("Unhandled cast");
648  }
649 
650  unsigned getExtractWithExtendCost(unsigned Opcode, Type *Dst,
651  VectorType *VecTy, unsigned Index) {
652  return static_cast<T *>(this)->getVectorInstrCost(
653  Instruction::ExtractElement, VecTy, Index) +
654  static_cast<T *>(this)->getCastInstrCost(Opcode, Dst,
655  VecTy->getElementType());
656  }
657 
658  unsigned getCFInstrCost(unsigned Opcode) {
659  // Branches are assumed to be predicted.
660  return 0;
661  }
662 
663  unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
664  const Instruction *I) {
665  const TargetLoweringBase *TLI = getTLI();
666  int ISD = TLI->InstructionOpcodeToISD(Opcode);
667  assert(ISD && "Invalid opcode");
668 
669  // Selects on vectors are actually vector selects.
670  if (ISD == ISD::SELECT) {
671  assert(CondTy && "CondTy must exist");
672  if (CondTy->isVectorTy())
673  ISD = ISD::VSELECT;
674  }
675  std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
676 
677  if (!(ValTy->isVectorTy() && !LT.second.isVector()) &&
678  !TLI->isOperationExpand(ISD, LT.second)) {
679  // The operation is legal. Assume it costs 1. Multiply
680  // by the type-legalization overhead.
681  return LT.first * 1;
682  }
683 
684  // Otherwise, assume that the cast is scalarized.
685  // TODO: If one of the types get legalized by splitting, handle this
686  // similarly to what getCastInstrCost() does.
687  if (ValTy->isVectorTy()) {
688  unsigned Num = ValTy->getVectorNumElements();
689  if (CondTy)
690  CondTy = CondTy->getScalarType();
691  unsigned Cost = static_cast<T *>(this)->getCmpSelInstrCost(
692  Opcode, ValTy->getScalarType(), CondTy, I);
693 
694  // Return the cost of multiple scalar invocation plus the cost of
695  // inserting and extracting the values.
696  return getScalarizationOverhead(ValTy, true, false) + Num * Cost;
697  }
698 
699  // Unknown scalar opcode.
700  return 1;
701  }
702 
703  unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) {
704  std::pair<unsigned, MVT> LT =
705  getTLI()->getTypeLegalizationCost(DL, Val->getScalarType());
706 
707  return LT.first;
708  }
709 
710  unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
711  unsigned AddressSpace, const Instruction *I = nullptr) {
712  assert(!Src->isVoidTy() && "Invalid type");
713  std::pair<unsigned, MVT> LT = getTLI()->getTypeLegalizationCost(DL, Src);
714 
715  // Assuming that all loads of legal types cost 1.
716  unsigned Cost = LT.first;
717 
718  if (Src->isVectorTy() &&
719  Src->getPrimitiveSizeInBits() < LT.second.getSizeInBits()) {
720  // This is a vector load that legalizes to a larger type than the vector
721  // itself. Unless the corresponding extending load or truncating store is
722  // legal, then this will scalarize.
724  EVT MemVT = getTLI()->getValueType(DL, Src);
725  if (Opcode == Instruction::Store)
726  LA = getTLI()->getTruncStoreAction(LT.second, MemVT);
727  else
728  LA = getTLI()->getLoadExtAction(ISD::EXTLOAD, LT.second, MemVT);
729 
730  if (LA != TargetLowering::Legal && LA != TargetLowering::Custom) {
731  // This is a vector load/store for some illegal type that is scalarized.
732  // We must account for the cost of building or decomposing the vector.
733  Cost += getScalarizationOverhead(Src, Opcode != Instruction::Store,
734  Opcode == Instruction::Store);
735  }
736  }
737 
738  return Cost;
739  }
740 
741  unsigned getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
742  unsigned Factor,
743  ArrayRef<unsigned> Indices,
744  unsigned Alignment,
745  unsigned AddressSpace) {
746  VectorType *VT = dyn_cast<VectorType>(VecTy);
747  assert(VT && "Expect a vector type for interleaved memory op");
748 
749  unsigned NumElts = VT->getNumElements();
750  assert(Factor > 1 && NumElts % Factor == 0 && "Invalid interleave factor");
751 
752  unsigned NumSubElts = NumElts / Factor;
753  VectorType *SubVT = VectorType::get(VT->getElementType(), NumSubElts);
754 
755  // Firstly, the cost of load/store operation.
756  unsigned Cost = static_cast<T *>(this)->getMemoryOpCost(
757  Opcode, VecTy, Alignment, AddressSpace);
758 
759  // Legalize the vector type, and get the legalized and unlegalized type
760  // sizes.
761  MVT VecTyLT = getTLI()->getTypeLegalizationCost(DL, VecTy).second;
762  unsigned VecTySize =
763  static_cast<T *>(this)->getDataLayout().getTypeStoreSize(VecTy);
764  unsigned VecTyLTSize = VecTyLT.getStoreSize();
765 
766  // Return the ceiling of dividing A by B.
767  auto ceil = [](unsigned A, unsigned B) { return (A + B - 1) / B; };
768 
769  // Scale the cost of the memory operation by the fraction of legalized
770  // instructions that will actually be used. We shouldn't account for the
771  // cost of dead instructions since they will be removed.
772  //
773  // E.g., An interleaved load of factor 8:
774  // %vec = load <16 x i64>, <16 x i64>* %ptr
775  // %v0 = shufflevector %vec, undef, <0, 8>
776  //
777  // If <16 x i64> is legalized to 8 v2i64 loads, only 2 of the loads will be
778  // used (those corresponding to elements [0:1] and [8:9] of the unlegalized
779  // type). The other loads are unused.
780  //
781  // We only scale the cost of loads since interleaved store groups aren't
782  // allowed to have gaps.
783  if (Opcode == Instruction::Load && VecTySize > VecTyLTSize) {
784  // The number of loads of a legal type it will take to represent a load
785  // of the unlegalized vector type.
786  unsigned NumLegalInsts = ceil(VecTySize, VecTyLTSize);
787 
788  // The number of elements of the unlegalized type that correspond to a
789  // single legal instruction.
790  unsigned NumEltsPerLegalInst = ceil(NumElts, NumLegalInsts);
791 
792  // Determine which legal instructions will be used.
793  BitVector UsedInsts(NumLegalInsts, false);
794  for (unsigned Index : Indices)
795  for (unsigned Elt = 0; Elt < NumSubElts; ++Elt)
796  UsedInsts.set((Index + Elt * Factor) / NumEltsPerLegalInst);
797 
798  // Scale the cost of the load by the fraction of legal instructions that
799  // will be used.
800  Cost *= UsedInsts.count() / NumLegalInsts;
801  }
802 
803  // Then plus the cost of interleave operation.
804  if (Opcode == Instruction::Load) {
805  // The interleave cost is similar to extract sub vectors' elements
806  // from the wide vector, and insert them into sub vectors.
807  //
808  // E.g. An interleaved load of factor 2 (with one member of index 0):
809  // %vec = load <8 x i32>, <8 x i32>* %ptr
810  // %v0 = shuffle %vec, undef, <0, 2, 4, 6> ; Index 0
811  // The cost is estimated as extract elements at 0, 2, 4, 6 from the
812  // <8 x i32> vector and insert them into a <4 x i32> vector.
813 
814  assert(Indices.size() <= Factor &&
815  "Interleaved memory op has too many members");
816 
817  for (unsigned Index : Indices) {
818  assert(Index < Factor && "Invalid index for interleaved memory op");
819 
820  // Extract elements from loaded vector for each sub vector.
821  for (unsigned i = 0; i < NumSubElts; i++)
822  Cost += static_cast<T *>(this)->getVectorInstrCost(
823  Instruction::ExtractElement, VT, Index + i * Factor);
824  }
825 
826  unsigned InsSubCost = 0;
827  for (unsigned i = 0; i < NumSubElts; i++)
828  InsSubCost += static_cast<T *>(this)->getVectorInstrCost(
829  Instruction::InsertElement, SubVT, i);
830 
831  Cost += Indices.size() * InsSubCost;
832  } else {
833  // The interleave cost is extract all elements from sub vectors, and
834  // insert them into the wide vector.
835  //
836  // E.g. An interleaved store of factor 2:
837  // %v0_v1 = shuffle %v0, %v1, <0, 4, 1, 5, 2, 6, 3, 7>
838  // store <8 x i32> %interleaved.vec, <8 x i32>* %ptr
839  // The cost is estimated as extract all elements from both <4 x i32>
840  // vectors and insert into the <8 x i32> vector.
841 
842  unsigned ExtSubCost = 0;
843  for (unsigned i = 0; i < NumSubElts; i++)
844  ExtSubCost += static_cast<T *>(this)->getVectorInstrCost(
845  Instruction::ExtractElement, SubVT, i);
846  Cost += ExtSubCost * Factor;
847 
848  for (unsigned i = 0; i < NumElts; i++)
849  Cost += static_cast<T *>(this)
850  ->getVectorInstrCost(Instruction::InsertElement, VT, i);
851  }
852 
853  return Cost;
854  }
855 
856  /// Get intrinsic cost based on arguments.
859  unsigned VF = 1) {
860  unsigned RetVF = (RetTy->isVectorTy() ? RetTy->getVectorNumElements() : 1);
861  assert((RetVF == 1 || VF == 1) && "VF > 1 and RetVF is a vector type");
862 
863  switch (IID) {
864  default: {
865  // Assume that we need to scalarize this intrinsic.
867  for (Value *Op : Args) {
868  Type *OpTy = Op->getType();
869  assert(VF == 1 || !OpTy->isVectorTy());
870  Types.push_back(VF == 1 ? OpTy : VectorType::get(OpTy, VF));
871  }
872 
873  if (VF > 1 && !RetTy->isVoidTy())
874  RetTy = VectorType::get(RetTy, VF);
875 
876  // Compute the scalarization overhead based on Args for a vector
877  // intrinsic. A vectorizer will pass a scalar RetTy and VF > 1, while
878  // CostModel will pass a vector RetTy and VF is 1.
879  unsigned ScalarizationCost = std::numeric_limits<unsigned>::max();
880  if (RetVF > 1 || VF > 1) {
881  ScalarizationCost = 0;
882  if (!RetTy->isVoidTy())
883  ScalarizationCost += getScalarizationOverhead(RetTy, true, false);
884  ScalarizationCost += getOperandsScalarizationOverhead(Args, VF);
885  }
886 
887  return static_cast<T *>(this)->
888  getIntrinsicInstrCost(IID, RetTy, Types, FMF, ScalarizationCost);
889  }
890  case Intrinsic::masked_scatter: {
891  assert(VF == 1 && "Can't vectorize types here.");
892  Value *Mask = Args[3];
893  bool VarMask = !isa<Constant>(Mask);
894  unsigned Alignment = cast<ConstantInt>(Args[2])->getZExtValue();
895  return
896  static_cast<T *>(this)->getGatherScatterOpCost(Instruction::Store,
897  Args[0]->getType(),
898  Args[1], VarMask,
899  Alignment);
900  }
901  case Intrinsic::masked_gather: {
902  assert(VF == 1 && "Can't vectorize types here.");
903  Value *Mask = Args[2];
904  bool VarMask = !isa<Constant>(Mask);
905  unsigned Alignment = cast<ConstantInt>(Args[1])->getZExtValue();
906  return
907  static_cast<T *>(this)->getGatherScatterOpCost(Instruction::Load,
908  RetTy, Args[0], VarMask,
909  Alignment);
910  }
911  }
912  }
913 
914  /// Get intrinsic cost based on argument types.
915  /// If ScalarizationCostPassed is std::numeric_limits<unsigned>::max(), the
916  /// cost of scalarizing the arguments and the return value will be computed
917  /// based on types.
919  Intrinsic::ID IID, Type *RetTy, ArrayRef<Type *> Tys, FastMathFlags FMF,
920  unsigned ScalarizationCostPassed = std::numeric_limits<unsigned>::max()) {
922  unsigned SingleCallCost = 10; // Library call cost. Make it expensive.
923  switch (IID) {
924  default: {
925  // Assume that we need to scalarize this intrinsic.
926  unsigned ScalarizationCost = ScalarizationCostPassed;
927  unsigned ScalarCalls = 1;
928  Type *ScalarRetTy = RetTy;
929  if (RetTy->isVectorTy()) {
930  if (ScalarizationCostPassed == std::numeric_limits<unsigned>::max())
931  ScalarizationCost = getScalarizationOverhead(RetTy, true, false);
932  ScalarCalls = std::max(ScalarCalls, RetTy->getVectorNumElements());
933  ScalarRetTy = RetTy->getScalarType();
934  }
935  SmallVector<Type *, 4> ScalarTys;
936  for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) {
937  Type *Ty = Tys[i];
938  if (Ty->isVectorTy()) {
939  if (ScalarizationCostPassed == std::numeric_limits<unsigned>::max())
940  ScalarizationCost += getScalarizationOverhead(Ty, false, true);
941  ScalarCalls = std::max(ScalarCalls, Ty->getVectorNumElements());
942  Ty = Ty->getScalarType();
943  }
944  ScalarTys.push_back(Ty);
945  }
946  if (ScalarCalls == 1)
947  return 1; // Return cost of a scalar intrinsic. Assume it to be cheap.
948 
949  unsigned ScalarCost = static_cast<T *>(this)->getIntrinsicInstrCost(
950  IID, ScalarRetTy, ScalarTys, FMF);
951 
952  return ScalarCalls * ScalarCost + ScalarizationCost;
953  }
954  // Look for intrinsics that can be lowered directly or turned into a scalar
955  // intrinsic call.
956  case Intrinsic::sqrt:
957  ISDs.push_back(ISD::FSQRT);
958  break;
959  case Intrinsic::sin:
960  ISDs.push_back(ISD::FSIN);
961  break;
962  case Intrinsic::cos:
963  ISDs.push_back(ISD::FCOS);
964  break;
965  case Intrinsic::exp:
966  ISDs.push_back(ISD::FEXP);
967  break;
968  case Intrinsic::exp2:
969  ISDs.push_back(ISD::FEXP2);
970  break;
971  case Intrinsic::log:
972  ISDs.push_back(ISD::FLOG);
973  break;
974  case Intrinsic::log10:
975  ISDs.push_back(ISD::FLOG10);
976  break;
977  case Intrinsic::log2:
978  ISDs.push_back(ISD::FLOG2);
979  break;
980  case Intrinsic::fabs:
981  ISDs.push_back(ISD::FABS);
982  break;
983  case Intrinsic::minnum:
984  ISDs.push_back(ISD::FMINNUM);
985  if (FMF.noNaNs())
986  ISDs.push_back(ISD::FMINNAN);
987  break;
988  case Intrinsic::maxnum:
989  ISDs.push_back(ISD::FMAXNUM);
990  if (FMF.noNaNs())
991  ISDs.push_back(ISD::FMAXNAN);
992  break;
993  case Intrinsic::copysign:
995  break;
996  case Intrinsic::floor:
997  ISDs.push_back(ISD::FFLOOR);
998  break;
999  case Intrinsic::ceil:
1000  ISDs.push_back(ISD::FCEIL);
1001  break;
1002  case Intrinsic::trunc:
1003  ISDs.push_back(ISD::FTRUNC);
1004  break;
1005  case Intrinsic::nearbyint:
1006  ISDs.push_back(ISD::FNEARBYINT);
1007  break;
1008  case Intrinsic::rint:
1009  ISDs.push_back(ISD::FRINT);
1010  break;
1011  case Intrinsic::round:
1012  ISDs.push_back(ISD::FROUND);
1013  break;
1014  case Intrinsic::pow:
1015  ISDs.push_back(ISD::FPOW);
1016  break;
1017  case Intrinsic::fma:
1018  ISDs.push_back(ISD::FMA);
1019  break;
1020  case Intrinsic::fmuladd:
1021  ISDs.push_back(ISD::FMA);
1022  break;
1023  // FIXME: We should return 0 whenever getIntrinsicCost == TCC_Free.
1024  case Intrinsic::lifetime_start:
1025  case Intrinsic::lifetime_end:
1026  case Intrinsic::sideeffect:
1027  return 0;
1028  case Intrinsic::masked_store:
1029  return static_cast<T *>(this)
1030  ->getMaskedMemoryOpCost(Instruction::Store, Tys[0], 0, 0);
1031  case Intrinsic::masked_load:
1032  return static_cast<T *>(this)
1033  ->getMaskedMemoryOpCost(Instruction::Load, RetTy, 0, 0);
1034  case Intrinsic::ctpop:
1035  ISDs.push_back(ISD::CTPOP);
1036  // In case of legalization use TCC_Expensive. This is cheaper than a
1037  // library call but still not a cheap instruction.
1038  SingleCallCost = TargetTransformInfo::TCC_Expensive;
1039  break;
1040  // FIXME: ctlz, cttz, ...
1041  }
1042 
1043  const TargetLoweringBase *TLI = getTLI();
1044  std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, RetTy);
1045 
1046  SmallVector<unsigned, 2> LegalCost;
1047  SmallVector<unsigned, 2> CustomCost;
1048  for (unsigned ISD : ISDs) {
1049  if (TLI->isOperationLegalOrPromote(ISD, LT.second)) {
1050  if (IID == Intrinsic::fabs && TLI->isFAbsFree(LT.second)) {
1051  return 0;
1052  }
1053 
1054  // The operation is legal. Assume it costs 1.
1055  // If the type is split to multiple registers, assume that there is some
1056  // overhead to this.
1057  // TODO: Once we have extract/insert subvector cost we need to use them.
1058  if (LT.first > 1)
1059  LegalCost.push_back(LT.first * 2);
1060  else
1061  LegalCost.push_back(LT.first * 1);
1062  } else if (!TLI->isOperationExpand(ISD, LT.second)) {
1063  // If the operation is custom lowered then assume
1064  // that the code is twice as expensive.
1065  CustomCost.push_back(LT.first * 2);
1066  }
1067  }
1068 
1069  auto MinLegalCostI = std::min_element(LegalCost.begin(), LegalCost.end());
1070  if (MinLegalCostI != LegalCost.end())
1071  return *MinLegalCostI;
1072 
1073  auto MinCustomCostI = std::min_element(CustomCost.begin(), CustomCost.end());
1074  if (MinCustomCostI != CustomCost.end())
1075  return *MinCustomCostI;
1076 
1077  // If we can't lower fmuladd into an FMA estimate the cost as a floating
1078  // point mul followed by an add.
1079  if (IID == Intrinsic::fmuladd)
1080  return static_cast<T *>(this)
1081  ->getArithmeticInstrCost(BinaryOperator::FMul, RetTy) +
1082  static_cast<T *>(this)
1083  ->getArithmeticInstrCost(BinaryOperator::FAdd, RetTy);
1084 
1085  // Else, assume that we need to scalarize this intrinsic. For math builtins
1086  // this will emit a costly libcall, adding call overhead and spills. Make it
1087  // very expensive.
1088  if (RetTy->isVectorTy()) {
1089  unsigned ScalarizationCost =
1090  ((ScalarizationCostPassed != std::numeric_limits<unsigned>::max())
1091  ? ScalarizationCostPassed
1092  : getScalarizationOverhead(RetTy, true, false));
1093  unsigned ScalarCalls = RetTy->getVectorNumElements();
1094  SmallVector<Type *, 4> ScalarTys;
1095  for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) {
1096  Type *Ty = Tys[i];
1097  if (Ty->isVectorTy())
1098  Ty = Ty->getScalarType();
1099  ScalarTys.push_back(Ty);
1100  }
1101  unsigned ScalarCost = static_cast<T *>(this)->getIntrinsicInstrCost(
1102  IID, RetTy->getScalarType(), ScalarTys, FMF);
1103  for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) {
1104  if (Tys[i]->isVectorTy()) {
1105  if (ScalarizationCostPassed == std::numeric_limits<unsigned>::max())
1106  ScalarizationCost += getScalarizationOverhead(Tys[i], false, true);
1107  ScalarCalls = std::max(ScalarCalls, Tys[i]->getVectorNumElements());
1108  }
1109  }
1110 
1111  return ScalarCalls * ScalarCost + ScalarizationCost;
1112  }
1113 
1114  // This is going to be turned into a library call, make it expensive.
1115  return SingleCallCost;
1116  }
1117 
1118  /// \brief Compute a cost of the given call instruction.
1119  ///
1120  /// Compute the cost of calling function F with return type RetTy and
1121  /// argument types Tys. F might be nullptr, in this case the cost of an
1122  /// arbitrary call with the specified signature will be returned.
1123  /// This is used, for instance, when we estimate call of a vector
1124  /// counterpart of the given function.
1125  /// \param F Called function, might be nullptr.
1126  /// \param RetTy Return value types.
1127  /// \param Tys Argument types.
1128  /// \returns The cost of Call instruction.
1130  return 10;
1131  }
1132 
1133  unsigned getNumberOfParts(Type *Tp) {
1134  std::pair<unsigned, MVT> LT = getTLI()->getTypeLegalizationCost(DL, Tp);
1135  return LT.first;
1136  }
1137 
1139  const SCEV *) {
1140  return 0;
1141  }
1142 
1143  /// Try to calculate arithmetic and shuffle op costs for reduction operations.
1144  /// We're assuming that reduction operation are performing the following way:
1145  /// 1. Non-pairwise reduction
1146  /// %val1 = shufflevector<n x t> %val, <n x t> %undef,
1147  /// <n x i32> <i32 n/2, i32 n/2 + 1, ..., i32 n, i32 undef, ..., i32 undef>
1148  /// \----------------v-------------/ \----------v------------/
1149  /// n/2 elements n/2 elements
1150  /// %red1 = op <n x t> %val, <n x t> val1
1151  /// After this operation we have a vector %red1 where only the first n/2
1152  /// elements are meaningful, the second n/2 elements are undefined and can be
1153  /// dropped. All other operations are actually working with the vector of
1154  /// length n/2, not n, though the real vector length is still n.
1155  /// %val2 = shufflevector<n x t> %red1, <n x t> %undef,
1156  /// <n x i32> <i32 n/4, i32 n/4 + 1, ..., i32 n/2, i32 undef, ..., i32 undef>
1157  /// \----------------v-------------/ \----------v------------/
1158  /// n/4 elements 3*n/4 elements
1159  /// %red2 = op <n x t> %red1, <n x t> val2 - working with the vector of
1160  /// length n/2, the resulting vector has length n/4 etc.
1161  /// 2. Pairwise reduction:
1162  /// Everything is the same except for an additional shuffle operation which
1163  /// is used to produce operands for pairwise kind of reductions.
1164  /// %val1 = shufflevector<n x t> %val, <n x t> %undef,
1165  /// <n x i32> <i32 0, i32 2, ..., i32 n-2, i32 undef, ..., i32 undef>
1166  /// \-------------v----------/ \----------v------------/
1167  /// n/2 elements n/2 elements
1168  /// %val2 = shufflevector<n x t> %val, <n x t> %undef,
1169  /// <n x i32> <i32 1, i32 3, ..., i32 n-1, i32 undef, ..., i32 undef>
1170  /// \-------------v----------/ \----------v------------/
1171  /// n/2 elements n/2 elements
1172  /// %red1 = op <n x t> %val1, <n x t> val2
1173  /// Again, the operation is performed on <n x t> vector, but the resulting
1174  /// vector %red1 is <n/2 x t> vector.
1175  ///
1176  /// The cost model should take into account that the actual length of the
1177  /// vector is reduced on each iteration.
1178  unsigned getArithmeticReductionCost(unsigned Opcode, Type *Ty,
1179  bool IsPairwise) {
1180  assert(Ty->isVectorTy() && "Expect a vector type");
1181  Type *ScalarTy = Ty->getVectorElementType();
1182  unsigned NumVecElts = Ty->getVectorNumElements();
1183  unsigned NumReduxLevels = Log2_32(NumVecElts);
1184  unsigned ArithCost = 0;
1185  unsigned ShuffleCost = 0;
1186  auto *ConcreteTTI = static_cast<T *>(this);
1187  std::pair<unsigned, MVT> LT =
1188  ConcreteTTI->getTLI()->getTypeLegalizationCost(DL, Ty);
1189  unsigned LongVectorCount = 0;
1190  unsigned MVTLen =
1191  LT.second.isVector() ? LT.second.getVectorNumElements() : 1;
1192  while (NumVecElts > MVTLen) {
1193  NumVecElts /= 2;
1194  // Assume the pairwise shuffles add a cost.
1195  ShuffleCost += (IsPairwise + 1) *
1196  ConcreteTTI->getShuffleCost(TTI::SK_ExtractSubvector, Ty,
1197  NumVecElts, Ty);
1198  ArithCost += ConcreteTTI->getArithmeticInstrCost(Opcode, Ty);
1199  Ty = VectorType::get(ScalarTy, NumVecElts);
1200  ++LongVectorCount;
1201  }
1202  // The minimal length of the vector is limited by the real length of vector
1203  // operations performed on the current platform. That's why several final
1204  // reduction operations are performed on the vectors with the same
1205  // architecture-dependent length.
1206  ShuffleCost += (NumReduxLevels - LongVectorCount) * (IsPairwise + 1) *
1207  ConcreteTTI->getShuffleCost(TTI::SK_ExtractSubvector, Ty,
1208  NumVecElts, Ty);
1209  ArithCost += (NumReduxLevels - LongVectorCount) *
1210  ConcreteTTI->getArithmeticInstrCost(Opcode, Ty);
1211  return ShuffleCost + ArithCost + getScalarizationOverhead(Ty, false, true);
1212  }
1213 
1214  /// Try to calculate op costs for min/max reduction operations.
1215  /// \param CondTy Conditional type for the Select instruction.
1216  unsigned getMinMaxReductionCost(Type *Ty, Type *CondTy, bool IsPairwise,
1217  bool) {
1218  assert(Ty->isVectorTy() && "Expect a vector type");
1219  Type *ScalarTy = Ty->getVectorElementType();
1220  Type *ScalarCondTy = CondTy->getVectorElementType();
1221  unsigned NumVecElts = Ty->getVectorNumElements();
1222  unsigned NumReduxLevels = Log2_32(NumVecElts);
1223  unsigned CmpOpcode;
1224  if (Ty->isFPOrFPVectorTy()) {
1225  CmpOpcode = Instruction::FCmp;
1226  } else {
1227  assert(Ty->isIntOrIntVectorTy() &&
1228  "expecting floating point or integer type for min/max reduction");
1229  CmpOpcode = Instruction::ICmp;
1230  }
1231  unsigned MinMaxCost = 0;
1232  unsigned ShuffleCost = 0;
1233  auto *ConcreteTTI = static_cast<T *>(this);
1234  std::pair<unsigned, MVT> LT =
1235  ConcreteTTI->getTLI()->getTypeLegalizationCost(DL, Ty);
1236  unsigned LongVectorCount = 0;
1237  unsigned MVTLen =
1238  LT.second.isVector() ? LT.second.getVectorNumElements() : 1;
1239  while (NumVecElts > MVTLen) {
1240  NumVecElts /= 2;
1241  // Assume the pairwise shuffles add a cost.
1242  ShuffleCost += (IsPairwise + 1) *
1243  ConcreteTTI->getShuffleCost(TTI::SK_ExtractSubvector, Ty,
1244  NumVecElts, Ty);
1245  MinMaxCost +=
1246  ConcreteTTI->getCmpSelInstrCost(CmpOpcode, Ty, CondTy, nullptr) +
1247  ConcreteTTI->getCmpSelInstrCost(Instruction::Select, Ty, CondTy,
1248  nullptr);
1249  Ty = VectorType::get(ScalarTy, NumVecElts);
1250  CondTy = VectorType::get(ScalarCondTy, NumVecElts);
1251  ++LongVectorCount;
1252  }
1253  // The minimal length of the vector is limited by the real length of vector
1254  // operations performed on the current platform. That's why several final
1255  // reduction opertions are perfomed on the vectors with the same
1256  // architecture-dependent length.
1257  ShuffleCost += (NumReduxLevels - LongVectorCount) * (IsPairwise + 1) *
1258  ConcreteTTI->getShuffleCost(TTI::SK_ExtractSubvector, Ty,
1259  NumVecElts, Ty);
1260  MinMaxCost +=
1261  (NumReduxLevels - LongVectorCount) *
1262  (ConcreteTTI->getCmpSelInstrCost(CmpOpcode, Ty, CondTy, nullptr) +
1263  ConcreteTTI->getCmpSelInstrCost(Instruction::Select, Ty, CondTy,
1264  nullptr));
1265  // Need 3 extractelement instructions for scalarization + an additional
1266  // scalar select instruction.
1267  return ShuffleCost + MinMaxCost +
1268  3 * getScalarizationOverhead(Ty, /*Insert=*/false,
1269  /*Extract=*/true) +
1270  ConcreteTTI->getCmpSelInstrCost(Instruction::Select, ScalarTy,
1271  ScalarCondTy, nullptr);
1272  }
1273 
1274  unsigned getVectorSplitCost() { return 1; }
1275 
1276  /// @}
1277 };
1278 
1279 /// \brief Concrete BasicTTIImpl that can be used if no further customization
1280 /// is needed.
1281 class BasicTTIImpl : public BasicTTIImplBase<BasicTTIImpl> {
1283 
1285 
1286  const TargetSubtargetInfo *ST;
1287  const TargetLoweringBase *TLI;
1288 
1289  const TargetSubtargetInfo *getST() const { return ST; }
1290  const TargetLoweringBase *getTLI() const { return TLI; }
1291 
1292 public:
1293  explicit BasicTTIImpl(const TargetMachine *ST, const Function &F);
1294 };
1295 
1296 } // end namespace llvm
1297 
1298 #endif // LLVM_CODEGEN_BASICTTIIMPL_H
LegalizeAction getLoadExtAction(unsigned ExtType, EVT ValVT, EVT MemVT) const
Return how this load with extension should be treated: either it is legal, needs to be promoted to a ...
Fast - This calling convention attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:43
Type * getVectorElementType() const
Definition: Type.h:368
unsigned getNumCases() const
Return the number of &#39;cases&#39; in this switch instruction, excluding the default case.
Base class for use as a mix-in that aids implementing a TargetTransformInfo-compatible class...
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:109
bool Partial
Allow partial unrolling (unrolling of loops to expand the size of the loop body, not only to eliminat...
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
Definition: ISDOpcodes.h:569
BitVector & set()
Definition: BitVector.h:398
unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info=TTI::OK_AnyValue, TTI::OperandValueKind Opd2Info=TTI::OK_AnyValue, TTI::OperandValueProperties Opd1PropInfo=TTI::OP_None, TTI::OperandValueProperties Opd2PropInfo=TTI::OP_None, ArrayRef< const Value *> Args=ArrayRef< const Value *>())
Definition: BasicTTIImpl.h:469
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg If BaseGV is null...
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
iterator_range< CaseIt > cases()
Iteration adapter for range-for loops.
LLVMContext & Context
bool noNaNs() const
Definition: Operator.h:200
unsigned getGatherScatterOpCost(unsigned Opcode, Type *DataTy, Value *Ptr, bool VariableMask, unsigned Alignment)
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract)
Estimate the overhead of scalarizing an instruction.
Definition: BasicTTIImpl.h:408
AMDGPU Rewrite Out Arguments
The main scalar evolution driver.
bool slt(const APInt &RHS) const
Signed less than comparison.
Definition: APInt.h:1183
unsigned PartialOptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size, like OptSizeThreshold, but used for partial/runtime unrolling (set to UINT_MAX to disable).
int getGEPCost(Type *PointeeType, const Value *Ptr, ArrayRef< const Value *> Operands)
Definition: BasicTTIImpl.h:187
static uint64_t round(uint64_t Acc, uint64_t Input)
Definition: xxhash.cpp:57
unsigned PartialThreshold
The cost threshold for the unrolled loop, like Threshold, but used for partial/runtime unrolling (set...
CaseIt case_begin()
Returns a read/write iterator that points to the first case in the SwitchInst.
unsigned getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy)
bool isSuitableForBitTests(unsigned NumDests, unsigned NumCmps, const APInt &Low, const APInt &High, const DataLayout &DL) const
Return true if lowering to a bit test is suitable for a set of case clusters which contains NumDests ...
bool sgt(const APInt &RHS) const
Signed greather than comparison.
Definition: APInt.h:1253
BasicTTIImplBase(const TargetMachine *TM, const DataLayout &DL)
Definition: BasicTTIImpl.h:115
int getExtCost(const Instruction *I, const Value *Src)
Definition: BasicTTIImpl.h:192
unsigned getPointerSizeInBits(unsigned AS=0) const
Layout pointer size, in bits FIXME: The defaults need to be removed once all of the backends/clients ...
Definition: DataLayout.h:344
F(f)
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Definition: DerivedTypes.h:503
An instruction for reading from memory.
Definition: Instructions.h:164
bool isProfitableToHoist(Instruction *I)
Definition: BasicTTIImpl.h:178
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:227
Base class which can be used to help build a TTI implementation.
Definition: BasicTTIImpl.h:78
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
bool isOperationLegalOrCustom(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
unsigned getMaxInterleaveFactor(unsigned VF)
Definition: BasicTTIImpl.h:467
unsigned getJumpBufAlignment() const
Returns the target&#39;s jmp_buf alignment in bytes (if never set, the default is 0)
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition: Type.h:130
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:252
This file provides helpers for the implementation of a TargetTransformInfo-conforming class...
CRTP base class for use as a mix-in that aids implementing a TargetTransformInfo-compatible class...
unsigned getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, ArrayRef< Value *> Args, FastMathFlags FMF, unsigned VF=1)
Get intrinsic cost based on arguments.
Definition: BasicTTIImpl.h:857
unsigned getAddressComputationCost(Type *Ty, ScalarEvolution *, const SCEV *)
unsigned getOperandsScalarizationOverhead(ArrayRef< const Value *> Args, unsigned VF)
Estimate the overhead of scalarizing an instructions unique non-constant operands.
Definition: BasicTTIImpl.h:427
unsigned getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy, unsigned Index)
Definition: BasicTTIImpl.h:650
int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace)
Definition: BasicTTIImpl.h:164
unsigned getArithmeticReductionCost(unsigned Opcode, Type *Ty, bool IsPairwise)
Try to calculate arithmetic and shuffle op costs for reduction operations.
unsigned getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
uint64_t getNumElements() const
Definition: DerivedTypes.h:359
bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth, unsigned AddressSpace, unsigned Alignment, bool *Fast) const
Definition: BasicTTIImpl.h:123
This file implements a class to represent arbitrary precision integral constant values and operations...
virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const
Return true if it&#39;s free to truncate a value of type FromTy to type ToTy.
unsigned getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, unsigned Alignment, unsigned AddressSpace)
Definition: BasicTTIImpl.h:741
unsigned getRegisterBitWidth(bool Vector) const
Definition: BasicTTIImpl.h:404
unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, const Instruction *I)
Definition: BasicTTIImpl.h:663
Choose alternate elements from vector.
unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, const Instruction *I=nullptr)
Definition: BasicTTIImpl.h:525
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition: ISDOpcodes.h:398
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:33
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
Definition: Type.h:203
virtual bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AddrSpace, Instruction *I=nullptr) const
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
unsigned getFPOpCost(Type *Ty)
Definition: BasicTTIImpl.h:300
bool isTruncateFree(Type *Ty1, Type *Ty2)
Definition: BasicTTIImpl.h:174
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return &#39;this&#39;.
Definition: Type.h:301
ExtractSubvector Index indicates start offset.
bool isVoidTy() const
Return true if this is &#39;void&#39;.
Definition: Type.h:141
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP)
Definition: BasicTTIImpl.h:325
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Machine Value Type.
Concrete BasicTTIImpl that can be used if no further customization is needed.
LLVM Basic Block Representation.
Definition: BasicBlock.h:59
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:46
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:69
unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, ArrayRef< const Value *> Arguments)
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:149
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator begin()
Definition: SmallVector.h:116
This file contains the declarations for the subclasses of Constant, which represent the different fla...
unsigned getMinMaxReductionCost(Type *Ty, Type *CondTy, bool IsPairwise, bool)
Try to calculate op costs for min/max reduction operations.
unsigned getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, ArrayRef< Type *> Tys, FastMathFlags FMF, unsigned ScalarizationCostPassed=std::numeric_limits< unsigned >::max())
Get intrinsic cost based on argument types.
Definition: BasicTTIImpl.h:918
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:371
Expected to fold away in lowering.
unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, ArrayRef< Type *> ParamTys)
Definition: BasicTTIImpl.h:209
Merge elements from two source vectors into one with any shuffle mask.
unsigned getNumberOfParts(Type *Tp)
static double log2(double V)
virtual bool isProfitableToHoist(Instruction *I) const
Extended Value Type.
Definition: ValueTypes.h:34
static wasm::ValType getType(const TargetRegisterClass *RC)
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
LLVM_READONLY APFloat maxnum(const APFloat &A, const APFloat &B)
Implements IEEE maxNum semantics.
Definition: APFloat.h:1238
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
bool isLegalAddImmediate(int64_t imm)
Definition: BasicTTIImpl.h:141
This base class for TargetLowering contains the SelectionDAG-independent parts that can be used from ...
OperandValueProperties
Additional properties of an operand&#39;s values.
LegalizeAction
This enum indicates whether operations are valid for a target, and if not, what action should be used...
unsigned getCFInstrCost(unsigned Opcode)
Definition: BasicTTIImpl.h:658
size_type size() const
Definition: SmallPtrSet.h:93
unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp)
Definition: BasicTTIImpl.h:516
unsigned getNumberOfRegisters(bool Vector)
Definition: BasicTTIImpl.h:402
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:418
size_type count() const
count - Returns the number of bits which are set.
Definition: BitVector.h:173
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
int getInstructionLatency(const Instruction *I)
Definition: BasicTTIImpl.h:390
virtual bool allowsMisalignedMemoryAccesses(EVT, unsigned AddrSpace=0, unsigned Align=1, bool *=nullptr) const
Determine if the target supports unaligned memory accesses.
iterator end()
Definition: BasicBlock.h:254
unsigned getJumpBufSize() const
Returns the target&#39;s jmp_buf size in bytes (if never set, the default is 200)
unsigned getVectorSplitCost()
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:864
bool isSuitableForJumpTable(const SwitchInst *SI, uint64_t NumCases, uint64_t Range) const
Return true if lowering to a jump table is suitable for a set of case clusters which may contain NumC...
AddressSpace
Definition: NVPTXBaseInfo.h:22
cl::opt< unsigned > PartialUnrollingThreshold
static const unsigned DefaultLoadLatency
Definition: MCSchedule.h:169
bool areJTsAllowed(const Function *Fn) const
Return true if lowering to a jump table is allowed.
bool Runtime
Allow runtime unrolling (unrolling of loops to expand the size of the loop body even when the number ...
LegalizeAction getTruncStoreAction(EVT ValVT, EVT MemVT) const
Return how this store with truncation should be treated: either it is legal, needs to be promoted to ...
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:531
unsigned getVectorNumElements() const
Definition: DerivedTypes.h:462
Class to represent vector types.
Definition: DerivedTypes.h:393
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
Class for arbitrary precision integers.
Definition: APInt.h:69
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:389
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:91
unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, unsigned AddressSpace, const Instruction *I=nullptr)
Definition: BasicTTIImpl.h:710
unsigned LoopMicroOpBufferSize
Definition: MCSchedule.h:164
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition: ISDOpcodes.h:287
bool isAlwaysUniform(const Value *V)
Definition: BasicTTIImpl.h:134
int InstructionOpcodeToISD(unsigned Opcode) const
Get the ISD node that corresponds to the Instruction class opcode.
FMINNAN/FMAXNAN - Behave identically to FMINNUM/FMAXNUM, except that when a single input is NaN...
Definition: ISDOpcodes.h:572
unsigned getFlatAddressSpace()
Definition: BasicTTIImpl.h:136
TargetSubtargetInfo - Generic base class for all target subtargets.
bool isLSRCostLess(TTI::LSRCost &C1, TTI::LSRCost &C2)
virtual bool isLegalICmpImmediate(int64_t) const
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
unsigned getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, unsigned AddressSpace)
BR_JT - Jumptable branch.
Definition: ISDOpcodes.h:605
unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, ArrayRef< const Value *> Arguments)
Definition: BasicTTIImpl.h:204
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator end()
Definition: SmallVector.h:120
unsigned getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy)
Definition: BasicTTIImpl.h:306
This class represents an analyzed expression in the program.
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:439
Parameters that control the generic loop unrolling transformation.
unsigned getJumpBufAlignment()
Definition: BasicTTIImpl.h:283
unsigned OptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size (set to UINT_MAX to disable)...
Establish a view to a call site for examination.
Definition: CallSite.h:713
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return &#39;Legal&#39;) or we ...
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:108
#define I(x, y, z)
Definition: MD5.cpp:58
#define N
int getGEPCost(Type *PointeeType, const Value *Ptr, ArrayRef< const Value *> Operands)
unsigned getInliningThresholdMultiplier()
Definition: BasicTTIImpl.h:323
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index)
Definition: BasicTTIImpl.h:703
block_iterator block_end() const
Definition: LoopInfo.h:155
static EVT getEVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
Definition: ValueTypes.cpp:311
bool isLSRCostLess(TTI::LSRCost C1, TTI::LSRCost C2)
Definition: BasicTTIImpl.h:160
unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI, unsigned &JumpTableSize)
Definition: BasicTTIImpl.h:226
FunTy * getCalledFunction() const
Return the function being called if this is a direct call, otherwise return null (if it&#39;s an indirect...
Definition: CallSite.h:107
bool isFPOrFPVectorTy() const
Return true if this is a FP type or a vector of FP.
Definition: Type.h:185
const unsigned Kind
Multiway switch.
unsigned getScalarizationOverhead(Type *VecTy, ArrayRef< const Value *> Args)
Definition: BasicTTIImpl.h:450
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
The cost of a typical &#39;add&#39; instruction.
bool isSourceOfDivergence(const Value *V)
Definition: BasicTTIImpl.h:132
unsigned getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition: Type.cpp:115
LLVM Value Representation.
Definition: Value.h:73
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:277
uint64_t getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type...
Definition: DataLayout.h:386
static VectorType * get(Type *ElementType, unsigned NumElements)
This static method is the primary way to construct an VectorType.
Definition: Type.cpp:593
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E&#39;s largest value.
Definition: BitmaskEnum.h:81
bool isOperationLegalOrPromote(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target or can be made legal using promotion...
bool isLoadExtLegal(unsigned ExtType, EVT ValVT, EVT MemVT) const
Return true if the specified load with extension is legal on this target.
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:57
bool isTypeLegal(Type *Ty)
Definition: BasicTTIImpl.h:182
Type * getElementType() const
Definition: DerivedTypes.h:360
bool UpperBound
Allow using trip count upper bound to unroll loops.
virtual int getScalingFactorCost(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS=0) const
Return the cost of the scaling factor used in the addressing mode represented by AM for this target...
const DataLayout & getDataLayout() const
Convenience struct for specifying and reasoning about fast-math flags.
Definition: Operator.h:160
unsigned getCallInstrCost(Function *F, Type *RetTy, ArrayRef< Type *> Tys)
Compute a cost of the given call instruction.
OperandValueKind
Additional information about an operand&#39;s possible values.
bool haveFastSqrt(Type *Ty)
Definition: BasicTTIImpl.h:293
This pass exposes codegen information to IR-level passes.
bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace, Instruction *I=nullptr)
Definition: BasicTTIImpl.h:149
bool isLegalICmpImmediate(int64_t imm)
Definition: BasicTTIImpl.h:145
block_iterator block_begin() const
Definition: LoopInfo.h:154
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition: ValueTypes.h:64
virtual bool isLegalAddImmediate(int64_t) const
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
virtual bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const
Returns true if a cast between SrcAS and DestAS is a noop.
The cost of a &#39;div&#39; instruction on x86.
const MCSchedModel & getSchedModel() const
Get the machine model for this subtarget&#39;s CPU.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
bool isOperationExpand(unsigned Op, EVT VT) const
Return true if the specified operation is illegal on this target or unlikely to be made legal with cu...
std::pair< int, MVT > getTypeLegalizationCost(const DataLayout &DL, Type *Ty) const
Estimate the cost of type-legalization and the legalized type.
LLVM_READONLY APFloat minnum(const APFloat &A, const APFloat &B)
Implements IEEE minNum semantics.
Definition: APFloat.h:1227
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:144
virtual bool isFAbsFree(EVT VT) const
Return true if an fabs operation is free to the point where it is never worthwhile to replace it with...
This file describes how to lower LLVM code to machine code.
const BasicBlock * getParent() const
Definition: Instruction.h:66
ShuffleKind
The various kinds of shuffle patterns for vector queries.
Shuffle elements of single source vector with any shuffle mask.
BRIND - Indirect branch.
Definition: ISDOpcodes.h:601