LLVM  7.0.0svn
BasicTTIImpl.h
Go to the documentation of this file.
1 //===- BasicTTIImpl.h -------------------------------------------*- C++ -*-===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 /// \file
11 /// This file provides a helper that implements much of the TTI interface in
12 /// terms of the target-independent code generator and TargetLowering
13 /// interfaces.
14 //
15 //===----------------------------------------------------------------------===//
16 
17 #ifndef LLVM_CODEGEN_BASICTTIIMPL_H
18 #define LLVM_CODEGEN_BASICTTIIMPL_H
19 
20 #include "llvm/ADT/APInt.h"
21 #include "llvm/ADT/ArrayRef.h"
22 #include "llvm/ADT/BitVector.h"
23 #include "llvm/ADT/SmallPtrSet.h"
24 #include "llvm/ADT/SmallVector.h"
25 #include "llvm/Analysis/LoopInfo.h"
33 #include "llvm/IR/BasicBlock.h"
34 #include "llvm/IR/CallSite.h"
35 #include "llvm/IR/Constant.h"
36 #include "llvm/IR/Constants.h"
37 #include "llvm/IR/DataLayout.h"
38 #include "llvm/IR/DerivedTypes.h"
39 #include "llvm/IR/InstrTypes.h"
40 #include "llvm/IR/Instruction.h"
41 #include "llvm/IR/Instructions.h"
42 #include "llvm/IR/Intrinsics.h"
43 #include "llvm/IR/Operator.h"
44 #include "llvm/IR/Type.h"
45 #include "llvm/IR/Value.h"
46 #include "llvm/MC/MCSchedule.h"
47 #include "llvm/Support/Casting.h"
51 #include <algorithm>
52 #include <cassert>
53 #include <cstdint>
54 #include <limits>
55 #include <utility>
56 
57 namespace llvm {
58 
59 class Function;
60 class GlobalValue;
61 class LLVMContext;
62 class ScalarEvolution;
63 class SCEV;
64 class TargetMachine;
65 
66 extern cl::opt<unsigned> PartialUnrollingThreshold;
67 
68 /// \brief Base class which can be used to help build a TTI implementation.
69 ///
70 /// This class provides as much implementation of the TTI interface as is
71 /// possible using the target independent parts of the code generator.
72 ///
73 /// In order to subclass it, your class must implement a getST() method to
74 /// return the subtarget, and a getTLI() method to return the target lowering.
75 /// We need these methods implemented in the derived class so that this class
76 /// doesn't have to duplicate storage for them.
77 template <typename T>
79 private:
81  using TTI = TargetTransformInfo;
82 
83  /// Estimate a cost of shuffle as a sequence of extract and insert
84  /// operations.
85  unsigned getPermuteShuffleOverhead(Type *Ty) {
86  assert(Ty->isVectorTy() && "Can only shuffle vectors");
87  unsigned Cost = 0;
88  // Shuffle cost is equal to the cost of extracting element from its argument
89  // plus the cost of inserting them onto the result vector.
90 
91  // e.g. <4 x float> has a mask of <0,5,2,7> i.e we need to extract from
92  // index 0 of first vector, index 1 of second vector,index 2 of first
93  // vector and finally index 3 of second vector and insert them at index
94  // <0,1,2,3> of result vector.
95  for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i) {
96  Cost += static_cast<T *>(this)
97  ->getVectorInstrCost(Instruction::InsertElement, Ty, i);
98  Cost += static_cast<T *>(this)
99  ->getVectorInstrCost(Instruction::ExtractElement, Ty, i);
100  }
101  return Cost;
102  }
103 
104  /// \brief Local query method delegates up to T which *must* implement this!
105  const TargetSubtargetInfo *getST() const {
106  return static_cast<const T *>(this)->getST();
107  }
108 
109  /// \brief Local query method delegates up to T which *must* implement this!
110  const TargetLoweringBase *getTLI() const {
111  return static_cast<const T *>(this)->getTLI();
112  }
113 
114 protected:
115  explicit BasicTTIImplBase(const TargetMachine *TM, const DataLayout &DL)
116  : BaseT(DL) {}
117 
119 
120 public:
121  /// \name Scalar TTI Implementations
122  /// @{
124  unsigned BitWidth, unsigned AddressSpace,
125  unsigned Alignment, bool *Fast) const {
126  EVT E = EVT::getIntegerVT(Context, BitWidth);
127  return getTLI()->allowsMisalignedMemoryAccesses(E, AddressSpace, Alignment, Fast);
128  }
129 
130  bool hasBranchDivergence() { return false; }
131 
132  bool isSourceOfDivergence(const Value *V) { return false; }
133 
134  bool isAlwaysUniform(const Value *V) { return false; }
135 
136  unsigned getFlatAddressSpace() {
137  // Return an invalid address space.
138  return -1;
139  }
140 
141  bool isLegalAddImmediate(int64_t imm) {
142  return getTLI()->isLegalAddImmediate(imm);
143  }
144 
145  bool isLegalICmpImmediate(int64_t imm) {
146  return getTLI()->isLegalICmpImmediate(imm);
147  }
148 
149  bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
150  bool HasBaseReg, int64_t Scale,
151  unsigned AddrSpace, Instruction *I = nullptr) {
153  AM.BaseGV = BaseGV;
154  AM.BaseOffs = BaseOffset;
155  AM.HasBaseReg = HasBaseReg;
156  AM.Scale = Scale;
157  return getTLI()->isLegalAddressingMode(DL, AM, Ty, AddrSpace, I);
158  }
159 
162  }
163 
164  int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
165  bool HasBaseReg, int64_t Scale, unsigned AddrSpace) {
167  AM.BaseGV = BaseGV;
168  AM.BaseOffs = BaseOffset;
169  AM.HasBaseReg = HasBaseReg;
170  AM.Scale = Scale;
171  return getTLI()->getScalingFactorCost(DL, AM, Ty, AddrSpace);
172  }
173 
174  bool isTruncateFree(Type *Ty1, Type *Ty2) {
175  return getTLI()->isTruncateFree(Ty1, Ty2);
176  }
177 
179  return getTLI()->isProfitableToHoist(I);
180  }
181 
182  bool isTypeLegal(Type *Ty) {
183  EVT VT = getTLI()->getValueType(DL, Ty);
184  return getTLI()->isTypeLegal(VT);
185  }
186 
187  int getGEPCost(Type *PointeeType, const Value *Ptr,
188  ArrayRef<const Value *> Operands) {
189  return BaseT::getGEPCost(PointeeType, Ptr, Operands);
190  }
191 
192  int getExtCost(const Instruction *I, const Value *Src) {
193  if (getTLI()->isExtFree(I))
195 
196  if (isa<ZExtInst>(I) || isa<SExtInst>(I))
197  if (const LoadInst *LI = dyn_cast<LoadInst>(Src))
198  if (getTLI()->isExtLoad(LI, I, DL))
200 
202  }
203 
204  unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
206  return BaseT::getIntrinsicCost(IID, RetTy, Arguments);
207  }
208 
209  unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
210  ArrayRef<Type *> ParamTys) {
211  if (IID == Intrinsic::cttz) {
212  if (getTLI()->isCheapToSpeculateCttz())
215  }
216 
217  if (IID == Intrinsic::ctlz) {
218  if (getTLI()->isCheapToSpeculateCtlz())
221  }
222 
223  return BaseT::getIntrinsicCost(IID, RetTy, ParamTys);
224  }
225 
227  unsigned &JumpTableSize) {
228  /// Try to find the estimated number of clusters. Note that the number of
229  /// clusters identified in this function could be different from the actural
230  /// numbers found in lowering. This function ignore switches that are
231  /// lowered with a mix of jump table / bit test / BTree. This function was
232  /// initially intended to be used when estimating the cost of switch in
233  /// inline cost heuristic, but it's a generic cost model to be used in other
234  /// places (e.g., in loop unrolling).
235  unsigned N = SI.getNumCases();
236  const TargetLoweringBase *TLI = getTLI();
237  const DataLayout &DL = this->getDataLayout();
238 
239  JumpTableSize = 0;
240  bool IsJTAllowed = TLI->areJTsAllowed(SI.getParent()->getParent());
241 
242  // Early exit if both a jump table and bit test are not allowed.
243  if (N < 1 || (!IsJTAllowed && DL.getIndexSizeInBits(0u) < N))
244  return N;
245 
246  APInt MaxCaseVal = SI.case_begin()->getCaseValue()->getValue();
247  APInt MinCaseVal = MaxCaseVal;
248  for (auto CI : SI.cases()) {
249  const APInt &CaseVal = CI.getCaseValue()->getValue();
250  if (CaseVal.sgt(MaxCaseVal))
251  MaxCaseVal = CaseVal;
252  if (CaseVal.slt(MinCaseVal))
253  MinCaseVal = CaseVal;
254  }
255 
256  // Check if suitable for a bit test
257  if (N <= DL.getIndexSizeInBits(0u)) {
259  for (auto I : SI.cases())
260  Dests.insert(I.getCaseSuccessor());
261 
262  if (TLI->isSuitableForBitTests(Dests.size(), N, MinCaseVal, MaxCaseVal,
263  DL))
264  return 1;
265  }
266 
267  // Check if suitable for a jump table.
268  if (IsJTAllowed) {
269  if (N < 2 || N < TLI->getMinimumJumpTableEntries())
270  return N;
271  uint64_t Range =
272  (MaxCaseVal - MinCaseVal)
273  .getLimitedValue(std::numeric_limits<uint64_t>::max() - 1) + 1;
274  // Check whether a range of clusters is dense enough for a jump table
275  if (TLI->isSuitableForJumpTable(&SI, N, Range)) {
276  JumpTableSize = Range;
277  return 1;
278  }
279  }
280  return N;
281  }
282 
283  unsigned getJumpBufAlignment() { return getTLI()->getJumpBufAlignment(); }
284 
285  unsigned getJumpBufSize() { return getTLI()->getJumpBufSize(); }
286 
288  const TargetLoweringBase *TLI = getTLI();
291  }
292 
293  bool haveFastSqrt(Type *Ty) {
294  const TargetLoweringBase *TLI = getTLI();
295  EVT VT = TLI->getValueType(DL, Ty);
296  return TLI->isTypeLegal(VT) &&
298  }
299 
301  return true;
302  }
303 
304  unsigned getFPOpCost(Type *Ty) {
305  // Check whether FADD is available, as a proxy for floating-point in
306  // general.
307  const TargetLoweringBase *TLI = getTLI();
308  EVT VT = TLI->getValueType(DL, Ty);
312  }
313 
314  unsigned getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) {
315  const TargetLoweringBase *TLI = getTLI();
316  switch (Opcode) {
317  default: break;
318  case Instruction::Trunc:
319  if (TLI->isTruncateFree(OpTy, Ty))
322  case Instruction::ZExt:
323  if (TLI->isZExtFree(OpTy, Ty))
326  }
327 
328  return BaseT::getOperationCost(Opcode, Ty, OpTy);
329  }
330 
331  unsigned getInliningThresholdMultiplier() { return 1; }
332 
335  // This unrolling functionality is target independent, but to provide some
336  // motivation for its intended use, for x86:
337 
338  // According to the Intel 64 and IA-32 Architectures Optimization Reference
339  // Manual, Intel Core models and later have a loop stream detector (and
340  // associated uop queue) that can benefit from partial unrolling.
341  // The relevant requirements are:
342  // - The loop must have no more than 4 (8 for Nehalem and later) branches
343  // taken, and none of them may be calls.
344  // - The loop can have no more than 18 (28 for Nehalem and later) uops.
345 
346  // According to the Software Optimization Guide for AMD Family 15h
347  // Processors, models 30h-4fh (Steamroller and later) have a loop predictor
348  // and loop buffer which can benefit from partial unrolling.
349  // The relevant requirements are:
350  // - The loop must have fewer than 16 branches
351  // - The loop must have less than 40 uops in all executed loop branches
352 
353  // The number of taken branches in a loop is hard to estimate here, and
354  // benchmarking has revealed that it is better not to be conservative when
355  // estimating the branch count. As a result, we'll ignore the branch limits
356  // until someone finds a case where it matters in practice.
357 
358  unsigned MaxOps;
359  const TargetSubtargetInfo *ST = getST();
360  if (PartialUnrollingThreshold.getNumOccurrences() > 0)
361  MaxOps = PartialUnrollingThreshold;
362  else if (ST->getSchedModel().LoopMicroOpBufferSize > 0)
363  MaxOps = ST->getSchedModel().LoopMicroOpBufferSize;
364  else
365  return;
366 
367  // Scan the loop: don't unroll loops with calls.
368  for (Loop::block_iterator I = L->block_begin(), E = L->block_end(); I != E;
369  ++I) {
370  BasicBlock *BB = *I;
371 
372  for (BasicBlock::iterator J = BB->begin(), JE = BB->end(); J != JE; ++J)
373  if (isa<CallInst>(J) || isa<InvokeInst>(J)) {
374  ImmutableCallSite CS(&*J);
375  if (const Function *F = CS.getCalledFunction()) {
376  if (!static_cast<T *>(this)->isLoweredToCall(F))
377  continue;
378  }
379 
380  return;
381  }
382  }
383 
384  // Enable runtime and partial unrolling up to the specified size.
385  // Enable using trip count upper bound to unroll loops.
386  UP.Partial = UP.Runtime = UP.UpperBound = true;
387  UP.PartialThreshold = MaxOps;
388 
389  // Avoid unrolling when optimizing for size.
390  UP.OptSizeThreshold = 0;
392 
393  // Set number of instructions optimized when "back edge"
394  // becomes "fall through" to default value of 2.
395  UP.BEInsns = 2;
396  }
397 
399  if (isa<LoadInst>(I))
400  return getST()->getSchedModel().DefaultLoadLatency;
401 
403  }
404 
405  /// @}
406 
407  /// \name Vector TTI Implementations
408  /// @{
409 
410  unsigned getNumberOfRegisters(bool Vector) { return Vector ? 0 : 1; }
411 
412  unsigned getRegisterBitWidth(bool Vector) const { return 32; }
413 
414  /// Estimate the overhead of scalarizing an instruction. Insert and Extract
415  /// are set if the result needs to be inserted and/or extracted from vectors.
416  unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) {
417  assert(Ty->isVectorTy() && "Can only scalarize vectors");
418  unsigned Cost = 0;
419 
420  for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i) {
421  if (Insert)
422  Cost += static_cast<T *>(this)
423  ->getVectorInstrCost(Instruction::InsertElement, Ty, i);
424  if (Extract)
425  Cost += static_cast<T *>(this)
426  ->getVectorInstrCost(Instruction::ExtractElement, Ty, i);
427  }
428 
429  return Cost;
430  }
431 
432  /// Estimate the overhead of scalarizing an instructions unique
433  /// non-constant operands. The types of the arguments are ordinarily
434  /// scalar, in which case the costs are multiplied with VF.
436  unsigned VF) {
437  unsigned Cost = 0;
438  SmallPtrSet<const Value*, 4> UniqueOperands;
439  for (const Value *A : Args) {
440  if (!isa<Constant>(A) && UniqueOperands.insert(A).second) {
441  Type *VecTy = nullptr;
442  if (A->getType()->isVectorTy()) {
443  VecTy = A->getType();
444  // If A is a vector operand, VF should be 1 or correspond to A.
445  assert((VF == 1 || VF == VecTy->getVectorNumElements()) &&
446  "Vector argument does not match VF");
447  }
448  else
449  VecTy = VectorType::get(A->getType(), VF);
450 
451  Cost += getScalarizationOverhead(VecTy, false, true);
452  }
453  }
454 
455  return Cost;
456  }
457 
459  assert(VecTy->isVectorTy());
460 
461  unsigned Cost = 0;
462 
463  Cost += getScalarizationOverhead(VecTy, true, false);
464  if (!Args.empty())
466  VecTy->getVectorNumElements());
467  else
468  // When no information on arguments is provided, we add the cost
469  // associated with one argument as a heuristic.
470  Cost += getScalarizationOverhead(VecTy, false, true);
471 
472  return Cost;
473  }
474 
475  unsigned getMaxInterleaveFactor(unsigned VF) { return 1; }
476 
478  unsigned Opcode, Type *Ty,
484  // Check if any of the operands are vector operands.
485  const TargetLoweringBase *TLI = getTLI();
486  int ISD = TLI->InstructionOpcodeToISD(Opcode);
487  assert(ISD && "Invalid opcode");
488 
489  std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
490 
491  bool IsFloat = Ty->isFPOrFPVectorTy();
492  // Assume that floating point arithmetic operations cost twice as much as
493  // integer operations.
494  unsigned OpCost = (IsFloat ? 2 : 1);
495 
496  if (TLI->isOperationLegalOrPromote(ISD, LT.second)) {
497  // The operation is legal. Assume it costs 1.
498  // TODO: Once we have extract/insert subvector cost we need to use them.
499  return LT.first * OpCost;
500  }
501 
502  if (!TLI->isOperationExpand(ISD, LT.second)) {
503  // If the operation is custom lowered, then assume that the code is twice
504  // as expensive.
505  return LT.first * 2 * OpCost;
506  }
507 
508  // Else, assume that we need to scalarize this op.
509  // TODO: If one of the types get legalized by splitting, handle this
510  // similarly to what getCastInstrCost() does.
511  if (Ty->isVectorTy()) {
512  unsigned Num = Ty->getVectorNumElements();
513  unsigned Cost = static_cast<T *>(this)
514  ->getArithmeticInstrCost(Opcode, Ty->getScalarType());
515  // Return the cost of multiple scalar invocation plus the cost of
516  // inserting and extracting the values.
517  return getScalarizationOverhead(Ty, Args) + Num * Cost;
518  }
519 
520  // We don't know anything about this scalar instruction.
521  return OpCost;
522  }
523 
525  Type *SubTp) {
526  if (Kind == TTI::SK_Alternate || Kind == TTI::SK_PermuteTwoSrc ||
527  Kind == TTI::SK_PermuteSingleSrc) {
528  return getPermuteShuffleOverhead(Tp);
529  }
530  return 1;
531  }
532 
533  unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
534  const Instruction *I = nullptr) {
535  const TargetLoweringBase *TLI = getTLI();
536  int ISD = TLI->InstructionOpcodeToISD(Opcode);
537  assert(ISD && "Invalid opcode");
538  std::pair<unsigned, MVT> SrcLT = TLI->getTypeLegalizationCost(DL, Src);
539  std::pair<unsigned, MVT> DstLT = TLI->getTypeLegalizationCost(DL, Dst);
540 
541  // Check for NOOP conversions.
542  if (SrcLT.first == DstLT.first &&
543  SrcLT.second.getSizeInBits() == DstLT.second.getSizeInBits()) {
544 
545  // Bitcast between types that are legalized to the same type are free.
546  if (Opcode == Instruction::BitCast || Opcode == Instruction::Trunc)
547  return 0;
548  }
549 
550  if (Opcode == Instruction::Trunc &&
551  TLI->isTruncateFree(SrcLT.second, DstLT.second))
552  return 0;
553 
554  if (Opcode == Instruction::ZExt &&
555  TLI->isZExtFree(SrcLT.second, DstLT.second))
556  return 0;
557 
558  if (Opcode == Instruction::AddrSpaceCast &&
560  Dst->getPointerAddressSpace()))
561  return 0;
562 
563  // If this is a zext/sext of a load, return 0 if the corresponding
564  // extending load exists on target.
565  if ((Opcode == Instruction::ZExt || Opcode == Instruction::SExt) &&
566  I && isa<LoadInst>(I->getOperand(0))) {
567  EVT ExtVT = EVT::getEVT(Dst);
568  EVT LoadVT = EVT::getEVT(Src);
569  unsigned LType =
570  ((Opcode == Instruction::ZExt) ? ISD::ZEXTLOAD : ISD::SEXTLOAD);
571  if (TLI->isLoadExtLegal(LType, ExtVT, LoadVT))
572  return 0;
573  }
574 
575  // If the cast is marked as legal (or promote) then assume low cost.
576  if (SrcLT.first == DstLT.first &&
577  TLI->isOperationLegalOrPromote(ISD, DstLT.second))
578  return 1;
579 
580  // Handle scalar conversions.
581  if (!Src->isVectorTy() && !Dst->isVectorTy()) {
582  // Scalar bitcasts are usually free.
583  if (Opcode == Instruction::BitCast)
584  return 0;
585 
586  // Just check the op cost. If the operation is legal then assume it costs
587  // 1.
588  if (!TLI->isOperationExpand(ISD, DstLT.second))
589  return 1;
590 
591  // Assume that illegal scalar instruction are expensive.
592  return 4;
593  }
594 
595  // Check vector-to-vector casts.
596  if (Dst->isVectorTy() && Src->isVectorTy()) {
597  // If the cast is between same-sized registers, then the check is simple.
598  if (SrcLT.first == DstLT.first &&
599  SrcLT.second.getSizeInBits() == DstLT.second.getSizeInBits()) {
600 
601  // Assume that Zext is done using AND.
602  if (Opcode == Instruction::ZExt)
603  return 1;
604 
605  // Assume that sext is done using SHL and SRA.
606  if (Opcode == Instruction::SExt)
607  return 2;
608 
609  // Just check the op cost. If the operation is legal then assume it
610  // costs
611  // 1 and multiply by the type-legalization overhead.
612  if (!TLI->isOperationExpand(ISD, DstLT.second))
613  return SrcLT.first * 1;
614  }
615 
616  // If we are legalizing by splitting, query the concrete TTI for the cost
617  // of casting the original vector twice. We also need to factor int the
618  // cost of the split itself. Count that as 1, to be consistent with
619  // TLI->getTypeLegalizationCost().
620  if ((TLI->getTypeAction(Src->getContext(), TLI->getValueType(DL, Src)) ==
622  (TLI->getTypeAction(Dst->getContext(), TLI->getValueType(DL, Dst)) ==
624  Type *SplitDst = VectorType::get(Dst->getVectorElementType(),
625  Dst->getVectorNumElements() / 2);
626  Type *SplitSrc = VectorType::get(Src->getVectorElementType(),
627  Src->getVectorNumElements() / 2);
628  T *TTI = static_cast<T *>(this);
629  return TTI->getVectorSplitCost() +
630  (2 * TTI->getCastInstrCost(Opcode, SplitDst, SplitSrc, I));
631  }
632 
633  // In other cases where the source or destination are illegal, assume
634  // the operation will get scalarized.
635  unsigned Num = Dst->getVectorNumElements();
636  unsigned Cost = static_cast<T *>(this)->getCastInstrCost(
637  Opcode, Dst->getScalarType(), Src->getScalarType(), I);
638 
639  // Return the cost of multiple scalar invocation plus the cost of
640  // inserting and extracting the values.
641  return getScalarizationOverhead(Dst, true, true) + Num * Cost;
642  }
643 
644  // We already handled vector-to-vector and scalar-to-scalar conversions.
645  // This
646  // is where we handle bitcast between vectors and scalars. We need to assume
647  // that the conversion is scalarized in one way or another.
648  if (Opcode == Instruction::BitCast)
649  // Illegal bitcasts are done by storing and loading from a stack slot.
650  return (Src->isVectorTy() ? getScalarizationOverhead(Src, false, true)
651  : 0) +
652  (Dst->isVectorTy() ? getScalarizationOverhead(Dst, true, false)
653  : 0);
654 
655  llvm_unreachable("Unhandled cast");
656  }
657 
658  unsigned getExtractWithExtendCost(unsigned Opcode, Type *Dst,
659  VectorType *VecTy, unsigned Index) {
660  return static_cast<T *>(this)->getVectorInstrCost(
661  Instruction::ExtractElement, VecTy, Index) +
662  static_cast<T *>(this)->getCastInstrCost(Opcode, Dst,
663  VecTy->getElementType());
664  }
665 
666  unsigned getCFInstrCost(unsigned Opcode) {
667  // Branches are assumed to be predicted.
668  return 0;
669  }
670 
671  unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
672  const Instruction *I) {
673  const TargetLoweringBase *TLI = getTLI();
674  int ISD = TLI->InstructionOpcodeToISD(Opcode);
675  assert(ISD && "Invalid opcode");
676 
677  // Selects on vectors are actually vector selects.
678  if (ISD == ISD::SELECT) {
679  assert(CondTy && "CondTy must exist");
680  if (CondTy->isVectorTy())
681  ISD = ISD::VSELECT;
682  }
683  std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
684 
685  if (!(ValTy->isVectorTy() && !LT.second.isVector()) &&
686  !TLI->isOperationExpand(ISD, LT.second)) {
687  // The operation is legal. Assume it costs 1. Multiply
688  // by the type-legalization overhead.
689  return LT.first * 1;
690  }
691 
692  // Otherwise, assume that the cast is scalarized.
693  // TODO: If one of the types get legalized by splitting, handle this
694  // similarly to what getCastInstrCost() does.
695  if (ValTy->isVectorTy()) {
696  unsigned Num = ValTy->getVectorNumElements();
697  if (CondTy)
698  CondTy = CondTy->getScalarType();
699  unsigned Cost = static_cast<T *>(this)->getCmpSelInstrCost(
700  Opcode, ValTy->getScalarType(), CondTy, I);
701 
702  // Return the cost of multiple scalar invocation plus the cost of
703  // inserting and extracting the values.
704  return getScalarizationOverhead(ValTy, true, false) + Num * Cost;
705  }
706 
707  // Unknown scalar opcode.
708  return 1;
709  }
710 
711  unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) {
712  std::pair<unsigned, MVT> LT =
713  getTLI()->getTypeLegalizationCost(DL, Val->getScalarType());
714 
715  return LT.first;
716  }
717 
718  unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
719  unsigned AddressSpace, const Instruction *I = nullptr) {
720  assert(!Src->isVoidTy() && "Invalid type");
721  std::pair<unsigned, MVT> LT = getTLI()->getTypeLegalizationCost(DL, Src);
722 
723  // Assuming that all loads of legal types cost 1.
724  unsigned Cost = LT.first;
725 
726  if (Src->isVectorTy() &&
727  Src->getPrimitiveSizeInBits() < LT.second.getSizeInBits()) {
728  // This is a vector load that legalizes to a larger type than the vector
729  // itself. Unless the corresponding extending load or truncating store is
730  // legal, then this will scalarize.
732  EVT MemVT = getTLI()->getValueType(DL, Src);
733  if (Opcode == Instruction::Store)
734  LA = getTLI()->getTruncStoreAction(LT.second, MemVT);
735  else
736  LA = getTLI()->getLoadExtAction(ISD::EXTLOAD, LT.second, MemVT);
737 
738  if (LA != TargetLowering::Legal && LA != TargetLowering::Custom) {
739  // This is a vector load/store for some illegal type that is scalarized.
740  // We must account for the cost of building or decomposing the vector.
741  Cost += getScalarizationOverhead(Src, Opcode != Instruction::Store,
742  Opcode == Instruction::Store);
743  }
744  }
745 
746  return Cost;
747  }
748 
749  unsigned getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
750  unsigned Factor,
751  ArrayRef<unsigned> Indices,
752  unsigned Alignment,
753  unsigned AddressSpace) {
754  VectorType *VT = dyn_cast<VectorType>(VecTy);
755  assert(VT && "Expect a vector type for interleaved memory op");
756 
757  unsigned NumElts = VT->getNumElements();
758  assert(Factor > 1 && NumElts % Factor == 0 && "Invalid interleave factor");
759 
760  unsigned NumSubElts = NumElts / Factor;
761  VectorType *SubVT = VectorType::get(VT->getElementType(), NumSubElts);
762 
763  // Firstly, the cost of load/store operation.
764  unsigned Cost = static_cast<T *>(this)->getMemoryOpCost(
765  Opcode, VecTy, Alignment, AddressSpace);
766 
767  // Legalize the vector type, and get the legalized and unlegalized type
768  // sizes.
769  MVT VecTyLT = getTLI()->getTypeLegalizationCost(DL, VecTy).second;
770  unsigned VecTySize =
771  static_cast<T *>(this)->getDataLayout().getTypeStoreSize(VecTy);
772  unsigned VecTyLTSize = VecTyLT.getStoreSize();
773 
774  // Return the ceiling of dividing A by B.
775  auto ceil = [](unsigned A, unsigned B) { return (A + B - 1) / B; };
776 
777  // Scale the cost of the memory operation by the fraction of legalized
778  // instructions that will actually be used. We shouldn't account for the
779  // cost of dead instructions since they will be removed.
780  //
781  // E.g., An interleaved load of factor 8:
782  // %vec = load <16 x i64>, <16 x i64>* %ptr
783  // %v0 = shufflevector %vec, undef, <0, 8>
784  //
785  // If <16 x i64> is legalized to 8 v2i64 loads, only 2 of the loads will be
786  // used (those corresponding to elements [0:1] and [8:9] of the unlegalized
787  // type). The other loads are unused.
788  //
789  // We only scale the cost of loads since interleaved store groups aren't
790  // allowed to have gaps.
791  if (Opcode == Instruction::Load && VecTySize > VecTyLTSize) {
792  // The number of loads of a legal type it will take to represent a load
793  // of the unlegalized vector type.
794  unsigned NumLegalInsts = ceil(VecTySize, VecTyLTSize);
795 
796  // The number of elements of the unlegalized type that correspond to a
797  // single legal instruction.
798  unsigned NumEltsPerLegalInst = ceil(NumElts, NumLegalInsts);
799 
800  // Determine which legal instructions will be used.
801  BitVector UsedInsts(NumLegalInsts, false);
802  for (unsigned Index : Indices)
803  for (unsigned Elt = 0; Elt < NumSubElts; ++Elt)
804  UsedInsts.set((Index + Elt * Factor) / NumEltsPerLegalInst);
805 
806  // Scale the cost of the load by the fraction of legal instructions that
807  // will be used.
808  Cost *= UsedInsts.count() / NumLegalInsts;
809  }
810 
811  // Then plus the cost of interleave operation.
812  if (Opcode == Instruction::Load) {
813  // The interleave cost is similar to extract sub vectors' elements
814  // from the wide vector, and insert them into sub vectors.
815  //
816  // E.g. An interleaved load of factor 2 (with one member of index 0):
817  // %vec = load <8 x i32>, <8 x i32>* %ptr
818  // %v0 = shuffle %vec, undef, <0, 2, 4, 6> ; Index 0
819  // The cost is estimated as extract elements at 0, 2, 4, 6 from the
820  // <8 x i32> vector and insert them into a <4 x i32> vector.
821 
822  assert(Indices.size() <= Factor &&
823  "Interleaved memory op has too many members");
824 
825  for (unsigned Index : Indices) {
826  assert(Index < Factor && "Invalid index for interleaved memory op");
827 
828  // Extract elements from loaded vector for each sub vector.
829  for (unsigned i = 0; i < NumSubElts; i++)
830  Cost += static_cast<T *>(this)->getVectorInstrCost(
831  Instruction::ExtractElement, VT, Index + i * Factor);
832  }
833 
834  unsigned InsSubCost = 0;
835  for (unsigned i = 0; i < NumSubElts; i++)
836  InsSubCost += static_cast<T *>(this)->getVectorInstrCost(
837  Instruction::InsertElement, SubVT, i);
838 
839  Cost += Indices.size() * InsSubCost;
840  } else {
841  // The interleave cost is extract all elements from sub vectors, and
842  // insert them into the wide vector.
843  //
844  // E.g. An interleaved store of factor 2:
845  // %v0_v1 = shuffle %v0, %v1, <0, 4, 1, 5, 2, 6, 3, 7>
846  // store <8 x i32> %interleaved.vec, <8 x i32>* %ptr
847  // The cost is estimated as extract all elements from both <4 x i32>
848  // vectors and insert into the <8 x i32> vector.
849 
850  unsigned ExtSubCost = 0;
851  for (unsigned i = 0; i < NumSubElts; i++)
852  ExtSubCost += static_cast<T *>(this)->getVectorInstrCost(
853  Instruction::ExtractElement, SubVT, i);
854  Cost += ExtSubCost * Factor;
855 
856  for (unsigned i = 0; i < NumElts; i++)
857  Cost += static_cast<T *>(this)
858  ->getVectorInstrCost(Instruction::InsertElement, VT, i);
859  }
860 
861  return Cost;
862  }
863 
864  /// Get intrinsic cost based on arguments.
867  unsigned VF = 1) {
868  unsigned RetVF = (RetTy->isVectorTy() ? RetTy->getVectorNumElements() : 1);
869  assert((RetVF == 1 || VF == 1) && "VF > 1 and RetVF is a vector type");
870 
871  switch (IID) {
872  default: {
873  // Assume that we need to scalarize this intrinsic.
875  for (Value *Op : Args) {
876  Type *OpTy = Op->getType();
877  assert(VF == 1 || !OpTy->isVectorTy());
878  Types.push_back(VF == 1 ? OpTy : VectorType::get(OpTy, VF));
879  }
880 
881  if (VF > 1 && !RetTy->isVoidTy())
882  RetTy = VectorType::get(RetTy, VF);
883 
884  // Compute the scalarization overhead based on Args for a vector
885  // intrinsic. A vectorizer will pass a scalar RetTy and VF > 1, while
886  // CostModel will pass a vector RetTy and VF is 1.
887  unsigned ScalarizationCost = std::numeric_limits<unsigned>::max();
888  if (RetVF > 1 || VF > 1) {
889  ScalarizationCost = 0;
890  if (!RetTy->isVoidTy())
891  ScalarizationCost += getScalarizationOverhead(RetTy, true, false);
892  ScalarizationCost += getOperandsScalarizationOverhead(Args, VF);
893  }
894 
895  return static_cast<T *>(this)->
896  getIntrinsicInstrCost(IID, RetTy, Types, FMF, ScalarizationCost);
897  }
898  case Intrinsic::masked_scatter: {
899  assert(VF == 1 && "Can't vectorize types here.");
900  Value *Mask = Args[3];
901  bool VarMask = !isa<Constant>(Mask);
902  unsigned Alignment = cast<ConstantInt>(Args[2])->getZExtValue();
903  return
904  static_cast<T *>(this)->getGatherScatterOpCost(Instruction::Store,
905  Args[0]->getType(),
906  Args[1], VarMask,
907  Alignment);
908  }
909  case Intrinsic::masked_gather: {
910  assert(VF == 1 && "Can't vectorize types here.");
911  Value *Mask = Args[2];
912  bool VarMask = !isa<Constant>(Mask);
913  unsigned Alignment = cast<ConstantInt>(Args[1])->getZExtValue();
914  return
915  static_cast<T *>(this)->getGatherScatterOpCost(Instruction::Load,
916  RetTy, Args[0], VarMask,
917  Alignment);
918  }
919  }
920  }
921 
922  /// Get intrinsic cost based on argument types.
923  /// If ScalarizationCostPassed is std::numeric_limits<unsigned>::max(), the
924  /// cost of scalarizing the arguments and the return value will be computed
925  /// based on types.
927  Intrinsic::ID IID, Type *RetTy, ArrayRef<Type *> Tys, FastMathFlags FMF,
928  unsigned ScalarizationCostPassed = std::numeric_limits<unsigned>::max()) {
930  unsigned SingleCallCost = 10; // Library call cost. Make it expensive.
931  switch (IID) {
932  default: {
933  // Assume that we need to scalarize this intrinsic.
934  unsigned ScalarizationCost = ScalarizationCostPassed;
935  unsigned ScalarCalls = 1;
936  Type *ScalarRetTy = RetTy;
937  if (RetTy->isVectorTy()) {
938  if (ScalarizationCostPassed == std::numeric_limits<unsigned>::max())
939  ScalarizationCost = getScalarizationOverhead(RetTy, true, false);
940  ScalarCalls = std::max(ScalarCalls, RetTy->getVectorNumElements());
941  ScalarRetTy = RetTy->getScalarType();
942  }
943  SmallVector<Type *, 4> ScalarTys;
944  for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) {
945  Type *Ty = Tys[i];
946  if (Ty->isVectorTy()) {
947  if (ScalarizationCostPassed == std::numeric_limits<unsigned>::max())
948  ScalarizationCost += getScalarizationOverhead(Ty, false, true);
949  ScalarCalls = std::max(ScalarCalls, Ty->getVectorNumElements());
950  Ty = Ty->getScalarType();
951  }
952  ScalarTys.push_back(Ty);
953  }
954  if (ScalarCalls == 1)
955  return 1; // Return cost of a scalar intrinsic. Assume it to be cheap.
956 
957  unsigned ScalarCost = static_cast<T *>(this)->getIntrinsicInstrCost(
958  IID, ScalarRetTy, ScalarTys, FMF);
959 
960  return ScalarCalls * ScalarCost + ScalarizationCost;
961  }
962  // Look for intrinsics that can be lowered directly or turned into a scalar
963  // intrinsic call.
964  case Intrinsic::sqrt:
965  ISDs.push_back(ISD::FSQRT);
966  break;
967  case Intrinsic::sin:
968  ISDs.push_back(ISD::FSIN);
969  break;
970  case Intrinsic::cos:
971  ISDs.push_back(ISD::FCOS);
972  break;
973  case Intrinsic::exp:
974  ISDs.push_back(ISD::FEXP);
975  break;
976  case Intrinsic::exp2:
977  ISDs.push_back(ISD::FEXP2);
978  break;
979  case Intrinsic::log:
980  ISDs.push_back(ISD::FLOG);
981  break;
982  case Intrinsic::log10:
983  ISDs.push_back(ISD::FLOG10);
984  break;
985  case Intrinsic::log2:
986  ISDs.push_back(ISD::FLOG2);
987  break;
988  case Intrinsic::fabs:
989  ISDs.push_back(ISD::FABS);
990  break;
991  case Intrinsic::minnum:
992  ISDs.push_back(ISD::FMINNUM);
993  if (FMF.noNaNs())
994  ISDs.push_back(ISD::FMINNAN);
995  break;
996  case Intrinsic::maxnum:
997  ISDs.push_back(ISD::FMAXNUM);
998  if (FMF.noNaNs())
999  ISDs.push_back(ISD::FMAXNAN);
1000  break;
1001  case Intrinsic::copysign:
1002  ISDs.push_back(ISD::FCOPYSIGN);
1003  break;
1004  case Intrinsic::floor:
1005  ISDs.push_back(ISD::FFLOOR);
1006  break;
1007  case Intrinsic::ceil:
1008  ISDs.push_back(ISD::FCEIL);
1009  break;
1010  case Intrinsic::trunc:
1011  ISDs.push_back(ISD::FTRUNC);
1012  break;
1013  case Intrinsic::nearbyint:
1014  ISDs.push_back(ISD::FNEARBYINT);
1015  break;
1016  case Intrinsic::rint:
1017  ISDs.push_back(ISD::FRINT);
1018  break;
1019  case Intrinsic::round:
1020  ISDs.push_back(ISD::FROUND);
1021  break;
1022  case Intrinsic::pow:
1023  ISDs.push_back(ISD::FPOW);
1024  break;
1025  case Intrinsic::fma:
1026  ISDs.push_back(ISD::FMA);
1027  break;
1028  case Intrinsic::fmuladd:
1029  ISDs.push_back(ISD::FMA);
1030  break;
1031  // FIXME: We should return 0 whenever getIntrinsicCost == TCC_Free.
1032  case Intrinsic::lifetime_start:
1033  case Intrinsic::lifetime_end:
1034  case Intrinsic::sideeffect:
1035  return 0;
1036  case Intrinsic::masked_store:
1037  return static_cast<T *>(this)
1038  ->getMaskedMemoryOpCost(Instruction::Store, Tys[0], 0, 0);
1039  case Intrinsic::masked_load:
1040  return static_cast<T *>(this)
1041  ->getMaskedMemoryOpCost(Instruction::Load, RetTy, 0, 0);
1042  case Intrinsic::ctpop:
1043  ISDs.push_back(ISD::CTPOP);
1044  // In case of legalization use TCC_Expensive. This is cheaper than a
1045  // library call but still not a cheap instruction.
1046  SingleCallCost = TargetTransformInfo::TCC_Expensive;
1047  break;
1048  // FIXME: ctlz, cttz, ...
1049  }
1050 
1051  const TargetLoweringBase *TLI = getTLI();
1052  std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, RetTy);
1053 
1054  SmallVector<unsigned, 2> LegalCost;
1055  SmallVector<unsigned, 2> CustomCost;
1056  for (unsigned ISD : ISDs) {
1057  if (TLI->isOperationLegalOrPromote(ISD, LT.second)) {
1058  if (IID == Intrinsic::fabs && TLI->isFAbsFree(LT.second)) {
1059  return 0;
1060  }
1061 
1062  // The operation is legal. Assume it costs 1.
1063  // If the type is split to multiple registers, assume that there is some
1064  // overhead to this.
1065  // TODO: Once we have extract/insert subvector cost we need to use them.
1066  if (LT.first > 1)
1067  LegalCost.push_back(LT.first * 2);
1068  else
1069  LegalCost.push_back(LT.first * 1);
1070  } else if (!TLI->isOperationExpand(ISD, LT.second)) {
1071  // If the operation is custom lowered then assume
1072  // that the code is twice as expensive.
1073  CustomCost.push_back(LT.first * 2);
1074  }
1075  }
1076 
1077  auto MinLegalCostI = std::min_element(LegalCost.begin(), LegalCost.end());
1078  if (MinLegalCostI != LegalCost.end())
1079  return *MinLegalCostI;
1080 
1081  auto MinCustomCostI = std::min_element(CustomCost.begin(), CustomCost.end());
1082  if (MinCustomCostI != CustomCost.end())
1083  return *MinCustomCostI;
1084 
1085  // If we can't lower fmuladd into an FMA estimate the cost as a floating
1086  // point mul followed by an add.
1087  if (IID == Intrinsic::fmuladd)
1088  return static_cast<T *>(this)
1089  ->getArithmeticInstrCost(BinaryOperator::FMul, RetTy) +
1090  static_cast<T *>(this)
1091  ->getArithmeticInstrCost(BinaryOperator::FAdd, RetTy);
1092 
1093  // Else, assume that we need to scalarize this intrinsic. For math builtins
1094  // this will emit a costly libcall, adding call overhead and spills. Make it
1095  // very expensive.
1096  if (RetTy->isVectorTy()) {
1097  unsigned ScalarizationCost =
1098  ((ScalarizationCostPassed != std::numeric_limits<unsigned>::max())
1099  ? ScalarizationCostPassed
1100  : getScalarizationOverhead(RetTy, true, false));
1101  unsigned ScalarCalls = RetTy->getVectorNumElements();
1102  SmallVector<Type *, 4> ScalarTys;
1103  for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) {
1104  Type *Ty = Tys[i];
1105  if (Ty->isVectorTy())
1106  Ty = Ty->getScalarType();
1107  ScalarTys.push_back(Ty);
1108  }
1109  unsigned ScalarCost = static_cast<T *>(this)->getIntrinsicInstrCost(
1110  IID, RetTy->getScalarType(), ScalarTys, FMF);
1111  for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) {
1112  if (Tys[i]->isVectorTy()) {
1113  if (ScalarizationCostPassed == std::numeric_limits<unsigned>::max())
1114  ScalarizationCost += getScalarizationOverhead(Tys[i], false, true);
1115  ScalarCalls = std::max(ScalarCalls, Tys[i]->getVectorNumElements());
1116  }
1117  }
1118 
1119  return ScalarCalls * ScalarCost + ScalarizationCost;
1120  }
1121 
1122  // This is going to be turned into a library call, make it expensive.
1123  return SingleCallCost;
1124  }
1125 
1126  /// \brief Compute a cost of the given call instruction.
1127  ///
1128  /// Compute the cost of calling function F with return type RetTy and
1129  /// argument types Tys. F might be nullptr, in this case the cost of an
1130  /// arbitrary call with the specified signature will be returned.
1131  /// This is used, for instance, when we estimate call of a vector
1132  /// counterpart of the given function.
1133  /// \param F Called function, might be nullptr.
1134  /// \param RetTy Return value types.
1135  /// \param Tys Argument types.
1136  /// \returns The cost of Call instruction.
1138  return 10;
1139  }
1140 
1141  unsigned getNumberOfParts(Type *Tp) {
1142  std::pair<unsigned, MVT> LT = getTLI()->getTypeLegalizationCost(DL, Tp);
1143  return LT.first;
1144  }
1145 
1147  const SCEV *) {
1148  return 0;
1149  }
1150 
1151  /// Try to calculate arithmetic and shuffle op costs for reduction operations.
1152  /// We're assuming that reduction operation are performing the following way:
1153  /// 1. Non-pairwise reduction
1154  /// %val1 = shufflevector<n x t> %val, <n x t> %undef,
1155  /// <n x i32> <i32 n/2, i32 n/2 + 1, ..., i32 n, i32 undef, ..., i32 undef>
1156  /// \----------------v-------------/ \----------v------------/
1157  /// n/2 elements n/2 elements
1158  /// %red1 = op <n x t> %val, <n x t> val1
1159  /// After this operation we have a vector %red1 where only the first n/2
1160  /// elements are meaningful, the second n/2 elements are undefined and can be
1161  /// dropped. All other operations are actually working with the vector of
1162  /// length n/2, not n, though the real vector length is still n.
1163  /// %val2 = shufflevector<n x t> %red1, <n x t> %undef,
1164  /// <n x i32> <i32 n/4, i32 n/4 + 1, ..., i32 n/2, i32 undef, ..., i32 undef>
1165  /// \----------------v-------------/ \----------v------------/
1166  /// n/4 elements 3*n/4 elements
1167  /// %red2 = op <n x t> %red1, <n x t> val2 - working with the vector of
1168  /// length n/2, the resulting vector has length n/4 etc.
1169  /// 2. Pairwise reduction:
1170  /// Everything is the same except for an additional shuffle operation which
1171  /// is used to produce operands for pairwise kind of reductions.
1172  /// %val1 = shufflevector<n x t> %val, <n x t> %undef,
1173  /// <n x i32> <i32 0, i32 2, ..., i32 n-2, i32 undef, ..., i32 undef>
1174  /// \-------------v----------/ \----------v------------/
1175  /// n/2 elements n/2 elements
1176  /// %val2 = shufflevector<n x t> %val, <n x t> %undef,
1177  /// <n x i32> <i32 1, i32 3, ..., i32 n-1, i32 undef, ..., i32 undef>
1178  /// \-------------v----------/ \----------v------------/
1179  /// n/2 elements n/2 elements
1180  /// %red1 = op <n x t> %val1, <n x t> val2
1181  /// Again, the operation is performed on <n x t> vector, but the resulting
1182  /// vector %red1 is <n/2 x t> vector.
1183  ///
1184  /// The cost model should take into account that the actual length of the
1185  /// vector is reduced on each iteration.
1186  unsigned getArithmeticReductionCost(unsigned Opcode, Type *Ty,
1187  bool IsPairwise) {
1188  assert(Ty->isVectorTy() && "Expect a vector type");
1189  Type *ScalarTy = Ty->getVectorElementType();
1190  unsigned NumVecElts = Ty->getVectorNumElements();
1191  unsigned NumReduxLevels = Log2_32(NumVecElts);
1192  unsigned ArithCost = 0;
1193  unsigned ShuffleCost = 0;
1194  auto *ConcreteTTI = static_cast<T *>(this);
1195  std::pair<unsigned, MVT> LT =
1196  ConcreteTTI->getTLI()->getTypeLegalizationCost(DL, Ty);
1197  unsigned LongVectorCount = 0;
1198  unsigned MVTLen =
1199  LT.second.isVector() ? LT.second.getVectorNumElements() : 1;
1200  while (NumVecElts > MVTLen) {
1201  NumVecElts /= 2;
1202  // Assume the pairwise shuffles add a cost.
1203  ShuffleCost += (IsPairwise + 1) *
1204  ConcreteTTI->getShuffleCost(TTI::SK_ExtractSubvector, Ty,
1205  NumVecElts, Ty);
1206  ArithCost += ConcreteTTI->getArithmeticInstrCost(Opcode, Ty);
1207  Ty = VectorType::get(ScalarTy, NumVecElts);
1208  ++LongVectorCount;
1209  }
1210  // The minimal length of the vector is limited by the real length of vector
1211  // operations performed on the current platform. That's why several final
1212  // reduction operations are performed on the vectors with the same
1213  // architecture-dependent length.
1214  ShuffleCost += (NumReduxLevels - LongVectorCount) * (IsPairwise + 1) *
1215  ConcreteTTI->getShuffleCost(TTI::SK_ExtractSubvector, Ty,
1216  NumVecElts, Ty);
1217  ArithCost += (NumReduxLevels - LongVectorCount) *
1218  ConcreteTTI->getArithmeticInstrCost(Opcode, Ty);
1219  return ShuffleCost + ArithCost + getScalarizationOverhead(Ty, false, true);
1220  }
1221 
1222  /// Try to calculate op costs for min/max reduction operations.
1223  /// \param CondTy Conditional type for the Select instruction.
1224  unsigned getMinMaxReductionCost(Type *Ty, Type *CondTy, bool IsPairwise,
1225  bool) {
1226  assert(Ty->isVectorTy() && "Expect a vector type");
1227  Type *ScalarTy = Ty->getVectorElementType();
1228  Type *ScalarCondTy = CondTy->getVectorElementType();
1229  unsigned NumVecElts = Ty->getVectorNumElements();
1230  unsigned NumReduxLevels = Log2_32(NumVecElts);
1231  unsigned CmpOpcode;
1232  if (Ty->isFPOrFPVectorTy()) {
1233  CmpOpcode = Instruction::FCmp;
1234  } else {
1235  assert(Ty->isIntOrIntVectorTy() &&
1236  "expecting floating point or integer type for min/max reduction");
1237  CmpOpcode = Instruction::ICmp;
1238  }
1239  unsigned MinMaxCost = 0;
1240  unsigned ShuffleCost = 0;
1241  auto *ConcreteTTI = static_cast<T *>(this);
1242  std::pair<unsigned, MVT> LT =
1243  ConcreteTTI->getTLI()->getTypeLegalizationCost(DL, Ty);
1244  unsigned LongVectorCount = 0;
1245  unsigned MVTLen =
1246  LT.second.isVector() ? LT.second.getVectorNumElements() : 1;
1247  while (NumVecElts > MVTLen) {
1248  NumVecElts /= 2;
1249  // Assume the pairwise shuffles add a cost.
1250  ShuffleCost += (IsPairwise + 1) *
1251  ConcreteTTI->getShuffleCost(TTI::SK_ExtractSubvector, Ty,
1252  NumVecElts, Ty);
1253  MinMaxCost +=
1254  ConcreteTTI->getCmpSelInstrCost(CmpOpcode, Ty, CondTy, nullptr) +
1255  ConcreteTTI->getCmpSelInstrCost(Instruction::Select, Ty, CondTy,
1256  nullptr);
1257  Ty = VectorType::get(ScalarTy, NumVecElts);
1258  CondTy = VectorType::get(ScalarCondTy, NumVecElts);
1259  ++LongVectorCount;
1260  }
1261  // The minimal length of the vector is limited by the real length of vector
1262  // operations performed on the current platform. That's why several final
1263  // reduction opertions are perfomed on the vectors with the same
1264  // architecture-dependent length.
1265  ShuffleCost += (NumReduxLevels - LongVectorCount) * (IsPairwise + 1) *
1266  ConcreteTTI->getShuffleCost(TTI::SK_ExtractSubvector, Ty,
1267  NumVecElts, Ty);
1268  MinMaxCost +=
1269  (NumReduxLevels - LongVectorCount) *
1270  (ConcreteTTI->getCmpSelInstrCost(CmpOpcode, Ty, CondTy, nullptr) +
1271  ConcreteTTI->getCmpSelInstrCost(Instruction::Select, Ty, CondTy,
1272  nullptr));
1273  // Need 3 extractelement instructions for scalarization + an additional
1274  // scalar select instruction.
1275  return ShuffleCost + MinMaxCost +
1276  3 * getScalarizationOverhead(Ty, /*Insert=*/false,
1277  /*Extract=*/true) +
1278  ConcreteTTI->getCmpSelInstrCost(Instruction::Select, ScalarTy,
1279  ScalarCondTy, nullptr);
1280  }
1281 
1282  unsigned getVectorSplitCost() { return 1; }
1283 
1284  /// @}
1285 };
1286 
1287 /// \brief Concrete BasicTTIImpl that can be used if no further customization
1288 /// is needed.
1289 class BasicTTIImpl : public BasicTTIImplBase<BasicTTIImpl> {
1291 
1293 
1294  const TargetSubtargetInfo *ST;
1295  const TargetLoweringBase *TLI;
1296 
1297  const TargetSubtargetInfo *getST() const { return ST; }
1298  const TargetLoweringBase *getTLI() const { return TLI; }
1299 
1300 public:
1301  explicit BasicTTIImpl(const TargetMachine *ST, const Function &F);
1302 };
1303 
1304 } // end namespace llvm
1305 
1306 #endif // LLVM_CODEGEN_BASICTTIIMPL_H
LegalizeAction getLoadExtAction(unsigned ExtType, EVT ValVT, EVT MemVT) const
Return how this load with extension should be treated: either it is legal, needs to be promoted to a ...
Type * getVectorElementType() const
Definition: Type.h:368
unsigned getNumCases() const
Return the number of &#39;cases&#39; in this switch instruction, excluding the default case.
Base class for use as a mix-in that aids implementing a TargetTransformInfo-compatible class...
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:111
bool Partial
Allow partial unrolling (unrolling of loops to expand the size of the loop body, not only to eliminat...
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
Definition: ISDOpcodes.h:570
BitVector & set()
Definition: BitVector.h:398
unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info=TTI::OK_AnyValue, TTI::OperandValueKind Opd2Info=TTI::OK_AnyValue, TTI::OperandValueProperties Opd1PropInfo=TTI::OP_None, TTI::OperandValueProperties Opd2PropInfo=TTI::OP_None, ArrayRef< const Value *> Args=ArrayRef< const Value *>())
Definition: BasicTTIImpl.h:477
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg If BaseGV is null...
unsigned getIndexSizeInBits(unsigned AS) const
Size in bits of index used for address calculation in getelementptr.
Definition: DataLayout.h:359
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
iterator_range< CaseIt > cases()
Iteration adapter for range-for loops.
LLVMContext & Context
bool noNaNs() const
Definition: Operator.h:200
unsigned getGatherScatterOpCost(unsigned Opcode, Type *DataTy, Value *Ptr, bool VariableMask, unsigned Alignment)
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract)
Estimate the overhead of scalarizing an instruction.
Definition: BasicTTIImpl.h:416
AMDGPU Rewrite Out Arguments
The main scalar evolution driver.
bool slt(const APInt &RHS) const
Signed less than comparison.
Definition: APInt.h:1183
unsigned PartialOptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size, like OptSizeThreshold, but used for partial/runtime unrolling (set to UINT_MAX to disable).
int getGEPCost(Type *PointeeType, const Value *Ptr, ArrayRef< const Value *> Operands)
Definition: BasicTTIImpl.h:187
static uint64_t round(uint64_t Acc, uint64_t Input)
Definition: xxhash.cpp:57
unsigned PartialThreshold
The cost threshold for the unrolled loop, like Threshold, but used for partial/runtime unrolling (set...
CaseIt case_begin()
Returns a read/write iterator that points to the first case in the SwitchInst.
unsigned getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy)
bool isSuitableForBitTests(unsigned NumDests, unsigned NumCmps, const APInt &Low, const APInt &High, const DataLayout &DL) const
Return true if lowering to a bit test is suitable for a set of case clusters which contains NumDests ...
bool sgt(const APInt &RHS) const
Signed greather than comparison.
Definition: APInt.h:1253
BasicTTIImplBase(const TargetMachine *TM, const DataLayout &DL)
Definition: BasicTTIImpl.h:115
int getExtCost(const Instruction *I, const Value *Src)
Definition: BasicTTIImpl.h:192
F(f)
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Definition: DerivedTypes.h:503
An instruction for reading from memory.
Definition: Instructions.h:164
bool isProfitableToHoist(Instruction *I)
Definition: BasicTTIImpl.h:178
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:227
Base class which can be used to help build a TTI implementation.
Definition: BasicTTIImpl.h:78
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
bool isOperationLegalOrCustom(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
unsigned getMaxInterleaveFactor(unsigned VF)
Definition: BasicTTIImpl.h:475
unsigned getJumpBufAlignment() const
Returns the target&#39;s jmp_buf alignment in bytes (if never set, the default is 0)
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition: Type.h:130
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:252
This file provides helpers for the implementation of a TargetTransformInfo-conforming class...
CRTP base class for use as a mix-in that aids implementing a TargetTransformInfo-compatible class...
unsigned getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, ArrayRef< Value *> Args, FastMathFlags FMF, unsigned VF=1)
Get intrinsic cost based on arguments.
Definition: BasicTTIImpl.h:865
unsigned getAddressComputationCost(Type *Ty, ScalarEvolution *, const SCEV *)
unsigned getOperandsScalarizationOverhead(ArrayRef< const Value *> Args, unsigned VF)
Estimate the overhead of scalarizing an instructions unique non-constant operands.
Definition: BasicTTIImpl.h:435
unsigned getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy, unsigned Index)
Definition: BasicTTIImpl.h:658
int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace)
Definition: BasicTTIImpl.h:164
unsigned getArithmeticReductionCost(unsigned Opcode, Type *Ty, bool IsPairwise)
Try to calculate arithmetic and shuffle op costs for reduction operations.
unsigned getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
uint64_t getNumElements() const
Definition: DerivedTypes.h:359
bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth, unsigned AddressSpace, unsigned Alignment, bool *Fast) const
Definition: BasicTTIImpl.h:123
This file implements a class to represent arbitrary precision integral constant values and operations...
virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const
Return true if it&#39;s free to truncate a value of type FromTy to type ToTy.
unsigned getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, unsigned Alignment, unsigned AddressSpace)
Definition: BasicTTIImpl.h:749
unsigned getRegisterBitWidth(bool Vector) const
Definition: BasicTTIImpl.h:412
unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, const Instruction *I)
Definition: BasicTTIImpl.h:671
Choose alternate elements from vector.
unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, const Instruction *I=nullptr)
Definition: BasicTTIImpl.h:533
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition: ISDOpcodes.h:399
bool isFCmpOrdCheaperThanFCmpZero(Type *Ty)
Definition: BasicTTIImpl.h:300
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:33
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
Definition: Type.h:203
virtual bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AddrSpace, Instruction *I=nullptr) const
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
unsigned getFPOpCost(Type *Ty)
Definition: BasicTTIImpl.h:304
bool isTruncateFree(Type *Ty1, Type *Ty2)
Definition: BasicTTIImpl.h:174
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return &#39;this&#39;.
Definition: Type.h:301
ExtractSubvector Index indicates start offset.
bool isVoidTy() const
Return true if this is &#39;void&#39;.
Definition: Type.h:141
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP)
Definition: BasicTTIImpl.h:333
virtual bool isSuitableForJumpTable(const SwitchInst *SI, uint64_t NumCases, uint64_t Range) const
Return true if lowering to a jump table is suitable for a set of case clusters which may contain NumC...
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Machine Value Type.
Concrete BasicTTIImpl that can be used if no further customization is needed.
LLVM Basic Block Representation.
Definition: BasicBlock.h:59
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:46
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:69
unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, ArrayRef< const Value *> Arguments)
Simple binary floating point operators.
Definition: ISDOpcodes.h:260
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:149
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator begin()
Definition: SmallVector.h:116
This file contains the declarations for the subclasses of Constant, which represent the different fla...
unsigned getMinMaxReductionCost(Type *Ty, Type *CondTy, bool IsPairwise, bool)
Try to calculate op costs for min/max reduction operations.
unsigned getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, ArrayRef< Type *> Tys, FastMathFlags FMF, unsigned ScalarizationCostPassed=std::numeric_limits< unsigned >::max())
Get intrinsic cost based on argument types.
Definition: BasicTTIImpl.h:926
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:371
Expected to fold away in lowering.
virtual bool areJTsAllowed(const Function *Fn) const
Return true if lowering to a jump table is allowed.
unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, ArrayRef< Type *> ParamTys)
Definition: BasicTTIImpl.h:209
bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
Merge elements from two source vectors into one with any shuffle mask.
unsigned getNumberOfParts(Type *Tp)
static double log2(double V)
virtual bool isProfitableToHoist(Instruction *I) const
Extended Value Type.
Definition: ValueTypes.h:34
static wasm::ValType getType(const TargetRegisterClass *RC)
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
LLVM_READONLY APFloat maxnum(const APFloat &A, const APFloat &B)
Implements IEEE maxNum semantics.
Definition: APFloat.h:1238
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
bool isLegalAddImmediate(int64_t imm)
Definition: BasicTTIImpl.h:141
This base class for TargetLowering contains the SelectionDAG-independent parts that can be used from ...
OperandValueProperties
Additional properties of an operand&#39;s values.
LegalizeAction
This enum indicates whether operations are valid for a target, and if not, what action should be used...
unsigned getCFInstrCost(unsigned Opcode)
Definition: BasicTTIImpl.h:666
size_type size() const
Definition: SmallPtrSet.h:93
unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp)
Definition: BasicTTIImpl.h:524
unsigned getNumberOfRegisters(bool Vector)
Definition: BasicTTIImpl.h:410
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:418
size_type count() const
count - Returns the number of bits which are set.
Definition: BitVector.h:173
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
int getInstructionLatency(const Instruction *I)
Definition: BasicTTIImpl.h:398
virtual bool allowsMisalignedMemoryAccesses(EVT, unsigned AddrSpace=0, unsigned Align=1, bool *=nullptr) const
Determine if the target supports unaligned memory accesses.
iterator end()
Definition: BasicBlock.h:254
unsigned getJumpBufSize() const
Returns the target&#39;s jmp_buf size in bytes (if never set, the default is 200)
unsigned getVectorSplitCost()
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:862
AddressSpace
Definition: NVPTXBaseInfo.h:22
cl::opt< unsigned > PartialUnrollingThreshold
static const unsigned DefaultLoadLatency
Definition: MCSchedule.h:174
bool Runtime
Allow runtime unrolling (unrolling of loops to expand the size of the loop body even when the number ...
LegalizeAction getTruncStoreAction(EVT ValVT, EVT MemVT) const
Return how this store with truncation should be treated: either it is legal, needs to be promoted to ...
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:531
unsigned getVectorNumElements() const
Definition: DerivedTypes.h:462
Class to represent vector types.
Definition: DerivedTypes.h:393
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
Class for arbitrary precision integers.
Definition: APInt.h:69
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:390
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:91
unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, unsigned AddressSpace, const Instruction *I=nullptr)
Definition: BasicTTIImpl.h:718
unsigned LoopMicroOpBufferSize
Definition: MCSchedule.h:169
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition: ISDOpcodes.h:288
bool isAlwaysUniform(const Value *V)
Definition: BasicTTIImpl.h:134
int InstructionOpcodeToISD(unsigned Opcode) const
Get the ISD node that corresponds to the Instruction class opcode.
FMINNAN/FMAXNAN - Behave identically to FMINNUM/FMAXNUM, except that when a single input is NaN...
Definition: ISDOpcodes.h:573
Fast - This calling convention attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:43
unsigned getFlatAddressSpace()
Definition: BasicTTIImpl.h:136
TargetSubtargetInfo - Generic base class for all target subtargets.
bool isLSRCostLess(TTI::LSRCost &C1, TTI::LSRCost &C2)
virtual bool isLegalICmpImmediate(int64_t) const
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
unsigned getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, unsigned AddressSpace)
BR_JT - Jumptable branch.
Definition: ISDOpcodes.h:606
unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, ArrayRef< const Value *> Arguments)
Definition: BasicTTIImpl.h:204
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator end()
Definition: SmallVector.h:120
unsigned getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy)
Definition: BasicTTIImpl.h:314
This class represents an analyzed expression in the program.
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:439
Parameters that control the generic loop unrolling transformation.
unsigned getJumpBufAlignment()
Definition: BasicTTIImpl.h:283
unsigned OptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size (set to UINT_MAX to disable)...
Establish a view to a call site for examination.
Definition: CallSite.h:713
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return &#39;Legal&#39;) or we ...
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:108
#define I(x, y, z)
Definition: MD5.cpp:58
#define N
int getGEPCost(Type *PointeeType, const Value *Ptr, ArrayRef< const Value *> Operands)
unsigned getInliningThresholdMultiplier()
Definition: BasicTTIImpl.h:331
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index)
Definition: BasicTTIImpl.h:711
block_iterator block_end() const
Definition: LoopInfo.h:155
static EVT getEVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
Definition: ValueTypes.cpp:312
bool isLSRCostLess(TTI::LSRCost C1, TTI::LSRCost C2)
Definition: BasicTTIImpl.h:160
unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI, unsigned &JumpTableSize)
Definition: BasicTTIImpl.h:226
FunTy * getCalledFunction() const
Return the function being called if this is a direct call, otherwise return null (if it&#39;s an indirect...
Definition: CallSite.h:107
bool isFPOrFPVectorTy() const
Return true if this is a FP type or a vector of FP.
Definition: Type.h:185
const unsigned Kind
Multiway switch.
unsigned getScalarizationOverhead(Type *VecTy, ArrayRef< const Value *> Args)
Definition: BasicTTIImpl.h:458
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
The cost of a typical &#39;add&#39; instruction.
bool isSourceOfDivergence(const Value *V)
Definition: BasicTTIImpl.h:132
unsigned getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition: Type.cpp:115
LLVM Value Representation.
Definition: Value.h:73
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:278
uint64_t getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type...
Definition: DataLayout.h:405
static VectorType * get(Type *ElementType, unsigned NumElements)
This static method is the primary way to construct an VectorType.
Definition: Type.cpp:593
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E&#39;s largest value.
Definition: BitmaskEnum.h:81
bool isOperationLegalOrPromote(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target or can be made legal using promotion...
bool isLoadExtLegal(unsigned ExtType, EVT ValVT, EVT MemVT) const
Return true if the specified load with extension is legal on this target.
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:59
bool isTypeLegal(Type *Ty)
Definition: BasicTTIImpl.h:182
Type * getElementType() const
Definition: DerivedTypes.h:360
bool UpperBound
Allow using trip count upper bound to unroll loops.
virtual int getScalingFactorCost(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS=0) const
Return the cost of the scaling factor used in the addressing mode represented by AM for this target...
const DataLayout & getDataLayout() const
Convenience struct for specifying and reasoning about fast-math flags.
Definition: Operator.h:160
unsigned getCallInstrCost(Function *F, Type *RetTy, ArrayRef< Type *> Tys)
Compute a cost of the given call instruction.
OperandValueKind
Additional information about an operand&#39;s possible values.
bool haveFastSqrt(Type *Ty)
Definition: BasicTTIImpl.h:293
This pass exposes codegen information to IR-level passes.
bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace, Instruction *I=nullptr)
Definition: BasicTTIImpl.h:149
bool isLegalICmpImmediate(int64_t imm)
Definition: BasicTTIImpl.h:145
block_iterator block_begin() const
Definition: LoopInfo.h:154
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition: ValueTypes.h:64
virtual bool isLegalAddImmediate(int64_t) const
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
virtual bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const
Returns true if a cast between SrcAS and DestAS is a noop.
The cost of a &#39;div&#39; instruction on x86.
const MCSchedModel & getSchedModel() const
Get the machine model for this subtarget&#39;s CPU.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
bool isOperationExpand(unsigned Op, EVT VT) const
Return true if the specified operation is illegal on this target or unlikely to be made legal with cu...
std::pair< int, MVT > getTypeLegalizationCost(const DataLayout &DL, Type *Ty) const
Estimate the cost of type-legalization and the legalized type.
LLVM_READONLY APFloat minnum(const APFloat &A, const APFloat &B)
Implements IEEE minNum semantics.
Definition: APFloat.h:1227
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:144
virtual bool isFAbsFree(EVT VT) const
Return true if an fabs operation is free to the point where it is never worthwhile to replace it with...
This file describes how to lower LLVM code to machine code.
const BasicBlock * getParent() const
Definition: Instruction.h:67
ShuffleKind
The various kinds of shuffle patterns for vector queries.
Shuffle elements of single source vector with any shuffle mask.
BRIND - Indirect branch.
Definition: ISDOpcodes.h:602