LLVM  10.0.0svn
BasicTTIImpl.h
Go to the documentation of this file.
1 //===- BasicTTIImpl.h -------------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// This file provides a helper that implements much of the TTI interface in
11 /// terms of the target-independent code generator and TargetLowering
12 /// interfaces.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #ifndef LLVM_CODEGEN_BASICTTIIMPL_H
17 #define LLVM_CODEGEN_BASICTTIIMPL_H
18 
19 #include "llvm/ADT/APInt.h"
20 #include "llvm/ADT/ArrayRef.h"
21 #include "llvm/ADT/BitVector.h"
22 #include "llvm/ADT/SmallPtrSet.h"
23 #include "llvm/ADT/SmallVector.h"
24 #include "llvm/Analysis/LoopInfo.h"
31 #include "llvm/IR/BasicBlock.h"
32 #include "llvm/IR/CallSite.h"
33 #include "llvm/IR/Constant.h"
34 #include "llvm/IR/Constants.h"
35 #include "llvm/IR/DataLayout.h"
36 #include "llvm/IR/DerivedTypes.h"
37 #include "llvm/IR/InstrTypes.h"
38 #include "llvm/IR/Instruction.h"
39 #include "llvm/IR/Instructions.h"
40 #include "llvm/IR/Intrinsics.h"
41 #include "llvm/IR/Operator.h"
42 #include "llvm/IR/Type.h"
43 #include "llvm/IR/Value.h"
44 #include "llvm/MC/MCSchedule.h"
45 #include "llvm/Support/Casting.h"
50 #include <algorithm>
51 #include <cassert>
52 #include <cstdint>
53 #include <limits>
54 #include <utility>
55 
56 namespace llvm {
57 
58 class Function;
59 class GlobalValue;
60 class LLVMContext;
61 class ScalarEvolution;
62 class SCEV;
63 class TargetMachine;
64 
65 extern cl::opt<unsigned> PartialUnrollingThreshold;
66 
67 /// Base class which can be used to help build a TTI implementation.
68 ///
69 /// This class provides as much implementation of the TTI interface as is
70 /// possible using the target independent parts of the code generator.
71 ///
72 /// In order to subclass it, your class must implement a getST() method to
73 /// return the subtarget, and a getTLI() method to return the target lowering.
74 /// We need these methods implemented in the derived class so that this class
75 /// doesn't have to duplicate storage for them.
76 template <typename T>
78 private:
80  using TTI = TargetTransformInfo;
81 
82  /// Estimate a cost of Broadcast as an extract and sequence of insert
83  /// operations.
84  unsigned getBroadcastShuffleOverhead(Type *Ty) {
85  assert(Ty->isVectorTy() && "Can only shuffle vectors");
86  unsigned Cost = 0;
87  // Broadcast cost is equal to the cost of extracting the zero'th element
88  // plus the cost of inserting it into every element of the result vector.
89  Cost += static_cast<T *>(this)->getVectorInstrCost(
90  Instruction::ExtractElement, Ty, 0);
91 
92  for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i) {
93  Cost += static_cast<T *>(this)->getVectorInstrCost(
94  Instruction::InsertElement, Ty, i);
95  }
96  return Cost;
97  }
98 
99  /// Estimate a cost of shuffle as a sequence of extract and insert
100  /// operations.
101  unsigned getPermuteShuffleOverhead(Type *Ty) {
102  assert(Ty->isVectorTy() && "Can only shuffle vectors");
103  unsigned Cost = 0;
104  // Shuffle cost is equal to the cost of extracting element from its argument
105  // plus the cost of inserting them onto the result vector.
106 
107  // e.g. <4 x float> has a mask of <0,5,2,7> i.e we need to extract from
108  // index 0 of first vector, index 1 of second vector,index 2 of first
109  // vector and finally index 3 of second vector and insert them at index
110  // <0,1,2,3> of result vector.
111  for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i) {
112  Cost += static_cast<T *>(this)
113  ->getVectorInstrCost(Instruction::InsertElement, Ty, i);
114  Cost += static_cast<T *>(this)
115  ->getVectorInstrCost(Instruction::ExtractElement, Ty, i);
116  }
117  return Cost;
118  }
119 
120  /// Estimate a cost of subvector extraction as a sequence of extract and
121  /// insert operations.
122  unsigned getExtractSubvectorOverhead(Type *Ty, int Index, Type *SubTy) {
123  assert(Ty && Ty->isVectorTy() && SubTy && SubTy->isVectorTy() &&
124  "Can only extract subvectors from vectors");
125  int NumSubElts = SubTy->getVectorNumElements();
126  assert((Index + NumSubElts) <= (int)Ty->getVectorNumElements() &&
127  "SK_ExtractSubvector index out of range");
128 
129  unsigned Cost = 0;
130  // Subvector extraction cost is equal to the cost of extracting element from
131  // the source type plus the cost of inserting them into the result vector
132  // type.
133  for (int i = 0; i != NumSubElts; ++i) {
134  Cost += static_cast<T *>(this)->getVectorInstrCost(
135  Instruction::ExtractElement, Ty, i + Index);
136  Cost += static_cast<T *>(this)->getVectorInstrCost(
137  Instruction::InsertElement, SubTy, i);
138  }
139  return Cost;
140  }
141 
142  /// Estimate a cost of subvector insertion as a sequence of extract and
143  /// insert operations.
144  unsigned getInsertSubvectorOverhead(Type *Ty, int Index, Type *SubTy) {
145  assert(Ty && Ty->isVectorTy() && SubTy && SubTy->isVectorTy() &&
146  "Can only insert subvectors into vectors");
147  int NumSubElts = SubTy->getVectorNumElements();
148  assert((Index + NumSubElts) <= (int)Ty->getVectorNumElements() &&
149  "SK_InsertSubvector index out of range");
150 
151  unsigned Cost = 0;
152  // Subvector insertion cost is equal to the cost of extracting element from
153  // the source type plus the cost of inserting them into the result vector
154  // type.
155  for (int i = 0; i != NumSubElts; ++i) {
156  Cost += static_cast<T *>(this)->getVectorInstrCost(
157  Instruction::ExtractElement, SubTy, i);
158  Cost += static_cast<T *>(this)->getVectorInstrCost(
159  Instruction::InsertElement, Ty, i + Index);
160  }
161  return Cost;
162  }
163 
164  /// Local query method delegates up to T which *must* implement this!
165  const TargetSubtargetInfo *getST() const {
166  return static_cast<const T *>(this)->getST();
167  }
168 
169  /// Local query method delegates up to T which *must* implement this!
170  const TargetLoweringBase *getTLI() const {
171  return static_cast<const T *>(this)->getTLI();
172  }
173 
174  static ISD::MemIndexedMode getISDIndexedMode(TTI::MemIndexedMode M) {
175  switch (M) {
176  case TTI::MIM_Unindexed:
177  return ISD::UNINDEXED;
178  case TTI::MIM_PreInc:
179  return ISD::PRE_INC;
180  case TTI::MIM_PreDec:
181  return ISD::PRE_DEC;
182  case TTI::MIM_PostInc:
183  return ISD::POST_INC;
184  case TTI::MIM_PostDec:
185  return ISD::POST_DEC;
186  }
187  llvm_unreachable("Unexpected MemIndexedMode");
188  }
189 
190 protected:
191  explicit BasicTTIImplBase(const TargetMachine *TM, const DataLayout &DL)
192  : BaseT(DL) {}
193 
195 
196 public:
197  /// \name Scalar TTI Implementations
198  /// @{
200  unsigned AddressSpace, unsigned Alignment,
201  bool *Fast) const {
202  EVT E = EVT::getIntegerVT(Context, BitWidth);
203  return getTLI()->allowsMisalignedMemoryAccesses(
204  E, AddressSpace, Alignment, MachineMemOperand::MONone, Fast);
205  }
206 
207  bool hasBranchDivergence() { return false; }
208 
209  bool isSourceOfDivergence(const Value *V) { return false; }
210 
211  bool isAlwaysUniform(const Value *V) { return false; }
212 
213  unsigned getFlatAddressSpace() {
214  // Return an invalid address space.
215  return -1;
216  }
217 
218  bool isLegalAddImmediate(int64_t imm) {
219  return getTLI()->isLegalAddImmediate(imm);
220  }
221 
222  bool isLegalICmpImmediate(int64_t imm) {
223  return getTLI()->isLegalICmpImmediate(imm);
224  }
225 
226  bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
227  bool HasBaseReg, int64_t Scale,
228  unsigned AddrSpace, Instruction *I = nullptr) {
230  AM.BaseGV = BaseGV;
231  AM.BaseOffs = BaseOffset;
232  AM.HasBaseReg = HasBaseReg;
233  AM.Scale = Scale;
234  return getTLI()->isLegalAddressingMode(DL, AM, Ty, AddrSpace, I);
235  }
236 
238  const DataLayout &DL) const {
239  EVT VT = getTLI()->getValueType(DL, Ty);
240  return getTLI()->isIndexedLoadLegal(getISDIndexedMode(M), VT);
241  }
242 
244  const DataLayout &DL) const {
245  EVT VT = getTLI()->getValueType(DL, Ty);
246  return getTLI()->isIndexedStoreLegal(getISDIndexedMode(M), VT);
247  }
248 
251  }
252 
253  int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
254  bool HasBaseReg, int64_t Scale, unsigned AddrSpace) {
256  AM.BaseGV = BaseGV;
257  AM.BaseOffs = BaseOffset;
258  AM.HasBaseReg = HasBaseReg;
259  AM.Scale = Scale;
260  return getTLI()->getScalingFactorCost(DL, AM, Ty, AddrSpace);
261  }
262 
263  bool isTruncateFree(Type *Ty1, Type *Ty2) {
264  return getTLI()->isTruncateFree(Ty1, Ty2);
265  }
266 
268  return getTLI()->isProfitableToHoist(I);
269  }
270 
271  bool useAA() const { return getST()->useAA(); }
272 
273  bool isTypeLegal(Type *Ty) {
274  EVT VT = getTLI()->getValueType(DL, Ty);
275  return getTLI()->isTypeLegal(VT);
276  }
277 
278  int getGEPCost(Type *PointeeType, const Value *Ptr,
279  ArrayRef<const Value *> Operands) {
280  return BaseT::getGEPCost(PointeeType, Ptr, Operands);
281  }
282 
283  int getExtCost(const Instruction *I, const Value *Src) {
284  if (getTLI()->isExtFree(I))
286 
287  if (isa<ZExtInst>(I) || isa<SExtInst>(I))
288  if (const LoadInst *LI = dyn_cast<LoadInst>(Src))
289  if (getTLI()->isExtLoad(LI, I, DL))
291 
293  }
294 
295  unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
297  return BaseT::getIntrinsicCost(IID, RetTy, Arguments, U);
298  }
299 
300  unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
301  ArrayRef<Type *> ParamTys, const User *U) {
302  if (IID == Intrinsic::cttz) {
303  if (getTLI()->isCheapToSpeculateCttz())
306  }
307 
308  if (IID == Intrinsic::ctlz) {
309  if (getTLI()->isCheapToSpeculateCtlz())
312  }
313 
314  return BaseT::getIntrinsicCost(IID, RetTy, ParamTys, U);
315  }
316 
318  unsigned &JumpTableSize) {
319  /// Try to find the estimated number of clusters. Note that the number of
320  /// clusters identified in this function could be different from the actural
321  /// numbers found in lowering. This function ignore switches that are
322  /// lowered with a mix of jump table / bit test / BTree. This function was
323  /// initially intended to be used when estimating the cost of switch in
324  /// inline cost heuristic, but it's a generic cost model to be used in other
325  /// places (e.g., in loop unrolling).
326  unsigned N = SI.getNumCases();
327  const TargetLoweringBase *TLI = getTLI();
328  const DataLayout &DL = this->getDataLayout();
329 
330  JumpTableSize = 0;
331  bool IsJTAllowed = TLI->areJTsAllowed(SI.getParent()->getParent());
332 
333  // Early exit if both a jump table and bit test are not allowed.
334  if (N < 1 || (!IsJTAllowed && DL.getIndexSizeInBits(0u) < N))
335  return N;
336 
337  APInt MaxCaseVal = SI.case_begin()->getCaseValue()->getValue();
338  APInt MinCaseVal = MaxCaseVal;
339  for (auto CI : SI.cases()) {
340  const APInt &CaseVal = CI.getCaseValue()->getValue();
341  if (CaseVal.sgt(MaxCaseVal))
342  MaxCaseVal = CaseVal;
343  if (CaseVal.slt(MinCaseVal))
344  MinCaseVal = CaseVal;
345  }
346 
347  // Check if suitable for a bit test
348  if (N <= DL.getIndexSizeInBits(0u)) {
350  for (auto I : SI.cases())
351  Dests.insert(I.getCaseSuccessor());
352 
353  if (TLI->isSuitableForBitTests(Dests.size(), N, MinCaseVal, MaxCaseVal,
354  DL))
355  return 1;
356  }
357 
358  // Check if suitable for a jump table.
359  if (IsJTAllowed) {
360  if (N < 2 || N < TLI->getMinimumJumpTableEntries())
361  return N;
362  uint64_t Range =
363  (MaxCaseVal - MinCaseVal)
364  .getLimitedValue(std::numeric_limits<uint64_t>::max() - 1) + 1;
365  // Check whether a range of clusters is dense enough for a jump table
366  if (TLI->isSuitableForJumpTable(&SI, N, Range)) {
367  JumpTableSize = Range;
368  return 1;
369  }
370  }
371  return N;
372  }
373 
374  unsigned getJumpBufAlignment() { return getTLI()->getJumpBufAlignment(); }
375 
376  unsigned getJumpBufSize() { return getTLI()->getJumpBufSize(); }
377 
379  const TargetLoweringBase *TLI = getTLI();
382  }
383 
384  bool haveFastSqrt(Type *Ty) {
385  const TargetLoweringBase *TLI = getTLI();
386  EVT VT = TLI->getValueType(DL, Ty);
387  return TLI->isTypeLegal(VT) &&
389  }
390 
392  return true;
393  }
394 
395  unsigned getFPOpCost(Type *Ty) {
396  // Check whether FADD is available, as a proxy for floating-point in
397  // general.
398  const TargetLoweringBase *TLI = getTLI();
399  EVT VT = TLI->getValueType(DL, Ty);
403  }
404 
405  unsigned getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) {
406  const TargetLoweringBase *TLI = getTLI();
407  switch (Opcode) {
408  default: break;
409  case Instruction::Trunc:
410  if (TLI->isTruncateFree(OpTy, Ty))
413  case Instruction::ZExt:
414  if (TLI->isZExtFree(OpTy, Ty))
417 
418  case Instruction::AddrSpaceCast:
420  Ty->getPointerAddressSpace()))
423  }
424 
425  return BaseT::getOperationCost(Opcode, Ty, OpTy);
426  }
427 
428  unsigned getInliningThresholdMultiplier() { return 1; }
429 
430  int getInlinerVectorBonusPercent() { return 150; }
431 
434  // This unrolling functionality is target independent, but to provide some
435  // motivation for its intended use, for x86:
436 
437  // According to the Intel 64 and IA-32 Architectures Optimization Reference
438  // Manual, Intel Core models and later have a loop stream detector (and
439  // associated uop queue) that can benefit from partial unrolling.
440  // The relevant requirements are:
441  // - The loop must have no more than 4 (8 for Nehalem and later) branches
442  // taken, and none of them may be calls.
443  // - The loop can have no more than 18 (28 for Nehalem and later) uops.
444 
445  // According to the Software Optimization Guide for AMD Family 15h
446  // Processors, models 30h-4fh (Steamroller and later) have a loop predictor
447  // and loop buffer which can benefit from partial unrolling.
448  // The relevant requirements are:
449  // - The loop must have fewer than 16 branches
450  // - The loop must have less than 40 uops in all executed loop branches
451 
452  // The number of taken branches in a loop is hard to estimate here, and
453  // benchmarking has revealed that it is better not to be conservative when
454  // estimating the branch count. As a result, we'll ignore the branch limits
455  // until someone finds a case where it matters in practice.
456 
457  unsigned MaxOps;
458  const TargetSubtargetInfo *ST = getST();
459  if (PartialUnrollingThreshold.getNumOccurrences() > 0)
460  MaxOps = PartialUnrollingThreshold;
461  else if (ST->getSchedModel().LoopMicroOpBufferSize > 0)
462  MaxOps = ST->getSchedModel().LoopMicroOpBufferSize;
463  else
464  return;
465 
466  // Scan the loop: don't unroll loops with calls.
467  for (Loop::block_iterator I = L->block_begin(), E = L->block_end(); I != E;
468  ++I) {
469  BasicBlock *BB = *I;
470 
471  for (BasicBlock::iterator J = BB->begin(), JE = BB->end(); J != JE; ++J)
472  if (isa<CallInst>(J) || isa<InvokeInst>(J)) {
473  ImmutableCallSite CS(&*J);
474  if (const Function *F = CS.getCalledFunction()) {
475  if (!static_cast<T *>(this)->isLoweredToCall(F))
476  continue;
477  }
478 
479  return;
480  }
481  }
482 
483  // Enable runtime and partial unrolling up to the specified size.
484  // Enable using trip count upper bound to unroll loops.
485  UP.Partial = UP.Runtime = UP.UpperBound = true;
486  UP.PartialThreshold = MaxOps;
487 
488  // Avoid unrolling when optimizing for size.
489  UP.OptSizeThreshold = 0;
491 
492  // Set number of instructions optimized when "back edge"
493  // becomes "fall through" to default value of 2.
494  UP.BEInsns = 2;
495  }
496 
498  AssumptionCache &AC,
499  TargetLibraryInfo *LibInfo,
500  HardwareLoopInfo &HWLoopInfo) {
501  return BaseT::isHardwareLoopProfitable(L, SE, AC, LibInfo, HWLoopInfo);
502  }
503 
505  if (isa<LoadInst>(I))
506  return getST()->getSchedModel().DefaultLoadLatency;
507 
509  }
510 
511  /// @}
512 
513  /// \name Vector TTI Implementations
514  /// @{
515 
516  unsigned getNumberOfRegisters(bool Vector) { return Vector ? 0 : 1; }
517 
518  unsigned getRegisterBitWidth(bool Vector) const { return 32; }
519 
520  /// Estimate the overhead of scalarizing an instruction. Insert and Extract
521  /// are set if the result needs to be inserted and/or extracted from vectors.
522  unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) {
523  assert(Ty->isVectorTy() && "Can only scalarize vectors");
524  unsigned Cost = 0;
525 
526  for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i) {
527  if (Insert)
528  Cost += static_cast<T *>(this)
529  ->getVectorInstrCost(Instruction::InsertElement, Ty, i);
530  if (Extract)
531  Cost += static_cast<T *>(this)
532  ->getVectorInstrCost(Instruction::ExtractElement, Ty, i);
533  }
534 
535  return Cost;
536  }
537 
538  /// Estimate the overhead of scalarizing an instructions unique
539  /// non-constant operands. The types of the arguments are ordinarily
540  /// scalar, in which case the costs are multiplied with VF.
542  unsigned VF) {
543  unsigned Cost = 0;
544  SmallPtrSet<const Value*, 4> UniqueOperands;
545  for (const Value *A : Args) {
546  if (!isa<Constant>(A) && UniqueOperands.insert(A).second) {
547  Type *VecTy = nullptr;
548  if (A->getType()->isVectorTy()) {
549  VecTy = A->getType();
550  // If A is a vector operand, VF should be 1 or correspond to A.
551  assert((VF == 1 || VF == VecTy->getVectorNumElements()) &&
552  "Vector argument does not match VF");
553  }
554  else
555  VecTy = VectorType::get(A->getType(), VF);
556 
557  Cost += getScalarizationOverhead(VecTy, false, true);
558  }
559  }
560 
561  return Cost;
562  }
563 
565  assert(VecTy->isVectorTy());
566 
567  unsigned Cost = 0;
568 
569  Cost += getScalarizationOverhead(VecTy, true, false);
570  if (!Args.empty())
572  VecTy->getVectorNumElements());
573  else
574  // When no information on arguments is provided, we add the cost
575  // associated with one argument as a heuristic.
576  Cost += getScalarizationOverhead(VecTy, false, true);
577 
578  return Cost;
579  }
580 
581  unsigned getMaxInterleaveFactor(unsigned VF) { return 1; }
582 
584  unsigned Opcode, Type *Ty,
590  // Check if any of the operands are vector operands.
591  const TargetLoweringBase *TLI = getTLI();
592  int ISD = TLI->InstructionOpcodeToISD(Opcode);
593  assert(ISD && "Invalid opcode");
594 
595  std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
596 
597  bool IsFloat = Ty->isFPOrFPVectorTy();
598  // Assume that floating point arithmetic operations cost twice as much as
599  // integer operations.
600  unsigned OpCost = (IsFloat ? 2 : 1);
601 
602  if (TLI->isOperationLegalOrPromote(ISD, LT.second)) {
603  // The operation is legal. Assume it costs 1.
604  // TODO: Once we have extract/insert subvector cost we need to use them.
605  return LT.first * OpCost;
606  }
607 
608  if (!TLI->isOperationExpand(ISD, LT.second)) {
609  // If the operation is custom lowered, then assume that the code is twice
610  // as expensive.
611  return LT.first * 2 * OpCost;
612  }
613 
614  // Else, assume that we need to scalarize this op.
615  // TODO: If one of the types get legalized by splitting, handle this
616  // similarly to what getCastInstrCost() does.
617  if (Ty->isVectorTy()) {
618  unsigned Num = Ty->getVectorNumElements();
619  unsigned Cost = static_cast<T *>(this)
620  ->getArithmeticInstrCost(Opcode, Ty->getScalarType());
621  // Return the cost of multiple scalar invocation plus the cost of
622  // inserting and extracting the values.
623  return getScalarizationOverhead(Ty, Args) + Num * Cost;
624  }
625 
626  // We don't know anything about this scalar instruction.
627  return OpCost;
628  }
629 
630  unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
631  Type *SubTp) {
632  switch (Kind) {
633  case TTI::SK_Broadcast:
634  return getBroadcastShuffleOverhead(Tp);
635  case TTI::SK_Select:
636  case TTI::SK_Reverse:
637  case TTI::SK_Transpose:
640  return getPermuteShuffleOverhead(Tp);
642  return getExtractSubvectorOverhead(Tp, Index, SubTp);
644  return getInsertSubvectorOverhead(Tp, Index, SubTp);
645  }
646  llvm_unreachable("Unknown TTI::ShuffleKind");
647  }
648 
649  unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
650  const Instruction *I = nullptr) {
651  const TargetLoweringBase *TLI = getTLI();
652  int ISD = TLI->InstructionOpcodeToISD(Opcode);
653  assert(ISD && "Invalid opcode");
654  std::pair<unsigned, MVT> SrcLT = TLI->getTypeLegalizationCost(DL, Src);
655  std::pair<unsigned, MVT> DstLT = TLI->getTypeLegalizationCost(DL, Dst);
656 
657  // Check for NOOP conversions.
658  if (SrcLT.first == DstLT.first &&
659  SrcLT.second.getSizeInBits() == DstLT.second.getSizeInBits()) {
660 
661  // Bitcast between types that are legalized to the same type are free.
662  if (Opcode == Instruction::BitCast || Opcode == Instruction::Trunc)
663  return 0;
664  }
665 
666  if (Opcode == Instruction::Trunc &&
667  TLI->isTruncateFree(SrcLT.second, DstLT.second))
668  return 0;
669 
670  if (Opcode == Instruction::ZExt &&
671  TLI->isZExtFree(SrcLT.second, DstLT.second))
672  return 0;
673 
674  if (Opcode == Instruction::AddrSpaceCast &&
676  Dst->getPointerAddressSpace()))
677  return 0;
678 
679  // If this is a zext/sext of a load, return 0 if the corresponding
680  // extending load exists on target.
681  if ((Opcode == Instruction::ZExt || Opcode == Instruction::SExt) &&
682  I && isa<LoadInst>(I->getOperand(0))) {
683  EVT ExtVT = EVT::getEVT(Dst);
684  EVT LoadVT = EVT::getEVT(Src);
685  unsigned LType =
686  ((Opcode == Instruction::ZExt) ? ISD::ZEXTLOAD : ISD::SEXTLOAD);
687  if (TLI->isLoadExtLegal(LType, ExtVT, LoadVT))
688  return 0;
689  }
690 
691  // If the cast is marked as legal (or promote) then assume low cost.
692  if (SrcLT.first == DstLT.first &&
693  TLI->isOperationLegalOrPromote(ISD, DstLT.second))
694  return 1;
695 
696  // Handle scalar conversions.
697  if (!Src->isVectorTy() && !Dst->isVectorTy()) {
698  // Scalar bitcasts are usually free.
699  if (Opcode == Instruction::BitCast)
700  return 0;
701 
702  // Just check the op cost. If the operation is legal then assume it costs
703  // 1.
704  if (!TLI->isOperationExpand(ISD, DstLT.second))
705  return 1;
706 
707  // Assume that illegal scalar instruction are expensive.
708  return 4;
709  }
710 
711  // Check vector-to-vector casts.
712  if (Dst->isVectorTy() && Src->isVectorTy()) {
713  // If the cast is between same-sized registers, then the check is simple.
714  if (SrcLT.first == DstLT.first &&
715  SrcLT.second.getSizeInBits() == DstLT.second.getSizeInBits()) {
716 
717  // Assume that Zext is done using AND.
718  if (Opcode == Instruction::ZExt)
719  return 1;
720 
721  // Assume that sext is done using SHL and SRA.
722  if (Opcode == Instruction::SExt)
723  return 2;
724 
725  // Just check the op cost. If the operation is legal then assume it
726  // costs
727  // 1 and multiply by the type-legalization overhead.
728  if (!TLI->isOperationExpand(ISD, DstLT.second))
729  return SrcLT.first * 1;
730  }
731 
732  // If we are legalizing by splitting, query the concrete TTI for the cost
733  // of casting the original vector twice. We also need to factor in the
734  // cost of the split itself. Count that as 1, to be consistent with
735  // TLI->getTypeLegalizationCost().
736  if ((TLI->getTypeAction(Src->getContext(), TLI->getValueType(DL, Src)) ==
738  (TLI->getTypeAction(Dst->getContext(), TLI->getValueType(DL, Dst)) ==
740  Type *SplitDst = VectorType::get(Dst->getVectorElementType(),
741  Dst->getVectorNumElements() / 2);
742  Type *SplitSrc = VectorType::get(Src->getVectorElementType(),
743  Src->getVectorNumElements() / 2);
744  T *TTI = static_cast<T *>(this);
745  return TTI->getVectorSplitCost() +
746  (2 * TTI->getCastInstrCost(Opcode, SplitDst, SplitSrc, I));
747  }
748 
749  // In other cases where the source or destination are illegal, assume
750  // the operation will get scalarized.
751  unsigned Num = Dst->getVectorNumElements();
752  unsigned Cost = static_cast<T *>(this)->getCastInstrCost(
753  Opcode, Dst->getScalarType(), Src->getScalarType(), I);
754 
755  // Return the cost of multiple scalar invocation plus the cost of
756  // inserting and extracting the values.
757  return getScalarizationOverhead(Dst, true, true) + Num * Cost;
758  }
759 
760  // We already handled vector-to-vector and scalar-to-scalar conversions.
761  // This
762  // is where we handle bitcast between vectors and scalars. We need to assume
763  // that the conversion is scalarized in one way or another.
764  if (Opcode == Instruction::BitCast)
765  // Illegal bitcasts are done by storing and loading from a stack slot.
766  return (Src->isVectorTy() ? getScalarizationOverhead(Src, false, true)
767  : 0) +
768  (Dst->isVectorTy() ? getScalarizationOverhead(Dst, true, false)
769  : 0);
770 
771  llvm_unreachable("Unhandled cast");
772  }
773 
774  unsigned getExtractWithExtendCost(unsigned Opcode, Type *Dst,
775  VectorType *VecTy, unsigned Index) {
776  return static_cast<T *>(this)->getVectorInstrCost(
777  Instruction::ExtractElement, VecTy, Index) +
778  static_cast<T *>(this)->getCastInstrCost(Opcode, Dst,
779  VecTy->getElementType());
780  }
781 
782  unsigned getCFInstrCost(unsigned Opcode) {
783  // Branches are assumed to be predicted.
784  return 0;
785  }
786 
787  unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
788  const Instruction *I) {
789  const TargetLoweringBase *TLI = getTLI();
790  int ISD = TLI->InstructionOpcodeToISD(Opcode);
791  assert(ISD && "Invalid opcode");
792 
793  // Selects on vectors are actually vector selects.
794  if (ISD == ISD::SELECT) {
795  assert(CondTy && "CondTy must exist");
796  if (CondTy->isVectorTy())
797  ISD = ISD::VSELECT;
798  }
799  std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
800 
801  if (!(ValTy->isVectorTy() && !LT.second.isVector()) &&
802  !TLI->isOperationExpand(ISD, LT.second)) {
803  // The operation is legal. Assume it costs 1. Multiply
804  // by the type-legalization overhead.
805  return LT.first * 1;
806  }
807 
808  // Otherwise, assume that the cast is scalarized.
809  // TODO: If one of the types get legalized by splitting, handle this
810  // similarly to what getCastInstrCost() does.
811  if (ValTy->isVectorTy()) {
812  unsigned Num = ValTy->getVectorNumElements();
813  if (CondTy)
814  CondTy = CondTy->getScalarType();
815  unsigned Cost = static_cast<T *>(this)->getCmpSelInstrCost(
816  Opcode, ValTy->getScalarType(), CondTy, I);
817 
818  // Return the cost of multiple scalar invocation plus the cost of
819  // inserting and extracting the values.
820  return getScalarizationOverhead(ValTy, true, false) + Num * Cost;
821  }
822 
823  // Unknown scalar opcode.
824  return 1;
825  }
826 
827  unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) {
828  std::pair<unsigned, MVT> LT =
829  getTLI()->getTypeLegalizationCost(DL, Val->getScalarType());
830 
831  return LT.first;
832  }
833 
834  unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
835  unsigned AddressSpace, const Instruction *I = nullptr) {
836  assert(!Src->isVoidTy() && "Invalid type");
837  std::pair<unsigned, MVT> LT = getTLI()->getTypeLegalizationCost(DL, Src);
838 
839  // Assuming that all loads of legal types cost 1.
840  unsigned Cost = LT.first;
841 
842  if (Src->isVectorTy() &&
843  Src->getPrimitiveSizeInBits() < LT.second.getSizeInBits()) {
844  // This is a vector load that legalizes to a larger type than the vector
845  // itself. Unless the corresponding extending load or truncating store is
846  // legal, then this will scalarize.
848  EVT MemVT = getTLI()->getValueType(DL, Src);
849  if (Opcode == Instruction::Store)
850  LA = getTLI()->getTruncStoreAction(LT.second, MemVT);
851  else
852  LA = getTLI()->getLoadExtAction(ISD::EXTLOAD, LT.second, MemVT);
853 
854  if (LA != TargetLowering::Legal && LA != TargetLowering::Custom) {
855  // This is a vector load/store for some illegal type that is scalarized.
856  // We must account for the cost of building or decomposing the vector.
857  Cost += getScalarizationOverhead(Src, Opcode != Instruction::Store,
858  Opcode == Instruction::Store);
859  }
860  }
861 
862  return Cost;
863  }
864 
865  unsigned getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
866  unsigned Factor,
867  ArrayRef<unsigned> Indices,
868  unsigned Alignment, unsigned AddressSpace,
869  bool UseMaskForCond = false,
870  bool UseMaskForGaps = false) {
871  VectorType *VT = dyn_cast<VectorType>(VecTy);
872  assert(VT && "Expect a vector type for interleaved memory op");
873 
874  unsigned NumElts = VT->getNumElements();
875  assert(Factor > 1 && NumElts % Factor == 0 && "Invalid interleave factor");
876 
877  unsigned NumSubElts = NumElts / Factor;
878  VectorType *SubVT = VectorType::get(VT->getElementType(), NumSubElts);
879 
880  // Firstly, the cost of load/store operation.
881  unsigned Cost;
882  if (UseMaskForCond || UseMaskForGaps)
883  Cost = static_cast<T *>(this)->getMaskedMemoryOpCost(
884  Opcode, VecTy, Alignment, AddressSpace);
885  else
886  Cost = static_cast<T *>(this)->getMemoryOpCost(Opcode, VecTy, Alignment,
887  AddressSpace);
888 
889  // Legalize the vector type, and get the legalized and unlegalized type
890  // sizes.
891  MVT VecTyLT = getTLI()->getTypeLegalizationCost(DL, VecTy).second;
892  unsigned VecTySize =
893  static_cast<T *>(this)->getDataLayout().getTypeStoreSize(VecTy);
894  unsigned VecTyLTSize = VecTyLT.getStoreSize();
895 
896  // Return the ceiling of dividing A by B.
897  auto ceil = [](unsigned A, unsigned B) { return (A + B - 1) / B; };
898 
899  // Scale the cost of the memory operation by the fraction of legalized
900  // instructions that will actually be used. We shouldn't account for the
901  // cost of dead instructions since they will be removed.
902  //
903  // E.g., An interleaved load of factor 8:
904  // %vec = load <16 x i64>, <16 x i64>* %ptr
905  // %v0 = shufflevector %vec, undef, <0, 8>
906  //
907  // If <16 x i64> is legalized to 8 v2i64 loads, only 2 of the loads will be
908  // used (those corresponding to elements [0:1] and [8:9] of the unlegalized
909  // type). The other loads are unused.
910  //
911  // We only scale the cost of loads since interleaved store groups aren't
912  // allowed to have gaps.
913  if (Opcode == Instruction::Load && VecTySize > VecTyLTSize) {
914  // The number of loads of a legal type it will take to represent a load
915  // of the unlegalized vector type.
916  unsigned NumLegalInsts = ceil(VecTySize, VecTyLTSize);
917 
918  // The number of elements of the unlegalized type that correspond to a
919  // single legal instruction.
920  unsigned NumEltsPerLegalInst = ceil(NumElts, NumLegalInsts);
921 
922  // Determine which legal instructions will be used.
923  BitVector UsedInsts(NumLegalInsts, false);
924  for (unsigned Index : Indices)
925  for (unsigned Elt = 0; Elt < NumSubElts; ++Elt)
926  UsedInsts.set((Index + Elt * Factor) / NumEltsPerLegalInst);
927 
928  // Scale the cost of the load by the fraction of legal instructions that
929  // will be used.
930  Cost *= UsedInsts.count() / NumLegalInsts;
931  }
932 
933  // Then plus the cost of interleave operation.
934  if (Opcode == Instruction::Load) {
935  // The interleave cost is similar to extract sub vectors' elements
936  // from the wide vector, and insert them into sub vectors.
937  //
938  // E.g. An interleaved load of factor 2 (with one member of index 0):
939  // %vec = load <8 x i32>, <8 x i32>* %ptr
940  // %v0 = shuffle %vec, undef, <0, 2, 4, 6> ; Index 0
941  // The cost is estimated as extract elements at 0, 2, 4, 6 from the
942  // <8 x i32> vector and insert them into a <4 x i32> vector.
943 
944  assert(Indices.size() <= Factor &&
945  "Interleaved memory op has too many members");
946 
947  for (unsigned Index : Indices) {
948  assert(Index < Factor && "Invalid index for interleaved memory op");
949 
950  // Extract elements from loaded vector for each sub vector.
951  for (unsigned i = 0; i < NumSubElts; i++)
952  Cost += static_cast<T *>(this)->getVectorInstrCost(
953  Instruction::ExtractElement, VT, Index + i * Factor);
954  }
955 
956  unsigned InsSubCost = 0;
957  for (unsigned i = 0; i < NumSubElts; i++)
958  InsSubCost += static_cast<T *>(this)->getVectorInstrCost(
959  Instruction::InsertElement, SubVT, i);
960 
961  Cost += Indices.size() * InsSubCost;
962  } else {
963  // The interleave cost is extract all elements from sub vectors, and
964  // insert them into the wide vector.
965  //
966  // E.g. An interleaved store of factor 2:
967  // %v0_v1 = shuffle %v0, %v1, <0, 4, 1, 5, 2, 6, 3, 7>
968  // store <8 x i32> %interleaved.vec, <8 x i32>* %ptr
969  // The cost is estimated as extract all elements from both <4 x i32>
970  // vectors and insert into the <8 x i32> vector.
971 
972  unsigned ExtSubCost = 0;
973  for (unsigned i = 0; i < NumSubElts; i++)
974  ExtSubCost += static_cast<T *>(this)->getVectorInstrCost(
975  Instruction::ExtractElement, SubVT, i);
976  Cost += ExtSubCost * Factor;
977 
978  for (unsigned i = 0; i < NumElts; i++)
979  Cost += static_cast<T *>(this)
980  ->getVectorInstrCost(Instruction::InsertElement, VT, i);
981  }
982 
983  if (!UseMaskForCond)
984  return Cost;
985 
986  Type *I8Type = Type::getInt8Ty(VT->getContext());
987  VectorType *MaskVT = VectorType::get(I8Type, NumElts);
988  SubVT = VectorType::get(I8Type, NumSubElts);
989 
990  // The Mask shuffling cost is extract all the elements of the Mask
991  // and insert each of them Factor times into the wide vector:
992  //
993  // E.g. an interleaved group with factor 3:
994  // %mask = icmp ult <8 x i32> %vec1, %vec2
995  // %interleaved.mask = shufflevector <8 x i1> %mask, <8 x i1> undef,
996  // <24 x i32> <0,0,0,1,1,1,2,2,2,3,3,3,4,4,4,5,5,5,6,6,6,7,7,7>
997  // The cost is estimated as extract all mask elements from the <8xi1> mask
998  // vector and insert them factor times into the <24xi1> shuffled mask
999  // vector.
1000  for (unsigned i = 0; i < NumSubElts; i++)
1001  Cost += static_cast<T *>(this)->getVectorInstrCost(
1002  Instruction::ExtractElement, SubVT, i);
1003 
1004  for (unsigned i = 0; i < NumElts; i++)
1005  Cost += static_cast<T *>(this)->getVectorInstrCost(
1006  Instruction::InsertElement, MaskVT, i);
1007 
1008  // The Gaps mask is invariant and created outside the loop, therefore the
1009  // cost of creating it is not accounted for here. However if we have both
1010  // a MaskForGaps and some other mask that guards the execution of the
1011  // memory access, we need to account for the cost of And-ing the two masks
1012  // inside the loop.
1013  if (UseMaskForGaps)
1014  Cost += static_cast<T *>(this)->getArithmeticInstrCost(
1015  BinaryOperator::And, MaskVT);
1016 
1017  return Cost;
1018  }
1019 
1020  /// Get intrinsic cost based on arguments.
1023  unsigned VF = 1) {
1024  unsigned RetVF = (RetTy->isVectorTy() ? RetTy->getVectorNumElements() : 1);
1025  assert((RetVF == 1 || VF == 1) && "VF > 1 and RetVF is a vector type");
1026  auto *ConcreteTTI = static_cast<T *>(this);
1027 
1028  switch (IID) {
1029  default: {
1030  // Assume that we need to scalarize this intrinsic.
1031  SmallVector<Type *, 4> Types;
1032  for (Value *Op : Args) {
1033  Type *OpTy = Op->getType();
1034  assert(VF == 1 || !OpTy->isVectorTy());
1035  Types.push_back(VF == 1 ? OpTy : VectorType::get(OpTy, VF));
1036  }
1037 
1038  if (VF > 1 && !RetTy->isVoidTy())
1039  RetTy = VectorType::get(RetTy, VF);
1040 
1041  // Compute the scalarization overhead based on Args for a vector
1042  // intrinsic. A vectorizer will pass a scalar RetTy and VF > 1, while
1043  // CostModel will pass a vector RetTy and VF is 1.
1044  unsigned ScalarizationCost = std::numeric_limits<unsigned>::max();
1045  if (RetVF > 1 || VF > 1) {
1046  ScalarizationCost = 0;
1047  if (!RetTy->isVoidTy())
1048  ScalarizationCost += getScalarizationOverhead(RetTy, true, false);
1049  ScalarizationCost += getOperandsScalarizationOverhead(Args, VF);
1050  }
1051 
1052  return ConcreteTTI->getIntrinsicInstrCost(IID, RetTy, Types, FMF,
1053  ScalarizationCost);
1054  }
1055  case Intrinsic::masked_scatter: {
1056  assert(VF == 1 && "Can't vectorize types here.");
1057  Value *Mask = Args[3];
1058  bool VarMask = !isa<Constant>(Mask);
1059  unsigned Alignment = cast<ConstantInt>(Args[2])->getZExtValue();
1060  return ConcreteTTI->getGatherScatterOpCost(
1061  Instruction::Store, Args[0]->getType(), Args[1], VarMask, Alignment);
1062  }
1063  case Intrinsic::masked_gather: {
1064  assert(VF == 1 && "Can't vectorize types here.");
1065  Value *Mask = Args[2];
1066  bool VarMask = !isa<Constant>(Mask);
1067  unsigned Alignment = cast<ConstantInt>(Args[1])->getZExtValue();
1068  return ConcreteTTI->getGatherScatterOpCost(Instruction::Load, RetTy,
1069  Args[0], VarMask, Alignment);
1070  }
1071  case Intrinsic::experimental_vector_reduce_add:
1072  case Intrinsic::experimental_vector_reduce_mul:
1073  case Intrinsic::experimental_vector_reduce_and:
1074  case Intrinsic::experimental_vector_reduce_or:
1075  case Intrinsic::experimental_vector_reduce_xor:
1076  case Intrinsic::experimental_vector_reduce_v2_fadd:
1077  case Intrinsic::experimental_vector_reduce_v2_fmul:
1078  case Intrinsic::experimental_vector_reduce_smax:
1079  case Intrinsic::experimental_vector_reduce_smin:
1080  case Intrinsic::experimental_vector_reduce_fmax:
1081  case Intrinsic::experimental_vector_reduce_fmin:
1082  case Intrinsic::experimental_vector_reduce_umax:
1083  case Intrinsic::experimental_vector_reduce_umin:
1084  return getIntrinsicInstrCost(IID, RetTy, Args[0]->getType(), FMF);
1085  case Intrinsic::fshl:
1086  case Intrinsic::fshr: {
1087  Value *X = Args[0];
1088  Value *Y = Args[1];
1089  Value *Z = Args[2];
1090  TTI::OperandValueProperties OpPropsX, OpPropsY, OpPropsZ, OpPropsBW;
1091  TTI::OperandValueKind OpKindX = TTI::getOperandInfo(X, OpPropsX);
1092  TTI::OperandValueKind OpKindY = TTI::getOperandInfo(Y, OpPropsY);
1093  TTI::OperandValueKind OpKindZ = TTI::getOperandInfo(Z, OpPropsZ);
1095  OpPropsBW = isPowerOf2_32(RetTy->getScalarSizeInBits()) ? TTI::OP_PowerOf2
1096  : TTI::OP_None;
1097  // fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
1098  // fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
1099  unsigned Cost = 0;
1100  Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::Or, RetTy);
1101  Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::Sub, RetTy);
1102  Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::Shl, RetTy,
1103  OpKindX, OpKindZ, OpPropsX);
1104  Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::LShr, RetTy,
1105  OpKindY, OpKindZ, OpPropsY);
1106  // Non-constant shift amounts requires a modulo.
1107  if (OpKindZ != TTI::OK_UniformConstantValue &&
1109  Cost += ConcreteTTI->getArithmeticInstrCost(BinaryOperator::URem, RetTy,
1110  OpKindZ, OpKindBW, OpPropsZ,
1111  OpPropsBW);
1112  // For non-rotates (X != Y) we must add shift-by-zero handling costs.
1113  if (X != Y) {
1114  Type *CondTy = Type::getInt1Ty(RetTy->getContext());
1115  if (RetVF > 1)
1116  CondTy = VectorType::get(CondTy, RetVF);
1117  Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy,
1118  CondTy, nullptr);
1119  Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::Select, RetTy,
1120  CondTy, nullptr);
1121  }
1122  return Cost;
1123  }
1124  }
1125  }
1126 
1127  /// Get intrinsic cost based on argument types.
1128  /// If ScalarizationCostPassed is std::numeric_limits<unsigned>::max(), the
1129  /// cost of scalarizing the arguments and the return value will be computed
1130  /// based on types.
1132  Intrinsic::ID IID, Type *RetTy, ArrayRef<Type *> Tys, FastMathFlags FMF,
1133  unsigned ScalarizationCostPassed = std::numeric_limits<unsigned>::max()) {
1134  unsigned RetVF = (RetTy->isVectorTy() ? RetTy->getVectorNumElements() : 1);
1135  auto *ConcreteTTI = static_cast<T *>(this);
1136 
1138  unsigned SingleCallCost = 10; // Library call cost. Make it expensive.
1139  switch (IID) {
1140  default: {
1141  // Assume that we need to scalarize this intrinsic.
1142  unsigned ScalarizationCost = ScalarizationCostPassed;
1143  unsigned ScalarCalls = 1;
1144  Type *ScalarRetTy = RetTy;
1145  if (RetTy->isVectorTy()) {
1146  if (ScalarizationCostPassed == std::numeric_limits<unsigned>::max())
1147  ScalarizationCost = getScalarizationOverhead(RetTy, true, false);
1148  ScalarCalls = std::max(ScalarCalls, RetTy->getVectorNumElements());
1149  ScalarRetTy = RetTy->getScalarType();
1150  }
1151  SmallVector<Type *, 4> ScalarTys;
1152  for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) {
1153  Type *Ty = Tys[i];
1154  if (Ty->isVectorTy()) {
1155  if (ScalarizationCostPassed == std::numeric_limits<unsigned>::max())
1156  ScalarizationCost += getScalarizationOverhead(Ty, false, true);
1157  ScalarCalls = std::max(ScalarCalls, Ty->getVectorNumElements());
1158  Ty = Ty->getScalarType();
1159  }
1160  ScalarTys.push_back(Ty);
1161  }
1162  if (ScalarCalls == 1)
1163  return 1; // Return cost of a scalar intrinsic. Assume it to be cheap.
1164 
1165  unsigned ScalarCost =
1166  ConcreteTTI->getIntrinsicInstrCost(IID, ScalarRetTy, ScalarTys, FMF);
1167 
1168  return ScalarCalls * ScalarCost + ScalarizationCost;
1169  }
1170  // Look for intrinsics that can be lowered directly or turned into a scalar
1171  // intrinsic call.
1172  case Intrinsic::sqrt:
1173  ISDs.push_back(ISD::FSQRT);
1174  break;
1175  case Intrinsic::sin:
1176  ISDs.push_back(ISD::FSIN);
1177  break;
1178  case Intrinsic::cos:
1179  ISDs.push_back(ISD::FCOS);
1180  break;
1181  case Intrinsic::exp:
1182  ISDs.push_back(ISD::FEXP);
1183  break;
1184  case Intrinsic::exp2:
1185  ISDs.push_back(ISD::FEXP2);
1186  break;
1187  case Intrinsic::log:
1188  ISDs.push_back(ISD::FLOG);
1189  break;
1190  case Intrinsic::log10:
1191  ISDs.push_back(ISD::FLOG10);
1192  break;
1193  case Intrinsic::log2:
1194  ISDs.push_back(ISD::FLOG2);
1195  break;
1196  case Intrinsic::fabs:
1197  ISDs.push_back(ISD::FABS);
1198  break;
1199  case Intrinsic::canonicalize:
1201  break;
1202  case Intrinsic::minnum:
1203  ISDs.push_back(ISD::FMINNUM);
1204  if (FMF.noNaNs())
1205  ISDs.push_back(ISD::FMINIMUM);
1206  break;
1207  case Intrinsic::maxnum:
1208  ISDs.push_back(ISD::FMAXNUM);
1209  if (FMF.noNaNs())
1210  ISDs.push_back(ISD::FMAXIMUM);
1211  break;
1212  case Intrinsic::copysign:
1213  ISDs.push_back(ISD::FCOPYSIGN);
1214  break;
1215  case Intrinsic::floor:
1216  ISDs.push_back(ISD::FFLOOR);
1217  break;
1218  case Intrinsic::ceil:
1219  ISDs.push_back(ISD::FCEIL);
1220  break;
1221  case Intrinsic::trunc:
1222  ISDs.push_back(ISD::FTRUNC);
1223  break;
1224  case Intrinsic::nearbyint:
1225  ISDs.push_back(ISD::FNEARBYINT);
1226  break;
1227  case Intrinsic::rint:
1228  ISDs.push_back(ISD::FRINT);
1229  break;
1230  case Intrinsic::round:
1231  ISDs.push_back(ISD::FROUND);
1232  break;
1233  case Intrinsic::pow:
1234  ISDs.push_back(ISD::FPOW);
1235  break;
1236  case Intrinsic::fma:
1237  ISDs.push_back(ISD::FMA);
1238  break;
1239  case Intrinsic::fmuladd:
1240  ISDs.push_back(ISD::FMA);
1241  break;
1242  // FIXME: We should return 0 whenever getIntrinsicCost == TCC_Free.
1243  case Intrinsic::lifetime_start:
1244  case Intrinsic::lifetime_end:
1245  case Intrinsic::sideeffect:
1246  return 0;
1247  case Intrinsic::masked_store:
1248  return ConcreteTTI->getMaskedMemoryOpCost(Instruction::Store, Tys[0], 0,
1249  0);
1250  case Intrinsic::masked_load:
1251  return ConcreteTTI->getMaskedMemoryOpCost(Instruction::Load, RetTy, 0, 0);
1252  case Intrinsic::experimental_vector_reduce_add:
1253  return ConcreteTTI->getArithmeticReductionCost(Instruction::Add, Tys[0],
1254  /*IsPairwiseForm=*/false);
1255  case Intrinsic::experimental_vector_reduce_mul:
1256  return ConcreteTTI->getArithmeticReductionCost(Instruction::Mul, Tys[0],
1257  /*IsPairwiseForm=*/false);
1258  case Intrinsic::experimental_vector_reduce_and:
1259  return ConcreteTTI->getArithmeticReductionCost(Instruction::And, Tys[0],
1260  /*IsPairwiseForm=*/false);
1261  case Intrinsic::experimental_vector_reduce_or:
1262  return ConcreteTTI->getArithmeticReductionCost(Instruction::Or, Tys[0],
1263  /*IsPairwiseForm=*/false);
1264  case Intrinsic::experimental_vector_reduce_xor:
1265  return ConcreteTTI->getArithmeticReductionCost(Instruction::Xor, Tys[0],
1266  /*IsPairwiseForm=*/false);
1267  case Intrinsic::experimental_vector_reduce_v2_fadd:
1268  return ConcreteTTI->getArithmeticReductionCost(
1269  Instruction::FAdd, Tys[0],
1270  /*IsPairwiseForm=*/false); // FIXME: Add new flag for cost of strict
1271  // reductions.
1272  case Intrinsic::experimental_vector_reduce_v2_fmul:
1273  return ConcreteTTI->getArithmeticReductionCost(
1274  Instruction::FMul, Tys[0],
1275  /*IsPairwiseForm=*/false); // FIXME: Add new flag for cost of strict
1276  // reductions.
1277  case Intrinsic::experimental_vector_reduce_smax:
1278  case Intrinsic::experimental_vector_reduce_smin:
1279  case Intrinsic::experimental_vector_reduce_fmax:
1280  case Intrinsic::experimental_vector_reduce_fmin:
1281  return ConcreteTTI->getMinMaxReductionCost(
1282  Tys[0], CmpInst::makeCmpResultType(Tys[0]), /*IsPairwiseForm=*/false,
1283  /*IsUnsigned=*/true);
1284  case Intrinsic::experimental_vector_reduce_umax:
1285  case Intrinsic::experimental_vector_reduce_umin:
1286  return ConcreteTTI->getMinMaxReductionCost(
1287  Tys[0], CmpInst::makeCmpResultType(Tys[0]), /*IsPairwiseForm=*/false,
1288  /*IsUnsigned=*/false);
1289  case Intrinsic::sadd_sat:
1290  case Intrinsic::ssub_sat: {
1291  Type *CondTy = Type::getInt1Ty(RetTy->getContext());
1292  if (RetVF > 1)
1293  CondTy = VectorType::get(CondTy, RetVF);
1294 
1295  Type *OpTy = StructType::create({RetTy, CondTy});
1296  Intrinsic::ID OverflowOp = IID == Intrinsic::sadd_sat
1297  ? Intrinsic::sadd_with_overflow
1298  : Intrinsic::ssub_with_overflow;
1299 
1300  // SatMax -> Overflow && SumDiff < 0
1301  // SatMin -> Overflow && SumDiff >= 0
1302  unsigned Cost = 0;
1303  Cost += ConcreteTTI->getIntrinsicInstrCost(
1304  OverflowOp, OpTy, {RetTy, RetTy}, FMF, ScalarizationCostPassed);
1305  Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy,
1306  CondTy, nullptr);
1307  Cost += 2 * ConcreteTTI->getCmpSelInstrCost(BinaryOperator::Select, RetTy,
1308  CondTy, nullptr);
1309  return Cost;
1310  }
1311  case Intrinsic::uadd_sat:
1312  case Intrinsic::usub_sat: {
1313  Type *CondTy = Type::getInt1Ty(RetTy->getContext());
1314  if (RetVF > 1)
1315  CondTy = VectorType::get(CondTy, RetVF);
1316 
1317  Type *OpTy = StructType::create({RetTy, CondTy});
1318  Intrinsic::ID OverflowOp = IID == Intrinsic::uadd_sat
1319  ? Intrinsic::uadd_with_overflow
1320  : Intrinsic::usub_with_overflow;
1321 
1322  unsigned Cost = 0;
1323  Cost += ConcreteTTI->getIntrinsicInstrCost(
1324  OverflowOp, OpTy, {RetTy, RetTy}, FMF, ScalarizationCostPassed);
1325  Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::Select, RetTy,
1326  CondTy, nullptr);
1327  return Cost;
1328  }
1329  case Intrinsic::smul_fix:
1330  case Intrinsic::umul_fix: {
1331  unsigned ExtSize = RetTy->getScalarSizeInBits() * 2;
1332  Type *ExtTy = Type::getIntNTy(RetTy->getContext(), ExtSize);
1333  if (RetVF > 1)
1334  ExtTy = VectorType::get(ExtTy, RetVF);
1335 
1336  unsigned ExtOp =
1337  IID == Intrinsic::smul_fix ? Instruction::SExt : Instruction::ZExt;
1338 
1339  unsigned Cost = 0;
1340  Cost += 2 * ConcreteTTI->getCastInstrCost(ExtOp, ExtTy, RetTy);
1341  Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::Mul, ExtTy);
1342  Cost +=
1343  2 * ConcreteTTI->getCastInstrCost(Instruction::Trunc, RetTy, ExtTy);
1344  Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::LShr, RetTy,
1347  Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::Shl, RetTy,
1350  Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::Or, RetTy);
1351  return Cost;
1352  }
1353  case Intrinsic::sadd_with_overflow:
1354  case Intrinsic::ssub_with_overflow: {
1355  Type *SumTy = RetTy->getContainedType(0);
1356  Type *OverflowTy = RetTy->getContainedType(1);
1357  unsigned Opcode = IID == Intrinsic::sadd_with_overflow
1359  : BinaryOperator::Sub;
1360 
1361  // LHSSign -> LHS >= 0
1362  // RHSSign -> RHS >= 0
1363  // SumSign -> Sum >= 0
1364  //
1365  // Add:
1366  // Overflow -> (LHSSign == RHSSign) && (LHSSign != SumSign)
1367  // Sub:
1368  // Overflow -> (LHSSign != RHSSign) && (LHSSign != SumSign)
1369  unsigned Cost = 0;
1370  Cost += ConcreteTTI->getArithmeticInstrCost(Opcode, SumTy);
1371  Cost += 3 * ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, SumTy,
1372  OverflowTy, nullptr);
1373  Cost += 2 * ConcreteTTI->getCmpSelInstrCost(
1374  BinaryOperator::ICmp, OverflowTy, OverflowTy, nullptr);
1375  Cost +=
1376  ConcreteTTI->getArithmeticInstrCost(BinaryOperator::And, OverflowTy);
1377  return Cost;
1378  }
1379  case Intrinsic::uadd_with_overflow:
1380  case Intrinsic::usub_with_overflow: {
1381  Type *SumTy = RetTy->getContainedType(0);
1382  Type *OverflowTy = RetTy->getContainedType(1);
1383  unsigned Opcode = IID == Intrinsic::uadd_with_overflow
1385  : BinaryOperator::Sub;
1386 
1387  unsigned Cost = 0;
1388  Cost += ConcreteTTI->getArithmeticInstrCost(Opcode, SumTy);
1389  Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, SumTy,
1390  OverflowTy, nullptr);
1391  return Cost;
1392  }
1393  case Intrinsic::smul_with_overflow:
1394  case Intrinsic::umul_with_overflow: {
1395  Type *MulTy = RetTy->getContainedType(0);
1396  Type *OverflowTy = RetTy->getContainedType(1);
1397  unsigned ExtSize = MulTy->getScalarSizeInBits() * 2;
1398  Type *ExtTy = Type::getIntNTy(RetTy->getContext(), ExtSize);
1399  if (MulTy->isVectorTy())
1400  ExtTy = VectorType::get(ExtTy, MulTy->getVectorNumElements() );
1401 
1402  unsigned ExtOp =
1403  IID == Intrinsic::smul_fix ? Instruction::SExt : Instruction::ZExt;
1404 
1405  unsigned Cost = 0;
1406  Cost += 2 * ConcreteTTI->getCastInstrCost(ExtOp, ExtTy, MulTy);
1407  Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::Mul, ExtTy);
1408  Cost +=
1409  2 * ConcreteTTI->getCastInstrCost(Instruction::Trunc, MulTy, ExtTy);
1410  Cost += ConcreteTTI->getArithmeticInstrCost(Instruction::LShr, MulTy,
1413 
1414  if (IID == Intrinsic::smul_with_overflow)
1415  Cost += ConcreteTTI->getArithmeticInstrCost(
1416  Instruction::AShr, MulTy, TTI::OK_AnyValue,
1418 
1419  Cost += ConcreteTTI->getCmpSelInstrCost(BinaryOperator::ICmp, MulTy,
1420  OverflowTy, nullptr);
1421  return Cost;
1422  }
1423  case Intrinsic::ctpop:
1424  ISDs.push_back(ISD::CTPOP);
1425  // In case of legalization use TCC_Expensive. This is cheaper than a
1426  // library call but still not a cheap instruction.
1427  SingleCallCost = TargetTransformInfo::TCC_Expensive;
1428  break;
1429  // FIXME: ctlz, cttz, ...
1430  }
1431 
1432  const TargetLoweringBase *TLI = getTLI();
1433  std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, RetTy);
1434 
1435  SmallVector<unsigned, 2> LegalCost;
1436  SmallVector<unsigned, 2> CustomCost;
1437  for (unsigned ISD : ISDs) {
1438  if (TLI->isOperationLegalOrPromote(ISD, LT.second)) {
1439  if (IID == Intrinsic::fabs && LT.second.isFloatingPoint() &&
1440  TLI->isFAbsFree(LT.second)) {
1441  return 0;
1442  }
1443 
1444  // The operation is legal. Assume it costs 1.
1445  // If the type is split to multiple registers, assume that there is some
1446  // overhead to this.
1447  // TODO: Once we have extract/insert subvector cost we need to use them.
1448  if (LT.first > 1)
1449  LegalCost.push_back(LT.first * 2);
1450  else
1451  LegalCost.push_back(LT.first * 1);
1452  } else if (!TLI->isOperationExpand(ISD, LT.second)) {
1453  // If the operation is custom lowered then assume
1454  // that the code is twice as expensive.
1455  CustomCost.push_back(LT.first * 2);
1456  }
1457  }
1458 
1459  auto MinLegalCostI = std::min_element(LegalCost.begin(), LegalCost.end());
1460  if (MinLegalCostI != LegalCost.end())
1461  return *MinLegalCostI;
1462 
1463  auto MinCustomCostI =
1464  std::min_element(CustomCost.begin(), CustomCost.end());
1465  if (MinCustomCostI != CustomCost.end())
1466  return *MinCustomCostI;
1467 
1468  // If we can't lower fmuladd into an FMA estimate the cost as a floating
1469  // point mul followed by an add.
1470  if (IID == Intrinsic::fmuladd)
1471  return ConcreteTTI->getArithmeticInstrCost(BinaryOperator::FMul, RetTy) +
1472  ConcreteTTI->getArithmeticInstrCost(BinaryOperator::FAdd, RetTy);
1473 
1474  // Else, assume that we need to scalarize this intrinsic. For math builtins
1475  // this will emit a costly libcall, adding call overhead and spills. Make it
1476  // very expensive.
1477  if (RetTy->isVectorTy()) {
1478  unsigned ScalarizationCost =
1479  ((ScalarizationCostPassed != std::numeric_limits<unsigned>::max())
1480  ? ScalarizationCostPassed
1481  : getScalarizationOverhead(RetTy, true, false));
1482  unsigned ScalarCalls = RetTy->getVectorNumElements();
1483  SmallVector<Type *, 4> ScalarTys;
1484  for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) {
1485  Type *Ty = Tys[i];
1486  if (Ty->isVectorTy())
1487  Ty = Ty->getScalarType();
1488  ScalarTys.push_back(Ty);
1489  }
1490  unsigned ScalarCost = ConcreteTTI->getIntrinsicInstrCost(
1491  IID, RetTy->getScalarType(), ScalarTys, FMF);
1492  for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) {
1493  if (Tys[i]->isVectorTy()) {
1494  if (ScalarizationCostPassed == std::numeric_limits<unsigned>::max())
1495  ScalarizationCost += getScalarizationOverhead(Tys[i], false, true);
1496  ScalarCalls = std::max(ScalarCalls, Tys[i]->getVectorNumElements());
1497  }
1498  }
1499 
1500  return ScalarCalls * ScalarCost + ScalarizationCost;
1501  }
1502 
1503  // This is going to be turned into a library call, make it expensive.
1504  return SingleCallCost;
1505  }
1506 
1507  /// Compute a cost of the given call instruction.
1508  ///
1509  /// Compute the cost of calling function F with return type RetTy and
1510  /// argument types Tys. F might be nullptr, in this case the cost of an
1511  /// arbitrary call with the specified signature will be returned.
1512  /// This is used, for instance, when we estimate call of a vector
1513  /// counterpart of the given function.
1514  /// \param F Called function, might be nullptr.
1515  /// \param RetTy Return value types.
1516  /// \param Tys Argument types.
1517  /// \returns The cost of Call instruction.
1519  return 10;
1520  }
1521 
1522  unsigned getNumberOfParts(Type *Tp) {
1523  std::pair<unsigned, MVT> LT = getTLI()->getTypeLegalizationCost(DL, Tp);
1524  return LT.first;
1525  }
1526 
1528  const SCEV *) {
1529  return 0;
1530  }
1531 
1532  /// Try to calculate arithmetic and shuffle op costs for reduction operations.
1533  /// We're assuming that reduction operation are performing the following way:
1534  /// 1. Non-pairwise reduction
1535  /// %val1 = shufflevector<n x t> %val, <n x t> %undef,
1536  /// <n x i32> <i32 n/2, i32 n/2 + 1, ..., i32 n, i32 undef, ..., i32 undef>
1537  /// \----------------v-------------/ \----------v------------/
1538  /// n/2 elements n/2 elements
1539  /// %red1 = op <n x t> %val, <n x t> val1
1540  /// After this operation we have a vector %red1 where only the first n/2
1541  /// elements are meaningful, the second n/2 elements are undefined and can be
1542  /// dropped. All other operations are actually working with the vector of
1543  /// length n/2, not n, though the real vector length is still n.
1544  /// %val2 = shufflevector<n x t> %red1, <n x t> %undef,
1545  /// <n x i32> <i32 n/4, i32 n/4 + 1, ..., i32 n/2, i32 undef, ..., i32 undef>
1546  /// \----------------v-------------/ \----------v------------/
1547  /// n/4 elements 3*n/4 elements
1548  /// %red2 = op <n x t> %red1, <n x t> val2 - working with the vector of
1549  /// length n/2, the resulting vector has length n/4 etc.
1550  /// 2. Pairwise reduction:
1551  /// Everything is the same except for an additional shuffle operation which
1552  /// is used to produce operands for pairwise kind of reductions.
1553  /// %val1 = shufflevector<n x t> %val, <n x t> %undef,
1554  /// <n x i32> <i32 0, i32 2, ..., i32 n-2, i32 undef, ..., i32 undef>
1555  /// \-------------v----------/ \----------v------------/
1556  /// n/2 elements n/2 elements
1557  /// %val2 = shufflevector<n x t> %val, <n x t> %undef,
1558  /// <n x i32> <i32 1, i32 3, ..., i32 n-1, i32 undef, ..., i32 undef>
1559  /// \-------------v----------/ \----------v------------/
1560  /// n/2 elements n/2 elements
1561  /// %red1 = op <n x t> %val1, <n x t> val2
1562  /// Again, the operation is performed on <n x t> vector, but the resulting
1563  /// vector %red1 is <n/2 x t> vector.
1564  ///
1565  /// The cost model should take into account that the actual length of the
1566  /// vector is reduced on each iteration.
1567  unsigned getArithmeticReductionCost(unsigned Opcode, Type *Ty,
1568  bool IsPairwise) {
1569  assert(Ty->isVectorTy() && "Expect a vector type");
1570  Type *ScalarTy = Ty->getVectorElementType();
1571  unsigned NumVecElts = Ty->getVectorNumElements();
1572  unsigned NumReduxLevels = Log2_32(NumVecElts);
1573  unsigned ArithCost = 0;
1574  unsigned ShuffleCost = 0;
1575  auto *ConcreteTTI = static_cast<T *>(this);
1576  std::pair<unsigned, MVT> LT =
1577  ConcreteTTI->getTLI()->getTypeLegalizationCost(DL, Ty);
1578  unsigned LongVectorCount = 0;
1579  unsigned MVTLen =
1580  LT.second.isVector() ? LT.second.getVectorNumElements() : 1;
1581  while (NumVecElts > MVTLen) {
1582  NumVecElts /= 2;
1583  Type *SubTy = VectorType::get(ScalarTy, NumVecElts);
1584  // Assume the pairwise shuffles add a cost.
1585  ShuffleCost += (IsPairwise + 1) *
1586  ConcreteTTI->getShuffleCost(TTI::SK_ExtractSubvector, Ty,
1587  NumVecElts, SubTy);
1588  ArithCost += ConcreteTTI->getArithmeticInstrCost(Opcode, SubTy);
1589  Ty = SubTy;
1590  ++LongVectorCount;
1591  }
1592 
1593  NumReduxLevels -= LongVectorCount;
1594 
1595  // The minimal length of the vector is limited by the real length of vector
1596  // operations performed on the current platform. That's why several final
1597  // reduction operations are performed on the vectors with the same
1598  // architecture-dependent length.
1599 
1600  // Non pairwise reductions need one shuffle per reduction level. Pairwise
1601  // reductions need two shuffles on every level, but the last one. On that
1602  // level one of the shuffles is <0, u, u, ...> which is identity.
1603  unsigned NumShuffles = NumReduxLevels;
1604  if (IsPairwise && NumReduxLevels >= 1)
1605  NumShuffles += NumReduxLevels - 1;
1606  ShuffleCost += NumShuffles *
1607  ConcreteTTI->getShuffleCost(TTI::SK_PermuteSingleSrc, Ty,
1608  0, Ty);
1609  ArithCost += NumReduxLevels *
1610  ConcreteTTI->getArithmeticInstrCost(Opcode, Ty);
1611  return ShuffleCost + ArithCost +
1612  ConcreteTTI->getVectorInstrCost(Instruction::ExtractElement, Ty, 0);
1613  }
1614 
1615  /// Try to calculate op costs for min/max reduction operations.
1616  /// \param CondTy Conditional type for the Select instruction.
1617  unsigned getMinMaxReductionCost(Type *Ty, Type *CondTy, bool IsPairwise,
1618  bool) {
1619  assert(Ty->isVectorTy() && "Expect a vector type");
1620  Type *ScalarTy = Ty->getVectorElementType();
1621  Type *ScalarCondTy = CondTy->getVectorElementType();
1622  unsigned NumVecElts = Ty->getVectorNumElements();
1623  unsigned NumReduxLevels = Log2_32(NumVecElts);
1624  unsigned CmpOpcode;
1625  if (Ty->isFPOrFPVectorTy()) {
1626  CmpOpcode = Instruction::FCmp;
1627  } else {
1628  assert(Ty->isIntOrIntVectorTy() &&
1629  "expecting floating point or integer type for min/max reduction");
1630  CmpOpcode = Instruction::ICmp;
1631  }
1632  unsigned MinMaxCost = 0;
1633  unsigned ShuffleCost = 0;
1634  auto *ConcreteTTI = static_cast<T *>(this);
1635  std::pair<unsigned, MVT> LT =
1636  ConcreteTTI->getTLI()->getTypeLegalizationCost(DL, Ty);
1637  unsigned LongVectorCount = 0;
1638  unsigned MVTLen =
1639  LT.second.isVector() ? LT.second.getVectorNumElements() : 1;
1640  while (NumVecElts > MVTLen) {
1641  NumVecElts /= 2;
1642  Type *SubTy = VectorType::get(ScalarTy, NumVecElts);
1643  CondTy = VectorType::get(ScalarCondTy, NumVecElts);
1644 
1645  // Assume the pairwise shuffles add a cost.
1646  ShuffleCost += (IsPairwise + 1) *
1647  ConcreteTTI->getShuffleCost(TTI::SK_ExtractSubvector, Ty,
1648  NumVecElts, SubTy);
1649  MinMaxCost +=
1650  ConcreteTTI->getCmpSelInstrCost(CmpOpcode, SubTy, CondTy, nullptr) +
1651  ConcreteTTI->getCmpSelInstrCost(Instruction::Select, SubTy, CondTy,
1652  nullptr);
1653  Ty = SubTy;
1654  ++LongVectorCount;
1655  }
1656 
1657  NumReduxLevels -= LongVectorCount;
1658 
1659  // The minimal length of the vector is limited by the real length of vector
1660  // operations performed on the current platform. That's why several final
1661  // reduction opertions are perfomed on the vectors with the same
1662  // architecture-dependent length.
1663 
1664  // Non pairwise reductions need one shuffle per reduction level. Pairwise
1665  // reductions need two shuffles on every level, but the last one. On that
1666  // level one of the shuffles is <0, u, u, ...> which is identity.
1667  unsigned NumShuffles = NumReduxLevels;
1668  if (IsPairwise && NumReduxLevels >= 1)
1669  NumShuffles += NumReduxLevels - 1;
1670  ShuffleCost += NumShuffles *
1671  ConcreteTTI->getShuffleCost(TTI::SK_PermuteSingleSrc, Ty,
1672  0, Ty);
1673  MinMaxCost +=
1674  NumReduxLevels *
1675  (ConcreteTTI->getCmpSelInstrCost(CmpOpcode, Ty, CondTy, nullptr) +
1676  ConcreteTTI->getCmpSelInstrCost(Instruction::Select, Ty, CondTy,
1677  nullptr));
1678  // The last min/max should be in vector registers and we counted it above.
1679  // So just need a single extractelement.
1680  return ShuffleCost + MinMaxCost +
1681  ConcreteTTI->getVectorInstrCost(Instruction::ExtractElement, Ty, 0);
1682  }
1683 
1684  unsigned getVectorSplitCost() { return 1; }
1685 
1686  /// @}
1687 };
1688 
1689 /// Concrete BasicTTIImpl that can be used if no further customization
1690 /// is needed.
1691 class BasicTTIImpl : public BasicTTIImplBase<BasicTTIImpl> {
1693 
1695 
1696  const TargetSubtargetInfo *ST;
1697  const TargetLoweringBase *TLI;
1698 
1699  const TargetSubtargetInfo *getST() const { return ST; }
1700  const TargetLoweringBase *getTLI() const { return TLI; }
1701 
1702 public:
1703  explicit BasicTTIImpl(const TargetMachine *TM, const Function &F);
1704 };
1705 
1706 } // end namespace llvm
1707 
1708 #endif // LLVM_CODEGEN_BASICTTIIMPL_H
LegalizeAction getLoadExtAction(unsigned ExtType, EVT ValVT, EVT MemVT) const
Return how this load with extension should be treated: either it is legal, needs to be promoted to a ...
Type * getVectorElementType() const
Definition: Type.h:371
unsigned getNumCases() const
Return the number of &#39;cases&#39; in this switch instruction, excluding the default case.
Base class for use as a mix-in that aids implementing a TargetTransformInfo-compatible class...
unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, ArrayRef< const Value *> Arguments, const User *U)
Definition: BasicTTIImpl.h:295
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:110
bool Partial
Allow partial unrolling (unrolling of loops to expand the size of the loop body, not only to eliminat...
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
Definition: ISDOpcodes.h:622
BitVector & set()
Definition: BitVector.h:397
unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info=TTI::OK_AnyValue, TTI::OperandValueKind Opd2Info=TTI::OK_AnyValue, TTI::OperandValueProperties Opd1PropInfo=TTI::OP_None, TTI::OperandValueProperties Opd2PropInfo=TTI::OP_None, ArrayRef< const Value *> Args=ArrayRef< const Value *>())
Definition: BasicTTIImpl.h:583
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg If BaseGV is null...
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
static IntegerType * getInt1Ty(LLVMContext &C)
Definition: Type.cpp:172
unsigned getIndexSizeInBits(unsigned AS) const
Size in bits of index used for address calculation in getelementptr.
Definition: DataLayout.h:398
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
iterator_range< CaseIt > cases()
Iteration adapter for range-for loops.
LLVMContext & Context
bool noNaNs() const
Definition: Operator.h:205
static Type * makeCmpResultType(Type *opnd_type)
Create a result type for fcmp/icmp.
Definition: InstrTypes.h:975
This class represents lattice values for constants.
Definition: AllocatorList.h:23
FMINIMUM/FMAXIMUM - NaN-propagating minimum/maximum that also treat -0.0 as less than 0...
Definition: ISDOpcodes.h:633
unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract)
Estimate the overhead of scalarizing an instruction.
Definition: BasicTTIImpl.h:522
The main scalar evolution driver.
bool slt(const APInt &RHS) const
Signed less than comparison.
Definition: APInt.h:1203
unsigned PartialOptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size, like OptSizeThreshold, but used for partial/runtime unrolling (set to UINT_MAX to disable).
int getGEPCost(Type *PointeeType, const Value *Ptr, ArrayRef< const Value *> Operands)
Definition: BasicTTIImpl.h:278
MemIndexedMode
The type of load/store indexing.
static uint64_t round(uint64_t Acc, uint64_t Input)
Definition: xxhash.cpp:57
unsigned PartialThreshold
The cost threshold for the unrolled loop, like Threshold, but used for partial/runtime unrolling (set...
CaseIt case_begin()
Returns a read/write iterator that points to the first case in the SwitchInst.
A cache of @llvm.assume calls within a function.
unsigned getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy)
bool isSuitableForBitTests(unsigned NumDests, unsigned NumCmps, const APInt &Low, const APInt &High, const DataLayout &DL) const
Return true if lowering to a bit test is suitable for a set of case clusters which contains NumDests ...
bool sgt(const APInt &RHS) const
Signed greather than comparison.
Definition: APInt.h:1273
BasicTTIImplBase(const TargetMachine *TM, const DataLayout &DL)
Definition: BasicTTIImpl.h:191
int getExtCost(const Instruction *I, const Value *Src)
Definition: BasicTTIImpl.h:283
F(f)
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Definition: DerivedTypes.h:580
An instruction for reading from memory.
Definition: Instructions.h:167
FunTy * getCalledFunction() const
Return the function being called if this is a direct call, otherwise return null (if it&#39;s an indirect...
Definition: CallSite.h:111
bool isProfitableToHoist(Instruction *I)
Definition: BasicTTIImpl.h:267
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:229
Base class which can be used to help build a TTI implementation.
Definition: BasicTTIImpl.h:77
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
bool isOperationLegalOrCustom(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
unsigned getMaxInterleaveFactor(unsigned VF)
Definition: BasicTTIImpl.h:581
unsigned getJumpBufAlignment() const
Returns the target&#39;s jmp_buf alignment in bytes (if never set, the default is 0)
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition: Type.h:129
virtual bool isFreeAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const
Returns true if a cast from SrcAS to DestAS is "cheap", such that e.g.
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:268
This file provides helpers for the implementation of a TargetTransformInfo-conforming class...
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
CRTP base class for use as a mix-in that aids implementing a TargetTransformInfo-compatible class...
unsigned getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, ArrayRef< Value *> Args, FastMathFlags FMF, unsigned VF=1)
Get intrinsic cost based on arguments.
unsigned getAddressComputationCost(Type *Ty, ScalarEvolution *, const SCEV *)
unsigned getOperandsScalarizationOverhead(ArrayRef< const Value *> Args, unsigned VF)
Estimate the overhead of scalarizing an instructions unique non-constant operands.
Definition: BasicTTIImpl.h:541
unsigned getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy, unsigned Index)
Definition: BasicTTIImpl.h:774
int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace)
Definition: BasicTTIImpl.h:253
unsigned getArithmeticReductionCost(unsigned Opcode, Type *Ty, bool IsPairwise)
Try to calculate arithmetic and shuffle op costs for reduction operations.
unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, ArrayRef< Type *> ParamTys, const User *U)
unsigned getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
uint64_t getNumElements() const
For scalable vectors, this will return the minimum number of elements in the vector.
Definition: DerivedTypes.h:393
bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth, unsigned AddressSpace, unsigned Alignment, bool *Fast) const
Definition: BasicTTIImpl.h:199
zlib style complession
This file implements a class to represent arbitrary precision integral constant values and operations...
virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const
Return true if it&#39;s free to truncate a value of type FromTy to type ToTy.
unsigned getRegisterBitWidth(bool Vector) const
Definition: BasicTTIImpl.h:518
unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, const Instruction *I)
Definition: BasicTTIImpl.h:787
unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, const Instruction *I=nullptr)
Definition: BasicTTIImpl.h:649
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition: ISDOpcodes.h:453
bool isFCmpOrdCheaperThanFCmpZero(Type *Ty)
Definition: BasicTTIImpl.h:391
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:32
Selects elements from the corresponding lane of either source operand.
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
Definition: Type.h:202
virtual bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AddrSpace, Instruction *I=nullptr) const
Return true if the addressing mode represented by AM is legal for this target, for a load/store of th...
Reverse the order of the vector.
bool isIndexedStoreLegal(TTI::MemIndexedMode M, Type *Ty, const DataLayout &DL) const
Definition: BasicTTIImpl.h:243
unsigned getFPOpCost(Type *Ty)
Definition: BasicTTIImpl.h:395
bool isTruncateFree(Type *Ty1, Type *Ty2)
Definition: BasicTTIImpl.h:263
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return &#39;this&#39;.
Definition: Type.h:303
ExtractSubvector Index indicates start offset.
bool isVoidTy() const
Return true if this is &#39;void&#39;.
Definition: Type.h:140
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP)
Definition: BasicTTIImpl.h:432
virtual bool isSuitableForJumpTable(const SwitchInst *SI, uint64_t NumCases, uint64_t Range) const
Return true if lowering to a jump table is suitable for a set of case clusters which may contain NumC...
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:428
Machine Value Type.
Concrete BasicTTIImpl that can be used if no further customization is needed.
LLVM Basic Block Representation.
Definition: BasicBlock.h:57
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:45
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:64
unsigned getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, unsigned Alignment, unsigned AddressSpace, bool UseMaskForCond=false, bool UseMaskForGaps=false)
Definition: BasicTTIImpl.h:865
Simple binary floating point operators.
Definition: ISDOpcodes.h:287
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:148
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
unsigned getMinMaxReductionCost(Type *Ty, Type *CondTy, bool IsPairwise, bool)
Try to calculate op costs for min/max reduction operations.
virtual bool allowsMisalignedMemoryAccesses(EVT, unsigned AddrSpace=0, unsigned Align=1, MachineMemOperand::Flags Flags=MachineMemOperand::MONone, bool *=nullptr) const
Determine if the target supports unaligned memory accesses.
unsigned getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, ArrayRef< Type *> Tys, FastMathFlags FMF, unsigned ScalarizationCostPassed=std::numeric_limits< unsigned >::max())
Get intrinsic cost based on argument types.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:370
Expected to fold away in lowering.
AMDGPU Lower Kernel Arguments
virtual bool areJTsAllowed(const Function *Fn) const
Return true if lowering to a jump table is allowed.
bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
Merge elements from two source vectors into one with any shuffle mask.
unsigned getNumberOfParts(Type *Tp)
bool isIndexedLoadLegal(unsigned IdxMode, EVT VT) const
Return true if the specified indexed load is legal on this target.
Attributes of a target dependent hardware loop.
static double log2(double V)
virtual bool isProfitableToHoist(Instruction *I) const
Extended Value Type.
Definition: ValueTypes.h:33
static wasm::ValType getType(const TargetRegisterClass *RC)
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
LLVM_READONLY APFloat maxnum(const APFloat &A, const APFloat &B)
Implements IEEE maxNum semantics.
Definition: APFloat.h:1248
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
bool isLegalAddImmediate(int64_t imm)
Definition: BasicTTIImpl.h:218
This base class for TargetLowering contains the SelectionDAG-independent parts that can be used from ...
OperandValueProperties
Additional properties of an operand&#39;s values.
LegalizeAction
This enum indicates whether operations are valid for a target, and if not, what action should be used...
unsigned getCFInstrCost(unsigned Opcode)
Definition: BasicTTIImpl.h:782
size_type size() const
Definition: SmallPtrSet.h:92
unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp)
Definition: BasicTTIImpl.h:630
unsigned getNumberOfRegisters(bool Vector)
Definition: BasicTTIImpl.h:516
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements...
Definition: SmallPtrSet.h:417
Returns platform specific canonical encoding of a floating point number.
Definition: ISDOpcodes.h:343
size_type count() const
count - Returns the number of bits which are set.
Definition: BitVector.h:172
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
int getInstructionLatency(const Instruction *I)
Definition: BasicTTIImpl.h:504
iterator end()
Definition: BasicBlock.h:270
unsigned getJumpBufSize() const
Returns the target&#39;s jmp_buf size in bytes (if never set, the default is 200)
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type...
Definition: Type.cpp:129
unsigned getVectorSplitCost()
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:837
Provides information about what library functions are available for the current target.
AddressSpace
Definition: NVPTXBaseInfo.h:21
cl::opt< unsigned > PartialUnrollingThreshold
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Definition: Type.cpp:179
static const unsigned DefaultLoadLatency
Definition: MCSchedule.h:286
bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *LibInfo, HardwareLoopInfo &HWLoopInfo)
Definition: BasicTTIImpl.h:497
bool Runtime
Allow runtime unrolling (unrolling of loops to expand the size of the loop body even when the number ...
LegalizeAction getTruncStoreAction(EVT ValVT, EVT MemVT) const
Return how this store with truncation should be treated: either it is legal, needs to be promoted to ...
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:538
unsigned getVectorNumElements() const
Definition: DerivedTypes.h:535
Class to represent vector types.
Definition: DerivedTypes.h:427
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
Class for arbitrary precision integers.
Definition: APInt.h:69
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:444
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:89
unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, unsigned AddressSpace, const Instruction *I=nullptr)
Definition: BasicTTIImpl.h:834
unsigned LoopMicroOpBufferSize
Definition: MCSchedule.h:281
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition: ISDOpcodes.h:336
bool isAlwaysUniform(const Value *V)
Definition: BasicTTIImpl.h:211
int InstructionOpcodeToISD(unsigned Opcode) const
Get the ISD node that corresponds to the Instruction class opcode.
unsigned getFlatAddressSpace()
Definition: BasicTTIImpl.h:213
TargetSubtargetInfo - Generic base class for all target subtargets.
bool isLSRCostLess(TTI::LSRCost &C1, TTI::LSRCost &C2)
virtual bool isLegalICmpImmediate(int64_t) const
Return true if the specified immediate is legal icmp immediate, that is the target has icmp instructi...
unsigned getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, unsigned AddressSpace)
BR_JT - Jumptable branch.
Definition: ISDOpcodes.h:666
unsigned getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy)
Definition: BasicTTIImpl.h:405
This class represents an analyzed expression in the program.
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:506
Parameters that control the generic loop unrolling transformation.
unsigned getJumpBufAlignment()
Definition: BasicTTIImpl.h:374
static VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
Definition: Type.cpp:609
unsigned OptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size (set to UINT_MAX to disable)...
Establish a view to a call site for examination.
Definition: CallSite.h:897
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return &#39;Legal&#39;) or we ...
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:106
#define I(x, y, z)
Definition: MD5.cpp:58
#define N
int getGEPCost(Type *PointeeType, const Value *Ptr, ArrayRef< const Value *> Operands)
unsigned getInliningThresholdMultiplier()
Definition: BasicTTIImpl.h:428
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:332
unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index)
Definition: BasicTTIImpl.h:827
block_iterator block_end() const
Definition: LoopInfo.h:157
static EVT getEVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
Definition: ValueTypes.cpp:340
InsertSubvector. Index indicates start offset.
bool isLSRCostLess(TTI::LSRCost C1, TTI::LSRCost C2)
Definition: BasicTTIImpl.h:249
unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI, unsigned &JumpTableSize)
Definition: BasicTTIImpl.h:317
bool isFPOrFPVectorTy() const
Return true if this is a FP type or a vector of FP.
Definition: Type.h:184
Multiway switch.
unsigned getScalarizationOverhead(Type *VecTy, ArrayRef< const Value *> Args)
Definition: BasicTTIImpl.h:564
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
The cost of a typical &#39;add&#39; instruction.
bool isSourceOfDivergence(const Value *V)
Definition: BasicTTIImpl.h:209
bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, AssumptionCache &AC, TargetLibraryInfo *LibInfo, HardwareLoopInfo &HWLoopInfo)
unsigned getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition: Type.cpp:114
LLVM Value Representation.
Definition: Value.h:72
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:326
Fast - This calling convention attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:42
uint64_t getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type...
Definition: DataLayout.h:444
static StructType * create(LLVMContext &Context, StringRef Name)
This creates an identified struct.
Definition: Type.cpp:436
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E&#39;s largest value.
Definition: BitmaskEnum.h:80
bool isOperationLegalOrPromote(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target or can be made legal using promotion...
Broadcast element 0 to all other elements.
bool isLoadExtLegal(unsigned ExtType, EVT ValVT, EVT MemVT) const
Return true if the specified load with extension is legal on this target.
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:65
bool isTypeLegal(Type *Ty)
Definition: BasicTTIImpl.h:273
Type * getElementType() const
Definition: DerivedTypes.h:394
bool UpperBound
Allow using trip count upper bound to unroll loops.
virtual int getScalingFactorCost(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS=0) const
Return the cost of the scaling factor used in the addressing mode represented by AM for this target...
const DataLayout & getDataLayout() const
virtual bool useAA() const
Enable use of alias analysis during code generation (during MI scheduling, DAGCombine, etc.).
bool isIndexedLoadLegal(TTI::MemIndexedMode M, Type *Ty, const DataLayout &DL) const
Definition: BasicTTIImpl.h:237
Convenience struct for specifying and reasoning about fast-math flags.
Definition: Operator.h:159
unsigned getCallInstrCost(Function *F, Type *RetTy, ArrayRef< Type *> Tys)
Compute a cost of the given call instruction.
OperandValueKind
Additional information about an operand&#39;s possible values.
bool haveFastSqrt(Type *Ty)
Definition: BasicTTIImpl.h:384
This pass exposes codegen information to IR-level passes.
bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale, unsigned AddrSpace, Instruction *I=nullptr)
Definition: BasicTTIImpl.h:226
bool isLegalICmpImmediate(int64_t imm)
Definition: BasicTTIImpl.h:222
block_iterator block_begin() const
Definition: LoopInfo.h:156
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition: ValueTypes.h:63
virtual bool isLegalAddImmediate(int64_t) const
Return true if the specified immediate is legal add immediate, that is the target has add instruction...
The cost of a &#39;div&#39; instruction on x86.
static IntegerType * getInt8Ty(LLVMContext &C)
Definition: Type.cpp:173
static OperandValueKind getOperandInfo(Value *V, OperandValueProperties &OpProps)
Collect properties of V used in cost analysis, e.g. OP_PowerOf2.
const MCSchedModel & getSchedModel() const
Get the machine model for this subtarget&#39;s CPU.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
bool isOperationExpand(unsigned Op, EVT VT) const
Return true if the specified operation is illegal on this target or unlikely to be made legal with cu...
std::pair< int, MVT > getTypeLegalizationCost(const DataLayout &DL, Type *Ty) const
Estimate the cost of type-legalization and the legalized type.
LLVM_READONLY APFloat minnum(const APFloat &A, const APFloat &B)
Implements IEEE minNum semantics.
Definition: APFloat.h:1237
bool isIndexedStoreLegal(unsigned IdxMode, EVT VT) const
Return true if the specified indexed load is legal on this target.
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:143
Type * getContainedType(unsigned i) const
This method is used to implement the type iterator (defined at the end of the file).
Definition: Type.h:332
virtual bool isFAbsFree(EVT VT) const
Return true if an fabs operation is free to the point where it is never worthwhile to replace it with...
This file describes how to lower LLVM code to machine code.
const BasicBlock * getParent() const
Definition: Instruction.h:66
ShuffleKind
The various kinds of shuffle patterns for vector queries.
Shuffle elements of single source vector with any shuffle mask.
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
Definition: ISDOpcodes.h:950
BRIND - Indirect branch.
Definition: ISDOpcodes.h:662
unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, ArrayRef< Type *> ParamTys, const User *U)
Definition: BasicTTIImpl.h:300