LLVM  7.0.0svn
ARMTargetTransformInfo.cpp
Go to the documentation of this file.
1 //===- ARMTargetTransformInfo.cpp - ARM specific TTI ----------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
10 #include "ARMTargetTransformInfo.h"
11 #include "ARMSubtarget.h"
13 #include "llvm/ADT/APInt.h"
14 #include "llvm/ADT/SmallVector.h"
15 #include "llvm/Analysis/LoopInfo.h"
16 #include "llvm/CodeGen/CostTable.h"
19 #include "llvm/IR/BasicBlock.h"
20 #include "llvm/IR/CallSite.h"
21 #include "llvm/IR/DataLayout.h"
22 #include "llvm/IR/DerivedTypes.h"
23 #include "llvm/IR/Instruction.h"
24 #include "llvm/IR/Instructions.h"
25 #include "llvm/IR/Type.h"
27 #include "llvm/Support/Casting.h"
30 #include <algorithm>
31 #include <cassert>
32 #include <cstdint>
33 #include <utility>
34 
35 using namespace llvm;
36 
37 #define DEBUG_TYPE "armtti"
38 
40  const Function *Callee) const {
41  const TargetMachine &TM = getTLI()->getTargetMachine();
42  const FeatureBitset &CallerBits =
43  TM.getSubtargetImpl(*Caller)->getFeatureBits();
44  const FeatureBitset &CalleeBits =
45  TM.getSubtargetImpl(*Callee)->getFeatureBits();
46 
47  // To inline a callee, all features not in the whitelist must match exactly.
48  bool MatchExact = (CallerBits & ~InlineFeatureWhitelist) ==
49  (CalleeBits & ~InlineFeatureWhitelist);
50  // For features in the whitelist, the callee's features must be a subset of
51  // the callers'.
52  bool MatchSubset = ((CallerBits & CalleeBits) & InlineFeatureWhitelist) ==
53  (CalleeBits & InlineFeatureWhitelist);
54  return MatchExact && MatchSubset;
55 }
56 
57 int ARMTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {
58  assert(Ty->isIntegerTy());
59 
60  unsigned Bits = Ty->getPrimitiveSizeInBits();
61  if (Bits == 0 || Imm.getActiveBits() >= 64)
62  return 4;
63 
64  int64_t SImmVal = Imm.getSExtValue();
65  uint64_t ZImmVal = Imm.getZExtValue();
66  if (!ST->isThumb()) {
67  if ((SImmVal >= 0 && SImmVal < 65536) ||
68  (ARM_AM::getSOImmVal(ZImmVal) != -1) ||
69  (ARM_AM::getSOImmVal(~ZImmVal) != -1))
70  return 1;
71  return ST->hasV6T2Ops() ? 2 : 3;
72  }
73  if (ST->isThumb2()) {
74  if ((SImmVal >= 0 && SImmVal < 65536) ||
75  (ARM_AM::getT2SOImmVal(ZImmVal) != -1) ||
76  (ARM_AM::getT2SOImmVal(~ZImmVal) != -1))
77  return 1;
78  return ST->hasV6T2Ops() ? 2 : 3;
79  }
80  // Thumb1.
81  if (SImmVal >= 0 && SImmVal < 256)
82  return 1;
83  if ((~SImmVal < 256) || ARM_AM::isThumbImmShiftedVal(ZImmVal))
84  return 2;
85  // Load from constantpool.
86  return 3;
87 }
88 
89 // Constants smaller than 256 fit in the immediate field of
90 // Thumb1 instructions so we return a zero cost and 1 otherwise.
91 int ARMTTIImpl::getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx,
92  const APInt &Imm, Type *Ty) {
93  if (Imm.isNonNegative() && Imm.getLimitedValue() < 256)
94  return 0;
95 
96  return 1;
97 }
98 
99 int ARMTTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
100  Type *Ty) {
101  // Division by a constant can be turned into multiplication, but only if we
102  // know it's constant. So it's not so much that the immediate is cheap (it's
103  // not), but that the alternative is worse.
104  // FIXME: this is probably unneeded with GlobalISel.
105  if ((Opcode == Instruction::SDiv || Opcode == Instruction::UDiv ||
106  Opcode == Instruction::SRem || Opcode == Instruction::URem) &&
107  Idx == 1)
108  return 0;
109 
110  if (Opcode == Instruction::And)
111  // Conversion to BIC is free, and means we can use ~Imm instead.
112  return std::min(getIntImmCost(Imm, Ty), getIntImmCost(~Imm, Ty));
113 
114  if (Opcode == Instruction::Add)
115  // Conversion to SUB is free, and means we can use -Imm instead.
116  return std::min(getIntImmCost(Imm, Ty), getIntImmCost(-Imm, Ty));
117 
118  if (Opcode == Instruction::ICmp && Imm.isNegative() &&
119  Ty->getIntegerBitWidth() == 32) {
120  int64_t NegImm = -Imm.getSExtValue();
121  if (ST->isThumb2() && NegImm < 1<<12)
122  // icmp X, #-C -> cmn X, #C
123  return 0;
124  if (ST->isThumb() && NegImm < 1<<8)
125  // icmp X, #-C -> adds X, #C
126  return 0;
127  }
128 
129  // xor a, -1 can always be folded to MVN
130  if (Opcode == Instruction::Xor && Imm.isAllOnesValue())
131  return 0;
132 
133  return getIntImmCost(Imm, Ty);
134 }
135 
136 int ARMTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
137  const Instruction *I) {
138  int ISD = TLI->InstructionOpcodeToISD(Opcode);
139  assert(ISD && "Invalid opcode");
140 
141  // Single to/from double precision conversions.
142  static const CostTblEntry NEONFltDblTbl[] = {
143  // Vector fptrunc/fpext conversions.
144  { ISD::FP_ROUND, MVT::v2f64, 2 },
145  { ISD::FP_EXTEND, MVT::v2f32, 2 },
146  { ISD::FP_EXTEND, MVT::v4f32, 4 }
147  };
148 
149  if (Src->isVectorTy() && ST->hasNEON() && (ISD == ISD::FP_ROUND ||
150  ISD == ISD::FP_EXTEND)) {
151  std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Src);
152  if (const auto *Entry = CostTableLookup(NEONFltDblTbl, ISD, LT.second))
153  return LT.first * Entry->Cost;
154  }
155 
156  EVT SrcTy = TLI->getValueType(DL, Src);
157  EVT DstTy = TLI->getValueType(DL, Dst);
158 
159  if (!SrcTy.isSimple() || !DstTy.isSimple())
160  return BaseT::getCastInstrCost(Opcode, Dst, Src);
161 
162  // Some arithmetic, load and store operations have specific instructions
163  // to cast up/down their types automatically at no extra cost.
164  // TODO: Get these tables to know at least what the related operations are.
165  static const TypeConversionCostTblEntry NEONVectorConversionTbl[] = {
172 
173  // The number of vmovl instructions for the extension.
184 
185  // Operations that we legalize using splitting.
188 
189  // Vector float <-> i32 conversions.
192 
213 
220 
221  // Vector double <-> i32 conversions.
224 
231 
238  };
239 
240  if (SrcTy.isVector() && ST->hasNEON()) {
241  if (const auto *Entry = ConvertCostTableLookup(NEONVectorConversionTbl, ISD,
242  DstTy.getSimpleVT(),
243  SrcTy.getSimpleVT()))
244  return Entry->Cost;
245  }
246 
247  // Scalar float to integer conversions.
248  static const TypeConversionCostTblEntry NEONFloatConversionTbl[] = {
269  };
270  if (SrcTy.isFloatingPoint() && ST->hasNEON()) {
271  if (const auto *Entry = ConvertCostTableLookup(NEONFloatConversionTbl, ISD,
272  DstTy.getSimpleVT(),
273  SrcTy.getSimpleVT()))
274  return Entry->Cost;
275  }
276 
277  // Scalar integer to float conversions.
278  static const TypeConversionCostTblEntry NEONIntegerConversionTbl[] = {
299  };
300 
301  if (SrcTy.isInteger() && ST->hasNEON()) {
302  if (const auto *Entry = ConvertCostTableLookup(NEONIntegerConversionTbl,
303  ISD, DstTy.getSimpleVT(),
304  SrcTy.getSimpleVT()))
305  return Entry->Cost;
306  }
307 
308  // Scalar integer conversion costs.
309  static const TypeConversionCostTblEntry ARMIntegerConversionTbl[] = {
310  // i16 -> i64 requires two dependent operations.
312 
313  // Truncates on i64 are assumed to be free.
316  { ISD::TRUNCATE, MVT::i8, MVT::i64, 0 },
318  };
319 
320  if (SrcTy.isInteger()) {
321  if (const auto *Entry = ConvertCostTableLookup(ARMIntegerConversionTbl, ISD,
322  DstTy.getSimpleVT(),
323  SrcTy.getSimpleVT()))
324  return Entry->Cost;
325  }
326 
327  return BaseT::getCastInstrCost(Opcode, Dst, Src);
328 }
329 
330 int ARMTTIImpl::getVectorInstrCost(unsigned Opcode, Type *ValTy,
331  unsigned Index) {
332  // Penalize inserting into an D-subregister. We end up with a three times
333  // lower estimated throughput on swift.
334  if (ST->hasSlowLoadDSubregister() && Opcode == Instruction::InsertElement &&
335  ValTy->isVectorTy() && ValTy->getScalarSizeInBits() <= 32)
336  return 3;
337 
338  if ((Opcode == Instruction::InsertElement ||
339  Opcode == Instruction::ExtractElement)) {
340  // Cross-class copies are expensive on many microarchitectures,
341  // so assume they are expensive by default.
342  if (ValTy->getVectorElementType()->isIntegerTy())
343  return 3;
344 
345  // Even if it's not a cross class copy, this likely leads to mixing
346  // of NEON and VFP code and should be therefore penalized.
347  if (ValTy->isVectorTy() &&
348  ValTy->getScalarSizeInBits() <= 32)
349  return std::max(BaseT::getVectorInstrCost(Opcode, ValTy, Index), 2U);
350  }
351 
352  return BaseT::getVectorInstrCost(Opcode, ValTy, Index);
353 }
354 
355 int ARMTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
356  const Instruction *I) {
357  int ISD = TLI->InstructionOpcodeToISD(Opcode);
358  // On NEON a vector select gets lowered to vbsl.
359  if (ST->hasNEON() && ValTy->isVectorTy() && ISD == ISD::SELECT) {
360  // Lowering of some vector selects is currently far from perfect.
361  static const TypeConversionCostTblEntry NEONVectorSelectTbl[] = {
362  { ISD::SELECT, MVT::v4i1, MVT::v4i64, 4*4 + 1*2 + 1 },
363  { ISD::SELECT, MVT::v8i1, MVT::v8i64, 50 },
365  };
366 
367  EVT SelCondTy = TLI->getValueType(DL, CondTy);
368  EVT SelValTy = TLI->getValueType(DL, ValTy);
369  if (SelCondTy.isSimple() && SelValTy.isSimple()) {
370  if (const auto *Entry = ConvertCostTableLookup(NEONVectorSelectTbl, ISD,
371  SelCondTy.getSimpleVT(),
372  SelValTy.getSimpleVT()))
373  return Entry->Cost;
374  }
375 
376  std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
377  return LT.first;
378  }
379 
380  return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, I);
381 }
382 
384  const SCEV *Ptr) {
385  // Address computations in vectorized code with non-consecutive addresses will
386  // likely result in more instructions compared to scalar code where the
387  // computation can more often be merged into the index mode. The resulting
388  // extra micro-ops can significantly decrease throughput.
389  unsigned NumVectorInstToHideOverhead = 10;
390  int MaxMergeDistance = 64;
391 
392  if (Ty->isVectorTy() && SE &&
393  !BaseT::isConstantStridedAccessLessThan(SE, Ptr, MaxMergeDistance + 1))
394  return NumVectorInstToHideOverhead;
395 
396  // In many cases the address computation is not merged into the instruction
397  // addressing mode.
398  return 1;
399 }
400 
402  Type *SubTp) {
403  // We only handle costs of reverse and select shuffles for now.
404  if (Kind != TTI::SK_Reverse && Kind != TTI::SK_Select)
405  return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
406 
407  if (Kind == TTI::SK_Reverse) {
408  static const CostTblEntry NEONShuffleTbl[] = {
409  // Reverse shuffle cost one instruction if we are shuffling within a
410  // double word (vrev) or two if we shuffle a quad word (vrev, vext).
415 
420 
421  std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
422 
423  if (const auto *Entry = CostTableLookup(NEONShuffleTbl, ISD::VECTOR_SHUFFLE,
424  LT.second))
425  return LT.first * Entry->Cost;
426 
427  return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
428  }
429  if (Kind == TTI::SK_Select) {
430  static const CostTblEntry NEONSelShuffleTbl[] = {
431  // Select shuffle cost table for ARM. Cost is the number of instructions
432  // required to create the shuffled vector.
433 
438 
442 
444 
446 
447  std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
448  if (const auto *Entry = CostTableLookup(NEONSelShuffleTbl,
449  ISD::VECTOR_SHUFFLE, LT.second))
450  return LT.first * Entry->Cost;
451  return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
452  }
453  return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
454 }
455 
457  unsigned Opcode, Type *Ty, TTI::OperandValueKind Op1Info,
459  TTI::OperandValueProperties Opd2PropInfo,
461  int ISDOpcode = TLI->InstructionOpcodeToISD(Opcode);
462  std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
463 
464  const unsigned FunctionCallDivCost = 20;
465  const unsigned ReciprocalDivCost = 10;
466  static const CostTblEntry CostTbl[] = {
467  // Division.
468  // These costs are somewhat random. Choose a cost of 20 to indicate that
469  // vectorizing devision (added function call) is going to be very expensive.
470  // Double registers types.
471  { ISD::SDIV, MVT::v1i64, 1 * FunctionCallDivCost},
472  { ISD::UDIV, MVT::v1i64, 1 * FunctionCallDivCost},
473  { ISD::SREM, MVT::v1i64, 1 * FunctionCallDivCost},
474  { ISD::UREM, MVT::v1i64, 1 * FunctionCallDivCost},
475  { ISD::SDIV, MVT::v2i32, 2 * FunctionCallDivCost},
476  { ISD::UDIV, MVT::v2i32, 2 * FunctionCallDivCost},
477  { ISD::SREM, MVT::v2i32, 2 * FunctionCallDivCost},
478  { ISD::UREM, MVT::v2i32, 2 * FunctionCallDivCost},
479  { ISD::SDIV, MVT::v4i16, ReciprocalDivCost},
480  { ISD::UDIV, MVT::v4i16, ReciprocalDivCost},
481  { ISD::SREM, MVT::v4i16, 4 * FunctionCallDivCost},
482  { ISD::UREM, MVT::v4i16, 4 * FunctionCallDivCost},
483  { ISD::SDIV, MVT::v8i8, ReciprocalDivCost},
484  { ISD::UDIV, MVT::v8i8, ReciprocalDivCost},
485  { ISD::SREM, MVT::v8i8, 8 * FunctionCallDivCost},
486  { ISD::UREM, MVT::v8i8, 8 * FunctionCallDivCost},
487  // Quad register types.
488  { ISD::SDIV, MVT::v2i64, 2 * FunctionCallDivCost},
489  { ISD::UDIV, MVT::v2i64, 2 * FunctionCallDivCost},
490  { ISD::SREM, MVT::v2i64, 2 * FunctionCallDivCost},
491  { ISD::UREM, MVT::v2i64, 2 * FunctionCallDivCost},
492  { ISD::SDIV, MVT::v4i32, 4 * FunctionCallDivCost},
493  { ISD::UDIV, MVT::v4i32, 4 * FunctionCallDivCost},
494  { ISD::SREM, MVT::v4i32, 4 * FunctionCallDivCost},
495  { ISD::UREM, MVT::v4i32, 4 * FunctionCallDivCost},
496  { ISD::SDIV, MVT::v8i16, 8 * FunctionCallDivCost},
497  { ISD::UDIV, MVT::v8i16, 8 * FunctionCallDivCost},
498  { ISD::SREM, MVT::v8i16, 8 * FunctionCallDivCost},
499  { ISD::UREM, MVT::v8i16, 8 * FunctionCallDivCost},
500  { ISD::SDIV, MVT::v16i8, 16 * FunctionCallDivCost},
501  { ISD::UDIV, MVT::v16i8, 16 * FunctionCallDivCost},
502  { ISD::SREM, MVT::v16i8, 16 * FunctionCallDivCost},
503  { ISD::UREM, MVT::v16i8, 16 * FunctionCallDivCost},
504  // Multiplication.
505  };
506 
507  if (ST->hasNEON())
508  if (const auto *Entry = CostTableLookup(CostTbl, ISDOpcode, LT.second))
509  return LT.first * Entry->Cost;
510 
511  int Cost = BaseT::getArithmeticInstrCost(Opcode, Ty, Op1Info, Op2Info,
512  Opd1PropInfo, Opd2PropInfo);
513 
514  // This is somewhat of a hack. The problem that we are facing is that SROA
515  // creates a sequence of shift, and, or instructions to construct values.
516  // These sequences are recognized by the ISel and have zero-cost. Not so for
517  // the vectorized code. Because we have support for v2i64 but not i64 those
518  // sequences look particularly beneficial to vectorize.
519  // To work around this we increase the cost of v2i64 operations to make them
520  // seem less beneficial.
521  if (LT.second == MVT::v2i64 &&
523  Cost += 4;
524 
525  return Cost;
526 }
527 
528 int ARMTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
529  unsigned AddressSpace, const Instruction *I) {
530  std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Src);
531 
532  if (Src->isVectorTy() && Alignment != 16 &&
533  Src->getVectorElementType()->isDoubleTy()) {
534  // Unaligned loads/stores are extremely inefficient.
535  // We need 4 uops for vst.1/vld.1 vs 1uop for vldr/vstr.
536  return LT.first * 4;
537  }
538  return LT.first;
539 }
540 
541 int ARMTTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
542  unsigned Factor,
543  ArrayRef<unsigned> Indices,
544  unsigned Alignment,
545  unsigned AddressSpace) {
546  assert(Factor >= 2 && "Invalid interleave factor");
547  assert(isa<VectorType>(VecTy) && "Expect a vector type");
548 
549  // vldN/vstN doesn't support vector types of i64/f64 element.
550  bool EltIs64Bits = DL.getTypeSizeInBits(VecTy->getScalarType()) == 64;
551 
552  if (Factor <= TLI->getMaxSupportedInterleaveFactor() && !EltIs64Bits) {
553  unsigned NumElts = VecTy->getVectorNumElements();
554  auto *SubVecTy = VectorType::get(VecTy->getScalarType(), NumElts / Factor);
555 
556  // vldN/vstN only support legal vector types of size 64 or 128 in bits.
557  // Accesses having vector types that are a multiple of 128 bits can be
558  // matched to more than one vldN/vstN instruction.
559  if (NumElts % Factor == 0 &&
560  TLI->isLegalInterleavedAccessType(SubVecTy, DL))
561  return Factor * TLI->getNumInterleavedAccesses(SubVecTy, DL);
562  }
563 
564  return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
565  Alignment, AddressSpace);
566 }
567 
570  // Only currently enable these preferences for M-Class cores.
571  if (!ST->isMClass())
573 
574  // Disable loop unrolling for Oz and Os.
575  UP.OptSizeThreshold = 0;
577  if (L->getHeader()->getParent()->optForSize())
578  return;
579 
580  // Only enable on Thumb-2 targets.
581  if (!ST->isThumb2())
582  return;
583 
584  SmallVector<BasicBlock*, 4> ExitingBlocks;
585  L->getExitingBlocks(ExitingBlocks);
586  LLVM_DEBUG(dbgs() << "Loop has:\n"
587  << "Blocks: " << L->getNumBlocks() << "\n"
588  << "Exit blocks: " << ExitingBlocks.size() << "\n");
589 
590  // Only allow another exit other than the latch. This acts as an early exit
591  // as it mirrors the profitability calculation of the runtime unroller.
592  if (ExitingBlocks.size() > 2)
593  return;
594 
595  // Limit the CFG of the loop body for targets with a branch predictor.
596  // Allowing 4 blocks permits if-then-else diamonds in the body.
597  if (ST->hasBranchPredictor() && L->getNumBlocks() > 4)
598  return;
599 
600  // Scan the loop: don't unroll loops with calls as this could prevent
601  // inlining.
602  unsigned Cost = 0;
603  for (auto *BB : L->getBlocks()) {
604  for (auto &I : *BB) {
605  if (isa<CallInst>(I) || isa<InvokeInst>(I)) {
606  ImmutableCallSite CS(&I);
607  if (const Function *F = CS.getCalledFunction()) {
608  if (!isLoweredToCall(F))
609  continue;
610  }
611  return;
612  }
613  SmallVector<const Value*, 4> Operands(I.value_op_begin(),
614  I.value_op_end());
615  Cost += getUserCost(&I, Operands);
616  }
617  }
618 
619  LLVM_DEBUG(dbgs() << "Cost of loop: " << Cost << "\n");
620 
621  UP.Partial = true;
622  UP.Runtime = true;
623  UP.UnrollRemainder = true;
625  UP.UnrollAndJam = true;
627 
628  // Force unrolling small loops can be very useful because of the branch
629  // taken cost of the backedge.
630  if (Cost < 12)
631  UP.Force = true;
632 }
Type * getVectorElementType() const
Definition: Type.h:371
X = FP_ROUND(Y, TRUNC) - Rounding &#39;Y&#39; from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:506
bool Partial
Allow partial unrolling (unrolling of loops to expand the size of the loop body, not only to eliminat...
int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, const Instruction *I=nullptr)
unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info=TTI::OK_AnyValue, TTI::OperandValueKind Opd2Info=TTI::OK_AnyValue, TTI::OperandValueProperties Opd1PropInfo=TTI::OP_None, TTI::OperandValueProperties Opd2PropInfo=TTI::OP_None, ArrayRef< const Value * > Args=ArrayRef< const Value * >())
Definition: BasicTTIImpl.h:507
bool isConstantStridedAccessLessThan(ScalarEvolution *SE, const SCEV *Ptr, int64_t MergeDistance)
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1556
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
bool isThumb() const
Definition: ARMSubtarget.h:689
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
Cost tables and simple lookup functions.
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition: ISDOpcodes.h:343
unsigned getNumInterleavedAccesses(VectorType *VecTy, const DataLayout &DL) const
Returns the number of interleaved accesses that will be generated when lowering accesses of the given...
int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, const Instruction *I=nullptr)
bool hasBranchPredictor() const
Definition: ARMSubtarget.h:606
int getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::OperandValueKind Op1Info=TTI::OK_AnyValue, TTI::OperandValueKind Op2Info=TTI::OK_AnyValue, TTI::OperandValueProperties Opd1PropInfo=TTI::OP_None, TTI::OperandValueProperties Opd2PropInfo=TTI::OP_None, ArrayRef< const Value *> Args=ArrayRef< const Value *>())
The main scalar evolution driver.
unsigned PartialOptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size, like OptSizeThreshold, but used for partial/runtime unrolling (set to UINT_MAX to disable).
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:253
int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, unsigned Alignment, unsigned AddressSpace)
bool Force
Apply loop unroll on any kind of loop (mainly to loops that fail runtime unrolling).
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition: ValueTypes.h:141
F(f)
Type Conversion Cost Table.
Definition: CostTable.h:45
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:230
bool UnrollAndJam
Allow unroll and jam. Used to enable unroll and jam for the target.
Cost Table Entry.
Definition: CostTable.h:25
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition: ValueTypes.h:136
bool isLegalInterleavedAccessType(VectorType *VecTy, const DataLayout &DL) const
Returns true if VecTy is a legal interleaved access type.
const FeatureBitset & getFeatureBits() const
bool isNonNegative() const
Determine if this APInt Value is non-negative (>= 0)
Definition: APInt.h:368
int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, unsigned AddressSpace, const Instruction *I=nullptr)
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:197
const TypeConversionCostTblEntry * ConvertCostTableLookup(ArrayRef< TypeConversionCostTblEntry > Tbl, int ISD, MVT Dst, MVT Src)
Find in type conversion cost table, TypeTy must be comparable to CompareTy by ==. ...
Definition: CostTable.h:55
This file implements a class to represent arbitrary precision integral constant values and operations...
BlockT * getHeader() const
Definition: LoopInfo.h:100
unsigned getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, unsigned Alignment, unsigned AddressSpace)
Definition: BasicTTIImpl.h:783
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition: APInt.h:1526
unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, const Instruction *I)
Definition: BasicTTIImpl.h:705
This file a TargetTransformInfo::Concept conforming object specific to the ARM target machine...
int getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp)
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1568
unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, const Instruction *I=nullptr)
Definition: BasicTTIImpl.h:567
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition: ISDOpcodes.h:448
bool hasV6T2Ops() const
Definition: ARMSubtarget.h:521
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:33
Selects elements from the corresponding lane of either source operand.
Reverse the order of the vector.
amdgpu Simplify well known AMD library false Value * Callee
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:492
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return &#39;this&#39;.
Definition: Type.h:304
bool isNegative() const
Determine sign of this APInt.
Definition: APInt.h:363
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP)
Definition: BasicTTIImpl.h:363
bool isAllOnesValue() const
Determine if all bits are set.
Definition: APInt.h:395
Container class for subtarget features.
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:46
bool isMClass() const
Definition: ARMSubtarget.h:693
bool isThumbImmShiftedVal(unsigned V)
isThumbImmShiftedVal - Return true if the specified value can be obtained by left shifting a 8-bit im...
unsigned UnrollAndJamInnerLoopThreshold
Threshold for unroll and jam, for inner loop size.
unsigned getUserCost(const User *U, ArrayRef< const Value * > Operands)
bool optForSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition: Function.h:584
bool hasSlowLoadDSubregister() const
Definition: ARMSubtarget.h:594
int getT2SOImmVal(unsigned Arg)
getT2SOImmVal - Given a 32-bit immediate, if it is something that can fit into a Thumb-2 shifter_oper...
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:524
void getExitingBlocks(SmallVectorImpl< BlockT *> &ExitingBlocks) const
Return all blocks inside the loop that have successors outside of the loop.
Definition: LoopInfoImpl.h:35
Extended Value Type.
Definition: ValueTypes.h:34
size_t size() const
Definition: SmallVector.h:53
const TargetMachine & getTargetMachine() const
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
OperandValueProperties
Additional properties of an operand&#39;s values.
unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp)
Definition: BasicTTIImpl.h:554
int getSOImmVal(unsigned Arg)
getSOImmVal - Given a 32-bit immediate, if it is something that can fit into an shifter_operand immed...
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type...
Definition: Type.cpp:130
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:841
AddressSpace
Definition: NVPTXBaseInfo.h:22
int getAddressComputationCost(Type *Val, ScalarEvolution *SE, const SCEV *Ptr)
bool hasNEON() const
Definition: ARMSubtarget.h:552
unsigned DefaultUnrollRuntimeCount
Default unroll count for loops with run-time trip count.
bool Runtime
Allow runtime unrolling (unrolling of loops to expand the size of the loop body even when the number ...
virtual const TargetSubtargetInfo * getSubtargetImpl(const Function &) const
Virtual method implemented by subclasses that returns a reference to that target&#39;s TargetSubtargetInf...
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:133
unsigned getVectorNumElements() const
Definition: DerivedTypes.h:462
Class for arbitrary precision integers.
Definition: APInt.h:69
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:390
bool UnrollRemainder
Allow unrolling of all the iterations of the runtime loop remainder.
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:438
int InstructionOpcodeToISD(unsigned Opcode) const
Get the ISD node that corresponds to the Instruction class opcode.
uint64_t getTypeSizeInBits(Type *Ty) const
Size examples:
Definition: DataLayout.h:560
const CostTblEntry * CostTableLookup(ArrayRef< CostTblEntry > Tbl, int ISD, MVT Ty)
Find in cost table, TypeTy must be comparable to CompareTy by ==.
Definition: CostTable.h:32
bool isThumb2() const
Definition: ARMSubtarget.h:691
unsigned getNumBlocks() const
Get the number of blocks in this loop in constant time.
Definition: LoopInfo.h:163
bool areInlineCompatible(const Function *Caller, const Function *Callee) const
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:151
This class represents an analyzed expression in the program.
unsigned getIntegerBitWidth() const
Definition: DerivedTypes.h:97
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:459
uint64_t getLimitedValue(uint64_t Limit=UINT64_MAX) const
If this value is smaller than the specified limit, return it, otherwise return the limit value...
Definition: APInt.h:481
ArrayRef< BlockT * > getBlocks() const
Get a list of the basic blocks which make up this loop.
Definition: LoopInfo.h:149
Parameters that control the generic loop unrolling transformation.
unsigned OptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size (set to UINT_MAX to disable)...
Establish a view to a call site for examination.
Definition: CallSite.h:714
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:108
int getIntImmCost(const APInt &Imm, Type *Ty)
#define I(x, y, z)
Definition: MD5.cpp:58
unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index)
Definition: BasicTTIImpl.h:745
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP)
FunTy * getCalledFunction() const
Return the function being called if this is a direct call, otherwise return null (if it&#39;s an indirect...
Definition: CallSite.h:107
const unsigned Kind
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
unsigned getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition: Type.cpp:115
static VectorType * get(Type *ElementType, unsigned NumElements)
This static method is the primary way to construct an VectorType.
Definition: Type.cpp:593
int getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty)
int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index)
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:59
OperandValueKind
Additional information about an operand&#39;s possible values.
Conversion operators.
Definition: ISDOpcodes.h:435
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:444
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:126
#define LLVM_DEBUG(X)
Definition: Debug.h:119
bool isDoubleTy() const
Return true if this is &#39;double&#39;, a 64-bit IEEE fp type.
Definition: Type.h:150
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
std::pair< int, MVT > getTypeLegalizationCost(const DataLayout &DL, Type *Ty) const
Estimate the cost of type-legalization and the legalized type.
ShuffleKind
The various kinds of shuffle patterns for vector queries.