LLVM  6.0.0svn
ARMTargetTransformInfo.cpp
Go to the documentation of this file.
1 //===- ARMTargetTransformInfo.cpp - ARM specific TTI ----------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
10 #include "ARMTargetTransformInfo.h"
11 #include "ARMSubtarget.h"
13 #include "llvm/ADT/APInt.h"
14 #include "llvm/ADT/SmallVector.h"
15 #include "llvm/Analysis/LoopInfo.h"
16 #include "llvm/CodeGen/CostTable.h"
20 #include "llvm/IR/BasicBlock.h"
21 #include "llvm/IR/CallSite.h"
22 #include "llvm/IR/DataLayout.h"
23 #include "llvm/IR/DerivedTypes.h"
24 #include "llvm/IR/Instruction.h"
25 #include "llvm/IR/Instructions.h"
26 #include "llvm/IR/Type.h"
28 #include "llvm/Support/Casting.h"
30 #include <algorithm>
31 #include <cassert>
32 #include <cstdint>
33 #include <utility>
34 
35 using namespace llvm;
36 
37 #define DEBUG_TYPE "armtti"
38 
40  const Function *Callee) const {
41  const TargetMachine &TM = getTLI()->getTargetMachine();
42  const FeatureBitset &CallerBits =
43  TM.getSubtargetImpl(*Caller)->getFeatureBits();
44  const FeatureBitset &CalleeBits =
45  TM.getSubtargetImpl(*Callee)->getFeatureBits();
46 
47  // To inline a callee, all features not in the whitelist must match exactly.
48  bool MatchExact = (CallerBits & ~InlineFeatureWhitelist) ==
49  (CalleeBits & ~InlineFeatureWhitelist);
50  // For features in the whitelist, the callee's features must be a subset of
51  // the callers'.
52  bool MatchSubset = ((CallerBits & CalleeBits) & InlineFeatureWhitelist) ==
53  (CalleeBits & InlineFeatureWhitelist);
54  return MatchExact && MatchSubset;
55 }
56 
57 int ARMTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {
58  assert(Ty->isIntegerTy());
59 
60  unsigned Bits = Ty->getPrimitiveSizeInBits();
61  if (Bits == 0 || Imm.getActiveBits() >= 64)
62  return 4;
63 
64  int64_t SImmVal = Imm.getSExtValue();
65  uint64_t ZImmVal = Imm.getZExtValue();
66  if (!ST->isThumb()) {
67  if ((SImmVal >= 0 && SImmVal < 65536) ||
68  (ARM_AM::getSOImmVal(ZImmVal) != -1) ||
69  (ARM_AM::getSOImmVal(~ZImmVal) != -1))
70  return 1;
71  return ST->hasV6T2Ops() ? 2 : 3;
72  }
73  if (ST->isThumb2()) {
74  if ((SImmVal >= 0 && SImmVal < 65536) ||
75  (ARM_AM::getT2SOImmVal(ZImmVal) != -1) ||
76  (ARM_AM::getT2SOImmVal(~ZImmVal) != -1))
77  return 1;
78  return ST->hasV6T2Ops() ? 2 : 3;
79  }
80  // Thumb1.
81  if (SImmVal >= 0 && SImmVal < 256)
82  return 1;
83  if ((~SImmVal < 256) || ARM_AM::isThumbImmShiftedVal(ZImmVal))
84  return 2;
85  // Load from constantpool.
86  return 3;
87 }
88 
89 // Constants smaller than 256 fit in the immediate field of
90 // Thumb1 instructions so we return a zero cost and 1 otherwise.
91 int ARMTTIImpl::getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx,
92  const APInt &Imm, Type *Ty) {
93  if (Imm.isNonNegative() && Imm.getLimitedValue() < 256)
94  return 0;
95 
96  return 1;
97 }
98 
99 int ARMTTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
100  Type *Ty) {
101  // Division by a constant can be turned into multiplication, but only if we
102  // know it's constant. So it's not so much that the immediate is cheap (it's
103  // not), but that the alternative is worse.
104  // FIXME: this is probably unneeded with GlobalISel.
105  if ((Opcode == Instruction::SDiv || Opcode == Instruction::UDiv ||
106  Opcode == Instruction::SRem || Opcode == Instruction::URem) &&
107  Idx == 1)
108  return 0;
109 
110  if (Opcode == Instruction::And)
111  // Conversion to BIC is free, and means we can use ~Imm instead.
112  return std::min(getIntImmCost(Imm, Ty), getIntImmCost(~Imm, Ty));
113 
114  if (Opcode == Instruction::Add)
115  // Conversion to SUB is free, and means we can use -Imm instead.
116  return std::min(getIntImmCost(Imm, Ty), getIntImmCost(-Imm, Ty));
117 
118  if (Opcode == Instruction::ICmp && Imm.isNegative() &&
119  Ty->getIntegerBitWidth() == 32) {
120  int64_t NegImm = -Imm.getSExtValue();
121  if (ST->isThumb2() && NegImm < 1<<12)
122  // icmp X, #-C -> cmn X, #C
123  return 0;
124  if (ST->isThumb() && NegImm < 1<<8)
125  // icmp X, #-C -> adds X, #C
126  return 0;
127  }
128 
129  return getIntImmCost(Imm, Ty);
130 }
131 
132 int ARMTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
133  const Instruction *I) {
134  int ISD = TLI->InstructionOpcodeToISD(Opcode);
135  assert(ISD && "Invalid opcode");
136 
137  // Single to/from double precision conversions.
138  static const CostTblEntry NEONFltDblTbl[] = {
139  // Vector fptrunc/fpext conversions.
140  { ISD::FP_ROUND, MVT::v2f64, 2 },
141  { ISD::FP_EXTEND, MVT::v2f32, 2 },
142  { ISD::FP_EXTEND, MVT::v4f32, 4 }
143  };
144 
145  if (Src->isVectorTy() && ST->hasNEON() && (ISD == ISD::FP_ROUND ||
146  ISD == ISD::FP_EXTEND)) {
147  std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Src);
148  if (const auto *Entry = CostTableLookup(NEONFltDblTbl, ISD, LT.second))
149  return LT.first * Entry->Cost;
150  }
151 
152  EVT SrcTy = TLI->getValueType(DL, Src);
153  EVT DstTy = TLI->getValueType(DL, Dst);
154 
155  if (!SrcTy.isSimple() || !DstTy.isSimple())
156  return BaseT::getCastInstrCost(Opcode, Dst, Src);
157 
158  // Some arithmetic, load and store operations have specific instructions
159  // to cast up/down their types automatically at no extra cost.
160  // TODO: Get these tables to know at least what the related operations are.
161  static const TypeConversionCostTblEntry NEONVectorConversionTbl[] = {
168 
169  // The number of vmovl instructions for the extension.
180 
181  // Operations that we legalize using splitting.
184 
185  // Vector float <-> i32 conversions.
188 
209 
216 
217  // Vector double <-> i32 conversions.
220 
227 
234  };
235 
236  if (SrcTy.isVector() && ST->hasNEON()) {
237  if (const auto *Entry = ConvertCostTableLookup(NEONVectorConversionTbl, ISD,
238  DstTy.getSimpleVT(),
239  SrcTy.getSimpleVT()))
240  return Entry->Cost;
241  }
242 
243  // Scalar float to integer conversions.
244  static const TypeConversionCostTblEntry NEONFloatConversionTbl[] = {
265  };
266  if (SrcTy.isFloatingPoint() && ST->hasNEON()) {
267  if (const auto *Entry = ConvertCostTableLookup(NEONFloatConversionTbl, ISD,
268  DstTy.getSimpleVT(),
269  SrcTy.getSimpleVT()))
270  return Entry->Cost;
271  }
272 
273  // Scalar integer to float conversions.
274  static const TypeConversionCostTblEntry NEONIntegerConversionTbl[] = {
295  };
296 
297  if (SrcTy.isInteger() && ST->hasNEON()) {
298  if (const auto *Entry = ConvertCostTableLookup(NEONIntegerConversionTbl,
299  ISD, DstTy.getSimpleVT(),
300  SrcTy.getSimpleVT()))
301  return Entry->Cost;
302  }
303 
304  // Scalar integer conversion costs.
305  static const TypeConversionCostTblEntry ARMIntegerConversionTbl[] = {
306  // i16 -> i64 requires two dependent operations.
308 
309  // Truncates on i64 are assumed to be free.
312  { ISD::TRUNCATE, MVT::i8, MVT::i64, 0 },
314  };
315 
316  if (SrcTy.isInteger()) {
317  if (const auto *Entry = ConvertCostTableLookup(ARMIntegerConversionTbl, ISD,
318  DstTy.getSimpleVT(),
319  SrcTy.getSimpleVT()))
320  return Entry->Cost;
321  }
322 
323  return BaseT::getCastInstrCost(Opcode, Dst, Src);
324 }
325 
326 int ARMTTIImpl::getVectorInstrCost(unsigned Opcode, Type *ValTy,
327  unsigned Index) {
328  // Penalize inserting into an D-subregister. We end up with a three times
329  // lower estimated throughput on swift.
330  if (ST->hasSlowLoadDSubregister() && Opcode == Instruction::InsertElement &&
331  ValTy->isVectorTy() && ValTy->getScalarSizeInBits() <= 32)
332  return 3;
333 
334  if ((Opcode == Instruction::InsertElement ||
335  Opcode == Instruction::ExtractElement)) {
336  // Cross-class copies are expensive on many microarchitectures,
337  // so assume they are expensive by default.
338  if (ValTy->getVectorElementType()->isIntegerTy())
339  return 3;
340 
341  // Even if it's not a cross class copy, this likely leads to mixing
342  // of NEON and VFP code and should be therefore penalized.
343  if (ValTy->isVectorTy() &&
344  ValTy->getScalarSizeInBits() <= 32)
345  return std::max(BaseT::getVectorInstrCost(Opcode, ValTy, Index), 2U);
346  }
347 
348  return BaseT::getVectorInstrCost(Opcode, ValTy, Index);
349 }
350 
351 int ARMTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
352  const Instruction *I) {
353  int ISD = TLI->InstructionOpcodeToISD(Opcode);
354  // On NEON a a vector select gets lowered to vbsl.
355  if (ST->hasNEON() && ValTy->isVectorTy() && ISD == ISD::SELECT) {
356  // Lowering of some vector selects is currently far from perfect.
357  static const TypeConversionCostTblEntry NEONVectorSelectTbl[] = {
358  { ISD::SELECT, MVT::v4i1, MVT::v4i64, 4*4 + 1*2 + 1 },
359  { ISD::SELECT, MVT::v8i1, MVT::v8i64, 50 },
361  };
362 
363  EVT SelCondTy = TLI->getValueType(DL, CondTy);
364  EVT SelValTy = TLI->getValueType(DL, ValTy);
365  if (SelCondTy.isSimple() && SelValTy.isSimple()) {
366  if (const auto *Entry = ConvertCostTableLookup(NEONVectorSelectTbl, ISD,
367  SelCondTy.getSimpleVT(),
368  SelValTy.getSimpleVT()))
369  return Entry->Cost;
370  }
371 
372  std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
373  return LT.first;
374  }
375 
376  return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, I);
377 }
378 
380  const SCEV *Ptr) {
381  // Address computations in vectorized code with non-consecutive addresses will
382  // likely result in more instructions compared to scalar code where the
383  // computation can more often be merged into the index mode. The resulting
384  // extra micro-ops can significantly decrease throughput.
385  unsigned NumVectorInstToHideOverhead = 10;
386  int MaxMergeDistance = 64;
387 
388  if (Ty->isVectorTy() && SE &&
389  !BaseT::isConstantStridedAccessLessThan(SE, Ptr, MaxMergeDistance + 1))
390  return NumVectorInstToHideOverhead;
391 
392  // In many cases the address computation is not merged into the instruction
393  // addressing mode.
394  return 1;
395 }
396 
398  // Use similar logic that's in ARMISelLowering:
399  // Any ARM CPU with VFP2 has floating point, but Thumb1 didn't have access
400  // to VFP.
401 
402  if (ST->hasVFP2() && !ST->isThumb1Only()) {
403  if (Ty->isFloatTy()) {
405  }
406 
407  if (Ty->isDoubleTy()) {
410  }
411  }
412 
414 }
415 
417  Type *SubTp) {
418  // We only handle costs of reverse and alternate shuffles for now.
419  if (Kind != TTI::SK_Reverse && Kind != TTI::SK_Alternate)
420  return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
421 
422  if (Kind == TTI::SK_Reverse) {
423  static const CostTblEntry NEONShuffleTbl[] = {
424  // Reverse shuffle cost one instruction if we are shuffling within a
425  // double word (vrev) or two if we shuffle a quad word (vrev, vext).
430 
435 
436  std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
437 
438  if (const auto *Entry = CostTableLookup(NEONShuffleTbl, ISD::VECTOR_SHUFFLE,
439  LT.second))
440  return LT.first * Entry->Cost;
441 
442  return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
443  }
444  if (Kind == TTI::SK_Alternate) {
445  static const CostTblEntry NEONAltShuffleTbl[] = {
446  // Alt shuffle cost table for ARM. Cost is the number of instructions
447  // required to create the shuffled vector.
448 
453 
457 
459 
461 
462  std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
463  if (const auto *Entry = CostTableLookup(NEONAltShuffleTbl,
464  ISD::VECTOR_SHUFFLE, LT.second))
465  return LT.first * Entry->Cost;
466  return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
467  }
468  return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
469 }
470 
472  unsigned Opcode, Type *Ty, TTI::OperandValueKind Op1Info,
474  TTI::OperandValueProperties Opd2PropInfo,
476  int ISDOpcode = TLI->InstructionOpcodeToISD(Opcode);
477  std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
478 
479  const unsigned FunctionCallDivCost = 20;
480  const unsigned ReciprocalDivCost = 10;
481  static const CostTblEntry CostTbl[] = {
482  // Division.
483  // These costs are somewhat random. Choose a cost of 20 to indicate that
484  // vectorizing devision (added function call) is going to be very expensive.
485  // Double registers types.
486  { ISD::SDIV, MVT::v1i64, 1 * FunctionCallDivCost},
487  { ISD::UDIV, MVT::v1i64, 1 * FunctionCallDivCost},
488  { ISD::SREM, MVT::v1i64, 1 * FunctionCallDivCost},
489  { ISD::UREM, MVT::v1i64, 1 * FunctionCallDivCost},
490  { ISD::SDIV, MVT::v2i32, 2 * FunctionCallDivCost},
491  { ISD::UDIV, MVT::v2i32, 2 * FunctionCallDivCost},
492  { ISD::SREM, MVT::v2i32, 2 * FunctionCallDivCost},
493  { ISD::UREM, MVT::v2i32, 2 * FunctionCallDivCost},
494  { ISD::SDIV, MVT::v4i16, ReciprocalDivCost},
495  { ISD::UDIV, MVT::v4i16, ReciprocalDivCost},
496  { ISD::SREM, MVT::v4i16, 4 * FunctionCallDivCost},
497  { ISD::UREM, MVT::v4i16, 4 * FunctionCallDivCost},
498  { ISD::SDIV, MVT::v8i8, ReciprocalDivCost},
499  { ISD::UDIV, MVT::v8i8, ReciprocalDivCost},
500  { ISD::SREM, MVT::v8i8, 8 * FunctionCallDivCost},
501  { ISD::UREM, MVT::v8i8, 8 * FunctionCallDivCost},
502  // Quad register types.
503  { ISD::SDIV, MVT::v2i64, 2 * FunctionCallDivCost},
504  { ISD::UDIV, MVT::v2i64, 2 * FunctionCallDivCost},
505  { ISD::SREM, MVT::v2i64, 2 * FunctionCallDivCost},
506  { ISD::UREM, MVT::v2i64, 2 * FunctionCallDivCost},
507  { ISD::SDIV, MVT::v4i32, 4 * FunctionCallDivCost},
508  { ISD::UDIV, MVT::v4i32, 4 * FunctionCallDivCost},
509  { ISD::SREM, MVT::v4i32, 4 * FunctionCallDivCost},
510  { ISD::UREM, MVT::v4i32, 4 * FunctionCallDivCost},
511  { ISD::SDIV, MVT::v8i16, 8 * FunctionCallDivCost},
512  { ISD::UDIV, MVT::v8i16, 8 * FunctionCallDivCost},
513  { ISD::SREM, MVT::v8i16, 8 * FunctionCallDivCost},
514  { ISD::UREM, MVT::v8i16, 8 * FunctionCallDivCost},
515  { ISD::SDIV, MVT::v16i8, 16 * FunctionCallDivCost},
516  { ISD::UDIV, MVT::v16i8, 16 * FunctionCallDivCost},
517  { ISD::SREM, MVT::v16i8, 16 * FunctionCallDivCost},
518  { ISD::UREM, MVT::v16i8, 16 * FunctionCallDivCost},
519  // Multiplication.
520  };
521 
522  if (ST->hasNEON())
523  if (const auto *Entry = CostTableLookup(CostTbl, ISDOpcode, LT.second))
524  return LT.first * Entry->Cost;
525 
526  int Cost = BaseT::getArithmeticInstrCost(Opcode, Ty, Op1Info, Op2Info,
527  Opd1PropInfo, Opd2PropInfo);
528 
529  // This is somewhat of a hack. The problem that we are facing is that SROA
530  // creates a sequence of shift, and, or instructions to construct values.
531  // These sequences are recognized by the ISel and have zero-cost. Not so for
532  // the vectorized code. Because we have support for v2i64 but not i64 those
533  // sequences look particularly beneficial to vectorize.
534  // To work around this we increase the cost of v2i64 operations to make them
535  // seem less beneficial.
536  if (LT.second == MVT::v2i64 &&
538  Cost += 4;
539 
540  return Cost;
541 }
542 
543 int ARMTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
544  unsigned AddressSpace, const Instruction *I) {
545  std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Src);
546 
547  if (Src->isVectorTy() && Alignment != 16 &&
548  Src->getVectorElementType()->isDoubleTy()) {
549  // Unaligned loads/stores are extremely inefficient.
550  // We need 4 uops for vst.1/vld.1 vs 1uop for vldr/vstr.
551  return LT.first * 4;
552  }
553  return LT.first;
554 }
555 
556 int ARMTTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
557  unsigned Factor,
558  ArrayRef<unsigned> Indices,
559  unsigned Alignment,
560  unsigned AddressSpace) {
561  assert(Factor >= 2 && "Invalid interleave factor");
562  assert(isa<VectorType>(VecTy) && "Expect a vector type");
563 
564  // vldN/vstN doesn't support vector types of i64/f64 element.
565  bool EltIs64Bits = DL.getTypeSizeInBits(VecTy->getScalarType()) == 64;
566 
567  if (Factor <= TLI->getMaxSupportedInterleaveFactor() && !EltIs64Bits) {
568  unsigned NumElts = VecTy->getVectorNumElements();
569  auto *SubVecTy = VectorType::get(VecTy->getScalarType(), NumElts / Factor);
570 
571  // vldN/vstN only support legal vector types of size 64 or 128 in bits.
572  // Accesses having vector types that are a multiple of 128 bits can be
573  // matched to more than one vldN/vstN instruction.
574  if (NumElts % Factor == 0 &&
575  TLI->isLegalInterleavedAccessType(SubVecTy, DL))
576  return Factor * TLI->getNumInterleavedAccesses(SubVecTy, DL);
577  }
578 
579  return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
580  Alignment, AddressSpace);
581 }
582 
585  // Only currently enable these preferences for M-Class cores.
586  if (!ST->isMClass())
588 
589  // Disable loop unrolling for Oz and Os.
590  UP.OptSizeThreshold = 0;
592  if (L->getHeader()->getParent()->optForSize())
593  return;
594 
595  // Only enable on Thumb-2 targets.
596  if (!ST->isThumb2())
597  return;
598 
599  SmallVector<BasicBlock*, 4> ExitingBlocks;
600  L->getExitingBlocks(ExitingBlocks);
601  DEBUG(dbgs() << "Loop has:\n"
602  << "Blocks: " << L->getNumBlocks() << "\n"
603  << "Exit blocks: " << ExitingBlocks.size() << "\n");
604 
605  // Only allow another exit other than the latch. This acts as an early exit
606  // as it mirrors the profitability calculation of the runtime unroller.
607  if (ExitingBlocks.size() > 2)
608  return;
609 
610  // Limit the CFG of the loop body for targets with a branch predictor.
611  // Allowing 4 blocks permits if-then-else diamonds in the body.
612  if (ST->hasBranchPredictor() && L->getNumBlocks() > 4)
613  return;
614 
615  // Scan the loop: don't unroll loops with calls as this could prevent
616  // inlining.
617  unsigned Cost = 0;
618  for (auto *BB : L->getBlocks()) {
619  for (auto &I : *BB) {
620  if (isa<CallInst>(I) || isa<InvokeInst>(I)) {
621  ImmutableCallSite CS(&I);
622  if (const Function *F = CS.getCalledFunction()) {
623  if (!isLoweredToCall(F))
624  continue;
625  }
626  return;
627  }
628  SmallVector<const Value*, 4> Operands(I.value_op_begin(),
629  I.value_op_end());
630  Cost += getUserCost(&I, Operands);
631  }
632  }
633 
634  DEBUG(dbgs() << "Cost of loop: " << Cost << "\n");
635 
636  UP.Partial = true;
637  UP.Runtime = true;
638  UP.UnrollRemainder = true;
640 
641  // Force unrolling small loops can be very useful because of the branch
642  // taken cost of the backedge.
643  if (Cost < 12)
644  UP.Force = true;
645 }
Type * getVectorElementType() const
Definition: Type.h:368
X = FP_ROUND(Y, TRUNC) - Rounding &#39;Y&#39; from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:512
bool Partial
Allow partial unrolling (unrolling of loops to expand the size of the loop body, not only to eliminat...
int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, const Instruction *I=nullptr)
unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info=TTI::OK_AnyValue, TTI::OperandValueKind Opd2Info=TTI::OK_AnyValue, TTI::OperandValueProperties Opd1PropInfo=TTI::OP_None, TTI::OperandValueProperties Opd2PropInfo=TTI::OP_None, ArrayRef< const Value * > Args=ArrayRef< const Value * >())
Definition: BasicTTIImpl.h:469
bool isConstantStridedAccessLessThan(ScalarEvolution *SE, const SCEV *Ptr, int64_t MergeDistance)
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1542
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
bool isThumb() const
Definition: ARMSubtarget.h:672
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
Cost tables and simple lookup functions.
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition: ISDOpcodes.h:342
LLVM_ATTRIBUTE_ALWAYS_INLINE size_type size() const
Definition: SmallVector.h:136
unsigned getNumInterleavedAccesses(VectorType *VecTy, const DataLayout &DL) const
Returns the number of interleaved accesses that will be generated when lowering accesses of the given...
int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, const Instruction *I=nullptr)
bool hasBranchPredictor() const
Definition: ARMSubtarget.h:583
int getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::OperandValueKind Op1Info=TTI::OK_AnyValue, TTI::OperandValueKind Op2Info=TTI::OK_AnyValue, TTI::OperandValueProperties Opd1PropInfo=TTI::OP_None, TTI::OperandValueProperties Opd2PropInfo=TTI::OP_None, ArrayRef< const Value *> Args=ArrayRef< const Value *>())
The main scalar evolution driver.
unsigned PartialOptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size, like OptSizeThreshold, but used for partial/runtime unrolling (set to UINT_MAX to disable).
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:253
int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, unsigned Alignment, unsigned AddressSpace)
bool Force
Apply loop unroll on any kind of loop (mainly to loops that fail runtime unrolling).
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition: ValueTypes.h:141
F(f)
Type Conversion Cost Table.
Definition: CostTable.h:45
bool isThumb1Only() const
Definition: ARMSubtarget.h:673
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:227
Cost Table Entry.
Definition: CostTable.h:25
bool isFPOnlySP() const
Definition: ARMSubtarget.h:558
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition: ValueTypes.h:136
bool isLegalInterleavedAccessType(VectorType *VecTy, const DataLayout &DL) const
Returns true if VecTy is a legal interleaved access type.
const FeatureBitset & getFeatureBits() const
getFeatureBits - Return the feature bits.
bool isNonNegative() const
Determine if this APInt Value is non-negative (>= 0)
Definition: APInt.h:362
int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, unsigned AddressSpace, const Instruction *I=nullptr)
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:197
const TypeConversionCostTblEntry * ConvertCostTableLookup(ArrayRef< TypeConversionCostTblEntry > Tbl, int ISD, MVT Dst, MVT Src)
Find in type conversion cost table, TypeTy must be comparable to CompareTy by ==. ...
Definition: CostTable.h:55
This file implements a class to represent arbitrary precision integral constant values and operations...
BlockT * getHeader() const
Definition: LoopInfo.h:100
bool hasVFP2() const
Definition: ARMSubtarget.h:529
unsigned getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, unsigned Alignment, unsigned AddressSpace)
Definition: BasicTTIImpl.h:741
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition: APInt.h:1512
unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, const Instruction *I)
Definition: BasicTTIImpl.h:663
This file a TargetTransformInfo::Concept conforming object specific to the ARM target machine...
Choose alternate elements from vector.
int getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp)
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1554
unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, const Instruction *I=nullptr)
Definition: BasicTTIImpl.h:525
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition: ISDOpcodes.h:455
bool hasV6T2Ops() const
Definition: ARMSubtarget.h:503
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:33
Reverse the order of the vector.
amdgpu Simplify well known AMD library false Value * Callee
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:498
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return &#39;this&#39;.
Definition: Type.h:301
bool isFloatTy() const
Return true if this is &#39;float&#39;, a 32-bit IEEE fp type.
Definition: Type.h:147
bool isNegative() const
Determine sign of this APInt.
Definition: APInt.h:357
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP)
Definition: BasicTTIImpl.h:325
Container class for subtarget features.
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:46
bool isMClass() const
Definition: ARMSubtarget.h:676
bool isThumbImmShiftedVal(unsigned V)
isThumbImmShiftedVal - Return true if the specified value can be obtained by left shifting a 8-bit im...
unsigned getUserCost(const User *U, ArrayRef< const Value * > Operands)
bool optForSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition: Function.h:530
bool hasSlowLoadDSubregister() const
Definition: ARMSubtarget.h:572
int getT2SOImmVal(unsigned Arg)
getT2SOImmVal - Given a 32-bit immediate, if it is something that can fit into a Thumb-2 shifter_oper...
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:530
void getExitingBlocks(SmallVectorImpl< BlockT *> &ExitingBlocks) const
Return all blocks inside the loop that have successors outside of the loop.
Definition: LoopInfoImpl.h:35
Extended Value Type.
Definition: ValueTypes.h:34
const TargetMachine & getTargetMachine() const
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
OperandValueProperties
Additional properties of an operand&#39;s values.
unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp)
Definition: BasicTTIImpl.h:516
int getSOImmVal(unsigned Arg)
getSOImmVal - Given a 32-bit immediate, if it is something that can fit into an shifter_operand immed...
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type...
Definition: Type.cpp:130
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:864
AddressSpace
Definition: NVPTXBaseInfo.h:22
int getAddressComputationCost(Type *Val, ScalarEvolution *SE, const SCEV *Ptr)
bool hasNEON() const
Definition: ARMSubtarget.h:533
unsigned DefaultUnrollRuntimeCount
Default unroll count for loops with run-time trip count.
bool Runtime
Allow runtime unrolling (unrolling of loops to expand the size of the loop body even when the number ...
virtual const TargetSubtargetInfo * getSubtargetImpl(const Function &) const
Virtual method implemented by subclasses that returns a reference to that target&#39;s TargetSubtargetInf...
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
unsigned getVectorNumElements() const
Definition: DerivedTypes.h:462
Class for arbitrary precision integers.
Definition: APInt.h:69
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:389
bool UnrollRemainder
Allow unrolling of all the iterations of the runtime loop remainder.
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:445
int InstructionOpcodeToISD(unsigned Opcode) const
Get the ISD node that corresponds to the Instruction class opcode.
uint64_t getTypeSizeInBits(Type *Ty) const
Size examples:
Definition: DataLayout.h:530
const CostTblEntry * CostTableLookup(ArrayRef< CostTblEntry > Tbl, int ISD, MVT Ty)
Find in cost table, TypeTy must be comparable to CompareTy by ==.
Definition: CostTable.h:32
bool isThumb2() const
Definition: ARMSubtarget.h:674
unsigned getNumBlocks() const
Get the number of blocks in this loop in constant time.
Definition: LoopInfo.h:163
bool areInlineCompatible(const Function *Caller, const Function *Callee) const
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:151
This class represents an analyzed expression in the program.
unsigned getIntegerBitWidth() const
Definition: DerivedTypes.h:97
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:439
uint64_t getLimitedValue(uint64_t Limit=UINT64_MAX) const
If this value is smaller than the specified limit, return it, otherwise return the limit value...
Definition: APInt.h:475
ArrayRef< BlockT * > getBlocks() const
Get a list of the basic blocks which make up this loop.
Definition: LoopInfo.h:149
Parameters that control the generic loop unrolling transformation.
unsigned OptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size (set to UINT_MAX to disable)...
Establish a view to a call site for examination.
Definition: CallSite.h:713
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:108
int getIntImmCost(const APInt &Imm, Type *Ty)
#define I(x, y, z)
Definition: MD5.cpp:58
unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index)
Definition: BasicTTIImpl.h:703
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP)
FunTy * getCalledFunction() const
Return the function being called if this is a direct call, otherwise return null (if it&#39;s an indirect...
Definition: CallSite.h:107
const unsigned Kind
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
The cost of a typical &#39;add&#39; instruction.
unsigned getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition: Type.cpp:115
static VectorType * get(Type *ElementType, unsigned NumElements)
This static method is the primary way to construct an VectorType.
Definition: Type.cpp:593
int getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty)
#define DEBUG(X)
Definition: Debug.h:118
int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index)
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:57
OperandValueKind
Additional information about an operand&#39;s possible values.
Conversion operators.
Definition: ISDOpcodes.h:442
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:451
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:126
bool isDoubleTy() const
Return true if this is &#39;double&#39;, a 64-bit IEEE fp type.
Definition: Type.h:150
The cost of a &#39;div&#39; instruction on x86.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
std::pair< int, MVT > getTypeLegalizationCost(const DataLayout &DL, Type *Ty) const
Estimate the cost of type-legalization and the legalized type.
ShuffleKind
The various kinds of shuffle patterns for vector queries.