LLVM  9.0.0svn
ARMTargetTransformInfo.cpp
Go to the documentation of this file.
1 //===- ARMTargetTransformInfo.cpp - ARM specific TTI ----------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
10 #include "ARMSubtarget.h"
12 #include "llvm/ADT/APInt.h"
13 #include "llvm/ADT/SmallVector.h"
14 #include "llvm/Analysis/LoopInfo.h"
15 #include "llvm/CodeGen/CostTable.h"
18 #include "llvm/IR/BasicBlock.h"
19 #include "llvm/IR/CallSite.h"
20 #include "llvm/IR/DataLayout.h"
21 #include "llvm/IR/DerivedTypes.h"
22 #include "llvm/IR/Instruction.h"
23 #include "llvm/IR/Instructions.h"
24 #include "llvm/IR/Type.h"
26 #include "llvm/Support/Casting.h"
29 #include <algorithm>
30 #include <cassert>
31 #include <cstdint>
32 #include <utility>
33 
34 using namespace llvm;
35 
36 #define DEBUG_TYPE "armtti"
37 
39  const Function *Callee) const {
40  const TargetMachine &TM = getTLI()->getTargetMachine();
41  const FeatureBitset &CallerBits =
42  TM.getSubtargetImpl(*Caller)->getFeatureBits();
43  const FeatureBitset &CalleeBits =
44  TM.getSubtargetImpl(*Callee)->getFeatureBits();
45 
46  // To inline a callee, all features not in the whitelist must match exactly.
47  bool MatchExact = (CallerBits & ~InlineFeatureWhitelist) ==
48  (CalleeBits & ~InlineFeatureWhitelist);
49  // For features in the whitelist, the callee's features must be a subset of
50  // the callers'.
51  bool MatchSubset = ((CallerBits & CalleeBits) & InlineFeatureWhitelist) ==
52  (CalleeBits & InlineFeatureWhitelist);
53  return MatchExact && MatchSubset;
54 }
55 
56 int ARMTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {
57  assert(Ty->isIntegerTy());
58 
59  unsigned Bits = Ty->getPrimitiveSizeInBits();
60  if (Bits == 0 || Imm.getActiveBits() >= 64)
61  return 4;
62 
63  int64_t SImmVal = Imm.getSExtValue();
64  uint64_t ZImmVal = Imm.getZExtValue();
65  if (!ST->isThumb()) {
66  if ((SImmVal >= 0 && SImmVal < 65536) ||
67  (ARM_AM::getSOImmVal(ZImmVal) != -1) ||
68  (ARM_AM::getSOImmVal(~ZImmVal) != -1))
69  return 1;
70  return ST->hasV6T2Ops() ? 2 : 3;
71  }
72  if (ST->isThumb2()) {
73  if ((SImmVal >= 0 && SImmVal < 65536) ||
74  (ARM_AM::getT2SOImmVal(ZImmVal) != -1) ||
75  (ARM_AM::getT2SOImmVal(~ZImmVal) != -1))
76  return 1;
77  return ST->hasV6T2Ops() ? 2 : 3;
78  }
79  // Thumb1, any i8 imm cost 1.
80  if (Bits == 8 || (SImmVal >= 0 && SImmVal < 256))
81  return 1;
82  if ((~SImmVal < 256) || ARM_AM::isThumbImmShiftedVal(ZImmVal))
83  return 2;
84  // Load from constantpool.
85  return 3;
86 }
87 
88 // Constants smaller than 256 fit in the immediate field of
89 // Thumb1 instructions so we return a zero cost and 1 otherwise.
90 int ARMTTIImpl::getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx,
91  const APInt &Imm, Type *Ty) {
92  if (Imm.isNonNegative() && Imm.getLimitedValue() < 256)
93  return 0;
94 
95  return 1;
96 }
97 
98 int ARMTTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
99  Type *Ty) {
100  // Division by a constant can be turned into multiplication, but only if we
101  // know it's constant. So it's not so much that the immediate is cheap (it's
102  // not), but that the alternative is worse.
103  // FIXME: this is probably unneeded with GlobalISel.
104  if ((Opcode == Instruction::SDiv || Opcode == Instruction::UDiv ||
105  Opcode == Instruction::SRem || Opcode == Instruction::URem) &&
106  Idx == 1)
107  return 0;
108 
109  if (Opcode == Instruction::And) {
110  // UXTB/UXTH
111  if (Imm == 255 || Imm == 65535)
112  return 0;
113  // Conversion to BIC is free, and means we can use ~Imm instead.
114  return std::min(getIntImmCost(Imm, Ty), getIntImmCost(~Imm, Ty));
115  }
116 
117  if (Opcode == Instruction::Add)
118  // Conversion to SUB is free, and means we can use -Imm instead.
119  return std::min(getIntImmCost(Imm, Ty), getIntImmCost(-Imm, Ty));
120 
121  if (Opcode == Instruction::ICmp && Imm.isNegative() &&
122  Ty->getIntegerBitWidth() == 32) {
123  int64_t NegImm = -Imm.getSExtValue();
124  if (ST->isThumb2() && NegImm < 1<<12)
125  // icmp X, #-C -> cmn X, #C
126  return 0;
127  if (ST->isThumb() && NegImm < 1<<8)
128  // icmp X, #-C -> adds X, #C
129  return 0;
130  }
131 
132  // xor a, -1 can always be folded to MVN
133  if (Opcode == Instruction::Xor && Imm.isAllOnesValue())
134  return 0;
135 
136  return getIntImmCost(Imm, Ty);
137 }
138 
139 int ARMTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
140  const Instruction *I) {
141  int ISD = TLI->InstructionOpcodeToISD(Opcode);
142  assert(ISD && "Invalid opcode");
143 
144  // Single to/from double precision conversions.
145  static const CostTblEntry NEONFltDblTbl[] = {
146  // Vector fptrunc/fpext conversions.
147  { ISD::FP_ROUND, MVT::v2f64, 2 },
148  { ISD::FP_EXTEND, MVT::v2f32, 2 },
149  { ISD::FP_EXTEND, MVT::v4f32, 4 }
150  };
151 
152  if (Src->isVectorTy() && ST->hasNEON() && (ISD == ISD::FP_ROUND ||
153  ISD == ISD::FP_EXTEND)) {
154  std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Src);
155  if (const auto *Entry = CostTableLookup(NEONFltDblTbl, ISD, LT.second))
156  return LT.first * Entry->Cost;
157  }
158 
159  EVT SrcTy = TLI->getValueType(DL, Src);
160  EVT DstTy = TLI->getValueType(DL, Dst);
161 
162  if (!SrcTy.isSimple() || !DstTy.isSimple())
163  return BaseT::getCastInstrCost(Opcode, Dst, Src);
164 
165  // Some arithmetic, load and store operations have specific instructions
166  // to cast up/down their types automatically at no extra cost.
167  // TODO: Get these tables to know at least what the related operations are.
168  static const TypeConversionCostTblEntry NEONVectorConversionTbl[] = {
175 
176  // The number of vmovl instructions for the extension.
187 
188  // Operations that we legalize using splitting.
191 
192  // Vector float <-> i32 conversions.
195 
216 
223 
224  // Vector double <-> i32 conversions.
227 
234 
241  };
242 
243  if (SrcTy.isVector() && ST->hasNEON()) {
244  if (const auto *Entry = ConvertCostTableLookup(NEONVectorConversionTbl, ISD,
245  DstTy.getSimpleVT(),
246  SrcTy.getSimpleVT()))
247  return Entry->Cost;
248  }
249 
250  // Scalar float to integer conversions.
251  static const TypeConversionCostTblEntry NEONFloatConversionTbl[] = {
272  };
273  if (SrcTy.isFloatingPoint() && ST->hasNEON()) {
274  if (const auto *Entry = ConvertCostTableLookup(NEONFloatConversionTbl, ISD,
275  DstTy.getSimpleVT(),
276  SrcTy.getSimpleVT()))
277  return Entry->Cost;
278  }
279 
280  // Scalar integer to float conversions.
281  static const TypeConversionCostTblEntry NEONIntegerConversionTbl[] = {
302  };
303 
304  if (SrcTy.isInteger() && ST->hasNEON()) {
305  if (const auto *Entry = ConvertCostTableLookup(NEONIntegerConversionTbl,
306  ISD, DstTy.getSimpleVT(),
307  SrcTy.getSimpleVT()))
308  return Entry->Cost;
309  }
310 
311  // Scalar integer conversion costs.
312  static const TypeConversionCostTblEntry ARMIntegerConversionTbl[] = {
313  // i16 -> i64 requires two dependent operations.
315 
316  // Truncates on i64 are assumed to be free.
319  { ISD::TRUNCATE, MVT::i8, MVT::i64, 0 },
321  };
322 
323  if (SrcTy.isInteger()) {
324  if (const auto *Entry = ConvertCostTableLookup(ARMIntegerConversionTbl, ISD,
325  DstTy.getSimpleVT(),
326  SrcTy.getSimpleVT()))
327  return Entry->Cost;
328  }
329 
330  return BaseT::getCastInstrCost(Opcode, Dst, Src);
331 }
332 
333 int ARMTTIImpl::getVectorInstrCost(unsigned Opcode, Type *ValTy,
334  unsigned Index) {
335  // Penalize inserting into an D-subregister. We end up with a three times
336  // lower estimated throughput on swift.
337  if (ST->hasSlowLoadDSubregister() && Opcode == Instruction::InsertElement &&
338  ValTy->isVectorTy() && ValTy->getScalarSizeInBits() <= 32)
339  return 3;
340 
341  if ((Opcode == Instruction::InsertElement ||
342  Opcode == Instruction::ExtractElement)) {
343  // Cross-class copies are expensive on many microarchitectures,
344  // so assume they are expensive by default.
345  if (ValTy->getVectorElementType()->isIntegerTy())
346  return 3;
347 
348  // Even if it's not a cross class copy, this likely leads to mixing
349  // of NEON and VFP code and should be therefore penalized.
350  if (ValTy->isVectorTy() &&
351  ValTy->getScalarSizeInBits() <= 32)
352  return std::max(BaseT::getVectorInstrCost(Opcode, ValTy, Index), 2U);
353  }
354 
355  return BaseT::getVectorInstrCost(Opcode, ValTy, Index);
356 }
357 
358 int ARMTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
359  const Instruction *I) {
360  int ISD = TLI->InstructionOpcodeToISD(Opcode);
361  // On NEON a vector select gets lowered to vbsl.
362  if (ST->hasNEON() && ValTy->isVectorTy() && ISD == ISD::SELECT) {
363  // Lowering of some vector selects is currently far from perfect.
364  static const TypeConversionCostTblEntry NEONVectorSelectTbl[] = {
365  { ISD::SELECT, MVT::v4i1, MVT::v4i64, 4*4 + 1*2 + 1 },
366  { ISD::SELECT, MVT::v8i1, MVT::v8i64, 50 },
368  };
369 
370  EVT SelCondTy = TLI->getValueType(DL, CondTy);
371  EVT SelValTy = TLI->getValueType(DL, ValTy);
372  if (SelCondTy.isSimple() && SelValTy.isSimple()) {
373  if (const auto *Entry = ConvertCostTableLookup(NEONVectorSelectTbl, ISD,
374  SelCondTy.getSimpleVT(),
375  SelValTy.getSimpleVT()))
376  return Entry->Cost;
377  }
378 
379  std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
380  return LT.first;
381  }
382 
383  return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, I);
384 }
385 
387  const SCEV *Ptr) {
388  // Address computations in vectorized code with non-consecutive addresses will
389  // likely result in more instructions compared to scalar code where the
390  // computation can more often be merged into the index mode. The resulting
391  // extra micro-ops can significantly decrease throughput.
392  unsigned NumVectorInstToHideOverhead = 10;
393  int MaxMergeDistance = 64;
394 
395  if (Ty->isVectorTy() && SE &&
396  !BaseT::isConstantStridedAccessLessThan(SE, Ptr, MaxMergeDistance + 1))
397  return NumVectorInstToHideOverhead;
398 
399  // In many cases the address computation is not merged into the instruction
400  // addressing mode.
401  return 1;
402 }
403 
405  Type *SubTp) {
406  if (Kind == TTI::SK_Broadcast) {
407  static const CostTblEntry NEONDupTbl[] = {
408  // VDUP handles these cases.
415 
420 
421  std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
422 
423  if (const auto *Entry = CostTableLookup(NEONDupTbl, ISD::VECTOR_SHUFFLE,
424  LT.second))
425  return LT.first * Entry->Cost;
426 
427  return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
428  }
429  if (Kind == TTI::SK_Reverse) {
430  static const CostTblEntry NEONShuffleTbl[] = {
431  // Reverse shuffle cost one instruction if we are shuffling within a
432  // double word (vrev) or two if we shuffle a quad word (vrev, vext).
439 
444 
445  std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
446 
447  if (const auto *Entry = CostTableLookup(NEONShuffleTbl, ISD::VECTOR_SHUFFLE,
448  LT.second))
449  return LT.first * Entry->Cost;
450 
451  return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
452  }
453  if (Kind == TTI::SK_Select) {
454  static const CostTblEntry NEONSelShuffleTbl[] = {
455  // Select shuffle cost table for ARM. Cost is the number of instructions
456  // required to create the shuffled vector.
457 
462 
466 
468 
470 
471  std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
472  if (const auto *Entry = CostTableLookup(NEONSelShuffleTbl,
473  ISD::VECTOR_SHUFFLE, LT.second))
474  return LT.first * Entry->Cost;
475  return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
476  }
477  return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
478 }
479 
481  unsigned Opcode, Type *Ty, TTI::OperandValueKind Op1Info,
483  TTI::OperandValueProperties Opd2PropInfo,
485  int ISDOpcode = TLI->InstructionOpcodeToISD(Opcode);
486  std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
487 
488  const unsigned FunctionCallDivCost = 20;
489  const unsigned ReciprocalDivCost = 10;
490  static const CostTblEntry CostTbl[] = {
491  // Division.
492  // These costs are somewhat random. Choose a cost of 20 to indicate that
493  // vectorizing devision (added function call) is going to be very expensive.
494  // Double registers types.
495  { ISD::SDIV, MVT::v1i64, 1 * FunctionCallDivCost},
496  { ISD::UDIV, MVT::v1i64, 1 * FunctionCallDivCost},
497  { ISD::SREM, MVT::v1i64, 1 * FunctionCallDivCost},
498  { ISD::UREM, MVT::v1i64, 1 * FunctionCallDivCost},
499  { ISD::SDIV, MVT::v2i32, 2 * FunctionCallDivCost},
500  { ISD::UDIV, MVT::v2i32, 2 * FunctionCallDivCost},
501  { ISD::SREM, MVT::v2i32, 2 * FunctionCallDivCost},
502  { ISD::UREM, MVT::v2i32, 2 * FunctionCallDivCost},
503  { ISD::SDIV, MVT::v4i16, ReciprocalDivCost},
504  { ISD::UDIV, MVT::v4i16, ReciprocalDivCost},
505  { ISD::SREM, MVT::v4i16, 4 * FunctionCallDivCost},
506  { ISD::UREM, MVT::v4i16, 4 * FunctionCallDivCost},
507  { ISD::SDIV, MVT::v8i8, ReciprocalDivCost},
508  { ISD::UDIV, MVT::v8i8, ReciprocalDivCost},
509  { ISD::SREM, MVT::v8i8, 8 * FunctionCallDivCost},
510  { ISD::UREM, MVT::v8i8, 8 * FunctionCallDivCost},
511  // Quad register types.
512  { ISD::SDIV, MVT::v2i64, 2 * FunctionCallDivCost},
513  { ISD::UDIV, MVT::v2i64, 2 * FunctionCallDivCost},
514  { ISD::SREM, MVT::v2i64, 2 * FunctionCallDivCost},
515  { ISD::UREM, MVT::v2i64, 2 * FunctionCallDivCost},
516  { ISD::SDIV, MVT::v4i32, 4 * FunctionCallDivCost},
517  { ISD::UDIV, MVT::v4i32, 4 * FunctionCallDivCost},
518  { ISD::SREM, MVT::v4i32, 4 * FunctionCallDivCost},
519  { ISD::UREM, MVT::v4i32, 4 * FunctionCallDivCost},
520  { ISD::SDIV, MVT::v8i16, 8 * FunctionCallDivCost},
521  { ISD::UDIV, MVT::v8i16, 8 * FunctionCallDivCost},
522  { ISD::SREM, MVT::v8i16, 8 * FunctionCallDivCost},
523  { ISD::UREM, MVT::v8i16, 8 * FunctionCallDivCost},
524  { ISD::SDIV, MVT::v16i8, 16 * FunctionCallDivCost},
525  { ISD::UDIV, MVT::v16i8, 16 * FunctionCallDivCost},
526  { ISD::SREM, MVT::v16i8, 16 * FunctionCallDivCost},
527  { ISD::UREM, MVT::v16i8, 16 * FunctionCallDivCost},
528  // Multiplication.
529  };
530 
531  if (ST->hasNEON())
532  if (const auto *Entry = CostTableLookup(CostTbl, ISDOpcode, LT.second))
533  return LT.first * Entry->Cost;
534 
535  int Cost = BaseT::getArithmeticInstrCost(Opcode, Ty, Op1Info, Op2Info,
536  Opd1PropInfo, Opd2PropInfo);
537 
538  // This is somewhat of a hack. The problem that we are facing is that SROA
539  // creates a sequence of shift, and, or instructions to construct values.
540  // These sequences are recognized by the ISel and have zero-cost. Not so for
541  // the vectorized code. Because we have support for v2i64 but not i64 those
542  // sequences look particularly beneficial to vectorize.
543  // To work around this we increase the cost of v2i64 operations to make them
544  // seem less beneficial.
545  if (LT.second == MVT::v2i64 &&
547  Cost += 4;
548 
549  return Cost;
550 }
551 
552 int ARMTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
553  unsigned AddressSpace, const Instruction *I) {
554  std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Src);
555 
556  if (Src->isVectorTy() && Alignment != 16 &&
557  Src->getVectorElementType()->isDoubleTy()) {
558  // Unaligned loads/stores are extremely inefficient.
559  // We need 4 uops for vst.1/vld.1 vs 1uop for vldr/vstr.
560  return LT.first * 4;
561  }
562  return LT.first;
563 }
564 
565 int ARMTTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
566  unsigned Factor,
567  ArrayRef<unsigned> Indices,
568  unsigned Alignment,
569  unsigned AddressSpace,
570  bool UseMaskForCond,
571  bool UseMaskForGaps) {
572  assert(Factor >= 2 && "Invalid interleave factor");
573  assert(isa<VectorType>(VecTy) && "Expect a vector type");
574 
575  // vldN/vstN doesn't support vector types of i64/f64 element.
576  bool EltIs64Bits = DL.getTypeSizeInBits(VecTy->getScalarType()) == 64;
577 
578  if (Factor <= TLI->getMaxSupportedInterleaveFactor() && !EltIs64Bits &&
579  !UseMaskForCond && !UseMaskForGaps) {
580  unsigned NumElts = VecTy->getVectorNumElements();
581  auto *SubVecTy = VectorType::get(VecTy->getScalarType(), NumElts / Factor);
582 
583  // vldN/vstN only support legal vector types of size 64 or 128 in bits.
584  // Accesses having vector types that are a multiple of 128 bits can be
585  // matched to more than one vldN/vstN instruction.
586  if (NumElts % Factor == 0 &&
587  TLI->isLegalInterleavedAccessType(SubVecTy, DL))
588  return Factor * TLI->getNumInterleavedAccesses(SubVecTy, DL);
589  }
590 
591  return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
592  Alignment, AddressSpace,
593  UseMaskForCond, UseMaskForGaps);
594 }
595 
598  // Only currently enable these preferences for M-Class cores.
599  if (!ST->isMClass())
601 
602  // Disable loop unrolling for Oz and Os.
603  UP.OptSizeThreshold = 0;
605  if (L->getHeader()->getParent()->hasOptSize())
606  return;
607 
608  // Only enable on Thumb-2 targets.
609  if (!ST->isThumb2())
610  return;
611 
612  SmallVector<BasicBlock*, 4> ExitingBlocks;
613  L->getExitingBlocks(ExitingBlocks);
614  LLVM_DEBUG(dbgs() << "Loop has:\n"
615  << "Blocks: " << L->getNumBlocks() << "\n"
616  << "Exit blocks: " << ExitingBlocks.size() << "\n");
617 
618  // Only allow another exit other than the latch. This acts as an early exit
619  // as it mirrors the profitability calculation of the runtime unroller.
620  if (ExitingBlocks.size() > 2)
621  return;
622 
623  // Limit the CFG of the loop body for targets with a branch predictor.
624  // Allowing 4 blocks permits if-then-else diamonds in the body.
625  if (ST->hasBranchPredictor() && L->getNumBlocks() > 4)
626  return;
627 
628  // Scan the loop: don't unroll loops with calls as this could prevent
629  // inlining.
630  unsigned Cost = 0;
631  for (auto *BB : L->getBlocks()) {
632  for (auto &I : *BB) {
633  if (isa<CallInst>(I) || isa<InvokeInst>(I)) {
634  ImmutableCallSite CS(&I);
635  if (const Function *F = CS.getCalledFunction()) {
636  if (!isLoweredToCall(F))
637  continue;
638  }
639  return;
640  }
641  SmallVector<const Value*, 4> Operands(I.value_op_begin(),
642  I.value_op_end());
643  Cost += getUserCost(&I, Operands);
644  }
645  }
646 
647  LLVM_DEBUG(dbgs() << "Cost of loop: " << Cost << "\n");
648 
649  UP.Partial = true;
650  UP.Runtime = true;
651  UP.UnrollRemainder = true;
653  UP.UnrollAndJam = true;
655 
656  // Force unrolling small loops can be very useful because of the branch
657  // taken cost of the backedge.
658  if (Cost < 12)
659  UP.Force = true;
660 }
Type * getVectorElementType() const
Definition: Type.h:370
X = FP_ROUND(Y, TRUNC) - Rounding &#39;Y&#39; from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:537
bool Partial
Allow partial unrolling (unrolling of loops to expand the size of the loop body, not only to eliminat...
int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, const Instruction *I=nullptr)
unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::OperandValueKind Opd1Info=TTI::OK_AnyValue, TTI::OperandValueKind Opd2Info=TTI::OK_AnyValue, TTI::OperandValueProperties Opd1PropInfo=TTI::OP_None, TTI::OperandValueProperties Opd2PropInfo=TTI::OP_None, ArrayRef< const Value * > Args=ArrayRef< const Value * >())
Definition: BasicTTIImpl.h:567
bool isConstantStridedAccessLessThan(ScalarEvolution *SE, const SCEV *Ptr, int64_t MergeDistance)
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1562
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
bool isThumb() const
Definition: ARMSubtarget.h:717
This class represents lattice values for constants.
Definition: AllocatorList.h:23
Cost tables and simple lookup functions.
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition: ISDOpcodes.h:366
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition: Function.h:600
unsigned getNumInterleavedAccesses(VectorType *VecTy, const DataLayout &DL) const
Returns the number of interleaved accesses that will be generated when lowering accesses of the given...
int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, const Instruction *I=nullptr)
bool hasBranchPredictor() const
Definition: ARMSubtarget.h:631
int getArithmeticInstrCost(unsigned Opcode, Type *Ty, TTI::OperandValueKind Op1Info=TTI::OK_AnyValue, TTI::OperandValueKind Op2Info=TTI::OK_AnyValue, TTI::OperandValueProperties Opd1PropInfo=TTI::OP_None, TTI::OperandValueProperties Opd2PropInfo=TTI::OP_None, ArrayRef< const Value *> Args=ArrayRef< const Value *>())
The main scalar evolution driver.
unsigned PartialOptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size, like OptSizeThreshold, but used for partial/runtime unrolling (set to UINT_MAX to disable).
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:252
bool Force
Apply loop unroll on any kind of loop (mainly to loops that fail runtime unrolling).
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition: ValueTypes.h:140
F(f)
Type Conversion Cost Table.
Definition: CostTable.h:44
FunTy * getCalledFunction() const
Return the function being called if this is a direct call, otherwise return null (if it&#39;s an indirect...
Definition: CallSite.h:111
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:229
bool UnrollAndJam
Allow unroll and jam. Used to enable unroll and jam for the target.
int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, unsigned Alignment, unsigned AddressSpace, bool UseMaskForCond=false, bool UseMaskForGaps=false)
Cost Table Entry.
Definition: CostTable.h:24
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition: ValueTypes.h:135
bool isLegalInterleavedAccessType(VectorType *VecTy, const DataLayout &DL) const
Returns true if VecTy is a legal interleaved access type.
const FeatureBitset & getFeatureBits() const
bool isNonNegative() const
Determine if this APInt Value is non-negative (>= 0)
Definition: APInt.h:368
int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, unsigned AddressSpace, const Instruction *I=nullptr)
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:196
const TypeConversionCostTblEntry * ConvertCostTableLookup(ArrayRef< TypeConversionCostTblEntry > Tbl, int ISD, MVT Dst, MVT Src)
Find in type conversion cost table, TypeTy must be comparable to CompareTy by ==. ...
Definition: CostTable.h:54
This file implements a class to represent arbitrary precision integral constant values and operations...
BlockT * getHeader() const
Definition: LoopInfo.h:99
unsigned getActiveBits() const
Compute the number of active bits in the value.
Definition: APInt.h:1532
unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, const Instruction *I)
Definition: BasicTTIImpl.h:771
This file a TargetTransformInfo::Concept conforming object specific to the ARM target machine...
int getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp)
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1574
unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, const Instruction *I=nullptr)
Definition: BasicTTIImpl.h:633
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition: ISDOpcodes.h:477
bool hasV6T2Ops() const
Definition: ARMSubtarget.h:544
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:32
Selects elements from the corresponding lane of either source operand.
Reverse the order of the vector.
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:523
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return &#39;this&#39;.
Definition: Type.h:303
bool isNegative() const
Determine sign of this APInt.
Definition: APInt.h:363
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP)
Definition: BasicTTIImpl.h:423
bool isAllOnesValue() const
Determine if all bits are set.
Definition: APInt.h:395
Container class for subtarget features.
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:45
unsigned getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef< unsigned > Indices, unsigned Alignment, unsigned AddressSpace, bool UseMaskForCond=false, bool UseMaskForGaps=false)
Definition: BasicTTIImpl.h:849
bool isMClass() const
Definition: ARMSubtarget.h:722
bool isThumbImmShiftedVal(unsigned V)
isThumbImmShiftedVal - Return true if the specified value can be obtained by left shifting a 8-bit im...
unsigned UnrollAndJamInnerLoopThreshold
Threshold for unroll and jam, for inner loop size.
unsigned getUserCost(const User *U, ArrayRef< const Value * > Operands)
bool hasSlowLoadDSubregister() const
Definition: ARMSubtarget.h:618
int getT2SOImmVal(unsigned Arg)
getT2SOImmVal - Given a 32-bit immediate, if it is something that can fit into a Thumb-2 shifter_oper...
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:555
void getExitingBlocks(SmallVectorImpl< BlockT *> &ExitingBlocks) const
Return all blocks inside the loop that have successors outside of the loop.
Definition: LoopInfoImpl.h:34
Extended Value Type.
Definition: ValueTypes.h:33
size_t size() const
Definition: SmallVector.h:52
const TargetMachine & getTargetMachine() const
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
OperandValueProperties
Additional properties of an operand&#39;s values.
unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index, Type *SubTp)
Definition: BasicTTIImpl.h:614
int getSOImmVal(unsigned Arg)
getSOImmVal - Given a 32-bit immediate, if it is something that can fit into an shifter_operand immed...
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type...
Definition: Type.cpp:129
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:841
AddressSpace
Definition: NVPTXBaseInfo.h:21
int getAddressComputationCost(Type *Val, ScalarEvolution *SE, const SCEV *Ptr)
bool hasNEON() const
Definition: ARMSubtarget.h:576
unsigned DefaultUnrollRuntimeCount
Default unroll count for loops with run-time trip count.
bool Runtime
Allow runtime unrolling (unrolling of loops to expand the size of the loop body even when the number ...
virtual const TargetSubtargetInfo * getSubtargetImpl(const Function &) const
Virtual method implemented by subclasses that returns a reference to that target&#39;s TargetSubtargetInf...
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
unsigned getVectorNumElements() const
Definition: DerivedTypes.h:493
Class for arbitrary precision integers.
Definition: APInt.h:69
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:419
bool UnrollRemainder
Allow unrolling of all the iterations of the runtime loop remainder.
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:467
int InstructionOpcodeToISD(unsigned Opcode) const
Get the ISD node that corresponds to the Instruction class opcode.
uint64_t getTypeSizeInBits(Type *Ty) const
Size examples:
Definition: DataLayout.h:593
const CostTblEntry * CostTableLookup(ArrayRef< CostTblEntry > Tbl, int ISD, MVT Ty)
Find in cost table, TypeTy must be comparable to CompareTy by ==.
Definition: CostTable.h:31
bool isThumb2() const
Definition: ARMSubtarget.h:720
unsigned getNumBlocks() const
Get the number of blocks in this loop in constant time.
Definition: LoopInfo.h:162
bool areInlineCompatible(const Function *Caller, const Function *Callee) const
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:150
This class represents an analyzed expression in the program.
unsigned getIntegerBitWidth() const
Definition: DerivedTypes.h:96
Represents a single loop in the control flow graph.
Definition: LoopInfo.h:464
uint64_t getLimitedValue(uint64_t Limit=UINT64_MAX) const
If this value is smaller than the specified limit, return it, otherwise return the limit value...
Definition: APInt.h:481
ArrayRef< BlockT * > getBlocks() const
Get a list of the basic blocks which make up this loop.
Definition: LoopInfo.h:148
Parameters that control the generic loop unrolling transformation.
unsigned OptSizeThreshold
The cost threshold for the unrolled loop when optimizing for size (set to UINT_MAX to disable)...
Establish a view to a call site for examination.
Definition: CallSite.h:892
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:106
int getIntImmCost(const APInt &Imm, Type *Ty)
#define I(x, y, z)
Definition: MD5.cpp:58
unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index)
Definition: BasicTTIImpl.h:811
void getUnrollingPreferences(Loop *L, ScalarEvolution &SE, TTI::UnrollingPreferences &UP)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
unsigned getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition: Type.cpp:114
static VectorType * get(Type *ElementType, unsigned NumElements)
This static method is the primary way to construct an VectorType.
Definition: Type.cpp:605
int getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx, const APInt &Imm, Type *Ty)
Broadcast element 0 to all other elements.
int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index)
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:65
OperandValueKind
Additional information about an operand&#39;s possible values.
Conversion operators.
Definition: ISDOpcodes.h:464
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:473
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:125
#define LLVM_DEBUG(X)
Definition: Debug.h:122
bool isDoubleTy() const
Return true if this is &#39;double&#39;, a 64-bit IEEE fp type.
Definition: Type.h:149
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
std::pair< int, MVT > getTypeLegalizationCost(const DataLayout &DL, Type *Ty) const
Estimate the cost of type-legalization and the legalized type.
ShuffleKind
The various kinds of shuffle patterns for vector queries.