LCOV - code coverage report
Current view: top level - lib/Target/ARM - ARMTargetTransformInfo.cpp (source / functions) Hit Total Coverage
Test: llvm-toolchain.info Lines: 151 167 90.4 %
Date: 2017-09-14 15:23:50 Functions: 13 14 92.9 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : //===-- ARMTargetTransformInfo.cpp - ARM specific TTI ---------------------===//
       2             : //
       3             : //                     The LLVM Compiler Infrastructure
       4             : //
       5             : // This file is distributed under the University of Illinois Open Source
       6             : // License. See LICENSE.TXT for details.
       7             : //
       8             : //===----------------------------------------------------------------------===//
       9             : 
      10             : #include "ARMTargetTransformInfo.h"
      11             : #include "llvm/Support/Debug.h"
      12             : #include "llvm/Target/CostTable.h"
      13             : #include "llvm/Target/TargetLowering.h"
      14             : using namespace llvm;
      15             : 
      16             : #define DEBUG_TYPE "armtti"
      17             : 
      18          12 : bool ARMTTIImpl::areInlineCompatible(const Function *Caller,
      19             :                                      const Function *Callee) const {
      20          12 :   const TargetMachine &TM = getTLI()->getTargetMachine();
      21             :   const FeatureBitset &CallerBits =
      22          12 :       TM.getSubtargetImpl(*Caller)->getFeatureBits();
      23             :   const FeatureBitset &CalleeBits =
      24          12 :       TM.getSubtargetImpl(*Callee)->getFeatureBits();
      25             : 
      26             :   // To inline a callee, all features not in the whitelist must match exactly.
      27          36 :   bool MatchExact = (CallerBits & ~InlineFeatureWhitelist) ==
      28          60 :                     (CalleeBits & ~InlineFeatureWhitelist);
      29             :   // For features in the whitelist, the callee's features must be a subset of
      30             :   // the callers'.
      31          36 :   bool MatchSubset = ((CallerBits & CalleeBits) & InlineFeatureWhitelist) ==
      32          48 :                      (CalleeBits & InlineFeatureWhitelist);
      33          12 :   return MatchExact && MatchSubset;
      34             : }
      35             : 
      36       12511 : int ARMTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty) {
      37             :   assert(Ty->isIntegerTy());
      38             : 
      39       12511 :  unsigned Bits = Ty->getPrimitiveSizeInBits();
      40       25022 :  if (Bits == 0 || Imm.getActiveBits() >= 64)
      41             :    return 4;
      42             : 
      43       12433 :   int64_t SImmVal = Imm.getSExtValue();
      44       12433 :   uint64_t ZImmVal = Imm.getZExtValue();
      45       12433 :   if (!ST->isThumb()) {
      46        7370 :     if ((SImmVal >= 0 && SImmVal < 65536) ||
      47        7248 :         (ARM_AM::getSOImmVal(ZImmVal) != -1) ||
      48         853 :         (ARM_AM::getSOImmVal(~ZImmVal) != -1))
      49             :       return 1;
      50         287 :     return ST->hasV6T2Ops() ? 2 : 3;
      51             :   }
      52        6038 :   if (ST->isThumb2()) {
      53        5149 :     if ((SImmVal >= 0 && SImmVal < 65536) ||
      54        4665 :         (ARM_AM::getT2SOImmVal(ZImmVal) != -1) ||
      55         359 :         (ARM_AM::getT2SOImmVal(~ZImmVal) != -1))
      56             :       return 1;
      57         157 :     return ST->hasV6T2Ops() ? 2 : 3;
      58             :   }
      59             :   // Thumb1.
      60        1732 :   if (SImmVal >= 0 && SImmVal < 256)
      61             :     return 1;
      62         590 :   if ((~SImmVal < 256) || ARM_AM::isThumbImmShiftedVal(ZImmVal))
      63             :     return 2;
      64             :   // Load from constantpool.
      65             :   return 3;
      66             : }
      67             : 
      68             : 
      69             : // Constants smaller than 256 fit in the immediate field of
      70             : // Thumb1 instructions so we return a zero cost and 1 otherwise.
      71          88 : int ARMTTIImpl::getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx,
      72             :                                       const APInt &Imm, Type *Ty) {
      73         149 :   if (Imm.isNonNegative() && Imm.getLimitedValue() < 256)
      74             :     return 0;
      75             : 
      76             :   return 1;
      77             : }
      78             : 
      79       11418 : int ARMTTIImpl::getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
      80             :                               Type *Ty) {
      81             :   // Division by a constant can be turned into multiplication, but only if we
      82             :   // know it's constant. So it's not so much that the immediate is cheap (it's
      83             :   // not), but that the alternative is worse.
      84             :   // FIXME: this is probably unneeded with GlobalISel.
      85       22836 :   if ((Opcode == Instruction::SDiv || Opcode == Instruction::UDiv ||
      86       22737 :        Opcode == Instruction::SRem || Opcode == Instruction::URem) &&
      87             :       Idx == 1)
      88             :     return 0;
      89             : 
      90       11299 :   if (Opcode == Instruction::And)
      91             :       // Conversion to BIC is free, and means we can use ~Imm instead.
      92        2955 :       return std::min(getIntImmCost(Imm, Ty), getIntImmCost(~Imm, Ty));
      93             : 
      94       10708 :   if (Opcode == Instruction::Add)
      95             :     // Conversion to SUB is free, and means we can use -Imm instead.
      96        3220 :     return std::min(getIntImmCost(Imm, Ty), getIntImmCost(-Imm, Ty));
      97             : 
      98       11408 :   if (Opcode == Instruction::ICmp && Imm.isNegative() &&
      99          85 :       Ty->getIntegerBitWidth() == 32) {
     100          78 :     int64_t NegImm = -Imm.getSExtValue();
     101         104 :     if (ST->isThumb2() && NegImm < 1<<12)
     102             :       // icmp X, #-C -> cmn X, #C
     103             :       return 0;
     104          56 :     if (ST->isThumb() && NegImm < 1<<8)
     105             :       // icmp X, #-C -> adds X, #C
     106             :       return 0;
     107             :   }
     108             : 
     109       10040 :   return getIntImmCost(Imm, Ty);
     110             : }
     111             : 
     112             : 
     113         538 : int ARMTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
     114             :                                  const Instruction *I) {
     115         538 :   int ISD = TLI->InstructionOpcodeToISD(Opcode);
     116             :   assert(ISD && "Invalid opcode");
     117             : 
     118             :   // Single to/from double precision conversions.
     119             :   static const CostTblEntry NEONFltDblTbl[] = {
     120             :     // Vector fptrunc/fpext conversions.
     121             :     { ISD::FP_ROUND,   MVT::v2f64, 2 },
     122             :     { ISD::FP_EXTEND,  MVT::v2f32, 2 },
     123             :     { ISD::FP_EXTEND,  MVT::v4f32, 4 }
     124             :   };
     125             : 
     126         911 :   if (Src->isVectorTy() && ST->hasNEON() && (ISD == ISD::FP_ROUND ||
     127         373 :                                           ISD == ISD::FP_EXTEND)) {
     128           8 :     std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Src);
     129          16 :     if (const auto *Entry = CostTableLookup(NEONFltDblTbl, ISD, LT.second))
     130           8 :       return LT.first * Entry->Cost;
     131             :   }
     132             : 
     133         530 :   EVT SrcTy = TLI->getValueType(DL, Src);
     134         530 :   EVT DstTy = TLI->getValueType(DL, Dst);
     135             : 
     136         530 :   if (!SrcTy.isSimple() || !DstTy.isSimple())
     137          20 :     return BaseT::getCastInstrCost(Opcode, Dst, Src);
     138             : 
     139             :   // Some arithmetic, load and store operations have specific instructions
     140             :   // to cast up/down their types automatically at no extra cost.
     141             :   // TODO: Get these tables to know at least what the related operations are.
     142             :   static const TypeConversionCostTblEntry NEONVectorConversionTbl[] = {
     143             :     { ISD::SIGN_EXTEND, MVT::v4i32, MVT::v4i16, 0 },
     144             :     { ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i16, 0 },
     145             :     { ISD::SIGN_EXTEND, MVT::v2i64, MVT::v2i32, 1 },
     146             :     { ISD::ZERO_EXTEND, MVT::v2i64, MVT::v2i32, 1 },
     147             :     { ISD::TRUNCATE,    MVT::v4i32, MVT::v4i64, 0 },
     148             :     { ISD::TRUNCATE,    MVT::v4i16, MVT::v4i32, 1 },
     149             : 
     150             :     // The number of vmovl instructions for the extension.
     151             :     { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 3 },
     152             :     { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i16, 3 },
     153             :     { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i8, 3 },
     154             :     { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i8, 3 },
     155             :     { ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i8, 7 },
     156             :     { ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i8, 7 },
     157             :     { ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i16, 6 },
     158             :     { ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i16, 6 },
     159             :     { ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i8, 6 },
     160             :     { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i8, 6 },
     161             : 
     162             :     // Operations that we legalize using splitting.
     163             :     { ISD::TRUNCATE,    MVT::v16i8, MVT::v16i32, 6 },
     164             :     { ISD::TRUNCATE,    MVT::v8i8, MVT::v8i32, 3 },
     165             : 
     166             :     // Vector float <-> i32 conversions.
     167             :     { ISD::SINT_TO_FP,  MVT::v4f32, MVT::v4i32, 1 },
     168             :     { ISD::UINT_TO_FP,  MVT::v4f32, MVT::v4i32, 1 },
     169             : 
     170             :     { ISD::SINT_TO_FP,  MVT::v2f32, MVT::v2i8, 3 },
     171             :     { ISD::UINT_TO_FP,  MVT::v2f32, MVT::v2i8, 3 },
     172             :     { ISD::SINT_TO_FP,  MVT::v2f32, MVT::v2i16, 2 },
     173             :     { ISD::UINT_TO_FP,  MVT::v2f32, MVT::v2i16, 2 },
     174             :     { ISD::SINT_TO_FP,  MVT::v2f32, MVT::v2i32, 1 },
     175             :     { ISD::UINT_TO_FP,  MVT::v2f32, MVT::v2i32, 1 },
     176             :     { ISD::SINT_TO_FP,  MVT::v4f32, MVT::v4i1, 3 },
     177             :     { ISD::UINT_TO_FP,  MVT::v4f32, MVT::v4i1, 3 },
     178             :     { ISD::SINT_TO_FP,  MVT::v4f32, MVT::v4i8, 3 },
     179             :     { ISD::UINT_TO_FP,  MVT::v4f32, MVT::v4i8, 3 },
     180             :     { ISD::SINT_TO_FP,  MVT::v4f32, MVT::v4i16, 2 },
     181             :     { ISD::UINT_TO_FP,  MVT::v4f32, MVT::v4i16, 2 },
     182             :     { ISD::SINT_TO_FP,  MVT::v8f32, MVT::v8i16, 4 },
     183             :     { ISD::UINT_TO_FP,  MVT::v8f32, MVT::v8i16, 4 },
     184             :     { ISD::SINT_TO_FP,  MVT::v8f32, MVT::v8i32, 2 },
     185             :     { ISD::UINT_TO_FP,  MVT::v8f32, MVT::v8i32, 2 },
     186             :     { ISD::SINT_TO_FP,  MVT::v16f32, MVT::v16i16, 8 },
     187             :     { ISD::UINT_TO_FP,  MVT::v16f32, MVT::v16i16, 8 },
     188             :     { ISD::SINT_TO_FP,  MVT::v16f32, MVT::v16i32, 4 },
     189             :     { ISD::UINT_TO_FP,  MVT::v16f32, MVT::v16i32, 4 },
     190             : 
     191             :     { ISD::FP_TO_SINT,  MVT::v4i32, MVT::v4f32, 1 },
     192             :     { ISD::FP_TO_UINT,  MVT::v4i32, MVT::v4f32, 1 },
     193             :     { ISD::FP_TO_SINT,  MVT::v4i8, MVT::v4f32, 3 },
     194             :     { ISD::FP_TO_UINT,  MVT::v4i8, MVT::v4f32, 3 },
     195             :     { ISD::FP_TO_SINT,  MVT::v4i16, MVT::v4f32, 2 },
     196             :     { ISD::FP_TO_UINT,  MVT::v4i16, MVT::v4f32, 2 },
     197             : 
     198             :     // Vector double <-> i32 conversions.
     199             :     { ISD::SINT_TO_FP,  MVT::v2f64, MVT::v2i32, 2 },
     200             :     { ISD::UINT_TO_FP,  MVT::v2f64, MVT::v2i32, 2 },
     201             : 
     202             :     { ISD::SINT_TO_FP,  MVT::v2f64, MVT::v2i8, 4 },
     203             :     { ISD::UINT_TO_FP,  MVT::v2f64, MVT::v2i8, 4 },
     204             :     { ISD::SINT_TO_FP,  MVT::v2f64, MVT::v2i16, 3 },
     205             :     { ISD::UINT_TO_FP,  MVT::v2f64, MVT::v2i16, 3 },
     206             :     { ISD::SINT_TO_FP,  MVT::v2f64, MVT::v2i32, 2 },
     207             :     { ISD::UINT_TO_FP,  MVT::v2f64, MVT::v2i32, 2 },
     208             : 
     209             :     { ISD::FP_TO_SINT,  MVT::v2i32, MVT::v2f64, 2 },
     210             :     { ISD::FP_TO_UINT,  MVT::v2i32, MVT::v2f64, 2 },
     211             :     { ISD::FP_TO_SINT,  MVT::v8i16, MVT::v8f32, 4 },
     212             :     { ISD::FP_TO_UINT,  MVT::v8i16, MVT::v8f32, 4 },
     213             :     { ISD::FP_TO_SINT,  MVT::v16i16, MVT::v16f32, 8 },
     214             :     { ISD::FP_TO_UINT,  MVT::v16i16, MVT::v16f32, 8 }
     215             :   };
     216             : 
     217         855 :   if (SrcTy.isVector() && ST->hasNEON()) {
     218         455 :     if (const auto *Entry = ConvertCostTableLookup(NEONVectorConversionTbl, ISD,
     219             :                                                    DstTy.getSimpleVT(),
     220         455 :                                                    SrcTy.getSimpleVT()))
     221         110 :       return Entry->Cost;
     222             :   }
     223             : 
     224             :   // Scalar float to integer conversions.
     225             :   static const TypeConversionCostTblEntry NEONFloatConversionTbl[] = {
     226             :     { ISD::FP_TO_SINT,  MVT::i1, MVT::f32, 2 },
     227             :     { ISD::FP_TO_UINT,  MVT::i1, MVT::f32, 2 },
     228             :     { ISD::FP_TO_SINT,  MVT::i1, MVT::f64, 2 },
     229             :     { ISD::FP_TO_UINT,  MVT::i1, MVT::f64, 2 },
     230             :     { ISD::FP_TO_SINT,  MVT::i8, MVT::f32, 2 },
     231             :     { ISD::FP_TO_UINT,  MVT::i8, MVT::f32, 2 },
     232             :     { ISD::FP_TO_SINT,  MVT::i8, MVT::f64, 2 },
     233             :     { ISD::FP_TO_UINT,  MVT::i8, MVT::f64, 2 },
     234             :     { ISD::FP_TO_SINT,  MVT::i16, MVT::f32, 2 },
     235             :     { ISD::FP_TO_UINT,  MVT::i16, MVT::f32, 2 },
     236             :     { ISD::FP_TO_SINT,  MVT::i16, MVT::f64, 2 },
     237             :     { ISD::FP_TO_UINT,  MVT::i16, MVT::f64, 2 },
     238             :     { ISD::FP_TO_SINT,  MVT::i32, MVT::f32, 2 },
     239             :     { ISD::FP_TO_UINT,  MVT::i32, MVT::f32, 2 },
     240             :     { ISD::FP_TO_SINT,  MVT::i32, MVT::f64, 2 },
     241             :     { ISD::FP_TO_UINT,  MVT::i32, MVT::f64, 2 },
     242             :     { ISD::FP_TO_SINT,  MVT::i64, MVT::f32, 10 },
     243             :     { ISD::FP_TO_UINT,  MVT::i64, MVT::f32, 10 },
     244             :     { ISD::FP_TO_SINT,  MVT::i64, MVT::f64, 10 },
     245             :     { ISD::FP_TO_UINT,  MVT::i64, MVT::f64, 10 }
     246             :   };
     247         400 :   if (SrcTy.isFloatingPoint() && ST->hasNEON()) {
     248         266 :     if (const auto *Entry = ConvertCostTableLookup(NEONFloatConversionTbl, ISD,
     249             :                                                    DstTy.getSimpleVT(),
     250         266 :                                                    SrcTy.getSimpleVT()))
     251          66 :       return Entry->Cost;
     252             :   }
     253             : 
     254             :   // Scalar integer to float conversions.
     255             :   static const TypeConversionCostTblEntry NEONIntegerConversionTbl[] = {
     256             :     { ISD::SINT_TO_FP,  MVT::f32, MVT::i1, 2 },
     257             :     { ISD::UINT_TO_FP,  MVT::f32, MVT::i1, 2 },
     258             :     { ISD::SINT_TO_FP,  MVT::f64, MVT::i1, 2 },
     259             :     { ISD::UINT_TO_FP,  MVT::f64, MVT::i1, 2 },
     260             :     { ISD::SINT_TO_FP,  MVT::f32, MVT::i8, 2 },
     261             :     { ISD::UINT_TO_FP,  MVT::f32, MVT::i8, 2 },
     262             :     { ISD::SINT_TO_FP,  MVT::f64, MVT::i8, 2 },
     263             :     { ISD::UINT_TO_FP,  MVT::f64, MVT::i8, 2 },
     264             :     { ISD::SINT_TO_FP,  MVT::f32, MVT::i16, 2 },
     265             :     { ISD::UINT_TO_FP,  MVT::f32, MVT::i16, 2 },
     266             :     { ISD::SINT_TO_FP,  MVT::f64, MVT::i16, 2 },
     267             :     { ISD::UINT_TO_FP,  MVT::f64, MVT::i16, 2 },
     268             :     { ISD::SINT_TO_FP,  MVT::f32, MVT::i32, 2 },
     269             :     { ISD::UINT_TO_FP,  MVT::f32, MVT::i32, 2 },
     270             :     { ISD::SINT_TO_FP,  MVT::f64, MVT::i32, 2 },
     271             :     { ISD::UINT_TO_FP,  MVT::f64, MVT::i32, 2 },
     272             :     { ISD::SINT_TO_FP,  MVT::f32, MVT::i64, 10 },
     273             :     { ISD::UINT_TO_FP,  MVT::f32, MVT::i64, 10 },
     274             :     { ISD::SINT_TO_FP,  MVT::f64, MVT::i64, 10 },
     275             :     { ISD::UINT_TO_FP,  MVT::f64, MVT::i64, 10 }
     276             :   };
     277             : 
     278         334 :   if (SrcTy.isInteger() && ST->hasNEON()) {
     279         246 :     if (const auto *Entry = ConvertCostTableLookup(NEONIntegerConversionTbl,
     280             :                                                    ISD, DstTy.getSimpleVT(),
     281         246 :                                                    SrcTy.getSimpleVT()))
     282          46 :       return Entry->Cost;
     283             :   }
     284             : 
     285             :   // Scalar integer conversion costs.
     286             :   static const TypeConversionCostTblEntry ARMIntegerConversionTbl[] = {
     287             :     // i16 -> i64 requires two dependent operations.
     288             :     { ISD::SIGN_EXTEND, MVT::i64, MVT::i16, 2 },
     289             : 
     290             :     // Truncates on i64 are assumed to be free.
     291             :     { ISD::TRUNCATE,    MVT::i32, MVT::i64, 0 },
     292             :     { ISD::TRUNCATE,    MVT::i16, MVT::i64, 0 },
     293             :     { ISD::TRUNCATE,    MVT::i8,  MVT::i64, 0 },
     294             :     { ISD::TRUNCATE,    MVT::i1,  MVT::i64, 0 }
     295             :   };
     296             : 
     297         288 :   if (SrcTy.isInteger()) {
     298         173 :     if (const auto *Entry = ConvertCostTableLookup(ARMIntegerConversionTbl, ISD,
     299             :                                                    DstTy.getSimpleVT(),
     300         173 :                                                    SrcTy.getSimpleVT()))
     301          19 :       return Entry->Cost;
     302             :   }
     303             : 
     304         269 :   return BaseT::getCastInstrCost(Opcode, Dst, Src);
     305             : }
     306             : 
     307         387 : int ARMTTIImpl::getVectorInstrCost(unsigned Opcode, Type *ValTy,
     308             :                                    unsigned Index) {
     309             :   // Penalize inserting into an D-subregister. We end up with a three times
     310             :   // lower estimated throughput on swift.
     311         469 :   if (ST->hasSlowLoadDSubregister() && Opcode == Instruction::InsertElement &&
     312         449 :       ValTy->isVectorTy() && ValTy->getScalarSizeInBits() <= 32)
     313             :     return 3;
     314             : 
     315         358 :   if ((Opcode == Instruction::InsertElement ||
     316             :        Opcode == Instruction::ExtractElement)) {
     317             :     // Cross-class copies are expensive on many microarchitectures,
     318             :     // so assume they are expensive by default.
     319        1074 :     if (ValTy->getVectorElementType()->isIntegerTy())
     320             :       return 3;
     321             : 
     322             :     // Even if it's not a cross class copy, this likely leads to mixing
     323             :     // of NEON and VFP code and should be therefore penalized.
     324         288 :     if (ValTy->isVectorTy() &&
     325         144 :         ValTy->getScalarSizeInBits() <= 32)
     326         228 :       return std::max(BaseT::getVectorInstrCost(Opcode, ValTy, Index), 2U);
     327             :   }
     328             : 
     329         136 :   return BaseT::getVectorInstrCost(Opcode, ValTy, Index);
     330             : }
     331             : 
     332          75 : int ARMTTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
     333             :                                    const Instruction *I) {
     334             : 
     335          75 :   int ISD = TLI->InstructionOpcodeToISD(Opcode);
     336             :   // On NEON a a vector select gets lowered to vbsl.
     337         150 :   if (ST->hasNEON() && ValTy->isVectorTy() && ISD == ISD::SELECT) {
     338             :     // Lowering of some vector selects is currently far from perfect.
     339             :     static const TypeConversionCostTblEntry NEONVectorSelectTbl[] = {
     340             :       { ISD::SELECT, MVT::v4i1, MVT::v4i64, 4*4 + 1*2 + 1 },
     341             :       { ISD::SELECT, MVT::v8i1, MVT::v8i64, 50 },
     342             :       { ISD::SELECT, MVT::v16i1, MVT::v16i64, 100 }
     343             :     };
     344             : 
     345          29 :     EVT SelCondTy = TLI->getValueType(DL, CondTy);
     346          29 :     EVT SelValTy = TLI->getValueType(DL, ValTy);
     347          56 :     if (SelCondTy.isSimple() && SelValTy.isSimple()) {
     348          33 :       if (const auto *Entry = ConvertCostTableLookup(NEONVectorSelectTbl, ISD,
     349             :                                                      SelCondTy.getSimpleVT(),
     350          33 :                                                      SelValTy.getSimpleVT()))
     351           6 :         return Entry->Cost;
     352             :     }
     353             : 
     354          23 :     std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
     355          23 :     return LT.first;
     356             :   }
     357             : 
     358          46 :   return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, I);
     359             : }
     360             : 
     361          35 : int ARMTTIImpl::getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
     362             :                                           const SCEV *Ptr) {
     363             :   // Address computations in vectorized code with non-consecutive addresses will
     364             :   // likely result in more instructions compared to scalar code where the
     365             :   // computation can more often be merged into the index mode. The resulting
     366             :   // extra micro-ops can significantly decrease throughput.
     367          35 :   unsigned NumVectorInstToHideOverhead = 10;
     368          35 :   int MaxMergeDistance = 64;
     369             : 
     370          47 :   if (Ty->isVectorTy() && SE && 
     371          12 :       !BaseT::isConstantStridedAccessLessThan(SE, Ptr, MaxMergeDistance + 1))
     372             :     return NumVectorInstToHideOverhead;
     373             : 
     374             :   // In many cases the address computation is not merged into the instruction
     375             :   // addressing mode.
     376             :   return 1;
     377             : }
     378             : 
     379           0 : int ARMTTIImpl::getFPOpCost(Type *Ty) {
     380             :   // Use similar logic that's in ARMISelLowering:
     381             :   // Any ARM CPU with VFP2 has floating point, but Thumb1 didn't have access
     382             :   // to VFP.
     383             : 
     384           0 :   if (ST->hasVFP2() && !ST->isThumb1Only()) {
     385           0 :     if (Ty->isFloatTy()) {
     386             :       return TargetTransformInfo::TCC_Basic;
     387             :     }
     388             : 
     389           0 :     if (Ty->isDoubleTy()) {
     390           0 :       return ST->isFPOnlySP() ? TargetTransformInfo::TCC_Expensive :
     391             :         TargetTransformInfo::TCC_Basic;
     392             :     }
     393             :   }
     394             : 
     395             :   return TargetTransformInfo::TCC_Expensive;
     396             : }
     397             : 
     398          11 : int ARMTTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
     399             :                                Type *SubTp) {
     400             :   // We only handle costs of reverse and alternate shuffles for now.
     401          11 :   if (Kind != TTI::SK_Reverse && Kind != TTI::SK_Alternate)
     402           0 :     return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
     403             : 
     404          11 :   if (Kind == TTI::SK_Reverse) {
     405             :     static const CostTblEntry NEONShuffleTbl[] = {
     406             :         // Reverse shuffle cost one instruction if we are shuffling within a
     407             :         // double word (vrev) or two if we shuffle a quad word (vrev, vext).
     408             :         {ISD::VECTOR_SHUFFLE, MVT::v2i32, 1},
     409             :         {ISD::VECTOR_SHUFFLE, MVT::v2f32, 1},
     410             :         {ISD::VECTOR_SHUFFLE, MVT::v2i64, 1},
     411             :         {ISD::VECTOR_SHUFFLE, MVT::v2f64, 1},
     412             : 
     413             :         {ISD::VECTOR_SHUFFLE, MVT::v4i32, 2},
     414             :         {ISD::VECTOR_SHUFFLE, MVT::v4f32, 2},
     415             :         {ISD::VECTOR_SHUFFLE, MVT::v8i16, 2},
     416             :         {ISD::VECTOR_SHUFFLE, MVT::v16i8, 2}};
     417             : 
     418          11 :     std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
     419             : 
     420           8 :     if (const auto *Entry = CostTableLookup(NEONShuffleTbl, ISD::VECTOR_SHUFFLE,
     421          19 :                                             LT.second))
     422           8 :       return LT.first * Entry->Cost;
     423             : 
     424           3 :     return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
     425             :   }
     426           0 :   if (Kind == TTI::SK_Alternate) {
     427             :     static const CostTblEntry NEONAltShuffleTbl[] = {
     428             :         // Alt shuffle cost table for ARM. Cost is the number of instructions
     429             :         // required to create the shuffled vector.
     430             : 
     431             :         {ISD::VECTOR_SHUFFLE, MVT::v2f32, 1},
     432             :         {ISD::VECTOR_SHUFFLE, MVT::v2i64, 1},
     433             :         {ISD::VECTOR_SHUFFLE, MVT::v2f64, 1},
     434             :         {ISD::VECTOR_SHUFFLE, MVT::v2i32, 1},
     435             : 
     436             :         {ISD::VECTOR_SHUFFLE, MVT::v4i32, 2},
     437             :         {ISD::VECTOR_SHUFFLE, MVT::v4f32, 2},
     438             :         {ISD::VECTOR_SHUFFLE, MVT::v4i16, 2},
     439             : 
     440             :         {ISD::VECTOR_SHUFFLE, MVT::v8i16, 16},
     441             : 
     442             :         {ISD::VECTOR_SHUFFLE, MVT::v16i8, 32}};
     443             : 
     444           0 :     std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
     445           0 :     if (const auto *Entry = CostTableLookup(NEONAltShuffleTbl,
     446           0 :                                             ISD::VECTOR_SHUFFLE, LT.second))
     447           0 :       return LT.first * Entry->Cost;
     448           0 :     return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
     449             :   }
     450           0 :   return BaseT::getShuffleCost(Kind, Tp, Index, SubTp);
     451             : }
     452             : 
     453         222 : int ARMTTIImpl::getArithmeticInstrCost(
     454             :     unsigned Opcode, Type *Ty, TTI::OperandValueKind Op1Info,
     455             :     TTI::OperandValueKind Op2Info, TTI::OperandValueProperties Opd1PropInfo,
     456             :     TTI::OperandValueProperties Opd2PropInfo,
     457             :     ArrayRef<const Value *> Args) {
     458             : 
     459         222 :   int ISDOpcode = TLI->InstructionOpcodeToISD(Opcode);
     460         222 :   std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
     461             : 
     462         222 :   const unsigned FunctionCallDivCost = 20;
     463         222 :   const unsigned ReciprocalDivCost = 10;
     464             :   static const CostTblEntry CostTbl[] = {
     465             :     // Division.
     466             :     // These costs are somewhat random. Choose a cost of 20 to indicate that
     467             :     // vectorizing devision (added function call) is going to be very expensive.
     468             :     // Double registers types.
     469             :     { ISD::SDIV, MVT::v1i64, 1 * FunctionCallDivCost},
     470             :     { ISD::UDIV, MVT::v1i64, 1 * FunctionCallDivCost},
     471             :     { ISD::SREM, MVT::v1i64, 1 * FunctionCallDivCost},
     472             :     { ISD::UREM, MVT::v1i64, 1 * FunctionCallDivCost},
     473             :     { ISD::SDIV, MVT::v2i32, 2 * FunctionCallDivCost},
     474             :     { ISD::UDIV, MVT::v2i32, 2 * FunctionCallDivCost},
     475             :     { ISD::SREM, MVT::v2i32, 2 * FunctionCallDivCost},
     476             :     { ISD::UREM, MVT::v2i32, 2 * FunctionCallDivCost},
     477             :     { ISD::SDIV, MVT::v4i16,     ReciprocalDivCost},
     478             :     { ISD::UDIV, MVT::v4i16,     ReciprocalDivCost},
     479             :     { ISD::SREM, MVT::v4i16, 4 * FunctionCallDivCost},
     480             :     { ISD::UREM, MVT::v4i16, 4 * FunctionCallDivCost},
     481             :     { ISD::SDIV, MVT::v8i8,      ReciprocalDivCost},
     482             :     { ISD::UDIV, MVT::v8i8,      ReciprocalDivCost},
     483             :     { ISD::SREM, MVT::v8i8,  8 * FunctionCallDivCost},
     484             :     { ISD::UREM, MVT::v8i8,  8 * FunctionCallDivCost},
     485             :     // Quad register types.
     486             :     { ISD::SDIV, MVT::v2i64, 2 * FunctionCallDivCost},
     487             :     { ISD::UDIV, MVT::v2i64, 2 * FunctionCallDivCost},
     488             :     { ISD::SREM, MVT::v2i64, 2 * FunctionCallDivCost},
     489             :     { ISD::UREM, MVT::v2i64, 2 * FunctionCallDivCost},
     490             :     { ISD::SDIV, MVT::v4i32, 4 * FunctionCallDivCost},
     491             :     { ISD::UDIV, MVT::v4i32, 4 * FunctionCallDivCost},
     492             :     { ISD::SREM, MVT::v4i32, 4 * FunctionCallDivCost},
     493             :     { ISD::UREM, MVT::v4i32, 4 * FunctionCallDivCost},
     494             :     { ISD::SDIV, MVT::v8i16, 8 * FunctionCallDivCost},
     495             :     { ISD::UDIV, MVT::v8i16, 8 * FunctionCallDivCost},
     496             :     { ISD::SREM, MVT::v8i16, 8 * FunctionCallDivCost},
     497             :     { ISD::UREM, MVT::v8i16, 8 * FunctionCallDivCost},
     498             :     { ISD::SDIV, MVT::v16i8, 16 * FunctionCallDivCost},
     499             :     { ISD::UDIV, MVT::v16i8, 16 * FunctionCallDivCost},
     500             :     { ISD::SREM, MVT::v16i8, 16 * FunctionCallDivCost},
     501             :     { ISD::UREM, MVT::v16i8, 16 * FunctionCallDivCost},
     502             :     // Multiplication.
     503             :   };
     504             : 
     505         222 :   if (ST->hasNEON())
     506         286 :     if (const auto *Entry = CostTableLookup(CostTbl, ISDOpcode, LT.second))
     507          64 :       return LT.first * Entry->Cost;
     508             : 
     509         474 :   int Cost = BaseT::getArithmeticInstrCost(Opcode, Ty, Op1Info, Op2Info,
     510         316 :                                            Opd1PropInfo, Opd2PropInfo);
     511             : 
     512             :   // This is somewhat of a hack. The problem that we are facing is that SROA
     513             :   // creates a sequence of shift, and, or instructions to construct values.
     514             :   // These sequences are recognized by the ISel and have zero-cost. Not so for
     515             :   // the vectorized code. Because we have support for v2i64 but not i64 those
     516             :   // sequences look particularly beneficial to vectorize.
     517             :   // To work around this we increase the cost of v2i64 operations to make them
     518             :   // seem less beneficial.
     519         316 :   if (LT.second == MVT::v2i64 &&
     520             :       Op2Info == TargetTransformInfo::OK_UniformConstantValue)
     521           2 :     Cost += 4;
     522             : 
     523             :   return Cost;
     524             : }
     525             : 
     526         149 : int ARMTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
     527             :                                 unsigned AddressSpace, const Instruction *I) {
     528         149 :   std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Src);
     529             : 
     530         250 :   if (Src->isVectorTy() && Alignment != 16 &&
     531         303 :       Src->getVectorElementType()->isDoubleTy()) {
     532             :     // Unaligned loads/stores are extremely inefficient.
     533             :     // We need 4 uops for vst.1/vld.1 vs 1uop for vldr/vstr.
     534           3 :     return LT.first * 4;
     535             :   }
     536             :   return LT.first;
     537             : }
     538             : 
     539           6 : int ARMTTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
     540             :                                            unsigned Factor,
     541             :                                            ArrayRef<unsigned> Indices,
     542             :                                            unsigned Alignment,
     543             :                                            unsigned AddressSpace) {
     544             :   assert(Factor >= 2 && "Invalid interleave factor");
     545             :   assert(isa<VectorType>(VecTy) && "Expect a vector type");
     546             : 
     547             :   // vldN/vstN doesn't support vector types of i64/f64 element.
     548          12 :   bool EltIs64Bits = DL.getTypeSizeInBits(VecTy->getScalarType()) == 64;
     549             : 
     550           6 :   if (Factor <= TLI->getMaxSupportedInterleaveFactor() && !EltIs64Bits) {
     551           6 :     unsigned NumElts = VecTy->getVectorNumElements();
     552          12 :     auto *SubVecTy = VectorType::get(VecTy->getScalarType(), NumElts / Factor);
     553             : 
     554             :     // vldN/vstN only support legal vector types of size 64 or 128 in bits.
     555             :     // Accesses having vector types that are a multiple of 128 bits can be
     556             :     // matched to more than one vldN/vstN instruction.
     557          12 :     if (NumElts % Factor == 0 &&
     558           6 :         TLI->isLegalInterleavedAccessType(SubVecTy, DL))
     559           6 :       return Factor * TLI->getNumInterleavedAccesses(SubVecTy, DL);
     560             :   }
     561             : 
     562           0 :   return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
     563           0 :                                            Alignment, AddressSpace);
     564             : }
     565             : 
     566          40 : void ARMTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
     567             :                                          TTI::UnrollingPreferences &UP) {
     568             :   // Only currently enable these preferences for M-Class cores.
     569          40 :   if (!ST->isMClass())
     570          22 :     return BasicTTIImplBase::getUnrollingPreferences(L, SE, UP);
     571             : 
     572             :   // Only enable on Thumb-2 targets for simple loops.
     573          42 :   if (!ST->isThumb2() || L->getNumBlocks() != 1)
     574             :     return;
     575             : 
     576             :   // Disable loop unrolling for Oz and Os.
     577          10 :   UP.OptSizeThreshold = 0;
     578          10 :   UP.PartialOptSizeThreshold = 0;
     579          10 :   BasicBlock *BB = L->getLoopLatch();
     580          10 :   if (BB->getParent()->optForSize())
     581             :     return;
     582             : 
     583             :   // Scan the loop: don't unroll loops with calls as this could prevent
     584             :   // inlining.
     585          10 :   unsigned Cost = 0;
     586         136 :   for (auto &I : *BB) {
     587         210 :     if (isa<CallInst>(I) || isa<InvokeInst>(I)) {
     588           2 :       ImmutableCallSite CS(&I);
     589           2 :       if (const Function *F = CS.getCalledFunction()) {
     590           2 :         if (!isLoweredToCall(F))
     591           0 :           continue;
     592             :       }
     593           2 :       return;
     594             :     }
     595             :     SmallVector<const Value*, 4> Operands(I.value_op_begin(),
     596         520 :                                           I.value_op_end());
     597         208 :     Cost += getUserCost(&I, Operands);
     598             :   }
     599             : 
     600           8 :   UP.Partial = true;
     601           8 :   UP.Runtime = true;
     602           8 :   UP.UnrollRemainder = true;
     603           8 :   UP.DefaultUnrollRuntimeCount = 4;
     604             : 
     605             :   // Force unrolling small loops can be very useful because of the branch
     606             :   // taken cost of the backedge.
     607           8 :   if (Cost < 12)
     608           8 :     UP.Force = true;
     609             : }

Generated by: LCOV version 1.13