LCOV - code coverage report
Current view: top level - lib/Target/AArch64 - AArch64Subtarget.cpp (source / functions) Hit Total Coverage
Test: llvm-toolchain.info Lines: 129 129 100.0 %
Date: 2018-06-17 00:07:59 Functions: 16 16 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : //===-- AArch64Subtarget.cpp - AArch64 Subtarget Information ----*- C++ -*-===//
       2             : //
       3             : //                     The LLVM Compiler Infrastructure
       4             : //
       5             : // This file is distributed under the University of Illinois Open Source
       6             : // License. See LICENSE.TXT for details.
       7             : //
       8             : //===----------------------------------------------------------------------===//
       9             : //
      10             : // This file implements the AArch64 specific subclass of TargetSubtarget.
      11             : //
      12             : //===----------------------------------------------------------------------===//
      13             : 
      14             : #include "AArch64Subtarget.h"
      15             : 
      16             : #include "AArch64.h"
      17             : #include "AArch64InstrInfo.h"
      18             : #include "AArch64PBQPRegAlloc.h"
      19             : #include "AArch64TargetMachine.h"
      20             : 
      21             : #include "AArch64CallLowering.h"
      22             : #include "AArch64LegalizerInfo.h"
      23             : #include "AArch64RegisterBankInfo.h"
      24             : #include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
      25             : #include "llvm/CodeGen/MachineScheduler.h"
      26             : #include "llvm/IR/GlobalValue.h"
      27             : #include "llvm/Support/TargetParser.h"
      28             : 
      29             : using namespace llvm;
      30             : 
      31             : #define DEBUG_TYPE "aarch64-subtarget"
      32             : 
      33             : #define GET_SUBTARGETINFO_CTOR
      34             : #define GET_SUBTARGETINFO_TARGET_DESC
      35             : #include "AArch64GenSubtargetInfo.inc"
      36             : 
      37             : static cl::opt<bool>
      38      202338 : EnableEarlyIfConvert("aarch64-early-ifcvt", cl::desc("Enable the early if "
      39      303507 :                      "converter pass"), cl::init(true), cl::Hidden);
      40             : 
      41             : // If OS supports TBI, use this flag to enable it.
      42             : static cl::opt<bool>
      43      202338 : UseAddressTopByteIgnored("aarch64-use-tbi", cl::desc("Assume that top byte of "
      44      303507 :                          "an address is ignored"), cl::init(false), cl::Hidden);
      45             : 
      46             : static cl::opt<bool>
      47      101169 :     UseNonLazyBind("aarch64-enable-nonlazybind",
      48      101169 :                    cl::desc("Call nonlazybind functions via direct GOT load"),
      49      303507 :                    cl::init(false), cl::Hidden);
      50             : 
      51             : AArch64Subtarget &
      52        1435 : AArch64Subtarget::initializeSubtargetDependencies(StringRef FS,
      53             :                                                   StringRef CPUString) {
      54             :   // Determine default and user-specified characteristics
      55             : 
      56        1435 :   if (CPUString.empty())
      57        1162 :     CPUString = "generic";
      58             : 
      59        1435 :   ParseSubtargetFeatures(CPUString, FS);
      60        1435 :   initializeProperties();
      61             : 
      62        1435 :   return *this;
      63             : }
      64             : 
      65        1435 : void AArch64Subtarget::initializeProperties() {
      66             :   // Initialize CPU specific properties. We should add a tablegen feature for
      67             :   // this in the future so we can specify it together with the subtarget
      68             :   // features.
      69        1435 :   switch (ARMProcFamily) {
      70          83 :   case Cyclone:
      71          83 :     CacheLineSize = 64;
      72          83 :     PrefetchDistance = 280;
      73          83 :     MinPrefetchStride = 2048;
      74          83 :     MaxPrefetchIterationsAhead = 3;
      75          83 :     break;
      76          35 :   case CortexA57:
      77          35 :     MaxInterleaveFactor = 4;
      78          35 :     PrefFunctionAlignment = 4;
      79          35 :     break;
      80          19 :   case ExynosM1:
      81          19 :     MaxInterleaveFactor = 4;
      82          19 :     MaxJumpTableSize = 8;
      83          19 :     PrefFunctionAlignment = 4;
      84          19 :     PrefLoopAlignment = 3;
      85          19 :     break;
      86          19 :   case ExynosM3:
      87          19 :     MaxInterleaveFactor = 4;
      88          19 :     MaxJumpTableSize = 20;
      89          19 :     PrefFunctionAlignment = 5;
      90          19 :     PrefLoopAlignment = 4;
      91          19 :     break;
      92          13 :   case Falkor:
      93          13 :     MaxInterleaveFactor = 4;
      94             :     // FIXME: remove this to enable 64-bit SLP if performance looks good.
      95          13 :     MinVectorRegisterBitWidth = 128;
      96          13 :     CacheLineSize = 128;
      97          13 :     PrefetchDistance = 820;
      98          13 :     MinPrefetchStride = 2048;
      99          13 :     MaxPrefetchIterationsAhead = 8;
     100          13 :     break;
     101           4 :   case Saphira:
     102           4 :     MaxInterleaveFactor = 4;
     103             :     // FIXME: remove this to enable 64-bit SLP if performance looks good.
     104           4 :     MinVectorRegisterBitWidth = 128;
     105           4 :     break;
     106          12 :   case Kryo:
     107          12 :     MaxInterleaveFactor = 4;
     108          12 :     VectorInsertExtractBaseCost = 2;
     109          12 :     CacheLineSize = 128;
     110          12 :     PrefetchDistance = 740;
     111          12 :     MinPrefetchStride = 1024;
     112          12 :     MaxPrefetchIterationsAhead = 11;
     113             :     // FIXME: remove this to enable 64-bit SLP if performance looks good.
     114          12 :     MinVectorRegisterBitWidth = 128;
     115          12 :     break;
     116           5 :   case ThunderX2T99:
     117           5 :     CacheLineSize = 64;
     118           5 :     PrefFunctionAlignment = 3;
     119           5 :     PrefLoopAlignment = 2;
     120           5 :     MaxInterleaveFactor = 4;
     121           5 :     PrefetchDistance = 128;
     122           5 :     MinPrefetchStride = 1024;
     123           5 :     MaxPrefetchIterationsAhead = 4;
     124             :     // FIXME: remove this to enable 64-bit SLP if performance looks good.
     125           5 :     MinVectorRegisterBitWidth = 128;
     126           5 :     break;
     127           4 :   case ThunderX:
     128             :   case ThunderXT88:
     129             :   case ThunderXT81:
     130             :   case ThunderXT83:
     131           4 :     CacheLineSize = 128;
     132           4 :     PrefFunctionAlignment = 3;
     133           4 :     PrefLoopAlignment = 2;
     134             :     // FIXME: remove this to enable 64-bit SLP if performance looks good.
     135           4 :     MinVectorRegisterBitWidth = 128;
     136           4 :     break;
     137             :   case CortexA35: break;
     138          19 :   case CortexA53:
     139          19 :     PrefFunctionAlignment = 3;
     140          19 :     break;
     141             :   case CortexA55: break;
     142          12 :   case CortexA72:
     143             :   case CortexA73:
     144             :   case CortexA75:
     145          12 :     PrefFunctionAlignment = 4;
     146          12 :     break;
     147             :   case Others: break;
     148             :   }
     149        1435 : }
     150             : 
     151        1435 : AArch64Subtarget::AArch64Subtarget(const Triple &TT, const std::string &CPU,
     152             :                                    const std::string &FS,
     153        1435 :                                    const TargetMachine &TM, bool LittleEndian)
     154             :     : AArch64GenSubtargetInfo(TT, CPU, FS),
     155        1435 :       ReserveX18(AArch64::isX18ReservedByDefault(TT)), IsLittle(LittleEndian),
     156             :       TargetTriple(TT), FrameLowering(),
     157        2870 :       InstrInfo(initializeSubtargetDependencies(FS, CPU)), TSInfo(),
     158        8610 :       TLInfo(TM, *this) {
     159        1435 :   CallLoweringInfo.reset(new AArch64CallLowering(*getTargetLowering()));
     160        1435 :   Legalizer.reset(new AArch64LegalizerInfo(*this));
     161             : 
     162        1435 :   auto *RBI = new AArch64RegisterBankInfo(*getRegisterInfo());
     163             : 
     164             :   // FIXME: At this point, we can't rely on Subtarget having RBI.
     165             :   // It's awkward to mix passing RBI and the Subtarget; should we pass
     166             :   // TII/TRI as well?
     167        1435 :   InstSelector.reset(createAArch64InstructionSelector(
     168             :       *static_cast<const AArch64TargetMachine *>(&TM), *this, *RBI));
     169             : 
     170             :   RegBankInfo.reset(RBI);
     171        1435 : }
     172             : 
     173         272 : const CallLowering *AArch64Subtarget::getCallLowering() const {
     174         272 :   return CallLoweringInfo.get();
     175             : }
     176             : 
     177         412 : const InstructionSelector *AArch64Subtarget::getInstructionSelector() const {
     178         412 :   return InstSelector.get();
     179             : }
     180             : 
     181         184 : const LegalizerInfo *AArch64Subtarget::getLegalizerInfo() const {
     182         184 :   return Legalizer.get();
     183             : }
     184             : 
     185         333 : const RegisterBankInfo *AArch64Subtarget::getRegBankInfo() const {
     186         333 :   return RegBankInfo.get();
     187             : }
     188             : 
     189             : /// Find the target operand flags that describe how a global value should be
     190             : /// referenced for the current subtarget.
     191             : unsigned char
     192        7022 : AArch64Subtarget::ClassifyGlobalReference(const GlobalValue *GV,
     193             :                                           const TargetMachine &TM) const {
     194             :   // MachO large model always goes via a GOT, simply to get a single 8-byte
     195             :   // absolute relocation on all global addresses.
     196        7087 :   if (TM.getCodeModel() == CodeModel::Large && isTargetMachO())
     197             :     return AArch64II::MO_GOT;
     198             : 
     199        7009 :   unsigned Flags = GV->hasDLLImportStorageClass() ? AArch64II::MO_DLLIMPORT
     200             :                                                   : AArch64II::MO_NO_FLAG;
     201             : 
     202        7009 :   if (!TM.shouldAssumeDSOLocal(*GV->getParent(), GV))
     203         454 :     return AArch64II::MO_GOT | Flags;
     204             : 
     205             :   // The small code model's direct accesses use ADRP, which cannot
     206             :   // necessarily produce the value 0 (if the code is above 4GB).
     207        6503 :   if (useSmallAddressing() && GV->hasExternalWeakLinkage())
     208           8 :     return AArch64II::MO_GOT | Flags;
     209             : 
     210        6547 :   return Flags;
     211             : }
     212             : 
     213        1287 : unsigned char AArch64Subtarget::classifyGlobalFunctionReference(
     214             :     const GlobalValue *GV, const TargetMachine &TM) const {
     215             :   // MachO large model always goes via a GOT, because we don't have the
     216             :   // relocations available to do anything else..
     217        1288 :   if (TM.getCodeModel() == CodeModel::Large && isTargetMachO() &&
     218             :       !GV->hasInternalLinkage())
     219             :     return AArch64II::MO_GOT;
     220             : 
     221             :   // NonLazyBind goes via GOT unless we know it's available locally.
     222             :   auto *F = dyn_cast<Function>(GV);
     223        1293 :   if (UseNonLazyBind && F && F->hasFnAttribute(Attribute::NonLazyBind) &&
     224           3 :       !TM.shouldAssumeDSOLocal(*GV->getParent(), GV))
     225             :     return AArch64II::MO_GOT;
     226             : 
     227             :   return AArch64II::MO_NO_FLAG;
     228             : }
     229             : 
     230       20472 : void AArch64Subtarget::overrideSchedPolicy(MachineSchedPolicy &Policy,
     231             :                                            unsigned NumRegionInstrs) const {
     232             :   // LNT run (at least on Cyclone) showed reasonably significant gains for
     233             :   // bi-directional scheduling. 253.perlbmk.
     234       20472 :   Policy.OnlyTopDown = false;
     235       20472 :   Policy.OnlyBottomUp = false;
     236             :   // Enabling or Disabling the latency heuristic is a close call: It seems to
     237             :   // help nearly no benchmark on out-of-order architectures, on the other hand
     238             :   // it regresses register pressure on a few benchmarking.
     239       20472 :   Policy.DisableLatencyHeuristic = DisableLatencySchedHeuristic;
     240       20472 : }
     241             : 
     242       13286 : bool AArch64Subtarget::enableEarlyIfConversion() const {
     243       13286 :   return EnableEarlyIfConvert;
     244             : }
     245             : 
     246       15206 : bool AArch64Subtarget::supportsAddressTopByteIgnored() const {
     247       15206 :   if (!UseAddressTopByteIgnored)
     248             :     return false;
     249             : 
     250             :   if (TargetTriple.isiOS()) {
     251             :     unsigned Major, Minor, Micro;
     252           7 :     TargetTriple.getiOSVersion(Major, Minor, Micro);
     253           7 :     return Major >= 8;
     254             :   }
     255             : 
     256             :   return false;
     257             : }
     258             : 
     259             : std::unique_ptr<PBQPRAConstraint>
     260           5 : AArch64Subtarget::getCustomPBQPConstraints() const {
     261          10 :   return balanceFPOps() ? llvm::make_unique<A57ChainingConstraint>() : nullptr;
     262             : }
     263             : 
     264         830 : void AArch64Subtarget::mirFileLoaded(MachineFunction &MF) const {
     265             :   // We usually compute max call frame size after ISel. Do the computation now
     266             :   // if the .mir file didn't specify it. Note that this will probably give you
     267             :   // bogus values after PEI has eliminated the callframe setup/destroy pseudo
     268             :   // instructions, specify explicitely if you need it to be correct.
     269         830 :   MachineFrameInfo &MFI = MF.getFrameInfo();
     270         830 :   if (!MFI.isMaxCallFrameSizeComputed())
     271         826 :     MFI.computeMaxCallFrameSize(MF);
     272      304337 : }

Generated by: LCOV version 1.13