LCOV - code coverage report
Current view: top level - lib/Target/AArch64 - AArch64Subtarget.cpp (source / functions) Hit Total Coverage
Test: llvm-toolchain.info Lines: 129 129 100.0 %
Date: 2018-02-23 15:42:53 Functions: 16 16 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : //===-- AArch64Subtarget.cpp - AArch64 Subtarget Information ----*- C++ -*-===//
       2             : //
       3             : //                     The LLVM Compiler Infrastructure
       4             : //
       5             : // This file is distributed under the University of Illinois Open Source
       6             : // License. See LICENSE.TXT for details.
       7             : //
       8             : //===----------------------------------------------------------------------===//
       9             : //
      10             : // This file implements the AArch64 specific subclass of TargetSubtarget.
      11             : //
      12             : //===----------------------------------------------------------------------===//
      13             : 
      14             : #include "AArch64Subtarget.h"
      15             : 
      16             : #include "AArch64.h"
      17             : #include "AArch64InstrInfo.h"
      18             : #include "AArch64PBQPRegAlloc.h"
      19             : #include "AArch64TargetMachine.h"
      20             : 
      21             : #include "AArch64CallLowering.h"
      22             : #include "AArch64LegalizerInfo.h"
      23             : #include "AArch64RegisterBankInfo.h"
      24             : #include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
      25             : #include "llvm/CodeGen/MachineScheduler.h"
      26             : #include "llvm/IR/GlobalValue.h"
      27             : 
      28             : using namespace llvm;
      29             : 
      30             : #define DEBUG_TYPE "aarch64-subtarget"
      31             : 
      32             : #define GET_SUBTARGETINFO_CTOR
      33             : #define GET_SUBTARGETINFO_TARGET_DESC
      34             : #include "AArch64GenSubtargetInfo.inc"
      35             : 
      36             : static cl::opt<bool>
      37      163372 : EnableEarlyIfConvert("aarch64-early-ifcvt", cl::desc("Enable the early if "
      38      245058 :                      "converter pass"), cl::init(true), cl::Hidden);
      39             : 
      40             : // If OS supports TBI, use this flag to enable it.
      41             : static cl::opt<bool>
      42      163372 : UseAddressTopByteIgnored("aarch64-use-tbi", cl::desc("Assume that top byte of "
      43      245058 :                          "an address is ignored"), cl::init(false), cl::Hidden);
      44             : 
      45             : static cl::opt<bool>
      46       81686 :     UseNonLazyBind("aarch64-enable-nonlazybind",
      47       81686 :                    cl::desc("Call nonlazybind functions via direct GOT load"),
      48      245058 :                    cl::init(false), cl::Hidden);
      49             : 
      50             : AArch64Subtarget &
      51        1322 : AArch64Subtarget::initializeSubtargetDependencies(StringRef FS,
      52             :                                                   StringRef CPUString) {
      53             :   // Determine default and user-specified characteristics
      54             : 
      55        1322 :   if (CPUString.empty())
      56        1062 :     CPUString = "generic";
      57             : 
      58        1322 :   ParseSubtargetFeatures(CPUString, FS);
      59        1322 :   initializeProperties();
      60             : 
      61        1322 :   return *this;
      62             : }
      63             : 
      64        1322 : void AArch64Subtarget::initializeProperties() {
      65             :   // Initialize CPU specific properties. We should add a tablegen feature for
      66             :   // this in the future so we can specify it together with the subtarget
      67             :   // features.
      68        1322 :   switch (ARMProcFamily) {
      69          83 :   case Cyclone:
      70          83 :     CacheLineSize = 64;
      71          83 :     PrefetchDistance = 280;
      72          83 :     MinPrefetchStride = 2048;
      73          83 :     MaxPrefetchIterationsAhead = 3;
      74          83 :     break;
      75          34 :   case CortexA57:
      76          34 :     MaxInterleaveFactor = 4;
      77          34 :     PrefFunctionAlignment = 4;
      78          34 :     break;
      79          19 :   case ExynosM1:
      80          19 :     MaxInterleaveFactor = 4;
      81          19 :     MaxJumpTableSize = 8;
      82          19 :     PrefFunctionAlignment = 4;
      83          19 :     PrefLoopAlignment = 3;
      84          19 :     break;
      85          12 :   case ExynosM3:
      86          12 :     MaxInterleaveFactor = 4;
      87          12 :     MaxJumpTableSize = 20;
      88          12 :     PrefFunctionAlignment = 5;
      89          12 :     PrefLoopAlignment = 4;
      90          12 :     break;
      91          13 :   case Falkor:
      92          13 :     MaxInterleaveFactor = 4;
      93             :     // FIXME: remove this to enable 64-bit SLP if performance looks good.
      94          13 :     MinVectorRegisterBitWidth = 128;
      95          13 :     CacheLineSize = 128;
      96          13 :     PrefetchDistance = 820;
      97          13 :     MinPrefetchStride = 2048;
      98          13 :     MaxPrefetchIterationsAhead = 8;
      99          13 :     break;
     100           4 :   case Saphira:
     101           4 :     MaxInterleaveFactor = 4;
     102             :     // FIXME: remove this to enable 64-bit SLP if performance looks good.
     103           4 :     MinVectorRegisterBitWidth = 128;
     104           4 :     break;
     105          12 :   case Kryo:
     106          12 :     MaxInterleaveFactor = 4;
     107          12 :     VectorInsertExtractBaseCost = 2;
     108          12 :     CacheLineSize = 128;
     109          12 :     PrefetchDistance = 740;
     110          12 :     MinPrefetchStride = 1024;
     111          12 :     MaxPrefetchIterationsAhead = 11;
     112             :     // FIXME: remove this to enable 64-bit SLP if performance looks good.
     113          12 :     MinVectorRegisterBitWidth = 128;
     114          12 :     break;
     115           5 :   case ThunderX2T99:
     116           5 :     CacheLineSize = 64;
     117           5 :     PrefFunctionAlignment = 3;
     118           5 :     PrefLoopAlignment = 2;
     119           5 :     MaxInterleaveFactor = 4;
     120           5 :     PrefetchDistance = 128;
     121           5 :     MinPrefetchStride = 1024;
     122           5 :     MaxPrefetchIterationsAhead = 4;
     123             :     // FIXME: remove this to enable 64-bit SLP if performance looks good.
     124           5 :     MinVectorRegisterBitWidth = 128;
     125           5 :     break;
     126           4 :   case ThunderX:
     127             :   case ThunderXT88:
     128             :   case ThunderXT81:
     129             :   case ThunderXT83:
     130           4 :     CacheLineSize = 128;
     131           4 :     PrefFunctionAlignment = 3;
     132           4 :     PrefLoopAlignment = 2;
     133             :     // FIXME: remove this to enable 64-bit SLP if performance looks good.
     134           4 :     MinVectorRegisterBitWidth = 128;
     135           4 :     break;
     136             :   case CortexA35: break;
     137          17 :   case CortexA53:
     138          17 :     PrefFunctionAlignment = 3;
     139          17 :     break;
     140             :   case CortexA55: break;
     141          12 :   case CortexA72:
     142             :   case CortexA73:
     143             :   case CortexA75:
     144          12 :     PrefFunctionAlignment = 4;
     145          12 :     break;
     146             :   case Others: break;
     147             :   }
     148        1322 : }
     149             : 
     150        1322 : AArch64Subtarget::AArch64Subtarget(const Triple &TT, const std::string &CPU,
     151             :                                    const std::string &FS,
     152        1322 :                                    const TargetMachine &TM, bool LittleEndian)
     153             :     : AArch64GenSubtargetInfo(TT, CPU, FS),
     154         971 :       ReserveX18(TT.isOSDarwin() || TT.isOSWindows()), IsLittle(LittleEndian),
     155             :       TargetTriple(TT), FrameLowering(),
     156        2644 :       InstrInfo(initializeSubtargetDependencies(FS, CPU)), TSInfo(),
     157        7932 :       TLInfo(TM, *this) {
     158        1322 :   CallLoweringInfo.reset(new AArch64CallLowering(*getTargetLowering()));
     159        1322 :   Legalizer.reset(new AArch64LegalizerInfo(*this));
     160             : 
     161        1322 :   auto *RBI = new AArch64RegisterBankInfo(*getRegisterInfo());
     162             : 
     163             :   // FIXME: At this point, we can't rely on Subtarget having RBI.
     164             :   // It's awkward to mix passing RBI and the Subtarget; should we pass
     165             :   // TII/TRI as well?
     166        1322 :   InstSelector.reset(createAArch64InstructionSelector(
     167             :       *static_cast<const AArch64TargetMachine *>(&TM), *this, *RBI));
     168             : 
     169             :   RegBankInfo.reset(RBI);
     170        1322 : }
     171             : 
     172         268 : const CallLowering *AArch64Subtarget::getCallLowering() const {
     173         268 :   return CallLoweringInfo.get();
     174             : }
     175             : 
     176         253 : const InstructionSelector *AArch64Subtarget::getInstructionSelector() const {
     177         253 :   return InstSelector.get();
     178             : }
     179             : 
     180         179 : const LegalizerInfo *AArch64Subtarget::getLegalizerInfo() const {
     181         179 :   return Legalizer.get();
     182             : }
     183             : 
     184         297 : const RegisterBankInfo *AArch64Subtarget::getRegBankInfo() const {
     185         297 :   return RegBankInfo.get();
     186             : }
     187             : 
     188             : /// Find the target operand flags that describe how a global value should be
     189             : /// referenced for the current subtarget.
     190             : unsigned char
     191        3087 : AArch64Subtarget::ClassifyGlobalReference(const GlobalValue *GV,
     192             :                                           const TargetMachine &TM) const {
     193             :   // MachO large model always goes via a GOT, simply to get a single 8-byte
     194             :   // absolute relocation on all global addresses.
     195        3124 :   if (TM.getCodeModel() == CodeModel::Large && isTargetMachO())
     196             :     return AArch64II::MO_GOT;
     197             : 
     198        3076 :   unsigned Flags = GV->hasDLLImportStorageClass() ? AArch64II::MO_DLLIMPORT
     199             :                                                   : AArch64II::MO_NO_FLAG;
     200             : 
     201        3076 :   if (!TM.shouldAssumeDSOLocal(*GV->getParent(), GV))
     202         237 :     return AArch64II::MO_GOT | Flags;
     203             : 
     204             :   // The small code model's direct accesses use ADRP, which cannot
     205             :   // necessarily produce the value 0 (if the code is above 4GB).
     206        2813 :   if (useSmallAddressing() && GV->hasExternalWeakLinkage())
     207           4 :     return AArch64II::MO_GOT | Flags;
     208             : 
     209        2835 :   return Flags;
     210             : }
     211             : 
     212        1244 : unsigned char AArch64Subtarget::classifyGlobalFunctionReference(
     213             :     const GlobalValue *GV, const TargetMachine &TM) const {
     214             :   // MachO large model always goes via a GOT, because we don't have the
     215             :   // relocations available to do anything else..
     216        1245 :   if (TM.getCodeModel() == CodeModel::Large && isTargetMachO() &&
     217             :       !GV->hasInternalLinkage())
     218             :     return AArch64II::MO_GOT;
     219             : 
     220             :   // NonLazyBind goes via GOT unless we know it's available locally.
     221             :   auto *F = dyn_cast<Function>(GV);
     222        1250 :   if (UseNonLazyBind && F && F->hasFnAttribute(Attribute::NonLazyBind) &&
     223           3 :       !TM.shouldAssumeDSOLocal(*GV->getParent(), GV))
     224             :     return AArch64II::MO_GOT;
     225             : 
     226             :   return AArch64II::MO_NO_FLAG;
     227             : }
     228             : 
     229       19655 : void AArch64Subtarget::overrideSchedPolicy(MachineSchedPolicy &Policy,
     230             :                                            unsigned NumRegionInstrs) const {
     231             :   // LNT run (at least on Cyclone) showed reasonably significant gains for
     232             :   // bi-directional scheduling. 253.perlbmk.
     233       19655 :   Policy.OnlyTopDown = false;
     234       19655 :   Policy.OnlyBottomUp = false;
     235             :   // Enabling or Disabling the latency heuristic is a close call: It seems to
     236             :   // help nearly no benchmark on out-of-order architectures, on the other hand
     237             :   // it regresses register pressure on a few benchmarking.
     238       19655 :   Policy.DisableLatencyHeuristic = DisableLatencySchedHeuristic;
     239       19655 : }
     240             : 
     241       12658 : bool AArch64Subtarget::enableEarlyIfConversion() const {
     242       12658 :   return EnableEarlyIfConvert;
     243             : }
     244             : 
     245       14308 : bool AArch64Subtarget::supportsAddressTopByteIgnored() const {
     246       14308 :   if (!UseAddressTopByteIgnored)
     247             :     return false;
     248             : 
     249             :   if (TargetTriple.isiOS()) {
     250             :     unsigned Major, Minor, Micro;
     251           7 :     TargetTriple.getiOSVersion(Major, Minor, Micro);
     252           7 :     return Major >= 8;
     253             :   }
     254             : 
     255             :   return false;
     256             : }
     257             : 
     258             : std::unique_ptr<PBQPRAConstraint>
     259           5 : AArch64Subtarget::getCustomPBQPConstraints() const {
     260          10 :   return balanceFPOps() ? llvm::make_unique<A57ChainingConstraint>() : nullptr;
     261             : }
     262             : 
     263         602 : void AArch64Subtarget::mirFileLoaded(MachineFunction &MF) const {
     264             :   // We usually compute max call frame size after ISel. Do the computation now
     265             :   // if the .mir file didn't specify it. Note that this will probably give you
     266             :   // bogus values after PEI has eliminated the callframe setup/destroy pseudo
     267             :   // instructions, specify explicitely if you need it to be correct.
     268         602 :   MachineFrameInfo &MFI = MF.getFrameInfo();
     269         602 :   if (!MFI.isMaxCallFrameSizeComputed())
     270         598 :     MFI.computeMaxCallFrameSize(MF);
     271      245660 : }

Generated by: LCOV version 1.13