LCOV - code coverage report
Current view: top level - lib/Target/AArch64 - AArch64Subtarget.cpp (source / functions) Hit Total Coverage
Test: llvm-toolchain.info Lines: 123 123 100.0 %
Date: 2018-09-23 13:06:45 Functions: 14 14 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : //===-- AArch64Subtarget.cpp - AArch64 Subtarget Information ----*- C++ -*-===//
       2             : //
       3             : //                     The LLVM Compiler Infrastructure
       4             : //
       5             : // This file is distributed under the University of Illinois Open Source
       6             : // License. See LICENSE.TXT for details.
       7             : //
       8             : //===----------------------------------------------------------------------===//
       9             : //
      10             : // This file implements the AArch64 specific subclass of TargetSubtarget.
      11             : //
      12             : //===----------------------------------------------------------------------===//
      13             : 
      14             : #include "AArch64Subtarget.h"
      15             : 
      16             : #include "AArch64.h"
      17             : #include "AArch64InstrInfo.h"
      18             : #include "AArch64PBQPRegAlloc.h"
      19             : #include "AArch64TargetMachine.h"
      20             : 
      21             : #include "AArch64CallLowering.h"
      22             : #include "AArch64LegalizerInfo.h"
      23             : #include "AArch64RegisterBankInfo.h"
      24             : #include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
      25             : #include "llvm/CodeGen/MachineScheduler.h"
      26             : #include "llvm/IR/GlobalValue.h"
      27             : #include "llvm/Support/TargetParser.h"
      28             : 
      29             : using namespace llvm;
      30             : 
      31             : #define DEBUG_TYPE "aarch64-subtarget"
      32             : 
      33             : #define GET_SUBTARGETINFO_CTOR
      34             : #define GET_SUBTARGETINFO_TARGET_DESC
      35             : #include "AArch64GenSubtargetInfo.inc"
      36             : 
      37             : static cl::opt<bool>
      38             : EnableEarlyIfConvert("aarch64-early-ifcvt", cl::desc("Enable the early if "
      39             :                      "converter pass"), cl::init(true), cl::Hidden);
      40             : 
      41             : // If OS supports TBI, use this flag to enable it.
      42             : static cl::opt<bool>
      43             : UseAddressTopByteIgnored("aarch64-use-tbi", cl::desc("Assume that top byte of "
      44             :                          "an address is ignored"), cl::init(false), cl::Hidden);
      45             : 
      46             : static cl::opt<bool>
      47             :     UseNonLazyBind("aarch64-enable-nonlazybind",
      48             :                    cl::desc("Call nonlazybind functions via direct GOT load"),
      49             :                    cl::init(false), cl::Hidden);
      50             : 
      51             : AArch64Subtarget &
      52        1536 : AArch64Subtarget::initializeSubtargetDependencies(StringRef FS,
      53             :                                                   StringRef CPUString) {
      54             :   // Determine default and user-specified characteristics
      55             : 
      56        1536 :   if (CPUString.empty())
      57        1261 :     CPUString = "generic";
      58             : 
      59        1536 :   ParseSubtargetFeatures(CPUString, FS);
      60        1536 :   initializeProperties();
      61             : 
      62        1536 :   return *this;
      63             : }
      64             : 
      65        1536 : void AArch64Subtarget::initializeProperties() {
      66             :   // Initialize CPU specific properties. We should add a tablegen feature for
      67             :   // this in the future so we can specify it together with the subtarget
      68             :   // features.
      69        1536 :   switch (ARMProcFamily) {
      70          85 :   case Cyclone:
      71          85 :     CacheLineSize = 64;
      72          85 :     PrefetchDistance = 280;
      73          85 :     MinPrefetchStride = 2048;
      74          85 :     MaxPrefetchIterationsAhead = 3;
      75          85 :     break;
      76          35 :   case CortexA57:
      77          35 :     MaxInterleaveFactor = 4;
      78          35 :     PrefFunctionAlignment = 4;
      79          35 :     break;
      80          19 :   case ExynosM1:
      81          19 :     MaxInterleaveFactor = 4;
      82          19 :     MaxJumpTableSize = 8;
      83          19 :     PrefFunctionAlignment = 4;
      84          19 :     PrefLoopAlignment = 3;
      85          19 :     break;
      86          19 :   case ExynosM3:
      87          19 :     MaxInterleaveFactor = 4;
      88          19 :     MaxJumpTableSize = 20;
      89          19 :     PrefFunctionAlignment = 5;
      90          19 :     PrefLoopAlignment = 4;
      91          19 :     break;
      92          13 :   case Falkor:
      93          13 :     MaxInterleaveFactor = 4;
      94             :     // FIXME: remove this to enable 64-bit SLP if performance looks good.
      95          13 :     MinVectorRegisterBitWidth = 128;
      96          13 :     CacheLineSize = 128;
      97          13 :     PrefetchDistance = 820;
      98          13 :     MinPrefetchStride = 2048;
      99          13 :     MaxPrefetchIterationsAhead = 8;
     100          13 :     break;
     101           4 :   case Saphira:
     102           4 :     MaxInterleaveFactor = 4;
     103             :     // FIXME: remove this to enable 64-bit SLP if performance looks good.
     104           4 :     MinVectorRegisterBitWidth = 128;
     105           4 :     break;
     106          12 :   case Kryo:
     107          12 :     MaxInterleaveFactor = 4;
     108          12 :     VectorInsertExtractBaseCost = 2;
     109          12 :     CacheLineSize = 128;
     110          12 :     PrefetchDistance = 740;
     111          12 :     MinPrefetchStride = 1024;
     112          12 :     MaxPrefetchIterationsAhead = 11;
     113             :     // FIXME: remove this to enable 64-bit SLP if performance looks good.
     114          12 :     MinVectorRegisterBitWidth = 128;
     115          12 :     break;
     116           5 :   case ThunderX2T99:
     117           5 :     CacheLineSize = 64;
     118           5 :     PrefFunctionAlignment = 3;
     119           5 :     PrefLoopAlignment = 2;
     120           5 :     MaxInterleaveFactor = 4;
     121           5 :     PrefetchDistance = 128;
     122           5 :     MinPrefetchStride = 1024;
     123           5 :     MaxPrefetchIterationsAhead = 4;
     124             :     // FIXME: remove this to enable 64-bit SLP if performance looks good.
     125           5 :     MinVectorRegisterBitWidth = 128;
     126           5 :     break;
     127           4 :   case ThunderX:
     128             :   case ThunderXT88:
     129             :   case ThunderXT81:
     130             :   case ThunderXT83:
     131           4 :     CacheLineSize = 128;
     132           4 :     PrefFunctionAlignment = 3;
     133           4 :     PrefLoopAlignment = 2;
     134             :     // FIXME: remove this to enable 64-bit SLP if performance looks good.
     135           4 :     MinVectorRegisterBitWidth = 128;
     136           4 :     break;
     137             :   case CortexA35: break;
     138          19 :   case CortexA53:
     139          19 :     PrefFunctionAlignment = 3;
     140          19 :     break;
     141             :   case CortexA55: break;
     142          12 :   case CortexA72:
     143             :   case CortexA73:
     144             :   case CortexA75:
     145          12 :     PrefFunctionAlignment = 4;
     146          12 :     break;
     147             :   case Others: break;
     148             :   }
     149        1536 : }
     150             : 
     151        1536 : AArch64Subtarget::AArch64Subtarget(const Triple &TT, const std::string &CPU,
     152             :                                    const std::string &FS,
     153        1536 :                                    const TargetMachine &TM, bool LittleEndian)
     154             :     : AArch64GenSubtargetInfo(TT, CPU, FS),
     155             :       ReserveXRegister(AArch64::GPR64commonRegClass.getNumRegs()),
     156             :       IsLittle(LittleEndian),
     157             :       TargetTriple(TT), FrameLowering(),
     158        3072 :       InstrInfo(initializeSubtargetDependencies(FS, CPU)), TSInfo(),
     159        3072 :       TLInfo(TM, *this) {
     160        1536 :   if (AArch64::isX18ReservedByDefault(TT))
     161             :     ReserveXRegister.set(18);
     162             : 
     163        1536 :   CallLoweringInfo.reset(new AArch64CallLowering(*getTargetLowering()));
     164        1536 :   Legalizer.reset(new AArch64LegalizerInfo(*this));
     165             : 
     166        1536 :   auto *RBI = new AArch64RegisterBankInfo(*getRegisterInfo());
     167             : 
     168             :   // FIXME: At this point, we can't rely on Subtarget having RBI.
     169             :   // It's awkward to mix passing RBI and the Subtarget; should we pass
     170             :   // TII/TRI as well?
     171        1536 :   InstSelector.reset(createAArch64InstructionSelector(
     172             :       *static_cast<const AArch64TargetMachine *>(&TM), *this, *RBI));
     173             : 
     174             :   RegBankInfo.reset(RBI);
     175        1536 : }
     176             : 
     177         324 : const CallLowering *AArch64Subtarget::getCallLowering() const {
     178         324 :   return CallLoweringInfo.get();
     179             : }
     180             : 
     181         441 : const InstructionSelector *AArch64Subtarget::getInstructionSelector() const {
     182         441 :   return InstSelector.get();
     183             : }
     184             : 
     185         211 : const LegalizerInfo *AArch64Subtarget::getLegalizerInfo() const {
     186         211 :   return Legalizer.get();
     187             : }
     188             : 
     189         383 : const RegisterBankInfo *AArch64Subtarget::getRegBankInfo() const {
     190         383 :   return RegBankInfo.get();
     191             : }
     192             : 
     193             : /// Find the target operand flags that describe how a global value should be
     194             : /// referenced for the current subtarget.
     195             : unsigned char
     196        7361 : AArch64Subtarget::ClassifyGlobalReference(const GlobalValue *GV,
     197             :                                           const TargetMachine &TM) const {
     198             :   // MachO large model always goes via a GOT, simply to get a single 8-byte
     199             :   // absolute relocation on all global addresses.
     200        7361 :   if (TM.getCodeModel() == CodeModel::Large && isTargetMachO())
     201             :     return AArch64II::MO_GOT;
     202             : 
     203        7348 :   if (!TM.shouldAssumeDSOLocal(*GV->getParent(), GV)) {
     204         506 :     if (GV->hasDLLImportStorageClass())
     205             :       return AArch64II::MO_GOT | AArch64II::MO_DLLIMPORT;
     206         496 :     if (getTargetTriple().isOSWindows())
     207             :       return AArch64II::MO_GOT | AArch64II::MO_COFFSTUB;
     208         491 :     return AArch64II::MO_GOT;
     209             :   }
     210             : 
     211             :   // The small code model's direct accesses use ADRP, which cannot
     212             :   // necessarily produce the value 0 (if the code is above 4GB).
     213             :   // Same for the tiny code model, where we have a pc relative LDR.
     214        6842 :   if ((useSmallAddressing() || TM.getCodeModel() == CodeModel::Tiny) &&
     215             :       GV->hasExternalWeakLinkage())
     216          12 :     return AArch64II::MO_GOT;
     217             : 
     218             :   return AArch64II::MO_NO_FLAG;
     219             : }
     220             : 
     221        1324 : unsigned char AArch64Subtarget::classifyGlobalFunctionReference(
     222             :     const GlobalValue *GV, const TargetMachine &TM) const {
     223             :   // MachO large model always goes via a GOT, because we don't have the
     224             :   // relocations available to do anything else..
     225        1324 :   if (TM.getCodeModel() == CodeModel::Large && isTargetMachO() &&
     226             :       !GV->hasInternalLinkage())
     227             :     return AArch64II::MO_GOT;
     228             : 
     229             :   // NonLazyBind goes via GOT unless we know it's available locally.
     230             :   auto *F = dyn_cast<Function>(GV);
     231        1330 :   if (UseNonLazyBind && F && F->hasFnAttribute(Attribute::NonLazyBind) &&
     232           3 :       !TM.shouldAssumeDSOLocal(*GV->getParent(), GV))
     233           2 :     return AArch64II::MO_GOT;
     234             : 
     235             :   return AArch64II::MO_NO_FLAG;
     236             : }
     237             : 
     238       21228 : void AArch64Subtarget::overrideSchedPolicy(MachineSchedPolicy &Policy,
     239             :                                            unsigned NumRegionInstrs) const {
     240             :   // LNT run (at least on Cyclone) showed reasonably significant gains for
     241             :   // bi-directional scheduling. 253.perlbmk.
     242       21228 :   Policy.OnlyTopDown = false;
     243       21228 :   Policy.OnlyBottomUp = false;
     244             :   // Enabling or Disabling the latency heuristic is a close call: It seems to
     245             :   // help nearly no benchmark on out-of-order architectures, on the other hand
     246             :   // it regresses register pressure on a few benchmarking.
     247       21228 :   Policy.DisableLatencyHeuristic = DisableLatencySchedHeuristic;
     248       21228 : }
     249             : 
     250       13752 : bool AArch64Subtarget::enableEarlyIfConversion() const {
     251       13752 :   return EnableEarlyIfConvert;
     252             : }
     253             : 
     254       16135 : bool AArch64Subtarget::supportsAddressTopByteIgnored() const {
     255       16135 :   if (!UseAddressTopByteIgnored)
     256             :     return false;
     257             : 
     258             :   if (TargetTriple.isiOS()) {
     259             :     unsigned Major, Minor, Micro;
     260           7 :     TargetTriple.getiOSVersion(Major, Minor, Micro);
     261           7 :     return Major >= 8;
     262             :   }
     263             : 
     264             :   return false;
     265             : }
     266             : 
     267             : std::unique_ptr<PBQPRAConstraint>
     268           5 : AArch64Subtarget::getCustomPBQPConstraints() const {
     269           5 :   return balanceFPOps() ? llvm::make_unique<A57ChainingConstraint>() : nullptr;
     270             : }
     271             : 
     272         868 : void AArch64Subtarget::mirFileLoaded(MachineFunction &MF) const {
     273             :   // We usually compute max call frame size after ISel. Do the computation now
     274             :   // if the .mir file didn't specify it. Note that this will probably give you
     275             :   // bogus values after PEI has eliminated the callframe setup/destroy pseudo
     276             :   // instructions, specify explicitly if you need it to be correct.
     277         868 :   MachineFrameInfo &MFI = MF.getFrameInfo();
     278         868 :   if (!MFI.isMaxCallFrameSizeComputed())
     279         864 :     MFI.computeMaxCallFrameSize(MF);
     280         868 : }

Generated by: LCOV version 1.13