LCOV - code coverage report
Current view: top level - lib/Target/AArch64 - AArch64Subtarget.cpp (source / functions) Hit Total Coverage
Test: llvm-toolchain.info Lines: 123 123 100.0 %
Date: 2018-10-20 13:21:21 Functions: 14 14 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : //===-- AArch64Subtarget.cpp - AArch64 Subtarget Information ----*- C++ -*-===//
       2             : //
       3             : //                     The LLVM Compiler Infrastructure
       4             : //
       5             : // This file is distributed under the University of Illinois Open Source
       6             : // License. See LICENSE.TXT for details.
       7             : //
       8             : //===----------------------------------------------------------------------===//
       9             : //
      10             : // This file implements the AArch64 specific subclass of TargetSubtarget.
      11             : //
      12             : //===----------------------------------------------------------------------===//
      13             : 
      14             : #include "AArch64Subtarget.h"
      15             : 
      16             : #include "AArch64.h"
      17             : #include "AArch64InstrInfo.h"
      18             : #include "AArch64PBQPRegAlloc.h"
      19             : #include "AArch64TargetMachine.h"
      20             : 
      21             : #include "AArch64CallLowering.h"
      22             : #include "AArch64LegalizerInfo.h"
      23             : #include "AArch64RegisterBankInfo.h"
      24             : #include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
      25             : #include "llvm/CodeGen/MachineScheduler.h"
      26             : #include "llvm/IR/GlobalValue.h"
      27             : #include "llvm/Support/TargetParser.h"
      28             : 
      29             : using namespace llvm;
      30             : 
      31             : #define DEBUG_TYPE "aarch64-subtarget"
      32             : 
      33             : #define GET_SUBTARGETINFO_CTOR
      34             : #define GET_SUBTARGETINFO_TARGET_DESC
      35             : #include "AArch64GenSubtargetInfo.inc"
      36             : 
      37             : static cl::opt<bool>
      38             : EnableEarlyIfConvert("aarch64-early-ifcvt", cl::desc("Enable the early if "
      39             :                      "converter pass"), cl::init(true), cl::Hidden);
      40             : 
      41             : // If OS supports TBI, use this flag to enable it.
      42             : static cl::opt<bool>
      43             : UseAddressTopByteIgnored("aarch64-use-tbi", cl::desc("Assume that top byte of "
      44             :                          "an address is ignored"), cl::init(false), cl::Hidden);
      45             : 
      46             : static cl::opt<bool>
      47             :     UseNonLazyBind("aarch64-enable-nonlazybind",
      48             :                    cl::desc("Call nonlazybind functions via direct GOT load"),
      49             :                    cl::init(false), cl::Hidden);
      50             : 
      51             : AArch64Subtarget &
      52        1570 : AArch64Subtarget::initializeSubtargetDependencies(StringRef FS,
      53             :                                                   StringRef CPUString) {
      54             :   // Determine default and user-specified characteristics
      55             : 
      56        1570 :   if (CPUString.empty())
      57        1295 :     CPUString = "generic";
      58             : 
      59        1570 :   ParseSubtargetFeatures(CPUString, FS);
      60        1570 :   initializeProperties();
      61             : 
      62        1570 :   return *this;
      63             : }
      64             : 
      65        1570 : void AArch64Subtarget::initializeProperties() {
      66             :   // Initialize CPU specific properties. We should add a tablegen feature for
      67             :   // this in the future so we can specify it together with the subtarget
      68             :   // features.
      69        1570 :   switch (ARMProcFamily) {
      70          85 :   case Cyclone:
      71          85 :     CacheLineSize = 64;
      72          85 :     PrefetchDistance = 280;
      73          85 :     MinPrefetchStride = 2048;
      74          85 :     MaxPrefetchIterationsAhead = 3;
      75          85 :     break;
      76          35 :   case CortexA57:
      77          35 :     MaxInterleaveFactor = 4;
      78          35 :     PrefFunctionAlignment = 4;
      79          35 :     break;
      80          19 :   case ExynosM1:
      81          19 :     MaxInterleaveFactor = 4;
      82          19 :     MaxJumpTableSize = 8;
      83          19 :     PrefFunctionAlignment = 4;
      84          19 :     PrefLoopAlignment = 3;
      85          19 :     break;
      86          19 :   case ExynosM3:
      87          19 :     MaxInterleaveFactor = 4;
      88          19 :     MaxJumpTableSize = 20;
      89          19 :     PrefFunctionAlignment = 5;
      90          19 :     PrefLoopAlignment = 4;
      91          19 :     break;
      92          13 :   case Falkor:
      93          13 :     MaxInterleaveFactor = 4;
      94             :     // FIXME: remove this to enable 64-bit SLP if performance looks good.
      95          13 :     MinVectorRegisterBitWidth = 128;
      96          13 :     CacheLineSize = 128;
      97          13 :     PrefetchDistance = 820;
      98          13 :     MinPrefetchStride = 2048;
      99          13 :     MaxPrefetchIterationsAhead = 8;
     100          13 :     break;
     101           4 :   case Saphira:
     102           4 :     MaxInterleaveFactor = 4;
     103             :     // FIXME: remove this to enable 64-bit SLP if performance looks good.
     104           4 :     MinVectorRegisterBitWidth = 128;
     105           4 :     break;
     106          12 :   case Kryo:
     107          12 :     MaxInterleaveFactor = 4;
     108          12 :     VectorInsertExtractBaseCost = 2;
     109          12 :     CacheLineSize = 128;
     110          12 :     PrefetchDistance = 740;
     111          12 :     MinPrefetchStride = 1024;
     112          12 :     MaxPrefetchIterationsAhead = 11;
     113             :     // FIXME: remove this to enable 64-bit SLP if performance looks good.
     114          12 :     MinVectorRegisterBitWidth = 128;
     115          12 :     break;
     116           5 :   case ThunderX2T99:
     117           5 :     CacheLineSize = 64;
     118           5 :     PrefFunctionAlignment = 3;
     119           5 :     PrefLoopAlignment = 2;
     120           5 :     MaxInterleaveFactor = 4;
     121           5 :     PrefetchDistance = 128;
     122           5 :     MinPrefetchStride = 1024;
     123           5 :     MaxPrefetchIterationsAhead = 4;
     124             :     // FIXME: remove this to enable 64-bit SLP if performance looks good.
     125           5 :     MinVectorRegisterBitWidth = 128;
     126           5 :     break;
     127           4 :   case ThunderX:
     128             :   case ThunderXT88:
     129             :   case ThunderXT81:
     130             :   case ThunderXT83:
     131           4 :     CacheLineSize = 128;
     132           4 :     PrefFunctionAlignment = 3;
     133           4 :     PrefLoopAlignment = 2;
     134             :     // FIXME: remove this to enable 64-bit SLP if performance looks good.
     135           4 :     MinVectorRegisterBitWidth = 128;
     136           4 :     break;
     137             :   case CortexA35: break;
     138          19 :   case CortexA53:
     139          19 :     PrefFunctionAlignment = 3;
     140          19 :     break;
     141             :   case CortexA55: break;
     142          12 :   case CortexA72:
     143             :   case CortexA73:
     144             :   case CortexA75:
     145          12 :     PrefFunctionAlignment = 4;
     146          12 :     break;
     147             :   case Others: break;
     148             :   }
     149        1570 : }
     150             : 
     151        1570 : AArch64Subtarget::AArch64Subtarget(const Triple &TT, const std::string &CPU,
     152             :                                    const std::string &FS,
     153        1570 :                                    const TargetMachine &TM, bool LittleEndian)
     154             :     : AArch64GenSubtargetInfo(TT, CPU, FS),
     155             :       ReserveXRegister(AArch64::GPR64commonRegClass.getNumRegs()),
     156             :       CustomCallSavedXRegs(AArch64::GPR64commonRegClass.getNumRegs()),
     157             :       IsLittle(LittleEndian),
     158             :       TargetTriple(TT), FrameLowering(),
     159        3140 :       InstrInfo(initializeSubtargetDependencies(FS, CPU)), TSInfo(),
     160        3140 :       TLInfo(TM, *this) {
     161        1570 :   if (AArch64::isX18ReservedByDefault(TT))
     162             :     ReserveXRegister.set(18);
     163             : 
     164        1570 :   CallLoweringInfo.reset(new AArch64CallLowering(*getTargetLowering()));
     165        1570 :   Legalizer.reset(new AArch64LegalizerInfo(*this));
     166             : 
     167        1570 :   auto *RBI = new AArch64RegisterBankInfo(*getRegisterInfo());
     168             : 
     169             :   // FIXME: At this point, we can't rely on Subtarget having RBI.
     170             :   // It's awkward to mix passing RBI and the Subtarget; should we pass
     171             :   // TII/TRI as well?
     172        1570 :   InstSelector.reset(createAArch64InstructionSelector(
     173             :       *static_cast<const AArch64TargetMachine *>(&TM), *this, *RBI));
     174             : 
     175             :   RegBankInfo.reset(RBI);
     176        1570 : }
     177             : 
     178         330 : const CallLowering *AArch64Subtarget::getCallLowering() const {
     179         330 :   return CallLoweringInfo.get();
     180             : }
     181             : 
     182         446 : const InstructionSelector *AArch64Subtarget::getInstructionSelector() const {
     183         446 :   return InstSelector.get();
     184             : }
     185             : 
     186         216 : const LegalizerInfo *AArch64Subtarget::getLegalizerInfo() const {
     187         216 :   return Legalizer.get();
     188             : }
     189             : 
     190         394 : const RegisterBankInfo *AArch64Subtarget::getRegBankInfo() const {
     191         394 :   return RegBankInfo.get();
     192             : }
     193             : 
     194             : /// Find the target operand flags that describe how a global value should be
     195             : /// referenced for the current subtarget.
     196             : unsigned char
     197        7407 : AArch64Subtarget::ClassifyGlobalReference(const GlobalValue *GV,
     198             :                                           const TargetMachine &TM) const {
     199             :   // MachO large model always goes via a GOT, simply to get a single 8-byte
     200             :   // absolute relocation on all global addresses.
     201        7407 :   if (TM.getCodeModel() == CodeModel::Large && isTargetMachO())
     202             :     return AArch64II::MO_GOT;
     203             : 
     204        7394 :   if (!TM.shouldAssumeDSOLocal(*GV->getParent(), GV)) {
     205         506 :     if (GV->hasDLLImportStorageClass())
     206             :       return AArch64II::MO_GOT | AArch64II::MO_DLLIMPORT;
     207         496 :     if (getTargetTriple().isOSWindows())
     208             :       return AArch64II::MO_GOT | AArch64II::MO_COFFSTUB;
     209         491 :     return AArch64II::MO_GOT;
     210             :   }
     211             : 
     212             :   // The small code model's direct accesses use ADRP, which cannot
     213             :   // necessarily produce the value 0 (if the code is above 4GB).
     214             :   // Same for the tiny code model, where we have a pc relative LDR.
     215        6888 :   if ((useSmallAddressing() || TM.getCodeModel() == CodeModel::Tiny) &&
     216             :       GV->hasExternalWeakLinkage())
     217          12 :     return AArch64II::MO_GOT;
     218             : 
     219             :   return AArch64II::MO_NO_FLAG;
     220             : }
     221             : 
     222        1362 : unsigned char AArch64Subtarget::classifyGlobalFunctionReference(
     223             :     const GlobalValue *GV, const TargetMachine &TM) const {
     224             :   // MachO large model always goes via a GOT, because we don't have the
     225             :   // relocations available to do anything else..
     226        1362 :   if (TM.getCodeModel() == CodeModel::Large && isTargetMachO() &&
     227             :       !GV->hasInternalLinkage())
     228             :     return AArch64II::MO_GOT;
     229             : 
     230             :   // NonLazyBind goes via GOT unless we know it's available locally.
     231             :   auto *F = dyn_cast<Function>(GV);
     232        1368 :   if (UseNonLazyBind && F && F->hasFnAttribute(Attribute::NonLazyBind) &&
     233           3 :       !TM.shouldAssumeDSOLocal(*GV->getParent(), GV))
     234           2 :     return AArch64II::MO_GOT;
     235             : 
     236             :   return AArch64II::MO_NO_FLAG;
     237             : }
     238             : 
     239       21669 : void AArch64Subtarget::overrideSchedPolicy(MachineSchedPolicy &Policy,
     240             :                                            unsigned NumRegionInstrs) const {
     241             :   // LNT run (at least on Cyclone) showed reasonably significant gains for
     242             :   // bi-directional scheduling. 253.perlbmk.
     243       21669 :   Policy.OnlyTopDown = false;
     244       21669 :   Policy.OnlyBottomUp = false;
     245             :   // Enabling or Disabling the latency heuristic is a close call: It seems to
     246             :   // help nearly no benchmark on out-of-order architectures, on the other hand
     247             :   // it regresses register pressure on a few benchmarking.
     248       21669 :   Policy.DisableLatencyHeuristic = DisableLatencySchedHeuristic;
     249       21669 : }
     250             : 
     251       14082 : bool AArch64Subtarget::enableEarlyIfConversion() const {
     252       14082 :   return EnableEarlyIfConvert;
     253             : }
     254             : 
     255       18187 : bool AArch64Subtarget::supportsAddressTopByteIgnored() const {
     256       18187 :   if (!UseAddressTopByteIgnored)
     257             :     return false;
     258             : 
     259             :   if (TargetTriple.isiOS()) {
     260             :     unsigned Major, Minor, Micro;
     261           7 :     TargetTriple.getiOSVersion(Major, Minor, Micro);
     262           7 :     return Major >= 8;
     263             :   }
     264             : 
     265             :   return false;
     266             : }
     267             : 
     268             : std::unique_ptr<PBQPRAConstraint>
     269           5 : AArch64Subtarget::getCustomPBQPConstraints() const {
     270           5 :   return balanceFPOps() ? llvm::make_unique<A57ChainingConstraint>() : nullptr;
     271             : }
     272             : 
     273         905 : void AArch64Subtarget::mirFileLoaded(MachineFunction &MF) const {
     274             :   // We usually compute max call frame size after ISel. Do the computation now
     275             :   // if the .mir file didn't specify it. Note that this will probably give you
     276             :   // bogus values after PEI has eliminated the callframe setup/destroy pseudo
     277             :   // instructions, specify explicitly if you need it to be correct.
     278         905 :   MachineFrameInfo &MFI = MF.getFrameInfo();
     279         905 :   if (!MFI.isMaxCallFrameSizeComputed())
     280         901 :     MFI.computeMaxCallFrameSize(MF);
     281         905 : }

Generated by: LCOV version 1.13