LLVM  12.0.0git
AArch64Subtarget.cpp
Go to the documentation of this file.
1 //===-- AArch64Subtarget.cpp - AArch64 Subtarget Information ----*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the AArch64 specific subclass of TargetSubtarget.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "AArch64Subtarget.h"
14 
15 #include "AArch64.h"
16 #include "AArch64InstrInfo.h"
17 #include "AArch64PBQPRegAlloc.h"
18 #include "AArch64TargetMachine.h"
25 #include "llvm/IR/GlobalValue.h"
27 
28 using namespace llvm;
29 
30 #define DEBUG_TYPE "aarch64-subtarget"
31 
32 #define GET_SUBTARGETINFO_CTOR
33 #define GET_SUBTARGETINFO_TARGET_DESC
34 #include "AArch64GenSubtargetInfo.inc"
35 
36 static cl::opt<bool>
37 EnableEarlyIfConvert("aarch64-early-ifcvt", cl::desc("Enable the early if "
38  "converter pass"), cl::init(true), cl::Hidden);
39 
40 // If OS supports TBI, use this flag to enable it.
41 static cl::opt<bool>
42 UseAddressTopByteIgnored("aarch64-use-tbi", cl::desc("Assume that top byte of "
43  "an address is ignored"), cl::init(false), cl::Hidden);
44 
45 static cl::opt<bool>
46  UseNonLazyBind("aarch64-enable-nonlazybind",
47  cl::desc("Call nonlazybind functions via direct GOT load"),
48  cl::init(false), cl::Hidden);
49 
51  "aarch64-sve-vector-bits-max",
52  cl::desc("Assume SVE vector registers are at most this big, "
53  "with zero meaning no maximum size is assumed."),
54  cl::init(0), cl::Hidden);
55 
57  "aarch64-sve-vector-bits-min",
58  cl::desc("Assume SVE vector registers are at least this big, "
59  "with zero meaning no minimum size is assumed."),
60  cl::init(0), cl::Hidden);
61 
63 AArch64Subtarget::initializeSubtargetDependencies(StringRef FS,
64  StringRef CPUString) {
65  // Determine default and user-specified characteristics
66 
67  if (CPUString.empty())
68  CPUString = "generic";
69 
70  ParseSubtargetFeatures(CPUString, FS);
71  initializeProperties();
72 
73  return *this;
74 }
75 
76 void AArch64Subtarget::initializeProperties() {
77  // Initialize CPU specific properties. We should add a tablegen feature for
78  // this in the future so we can specify it together with the subtarget
79  // features.
80  switch (ARMProcFamily) {
81  case Others:
82  break;
83  case Carmel:
84  CacheLineSize = 64;
85  break;
86  case CortexA35:
87  break;
88  case CortexA53:
90  break;
91  case CortexA55:
92  break;
93  case CortexA57:
96  break;
97  case CortexA65:
99  break;
100  case CortexA72:
101  case CortexA73:
102  case CortexA75:
103  case CortexA76:
104  case CortexA77:
105  case CortexA78:
106  case CortexX1:
108  break;
109  case A64FX:
110  CacheLineSize = 256;
113  break;
114  case AppleA7:
115  case AppleA10:
116  case AppleA11:
117  case AppleA12:
118  case AppleA13:
119  CacheLineSize = 64;
120  PrefetchDistance = 280;
121  MinPrefetchStride = 2048;
123  break;
124  case ExynosM3:
126  MaxJumpTableSize = 20;
129  break;
130  case Falkor:
132  // FIXME: remove this to enable 64-bit SLP if performance looks good.
134  CacheLineSize = 128;
135  PrefetchDistance = 820;
136  MinPrefetchStride = 2048;
138  break;
139  case Kryo:
142  CacheLineSize = 128;
143  PrefetchDistance = 740;
144  MinPrefetchStride = 1024;
146  // FIXME: remove this to enable 64-bit SLP if performance looks good.
148  break;
149  case NeoverseE1:
151  break;
152  case NeoverseN1:
154  break;
155  case Saphira:
157  // FIXME: remove this to enable 64-bit SLP if performance looks good.
159  break;
160  case ThunderX2T99:
161  CacheLineSize = 64;
165  PrefetchDistance = 128;
166  MinPrefetchStride = 1024;
168  // FIXME: remove this to enable 64-bit SLP if performance looks good.
170  break;
171  case ThunderX:
172  case ThunderXT88:
173  case ThunderXT81:
174  case ThunderXT83:
175  CacheLineSize = 128;
178  // FIXME: remove this to enable 64-bit SLP if performance looks good.
180  break;
181  case TSV110:
182  CacheLineSize = 64;
185  break;
186  case ThunderX3T110:
187  CacheLineSize = 64;
191  PrefetchDistance = 128;
192  MinPrefetchStride = 1024;
194  // FIXME: remove this to enable 64-bit SLP if performance looks good.
196  break;
197  }
198 }
199 
200 AArch64Subtarget::AArch64Subtarget(const Triple &TT, const std::string &CPU,
201  const std::string &FS,
202  const TargetMachine &TM, bool LittleEndian)
203  : AArch64GenSubtargetInfo(TT, CPU, FS),
204  ReserveXRegister(AArch64::GPR64commonRegClass.getNumRegs()),
205  CustomCallSavedXRegs(AArch64::GPR64commonRegClass.getNumRegs()),
206  IsLittle(LittleEndian),
208  InstrInfo(initializeSubtargetDependencies(FS, CPU)), TSInfo(),
209  TLInfo(TM, *this) {
211  ReserveXRegister.set(18);
212 
215  Legalizer.reset(new AArch64LegalizerInfo(*this));
216 
217  auto *RBI = new AArch64RegisterBankInfo(*getRegisterInfo());
218 
219  // FIXME: At this point, we can't rely on Subtarget having RBI.
220  // It's awkward to mix passing RBI and the Subtarget; should we pass
221  // TII/TRI as well?
223  *static_cast<const AArch64TargetMachine *>(&TM), *this, *RBI));
224 
225  RegBankInfo.reset(RBI);
226 }
227 
229  return CallLoweringInfo.get();
230 }
231 
233  return InlineAsmLoweringInfo.get();
234 }
235 
237  return InstSelector.get();
238 }
239 
241  return Legalizer.get();
242 }
243 
245  return RegBankInfo.get();
246 }
247 
248 /// Find the target operand flags that describe how a global value should be
249 /// referenced for the current subtarget.
250 unsigned
252  const TargetMachine &TM) const {
253  // MachO large model always goes via a GOT, simply to get a single 8-byte
254  // absolute relocation on all global addresses.
256  return AArch64II::MO_GOT;
257 
258  if (!TM.shouldAssumeDSOLocal(*GV->getParent(), GV)) {
259  if (GV->hasDLLImportStorageClass())
263  return AArch64II::MO_GOT;
264  }
265 
266  // The small code model's direct accesses use ADRP, which cannot
267  // necessarily produce the value 0 (if the code is above 4GB).
268  // Same for the tiny code model, where we have a pc relative LDR.
269  if ((useSmallAddressing() || TM.getCodeModel() == CodeModel::Tiny) &&
271  return AArch64II::MO_GOT;
272 
273  // References to tagged globals are marked with MO_NC | MO_TAGGED to indicate
274  // that their nominal addresses are tagged and outside of the code model. In
275  // AArch64ExpandPseudo::expandMI we emit an additional instruction to set the
276  // tag if necessary based on MO_TAGGED.
277  if (AllowTaggedGlobals && !isa<FunctionType>(GV->getValueType()))
279 
280  return AArch64II::MO_NO_FLAG;
281 }
282 
284  const GlobalValue *GV, const TargetMachine &TM) const {
285  // MachO large model always goes via a GOT, because we don't have the
286  // relocations available to do anything else..
287  if (TM.getCodeModel() == CodeModel::Large && isTargetMachO() &&
288  !GV->hasInternalLinkage())
289  return AArch64II::MO_GOT;
290 
291  // NonLazyBind goes via GOT unless we know it's available locally.
292  auto *F = dyn_cast<Function>(GV);
293  if (UseNonLazyBind && F && F->hasFnAttribute(Attribute::NonLazyBind) &&
294  !TM.shouldAssumeDSOLocal(*GV->getParent(), GV))
295  return AArch64II::MO_GOT;
296 
297  // Use ClassifyGlobalReference for setting MO_DLLIMPORT/MO_COFFSTUB.
299  return ClassifyGlobalReference(GV, TM);
300 
301  return AArch64II::MO_NO_FLAG;
302 }
303 
305  unsigned NumRegionInstrs) const {
306  // LNT run (at least on Cyclone) showed reasonably significant gains for
307  // bi-directional scheduling. 253.perlbmk.
308  Policy.OnlyTopDown = false;
309  Policy.OnlyBottomUp = false;
310  // Enabling or Disabling the latency heuristic is a close call: It seems to
311  // help nearly no benchmark on out-of-order architectures, on the other hand
312  // it regresses register pressure on a few benchmarking.
314 }
315 
317  return EnableEarlyIfConvert;
318 }
319 
322  return false;
323 
324  if (TargetTriple.isiOS()) {
325  unsigned Major, Minor, Micro;
326  TargetTriple.getiOSVersion(Major, Minor, Micro);
327  return Major >= 8;
328  }
329 
330  return false;
331 }
332 
333 std::unique_ptr<PBQPRAConstraint>
335  return balanceFPOps() ? std::make_unique<A57ChainingConstraint>() : nullptr;
336 }
337 
339  // We usually compute max call frame size after ISel. Do the computation now
340  // if the .mir file didn't specify it. Note that this will probably give you
341  // bogus values after PEI has eliminated the callframe setup/destroy pseudo
342  // instructions, specify explicitly if you need it to be correct.
343  MachineFrameInfo &MFI = MF.getFrameInfo();
344  if (!MFI.isMaxCallFrameSizeComputed())
345  MFI.computeMaxCallFrameSize(MF);
346 }
347 
349  assert(HasSVE && "Tried to get SVE vector length without SVE support!");
350  assert(SVEVectorBitsMax % 128 == 0 &&
351  "SVE requires vector length in multiples of 128!");
353  "Minimum SVE vector size should not be larger than its maximum!");
354  if (SVEVectorBitsMax == 0)
355  return 0;
356  return (std::max(SVEVectorBitsMin, SVEVectorBitsMax) / 128) * 128;
357 }
358 
360  assert(HasSVE && "Tried to get SVE vector length without SVE support!");
361  assert(SVEVectorBitsMin % 128 == 0 &&
362  "SVE requires vector length in multiples of 128!");
364  "Minimum SVE vector size should not be larger than its maximum!");
365  if (SVEVectorBitsMax == 0)
366  return (SVEVectorBitsMin / 128) * 128;
367  return (std::min(SVEVectorBitsMin, SVEVectorBitsMax) / 128) * 128;
368 }
const Triple & getTargetTriple() const
void getiOSVersion(unsigned &Major, unsigned &Minor, unsigned &Micro) const
getiOSVersion - Parse the version number as with getOSVersion.
Definition: Triple.cpp:1124
BitVector & set()
Definition: BitVector.h:398
LLVM_NODISCARD std::enable_if_t< !is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type > dyn_cast(const Y &Val)
Definition: Casting.h:334
unsigned ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const
ClassifyGlobalReference - Find the target operand flags that describe how a global value should be re...
This class represents lattice values for constants.
Definition: AllocatorList.h:23
MO_TAGGED - With MO_PAGE, indicates that the page includes a memory tag in bits 56-63.
std::unique_ptr< InlineAsmLowering > InlineAsmLoweringInfo
static cl::opt< unsigned > SVEVectorBitsMin("aarch64-sve-vector-bits-min", cl::desc("Assume SVE vector registers are at least this big, " "with zero meaning no minimum size is assumed."), cl::init(0), cl::Hidden)
This class provides the information for the target register banks.
InstructionSelector * getInstructionSelector() const override
AArch64SelectionDAGInfo TSInfo
bool hasDLLImportStorageClass() const
Definition: GlobalValue.h:259
F(f)
bool hasExternalWeakLinkage() const
Definition: GlobalValue.h:446
const CallLowering * getCallLowering() const override
void mirFileLoaded(MachineFunction &MF) const override
void overrideSchedPolicy(MachineSchedPolicy &Policy, unsigned NumRegionInstrs) const override
This file declares the targeting of the RegisterBankInfo class for AArch64.
unsigned classifyGlobalFunctionReference(const GlobalValue *GV, const TargetMachine &TM) const
Holds all the information related to register banks.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted...
unsigned getMinSVEVectorSizeInBits() const
LLVM_NODISCARD bool empty() const
empty - Check if the string is empty.
Definition: StringRef.h:156
static cl::opt< bool > EnableEarlyIfConvert("aarch64-early-ifcvt", cl::desc("Enable the early if " "converter pass"), cl::init(true), cl::Hidden)
std::unique_ptr< InstructionSelector > InstSelector
MO_GOT - This flag indicates that a symbol operand represents the address of the GOT entry for the sy...
const AArch64RegisterInfo * getRegisterInfo() const override
This file declares the targeting of the Machinelegalizer class for AArch64.
bool isiOS() const
Is this an iOS triple.
Definition: Triple.h:456
bool supportsAddressTopByteIgnored() const
CPU has TBI (top byte of addresses is ignored during HW address translation) and OS enables it...
bool enableEarlyIfConversion() const override
bool isOSWindows() const
Tests whether the OS is Windows.
Definition: Triple.h:532
const AArch64TargetLowering * getTargetLowering() const override
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:434
bool shouldAssumeDSOLocal(const Module &M, const GlobalValue *GV) const
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
bool isX18ReservedByDefault(const Triple &TT)
bool useSmallAddressing() const
Expected< ExpressionValue > min(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:305
bool hasInternalLinkage() const
Definition: GlobalValue.h:443
void computeMaxCallFrameSize(const MachineFunction &MF)
Computes the maximum size of a callframe and the AdjustsStack property.
std::unique_ptr< CallLowering > CallLoweringInfo
GlobalISel related APIs.
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:45
Align max(MaybeAlign Lhs, Align Rhs)
Definition: Alignment.h:350
AArch64InstrInfo InstrInfo
void ParseSubtargetFeatures(StringRef CPU, StringRef FS)
ParseSubtargetFeatures - Parses features string setting specified subtarget options.
static cl::opt< unsigned > SVEVectorBitsMax("aarch64-sve-vector-bits-max", cl::desc("Assume SVE vector registers are at most this big, " "with zero meaning no maximum size is assumed."), cl::init(0), cl::Hidden)
CodeModel::Model getCodeModel() const
Returns the code model.
std::unique_ptr< RegisterBankInfo > RegBankInfo
Provides the logic to select generic machine instructions.
Define a generic scheduling policy for targets that don&#39;t provide their own MachineSchedStrategy.
This class provides the information for the target register banks.
std::unique_ptr< PBQPRAConstraint > getCustomPBQPConstraints() const override
AArch64Subtarget(const Triple &TT, const std::string &CPU, const std::string &FS, const TargetMachine &TM, bool LittleEndian)
This constructor initializes the data members to match that of the specified triple.
const LegalizerInfo * getLegalizerInfo() const override
static cl::opt< bool > UseNonLazyBind("aarch64-enable-nonlazybind", cl::desc("Call nonlazybind functions via direct GOT load"), cl::init(false), cl::Hidden)
MO_COFFSTUB - On a symbol operand "FOO", this indicates that the reference is actually to the "...
MO_DLLIMPORT - On a symbol operand, this represents that the reference to the symbol is for an import...
AArch64FrameLowering FrameLowering
InstructionSelector * createAArch64InstructionSelector(const AArch64TargetMachine &, AArch64Subtarget &, AArch64RegisterBankInfo &)
Type * getValueType() const
Definition: GlobalValue.h:273
This file describes how to lower LLVM calls to machine code calls.
Triple TargetTriple
TargetTriple - What processor and OS we&#39;re targeting.
ARMProcFamilyEnum ARMProcFamily
ARMProcFamily - ARM processor family: Cortex-A53, Cortex-A57, and others.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:572
const RegisterBankInfo * getRegBankInfo() const override
bool isMaxCallFrameSizeComputed() const
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:65
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:57
MO_NC - Indicates whether the linker is expected to check the symbol reference for overflow...
AArch64TargetLowering TLInfo
const InlineAsmLowering * getInlineAsmLowering() const override
unsigned getMaxSVEVectorSizeInBits() const
static cl::opt< bool > UseAddressTopByteIgnored("aarch64-use-tbi", cl::desc("Assume that top byte of " "an address is ignored"), cl::init(false), cl::Hidden)