LLVM 18.0.0git
AArch64Subtarget.cpp
Go to the documentation of this file.
1//===-- AArch64Subtarget.cpp - AArch64 Subtarget Information ----*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the AArch64 specific subclass of TargetSubtarget.
10//
11//===----------------------------------------------------------------------===//
12
13#include "AArch64Subtarget.h"
14
15#include "AArch64.h"
16#include "AArch64InstrInfo.h"
17#include "AArch64PBQPRegAlloc.h"
26#include "llvm/IR/GlobalValue.h"
28
29using namespace llvm;
30
31#define DEBUG_TYPE "aarch64-subtarget"
32
33#define GET_SUBTARGETINFO_CTOR
34#define GET_SUBTARGETINFO_TARGET_DESC
35#include "AArch64GenSubtargetInfo.inc"
36
37static cl::opt<bool>
38EnableEarlyIfConvert("aarch64-early-ifcvt", cl::desc("Enable the early if "
39 "converter pass"), cl::init(true), cl::Hidden);
40
41// If OS supports TBI, use this flag to enable it.
42static cl::opt<bool>
43UseAddressTopByteIgnored("aarch64-use-tbi", cl::desc("Assume that top byte of "
44 "an address is ignored"), cl::init(false), cl::Hidden);
45
46static cl::opt<bool>
47 UseNonLazyBind("aarch64-enable-nonlazybind",
48 cl::desc("Call nonlazybind functions via direct GOT load"),
49 cl::init(false), cl::Hidden);
50
51static cl::opt<bool> UseAA("aarch64-use-aa", cl::init(true),
52 cl::desc("Enable the use of AA during codegen."));
53
55 "aarch64-insert-extract-base-cost",
56 cl::desc("Base cost of vector insert/extract element"), cl::Hidden);
57
58// Reserve a list of X# registers, so they are unavailable for register
59// allocator, but can still be used as ABI requests, such as passing arguments
60// to function call.
62ReservedRegsForRA("reserve-regs-for-regalloc", cl::desc("Reserve physical "
63 "registers, so they can't be used by register allocator. "
64 "Should only be used for testing register allocator."),
66
68 "force-streaming-compatible-sve",
70 "Force the use of streaming-compatible SVE code for all functions"),
72
74 if (OverrideVectorInsertExtractBaseCost.getNumOccurrences() > 0)
77}
78
79AArch64Subtarget &AArch64Subtarget::initializeSubtargetDependencies(
80 StringRef FS, StringRef CPUString, StringRef TuneCPUString) {
81 // Determine default and user-specified characteristics
82
83 if (CPUString.empty())
84 CPUString = "generic";
85
86 if (TuneCPUString.empty())
87 TuneCPUString = CPUString;
88
89 ParseSubtargetFeatures(CPUString, TuneCPUString, FS);
90 initializeProperties();
91
92 return *this;
93}
94
95void AArch64Subtarget::initializeProperties() {
96 // Initialize CPU specific properties. We should add a tablegen feature for
97 // this in the future so we can specify it together with the subtarget
98 // features.
99 switch (ARMProcFamily) {
100 case Others:
101 break;
102 case Carmel:
103 CacheLineSize = 64;
104 break;
105 case CortexA35:
106 case CortexA53:
107 case CortexA55:
111 break;
112 case CortexA57:
117 break;
118 case CortexA65:
120 break;
121 case CortexA72:
122 case CortexA73:
123 case CortexA75:
127 break;
128 case CortexA76:
129 case CortexA77:
130 case CortexA78:
131 case CortexA78C:
132 case CortexR82:
133 case CortexX1:
134 case CortexX1C:
138 break;
139 case CortexA510:
141 VScaleForTuning = 1;
144 break;
145 case CortexA710:
146 case CortexA715:
147 case CortexX2:
148 case CortexX3:
150 VScaleForTuning = 1;
153 break;
154 case A64FX:
155 CacheLineSize = 256;
159 PrefetchDistance = 128;
160 MinPrefetchStride = 1024;
162 VScaleForTuning = 4;
163 break;
164 case AppleA7:
165 case AppleA10:
166 case AppleA11:
167 case AppleA12:
168 case AppleA13:
169 case AppleA14:
170 case AppleA15:
171 case AppleA16:
172 CacheLineSize = 64;
173 PrefetchDistance = 280;
174 MinPrefetchStride = 2048;
176 switch (ARMProcFamily) {
177 case AppleA14:
178 case AppleA15:
179 case AppleA16:
181 break;
182 default:
183 break;
184 }
185 break;
186 case ExynosM3:
188 MaxJumpTableSize = 20;
191 break;
192 case Falkor:
194 // FIXME: remove this to enable 64-bit SLP if performance looks good.
196 CacheLineSize = 128;
197 PrefetchDistance = 820;
198 MinPrefetchStride = 2048;
200 break;
201 case Kryo:
204 CacheLineSize = 128;
205 PrefetchDistance = 740;
206 MinPrefetchStride = 1024;
208 // FIXME: remove this to enable 64-bit SLP if performance looks good.
210 break;
211 case NeoverseE1:
213 break;
214 case NeoverseN1:
218 break;
219 case NeoverseN2:
220 case NeoverseV2:
224 VScaleForTuning = 1;
225 break;
226 case NeoverseV1:
230 VScaleForTuning = 2;
232 break;
233 case Neoverse512TVB:
235 VScaleForTuning = 1;
237 break;
238 case Saphira:
240 // FIXME: remove this to enable 64-bit SLP if performance looks good.
242 break;
243 case ThunderX2T99:
244 CacheLineSize = 64;
248 PrefetchDistance = 128;
249 MinPrefetchStride = 1024;
251 // FIXME: remove this to enable 64-bit SLP if performance looks good.
253 break;
254 case ThunderX:
255 case ThunderXT88:
256 case ThunderXT81:
257 case ThunderXT83:
258 CacheLineSize = 128;
261 // FIXME: remove this to enable 64-bit SLP if performance looks good.
263 break;
264 case TSV110:
265 CacheLineSize = 64;
268 break;
269 case ThunderX3T110:
270 CacheLineSize = 64;
274 PrefetchDistance = 128;
275 MinPrefetchStride = 1024;
277 // FIXME: remove this to enable 64-bit SLP if performance looks good.
279 break;
280 case Ampere1:
281 case Ampere1A:
282 CacheLineSize = 64;
286 break;
287 }
288}
289
291 StringRef TuneCPU, StringRef FS,
292 const TargetMachine &TM, bool LittleEndian,
293 unsigned MinSVEVectorSizeInBitsOverride,
294 unsigned MaxSVEVectorSizeInBitsOverride,
295 bool StreamingSVEMode,
296 bool StreamingCompatibleSVEMode)
297 : AArch64GenSubtargetInfo(TT, CPU, TuneCPU, FS),
298 ReserveXRegister(AArch64::GPR64commonRegClass.getNumRegs()),
299 ReserveXRegisterForRA(AArch64::GPR64commonRegClass.getNumRegs()),
300 CustomCallSavedXRegs(AArch64::GPR64commonRegClass.getNumRegs()),
301 IsLittle(LittleEndian),
302 StreamingSVEMode(StreamingSVEMode),
303 StreamingCompatibleSVEMode(StreamingCompatibleSVEMode),
304 MinSVEVectorSizeInBits(MinSVEVectorSizeInBitsOverride),
305 MaxSVEVectorSizeInBits(MaxSVEVectorSizeInBitsOverride), TargetTriple(TT),
306 InstrInfo(initializeSubtargetDependencies(FS, CPU, TuneCPU)),
307 TLInfo(TM, *this) {
310
313 Legalizer.reset(new AArch64LegalizerInfo(*this));
314
315 auto *RBI = new AArch64RegisterBankInfo(*getRegisterInfo());
316
317 // FIXME: At this point, we can't rely on Subtarget having RBI.
318 // It's awkward to mix passing RBI and the Subtarget; should we pass
319 // TII/TRI as well?
321 *static_cast<const AArch64TargetMachine *>(&TM), *this, *RBI));
322
323 RegBankInfo.reset(RBI);
324
325 auto TRI = getRegisterInfo();
326 StringSet<> ReservedRegNames;
327 ReservedRegNames.insert(ReservedRegsForRA.begin(), ReservedRegsForRA.end());
328 for (unsigned i = 0; i < 29; ++i) {
329 if (ReservedRegNames.count(TRI->getName(AArch64::X0 + i)))
331 }
332 // X30 is named LR, so we can't use TRI->getName to check X30.
333 if (ReservedRegNames.count("X30") || ReservedRegNames.count("LR"))
335 // X29 is named FP, so we can't use TRI->getName to check X29.
336 if (ReservedRegNames.count("X29") || ReservedRegNames.count("FP"))
338}
339
341 return CallLoweringInfo.get();
342}
343
345 return InlineAsmLoweringInfo.get();
346}
347
349 return InstSelector.get();
350}
351
353 return Legalizer.get();
354}
355
357 return RegBankInfo.get();
358}
359
360/// Find the target operand flags that describe how a global value should be
361/// referenced for the current subtarget.
362unsigned
364 const TargetMachine &TM) const {
365 // MachO large model always goes via a GOT, simply to get a single 8-byte
366 // absolute relocation on all global addresses.
367 if (TM.getCodeModel() == CodeModel::Large && isTargetMachO())
368 return AArch64II::MO_GOT;
369
370 // All globals dynamically protected by MTE must have their address tags
371 // synthesized. This is done by having the loader stash the tag in the GOT
372 // entry. Force all tagged globals (even ones with internal linkage) through
373 // the GOT.
374 if (GV->isTagged())
375 return AArch64II::MO_GOT;
376
377 if (!TM.shouldAssumeDSOLocal(*GV->getParent(), GV)) {
378 if (GV->hasDLLImportStorageClass()) {
382 }
383 if (getTargetTriple().isOSWindows())
385 return AArch64II::MO_GOT;
386 }
387
388 // The small code model's direct accesses use ADRP, which cannot
389 // necessarily produce the value 0 (if the code is above 4GB).
390 // Same for the tiny code model, where we have a pc relative LDR.
391 if ((useSmallAddressing() || TM.getCodeModel() == CodeModel::Tiny) &&
393 return AArch64II::MO_GOT;
394
395 // References to tagged globals are marked with MO_NC | MO_TAGGED to indicate
396 // that their nominal addresses are tagged and outside of the code model. In
397 // AArch64ExpandPseudo::expandMI we emit an additional instruction to set the
398 // tag if necessary based on MO_TAGGED.
399 if (AllowTaggedGlobals && !isa<FunctionType>(GV->getValueType()))
401
403}
404
406 const GlobalValue *GV, const TargetMachine &TM) const {
407 // MachO large model always goes via a GOT, because we don't have the
408 // relocations available to do anything else..
409 if (TM.getCodeModel() == CodeModel::Large && isTargetMachO() &&
410 !GV->hasInternalLinkage())
411 return AArch64II::MO_GOT;
412
413 // NonLazyBind goes via GOT unless we know it's available locally.
414 auto *F = dyn_cast<Function>(GV);
415 if (UseNonLazyBind && F && F->hasFnAttribute(Attribute::NonLazyBind) &&
416 !TM.shouldAssumeDSOLocal(*GV->getParent(), GV))
417 return AArch64II::MO_GOT;
418
419 if (getTargetTriple().isOSWindows()) {
420 if (isWindowsArm64EC() && GV->getValueType()->isFunctionTy() &&
422 // On Arm64EC, if we're calling a function directly, use MO_DLLIMPORT,
423 // not MO_DLLIMPORTAUX.
425 }
426
427 // Use ClassifyGlobalReference for setting MO_DLLIMPORT/MO_COFFSTUB.
428 return ClassifyGlobalReference(GV, TM);
429 }
430
432}
433
435 unsigned NumRegionInstrs) const {
436 // LNT run (at least on Cyclone) showed reasonably significant gains for
437 // bi-directional scheduling. 253.perlbmk.
438 Policy.OnlyTopDown = false;
439 Policy.OnlyBottomUp = false;
440 // Enabling or Disabling the latency heuristic is a close call: It seems to
441 // help nearly no benchmark on out-of-order architectures, on the other hand
442 // it regresses register pressure on a few benchmarking.
443 Policy.DisableLatencyHeuristic = DisableLatencySchedHeuristic;
444}
445
448}
449
452 return false;
453
455 return true;
456 if (TargetTriple.isiOS()) {
458 }
459
460 return false;
461}
462
463std::unique_ptr<PBQPRAConstraint>
465 return balanceFPOps() ? std::make_unique<A57ChainingConstraint>() : nullptr;
466}
467
469 // We usually compute max call frame size after ISel. Do the computation now
470 // if the .mir file didn't specify it. Note that this will probably give you
471 // bogus values after PEI has eliminated the callframe setup/destroy pseudo
472 // instructions, specify explicitly if you need it to be correct.
473 MachineFrameInfo &MFI = MF.getFrameInfo();
476}
477
478bool AArch64Subtarget::useAA() const { return UseAA; }
479
482}
483
485 return hasNEON() && !isStreaming() && !isStreamingCompatible();
486}
487
489 // FIXME: Also return false if FEAT_FA64 is set, but we can't do this yet
490 // as we don't yet support the feature in LLVM.
491 return hasSVE() && !isStreaming() && !isStreamingCompatible();
492}
This file describes how to lower LLVM calls to machine code calls.
This file declares the targeting of the Machinelegalizer class for AArch64.
This file declares the targeting of the RegisterBankInfo class for AArch64.
static cl::opt< bool > UseAddressTopByteIgnored("aarch64-use-tbi", cl::desc("Assume that top byte of " "an address is ignored"), cl::init(false), cl::Hidden)
static cl::opt< bool > ForceStreamingCompatibleSVE("force-streaming-compatible-sve", cl::desc("Force the use of streaming-compatible SVE code for all functions"), cl::Hidden)
static cl::opt< bool > UseNonLazyBind("aarch64-enable-nonlazybind", cl::desc("Call nonlazybind functions via direct GOT load"), cl::init(false), cl::Hidden)
static cl::opt< bool > EnableEarlyIfConvert("aarch64-early-ifcvt", cl::desc("Enable the early if " "converter pass"), cl::init(true), cl::Hidden)
static cl::opt< bool > UseAA("aarch64-use-aa", cl::init(true), cl::desc("Enable the use of AA during codegen."))
static cl::list< std::string > ReservedRegsForRA("reserve-regs-for-regalloc", cl::desc("Reserve physical " "registers, so they can't be used by register allocator. " "Should only be used for testing register allocator."), cl::CommaSeparated, cl::Hidden)
static cl::opt< unsigned > OverrideVectorInsertExtractBaseCost("aarch64-insert-extract-base-cost", cl::desc("Base cost of vector insert/extract element"), cl::Hidden)
#define F(x, y, z)
Definition: MD5.cpp:55
unsigned const TargetRegisterInfo * TRI
const char LLVMTargetMachineRef TM
return InstrInfo
This class provides the information for the target register banks.
This class provides the information for the target register banks.
const CallLowering * getCallLowering() const override
bool isNeonAvailable() const
Returns true if the target has NEON and the function at runtime is known to have NEON enabled (e....
const AArch64RegisterInfo * getRegisterInfo() const override
TailFoldingOpts DefaultSVETFOpts
std::unique_ptr< InstructionSelector > InstSelector
AArch64Subtarget(const Triple &TT, StringRef CPU, StringRef TuneCPU, StringRef FS, const TargetMachine &TM, bool LittleEndian, unsigned MinSVEVectorSizeInBitsOverride=0, unsigned MaxSVEVectorSizeInBitsOverride=0, bool StreamingSVEMode=false, bool StreamingCompatibleSVEMode=false)
This constructor initializes the data members to match that of the specified triple.
ARMProcFamilyEnum ARMProcFamily
ARMProcFamily - ARM processor family: Cortex-A53, Cortex-A57, and others.
std::unique_ptr< RegisterBankInfo > RegBankInfo
bool useSmallAddressing() const
bool isStreamingCompatible() const
Returns true if the function has a streaming-compatible body.
void overrideSchedPolicy(MachineSchedPolicy &Policy, unsigned NumRegionInstrs) const override
bool enableEarlyIfConversion() const override
const InlineAsmLowering * getInlineAsmLowering() const override
unsigned getVectorInsertExtractBaseCost() const
std::unique_ptr< CallLowering > CallLoweringInfo
GlobalISel related APIs.
unsigned classifyGlobalFunctionReference(const GlobalValue *GV, const TargetMachine &TM) const
bool useAA() const override
const AArch64TargetLowering * getTargetLowering() const override
bool supportsAddressTopByteIgnored() const
CPU has TBI (top byte of addresses is ignored during HW address translation) and OS enables it.
const Triple & getTargetTriple() const
void mirFileLoaded(MachineFunction &MF) const override
Triple TargetTriple
TargetTriple - What processor and OS we're targeting.
InstructionSelector * getInstructionSelector() const override
unsigned ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const
ClassifyGlobalReference - Find the target operand flags that describe how a global value should be re...
void ParseSubtargetFeatures(StringRef CPU, StringRef TuneCPU, StringRef FS)
ParseSubtargetFeatures - Parses features string setting specified subtarget options.
bool isStreaming() const
Returns true if the function has a streaming body.
bool isSVEAvailable() const
Returns true if the target has SVE and can use the full range of SVE instructions,...
const LegalizerInfo * getLegalizerInfo() const override
std::unique_ptr< PBQPRAConstraint > getCustomPBQPConstraints() const override
const RegisterBankInfo * getRegBankInfo() const override
std::unique_ptr< InlineAsmLowering > InlineAsmLoweringInfo
BitVector & set()
Definition: BitVector.h:351
bool isTagged() const
Definition: GlobalValue.h:360
bool hasExternalWeakLinkage() const
Definition: GlobalValue.h:524
bool hasDLLImportStorageClass() const
Definition: GlobalValue.h:274
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:652
bool hasInternalLinkage() const
Definition: GlobalValue.h:521
Type * getValueType() const
Definition: GlobalValue.h:292
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
void computeMaxCallFrameSize(const MachineFunction &MF)
Computes the maximum size of a callframe and the AdjustsStack property.
bool isMaxCallFrameSizeComputed() const
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
Holds all the information related to register banks.
size_type count(StringRef Key) const
count - Return 1 if the element is in the map, 0 otherwise.
Definition: StringMap.h:257
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
constexpr bool empty() const
empty - Check if the string is empty.
Definition: StringRef.h:134
StringSet - A wrapper for StringMap that provides set-like functionality.
Definition: StringSet.h:23
std::pair< typename Base::iterator, bool > insert(StringRef key)
Definition: StringSet.h:34
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:78
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:44
bool isDriverKit() const
Is this an Apple DriverKit triple.
Definition: Triple.h:513
bool isiOS() const
Is this an iOS triple.
Definition: Triple.h:494
VersionTuple getiOSVersion() const
Parse the version number as with getOSVersion.
Definition: Triple.cpp:1266
bool isFunctionTy() const
True if this is an instance of FunctionType.
Definition: Type.h:246
Represents a version number in the form major[.minor[.subminor[.build]]].
Definition: VersionTuple.h:31
@ MO_DLLIMPORT
MO_DLLIMPORT - On a symbol operand, this represents that the reference to the symbol is for an import...
@ MO_NC
MO_NC - Indicates whether the linker is expected to check the symbol reference for overflow.
@ MO_GOT
MO_GOT - This flag indicates that a symbol operand represents the address of the GOT entry for the sy...
@ MO_DLLIMPORTAUX
MO_DLLIMPORTAUX - Symbol refers to "auxilliary" import stub.
@ MO_TAGGED
MO_TAGGED - With MO_PAGE, indicates that the page includes a memory tag in bits 56-63.
@ MO_COFFSTUB
MO_COFFSTUB - On a symbol operand "FOO", this indicates that the reference is actually to the "....
bool isX18ReservedByDefault(const Triple &TT)
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:445
@ CommaSeparated
Definition: CommandLine.h:164
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
InstructionSelector * createAArch64InstructionSelector(const AArch64TargetMachine &, AArch64Subtarget &, AArch64RegisterBankInfo &)
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Define a generic scheduling policy for targets that don't provide their own MachineSchedStrategy.