Line data Source code
1 : //===-- AArch64Subtarget.cpp - AArch64 Subtarget Information ----*- C++ -*-===//
2 : //
3 : // The LLVM Compiler Infrastructure
4 : //
5 : // This file is distributed under the University of Illinois Open Source
6 : // License. See LICENSE.TXT for details.
7 : //
8 : //===----------------------------------------------------------------------===//
9 : //
10 : // This file implements the AArch64 specific subclass of TargetSubtarget.
11 : //
12 : //===----------------------------------------------------------------------===//
13 :
14 : #include "AArch64Subtarget.h"
15 :
16 : #include "AArch64.h"
17 : #include "AArch64InstrInfo.h"
18 : #include "AArch64PBQPRegAlloc.h"
19 : #include "AArch64TargetMachine.h"
20 :
21 : #include "AArch64CallLowering.h"
22 : #include "AArch64LegalizerInfo.h"
23 : #include "AArch64RegisterBankInfo.h"
24 : #include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
25 : #include "llvm/CodeGen/MachineScheduler.h"
26 : #include "llvm/IR/GlobalValue.h"
27 : #include "llvm/Support/TargetParser.h"
28 :
29 : using namespace llvm;
30 :
31 : #define DEBUG_TYPE "aarch64-subtarget"
32 :
33 : #define GET_SUBTARGETINFO_CTOR
34 : #define GET_SUBTARGETINFO_TARGET_DESC
35 : #include "AArch64GenSubtargetInfo.inc"
36 :
37 : static cl::opt<bool>
38 : EnableEarlyIfConvert("aarch64-early-ifcvt", cl::desc("Enable the early if "
39 : "converter pass"), cl::init(true), cl::Hidden);
40 :
41 : // If OS supports TBI, use this flag to enable it.
42 : static cl::opt<bool>
43 : UseAddressTopByteIgnored("aarch64-use-tbi", cl::desc("Assume that top byte of "
44 : "an address is ignored"), cl::init(false), cl::Hidden);
45 :
46 : static cl::opt<bool>
47 : UseNonLazyBind("aarch64-enable-nonlazybind",
48 : cl::desc("Call nonlazybind functions via direct GOT load"),
49 : cl::init(false), cl::Hidden);
50 :
51 : AArch64Subtarget &
52 1570 : AArch64Subtarget::initializeSubtargetDependencies(StringRef FS,
53 : StringRef CPUString) {
54 : // Determine default and user-specified characteristics
55 :
56 1570 : if (CPUString.empty())
57 1295 : CPUString = "generic";
58 :
59 1570 : ParseSubtargetFeatures(CPUString, FS);
60 1570 : initializeProperties();
61 :
62 1570 : return *this;
63 : }
64 :
65 1570 : void AArch64Subtarget::initializeProperties() {
66 : // Initialize CPU specific properties. We should add a tablegen feature for
67 : // this in the future so we can specify it together with the subtarget
68 : // features.
69 1570 : switch (ARMProcFamily) {
70 85 : case Cyclone:
71 85 : CacheLineSize = 64;
72 85 : PrefetchDistance = 280;
73 85 : MinPrefetchStride = 2048;
74 85 : MaxPrefetchIterationsAhead = 3;
75 85 : break;
76 35 : case CortexA57:
77 35 : MaxInterleaveFactor = 4;
78 35 : PrefFunctionAlignment = 4;
79 35 : break;
80 19 : case ExynosM1:
81 19 : MaxInterleaveFactor = 4;
82 19 : MaxJumpTableSize = 8;
83 19 : PrefFunctionAlignment = 4;
84 19 : PrefLoopAlignment = 3;
85 19 : break;
86 19 : case ExynosM3:
87 19 : MaxInterleaveFactor = 4;
88 19 : MaxJumpTableSize = 20;
89 19 : PrefFunctionAlignment = 5;
90 19 : PrefLoopAlignment = 4;
91 19 : break;
92 13 : case Falkor:
93 13 : MaxInterleaveFactor = 4;
94 : // FIXME: remove this to enable 64-bit SLP if performance looks good.
95 13 : MinVectorRegisterBitWidth = 128;
96 13 : CacheLineSize = 128;
97 13 : PrefetchDistance = 820;
98 13 : MinPrefetchStride = 2048;
99 13 : MaxPrefetchIterationsAhead = 8;
100 13 : break;
101 4 : case Saphira:
102 4 : MaxInterleaveFactor = 4;
103 : // FIXME: remove this to enable 64-bit SLP if performance looks good.
104 4 : MinVectorRegisterBitWidth = 128;
105 4 : break;
106 12 : case Kryo:
107 12 : MaxInterleaveFactor = 4;
108 12 : VectorInsertExtractBaseCost = 2;
109 12 : CacheLineSize = 128;
110 12 : PrefetchDistance = 740;
111 12 : MinPrefetchStride = 1024;
112 12 : MaxPrefetchIterationsAhead = 11;
113 : // FIXME: remove this to enable 64-bit SLP if performance looks good.
114 12 : MinVectorRegisterBitWidth = 128;
115 12 : break;
116 5 : case ThunderX2T99:
117 5 : CacheLineSize = 64;
118 5 : PrefFunctionAlignment = 3;
119 5 : PrefLoopAlignment = 2;
120 5 : MaxInterleaveFactor = 4;
121 5 : PrefetchDistance = 128;
122 5 : MinPrefetchStride = 1024;
123 5 : MaxPrefetchIterationsAhead = 4;
124 : // FIXME: remove this to enable 64-bit SLP if performance looks good.
125 5 : MinVectorRegisterBitWidth = 128;
126 5 : break;
127 4 : case ThunderX:
128 : case ThunderXT88:
129 : case ThunderXT81:
130 : case ThunderXT83:
131 4 : CacheLineSize = 128;
132 4 : PrefFunctionAlignment = 3;
133 4 : PrefLoopAlignment = 2;
134 : // FIXME: remove this to enable 64-bit SLP if performance looks good.
135 4 : MinVectorRegisterBitWidth = 128;
136 4 : break;
137 : case CortexA35: break;
138 19 : case CortexA53:
139 19 : PrefFunctionAlignment = 3;
140 19 : break;
141 : case CortexA55: break;
142 12 : case CortexA72:
143 : case CortexA73:
144 : case CortexA75:
145 12 : PrefFunctionAlignment = 4;
146 12 : break;
147 : case Others: break;
148 : }
149 1570 : }
150 :
151 1570 : AArch64Subtarget::AArch64Subtarget(const Triple &TT, const std::string &CPU,
152 : const std::string &FS,
153 1570 : const TargetMachine &TM, bool LittleEndian)
154 : : AArch64GenSubtargetInfo(TT, CPU, FS),
155 : ReserveXRegister(AArch64::GPR64commonRegClass.getNumRegs()),
156 : CustomCallSavedXRegs(AArch64::GPR64commonRegClass.getNumRegs()),
157 : IsLittle(LittleEndian),
158 : TargetTriple(TT), FrameLowering(),
159 3140 : InstrInfo(initializeSubtargetDependencies(FS, CPU)), TSInfo(),
160 3140 : TLInfo(TM, *this) {
161 1570 : if (AArch64::isX18ReservedByDefault(TT))
162 : ReserveXRegister.set(18);
163 :
164 1570 : CallLoweringInfo.reset(new AArch64CallLowering(*getTargetLowering()));
165 1570 : Legalizer.reset(new AArch64LegalizerInfo(*this));
166 :
167 1570 : auto *RBI = new AArch64RegisterBankInfo(*getRegisterInfo());
168 :
169 : // FIXME: At this point, we can't rely on Subtarget having RBI.
170 : // It's awkward to mix passing RBI and the Subtarget; should we pass
171 : // TII/TRI as well?
172 1570 : InstSelector.reset(createAArch64InstructionSelector(
173 : *static_cast<const AArch64TargetMachine *>(&TM), *this, *RBI));
174 :
175 : RegBankInfo.reset(RBI);
176 1570 : }
177 :
178 330 : const CallLowering *AArch64Subtarget::getCallLowering() const {
179 330 : return CallLoweringInfo.get();
180 : }
181 :
182 446 : const InstructionSelector *AArch64Subtarget::getInstructionSelector() const {
183 446 : return InstSelector.get();
184 : }
185 :
186 216 : const LegalizerInfo *AArch64Subtarget::getLegalizerInfo() const {
187 216 : return Legalizer.get();
188 : }
189 :
190 394 : const RegisterBankInfo *AArch64Subtarget::getRegBankInfo() const {
191 394 : return RegBankInfo.get();
192 : }
193 :
194 : /// Find the target operand flags that describe how a global value should be
195 : /// referenced for the current subtarget.
196 : unsigned char
197 7407 : AArch64Subtarget::ClassifyGlobalReference(const GlobalValue *GV,
198 : const TargetMachine &TM) const {
199 : // MachO large model always goes via a GOT, simply to get a single 8-byte
200 : // absolute relocation on all global addresses.
201 7407 : if (TM.getCodeModel() == CodeModel::Large && isTargetMachO())
202 : return AArch64II::MO_GOT;
203 :
204 7394 : if (!TM.shouldAssumeDSOLocal(*GV->getParent(), GV)) {
205 506 : if (GV->hasDLLImportStorageClass())
206 : return AArch64II::MO_GOT | AArch64II::MO_DLLIMPORT;
207 496 : if (getTargetTriple().isOSWindows())
208 : return AArch64II::MO_GOT | AArch64II::MO_COFFSTUB;
209 491 : return AArch64II::MO_GOT;
210 : }
211 :
212 : // The small code model's direct accesses use ADRP, which cannot
213 : // necessarily produce the value 0 (if the code is above 4GB).
214 : // Same for the tiny code model, where we have a pc relative LDR.
215 6888 : if ((useSmallAddressing() || TM.getCodeModel() == CodeModel::Tiny) &&
216 : GV->hasExternalWeakLinkage())
217 12 : return AArch64II::MO_GOT;
218 :
219 : return AArch64II::MO_NO_FLAG;
220 : }
221 :
222 1362 : unsigned char AArch64Subtarget::classifyGlobalFunctionReference(
223 : const GlobalValue *GV, const TargetMachine &TM) const {
224 : // MachO large model always goes via a GOT, because we don't have the
225 : // relocations available to do anything else..
226 1362 : if (TM.getCodeModel() == CodeModel::Large && isTargetMachO() &&
227 : !GV->hasInternalLinkage())
228 : return AArch64II::MO_GOT;
229 :
230 : // NonLazyBind goes via GOT unless we know it's available locally.
231 : auto *F = dyn_cast<Function>(GV);
232 1368 : if (UseNonLazyBind && F && F->hasFnAttribute(Attribute::NonLazyBind) &&
233 3 : !TM.shouldAssumeDSOLocal(*GV->getParent(), GV))
234 2 : return AArch64II::MO_GOT;
235 :
236 : return AArch64II::MO_NO_FLAG;
237 : }
238 :
239 21669 : void AArch64Subtarget::overrideSchedPolicy(MachineSchedPolicy &Policy,
240 : unsigned NumRegionInstrs) const {
241 : // LNT run (at least on Cyclone) showed reasonably significant gains for
242 : // bi-directional scheduling. 253.perlbmk.
243 21669 : Policy.OnlyTopDown = false;
244 21669 : Policy.OnlyBottomUp = false;
245 : // Enabling or Disabling the latency heuristic is a close call: It seems to
246 : // help nearly no benchmark on out-of-order architectures, on the other hand
247 : // it regresses register pressure on a few benchmarking.
248 21669 : Policy.DisableLatencyHeuristic = DisableLatencySchedHeuristic;
249 21669 : }
250 :
251 14082 : bool AArch64Subtarget::enableEarlyIfConversion() const {
252 14082 : return EnableEarlyIfConvert;
253 : }
254 :
255 18187 : bool AArch64Subtarget::supportsAddressTopByteIgnored() const {
256 18187 : if (!UseAddressTopByteIgnored)
257 : return false;
258 :
259 : if (TargetTriple.isiOS()) {
260 : unsigned Major, Minor, Micro;
261 7 : TargetTriple.getiOSVersion(Major, Minor, Micro);
262 7 : return Major >= 8;
263 : }
264 :
265 : return false;
266 : }
267 :
268 : std::unique_ptr<PBQPRAConstraint>
269 5 : AArch64Subtarget::getCustomPBQPConstraints() const {
270 5 : return balanceFPOps() ? llvm::make_unique<A57ChainingConstraint>() : nullptr;
271 : }
272 :
273 905 : void AArch64Subtarget::mirFileLoaded(MachineFunction &MF) const {
274 : // We usually compute max call frame size after ISel. Do the computation now
275 : // if the .mir file didn't specify it. Note that this will probably give you
276 : // bogus values after PEI has eliminated the callframe setup/destroy pseudo
277 : // instructions, specify explicitly if you need it to be correct.
278 905 : MachineFrameInfo &MFI = MF.getFrameInfo();
279 905 : if (!MFI.isMaxCallFrameSizeComputed())
280 901 : MFI.computeMaxCallFrameSize(MF);
281 905 : }
|