Line data Source code
1 : //===-- AArch64TargetMachine.cpp - Define TargetMachine for AArch64 -------===//
2 : //
3 : // The LLVM Compiler Infrastructure
4 : //
5 : // This file is distributed under the University of Illinois Open Source
6 : // License. See LICENSE.TXT for details.
7 : //
8 : //===----------------------------------------------------------------------===//
9 : //
10 : //
11 : //===----------------------------------------------------------------------===//
12 :
13 : #include "AArch64TargetMachine.h"
14 : #include "AArch64.h"
15 : #include "AArch64MacroFusion.h"
16 : #include "AArch64Subtarget.h"
17 : #include "AArch64TargetObjectFile.h"
18 : #include "AArch64TargetTransformInfo.h"
19 : #include "MCTargetDesc/AArch64MCTargetDesc.h"
20 : #include "llvm/ADT/STLExtras.h"
21 : #include "llvm/ADT/Triple.h"
22 : #include "llvm/Analysis/TargetTransformInfo.h"
23 : #include "llvm/CodeGen/GlobalISel/IRTranslator.h"
24 : #include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
25 : #include "llvm/CodeGen/GlobalISel/Legalizer.h"
26 : #include "llvm/CodeGen/GlobalISel/Localizer.h"
27 : #include "llvm/CodeGen/GlobalISel/RegBankSelect.h"
28 : #include "llvm/CodeGen/MachineScheduler.h"
29 : #include "llvm/CodeGen/Passes.h"
30 : #include "llvm/CodeGen/TargetPassConfig.h"
31 : #include "llvm/IR/Attributes.h"
32 : #include "llvm/IR/Function.h"
33 : #include "llvm/MC/MCTargetOptions.h"
34 : #include "llvm/Pass.h"
35 : #include "llvm/Support/CodeGen.h"
36 : #include "llvm/Support/CommandLine.h"
37 : #include "llvm/Support/TargetRegistry.h"
38 : #include "llvm/Target/TargetLoweringObjectFile.h"
39 : #include "llvm/Target/TargetOptions.h"
40 : #include "llvm/Transforms/Scalar.h"
41 : #include <memory>
42 : #include <string>
43 :
44 : using namespace llvm;
45 :
46 : static cl::opt<bool> EnableCCMP("aarch64-enable-ccmp",
47 : cl::desc("Enable the CCMP formation pass"),
48 : cl::init(true), cl::Hidden);
49 :
50 : static cl::opt<bool>
51 : EnableCondBrTuning("aarch64-enable-cond-br-tune",
52 : cl::desc("Enable the conditional branch tuning pass"),
53 : cl::init(true), cl::Hidden);
54 :
55 : static cl::opt<bool> EnableMCR("aarch64-enable-mcr",
56 : cl::desc("Enable the machine combiner pass"),
57 : cl::init(true), cl::Hidden);
58 :
59 : static cl::opt<bool> EnableStPairSuppress("aarch64-enable-stp-suppress",
60 : cl::desc("Suppress STP for AArch64"),
61 : cl::init(true), cl::Hidden);
62 :
63 : static cl::opt<bool> EnableAdvSIMDScalar(
64 : "aarch64-enable-simd-scalar",
65 : cl::desc("Enable use of AdvSIMD scalar integer instructions"),
66 : cl::init(false), cl::Hidden);
67 :
68 : static cl::opt<bool>
69 : EnablePromoteConstant("aarch64-enable-promote-const",
70 : cl::desc("Enable the promote constant pass"),
71 : cl::init(true), cl::Hidden);
72 :
73 : static cl::opt<bool> EnableCollectLOH(
74 : "aarch64-enable-collect-loh",
75 : cl::desc("Enable the pass that emits the linker optimization hints (LOH)"),
76 : cl::init(true), cl::Hidden);
77 :
78 : static cl::opt<bool>
79 : EnableDeadRegisterElimination("aarch64-enable-dead-defs", cl::Hidden,
80 : cl::desc("Enable the pass that removes dead"
81 : " definitons and replaces stores to"
82 : " them with stores to the zero"
83 : " register"),
84 : cl::init(true));
85 :
86 : static cl::opt<bool> EnableRedundantCopyElimination(
87 : "aarch64-enable-copyelim",
88 : cl::desc("Enable the redundant copy elimination pass"), cl::init(true),
89 : cl::Hidden);
90 :
91 : static cl::opt<bool> EnableLoadStoreOpt("aarch64-enable-ldst-opt",
92 : cl::desc("Enable the load/store pair"
93 : " optimization pass"),
94 : cl::init(true), cl::Hidden);
95 :
96 : static cl::opt<bool> EnableAtomicTidy(
97 : "aarch64-enable-atomic-cfg-tidy", cl::Hidden,
98 : cl::desc("Run SimplifyCFG after expanding atomic operations"
99 : " to make use of cmpxchg flow-based information"),
100 : cl::init(true));
101 :
102 : static cl::opt<bool>
103 : EnableEarlyIfConversion("aarch64-enable-early-ifcvt", cl::Hidden,
104 : cl::desc("Run early if-conversion"),
105 : cl::init(true));
106 :
107 : static cl::opt<bool>
108 : EnableCondOpt("aarch64-enable-condopt",
109 : cl::desc("Enable the condition optimizer pass"),
110 : cl::init(true), cl::Hidden);
111 :
112 : static cl::opt<bool>
113 : EnableA53Fix835769("aarch64-fix-cortex-a53-835769", cl::Hidden,
114 : cl::desc("Work around Cortex-A53 erratum 835769"),
115 : cl::init(false));
116 :
117 : static cl::opt<bool>
118 : EnableGEPOpt("aarch64-enable-gep-opt", cl::Hidden,
119 : cl::desc("Enable optimizations on complex GEPs"),
120 : cl::init(false));
121 :
122 : static cl::opt<bool>
123 : BranchRelaxation("aarch64-enable-branch-relax", cl::Hidden, cl::init(true),
124 : cl::desc("Relax out of range conditional branches"));
125 :
126 : // FIXME: Unify control over GlobalMerge.
127 : static cl::opt<cl::boolOrDefault>
128 : EnableGlobalMerge("aarch64-enable-global-merge", cl::Hidden,
129 : cl::desc("Enable the global merge pass"));
130 :
131 : static cl::opt<bool>
132 : EnableLoopDataPrefetch("aarch64-enable-loop-data-prefetch", cl::Hidden,
133 : cl::desc("Enable the loop data prefetch pass"),
134 : cl::init(true));
135 :
136 : static cl::opt<int> EnableGlobalISelAtO(
137 : "aarch64-enable-global-isel-at-O", cl::Hidden,
138 : cl::desc("Enable GlobalISel at or below an opt level (-1 to disable)"),
139 : cl::init(0));
140 :
141 : static cl::opt<bool> EnableFalkorHWPFFix("aarch64-enable-falkor-hwpf-fix",
142 : cl::init(true), cl::Hidden);
143 :
144 : static cl::opt<bool>
145 : EnableBranchTargets("aarch64-enable-branch-targets", cl::Hidden,
146 : cl::desc("Enable the AAcrh64 branch target pass"),
147 : cl::init(true));
148 :
149 113923 : extern "C" void LLVMInitializeAArch64Target() {
150 : // Register the target.
151 113923 : RegisterTargetMachine<AArch64leTargetMachine> X(getTheAArch64leTarget());
152 113923 : RegisterTargetMachine<AArch64beTargetMachine> Y(getTheAArch64beTarget());
153 113923 : RegisterTargetMachine<AArch64leTargetMachine> Z(getTheARM64Target());
154 113923 : auto PR = PassRegistry::getPassRegistry();
155 113923 : initializeGlobalISel(*PR);
156 113923 : initializeAArch64A53Fix835769Pass(*PR);
157 113923 : initializeAArch64A57FPLoadBalancingPass(*PR);
158 113923 : initializeAArch64AdvSIMDScalarPass(*PR);
159 113923 : initializeAArch64BranchTargetsPass(*PR);
160 113923 : initializeAArch64CollectLOHPass(*PR);
161 113923 : initializeAArch64ConditionalComparesPass(*PR);
162 113923 : initializeAArch64ConditionOptimizerPass(*PR);
163 113923 : initializeAArch64DeadRegisterDefinitionsPass(*PR);
164 113923 : initializeAArch64ExpandPseudoPass(*PR);
165 113923 : initializeAArch64LoadStoreOptPass(*PR);
166 113923 : initializeAArch64SIMDInstrOptPass(*PR);
167 113923 : initializeAArch64PreLegalizerCombinerPass(*PR);
168 113923 : initializeAArch64PromoteConstantPass(*PR);
169 113923 : initializeAArch64RedundantCopyEliminationPass(*PR);
170 113923 : initializeAArch64StorePairSuppressPass(*PR);
171 113923 : initializeFalkorHWPFFixPass(*PR);
172 113923 : initializeFalkorMarkStridedAccessesLegacyPass(*PR);
173 113923 : initializeLDTLSCleanupPass(*PR);
174 113923 : }
175 :
176 : //===----------------------------------------------------------------------===//
177 : // AArch64 Lowering public interface.
178 : //===----------------------------------------------------------------------===//
179 1843 : static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
180 1843 : if (TT.isOSBinFormatMachO())
181 : return llvm::make_unique<AArch64_MachoTargetObjectFile>();
182 1412 : if (TT.isOSBinFormatCOFF())
183 37 : return llvm::make_unique<AArch64_COFFTargetObjectFile>();
184 :
185 1375 : return llvm::make_unique<AArch64_ELFTargetObjectFile>();
186 : }
187 :
188 : // Helper function to build a DataLayout string
189 1843 : static std::string computeDataLayout(const Triple &TT,
190 : const MCTargetOptions &Options,
191 : bool LittleEndian) {
192 1843 : if (Options.getABIName() == "ilp32")
193 0 : return "e-m:e-p:32:32-i8:8-i16:16-i64:64-S128";
194 1843 : if (TT.isOSBinFormatMachO())
195 431 : return "e-m:o-i64:64-i128:128-n32:64-S128";
196 1412 : if (TT.isOSBinFormatCOFF())
197 37 : return "e-m:w-p:64:64-i32:32-i64:64-i128:128-n32:64-S128";
198 1375 : if (LittleEndian)
199 1340 : return "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128";
200 35 : return "E-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128";
201 : }
202 :
203 : static Reloc::Model getEffectiveRelocModel(const Triple &TT,
204 : Optional<Reloc::Model> RM) {
205 : // AArch64 Darwin is always PIC.
206 : if (TT.isOSDarwin())
207 : return Reloc::PIC_;
208 : // On ELF platforms the default static relocation model has a smart enough
209 : // linker to cope with referencing external symbols defined in a shared
210 : // library. Hence DynamicNoPIC doesn't need to be promoted to PIC.
211 1414 : if (!RM.hasValue() || *RM == Reloc::DynamicNoPIC)
212 : return Reloc::Static;
213 : return *RM;
214 : }
215 :
216 1846 : static CodeModel::Model getEffectiveCodeModel(const Triple &TT,
217 : Optional<CodeModel::Model> CM,
218 : bool JIT) {
219 1846 : if (CM) {
220 54 : if (*CM != CodeModel::Small && *CM != CodeModel::Tiny &&
221 : *CM != CodeModel::Large) {
222 6 : if (!TT.isOSFuchsia())
223 0 : report_fatal_error(
224 : "Only small, tiny and large code models are allowed on AArch64");
225 6 : else if (*CM != CodeModel::Kernel)
226 0 : report_fatal_error("Only small, tiny, kernel, and large code models "
227 : "are allowed on AArch64");
228 48 : } else if (*CM == CodeModel::Tiny && !TT.isOSBinFormatELF())
229 3 : report_fatal_error("tiny code model is only supported on ELF");
230 : return *CM;
231 : }
232 : // The default MCJIT memory managers make no guarantees about where they can
233 : // find an executable page; JITed code needs to be able to refer to globals
234 : // no matter how far away they are.
235 1792 : if (JIT)
236 0 : return CodeModel::Large;
237 : return CodeModel::Small;
238 : }
239 :
240 : /// Create an AArch64 architecture model.
241 : ///
242 1846 : AArch64TargetMachine::AArch64TargetMachine(const Target &T, const Triple &TT,
243 : StringRef CPU, StringRef FS,
244 : const TargetOptions &Options,
245 : Optional<Reloc::Model> RM,
246 : Optional<CodeModel::Model> CM,
247 : CodeGenOpt::Level OL, bool JIT,
248 1846 : bool LittleEndian)
249 : : LLVMTargetMachine(T,
250 1843 : computeDataLayout(TT, Options.MCOptions, LittleEndian),
251 : TT, CPU, FS, Options, getEffectiveRelocModel(TT, RM),
252 : getEffectiveCodeModel(TT, CM, JIT), OL),
253 7429 : TLOF(createTLOF(getTargetTriple())), isLittle(LittleEndian) {
254 1843 : initAsmInfo();
255 :
256 1843 : if (TT.isOSBinFormatMachO()) {
257 431 : this->Options.TrapUnreachable = true;
258 431 : this->Options.NoTrapAfterNoreturn = true;
259 : }
260 :
261 : // Enable GlobalISel at or below EnableGlobalISelAt0.
262 1843 : if (getOptLevel() <= EnableGlobalISelAtO)
263 : setGlobalISel(true);
264 :
265 : // AArch64 supports the MachineOutliner.
266 : setMachineOutliner(true);
267 :
268 : // AArch64 supports default outlining behaviour.
269 : setSupportsDefaultOutlining(true);
270 1843 : }
271 :
272 : AArch64TargetMachine::~AArch64TargetMachine() = default;
273 :
274 : const AArch64Subtarget *
275 266478 : AArch64TargetMachine::getSubtargetImpl(const Function &F) const {
276 266478 : Attribute CPUAttr = F.getFnAttribute("target-cpu");
277 266478 : Attribute FSAttr = F.getFnAttribute("target-features");
278 :
279 266478 : std::string CPU = !CPUAttr.hasAttribute(Attribute::None)
280 38866 : ? CPUAttr.getValueAsString().str()
281 305344 : : TargetCPU;
282 266478 : std::string FS = !FSAttr.hasAttribute(Attribute::None)
283 93103 : ? FSAttr.getValueAsString().str()
284 359581 : : TargetFS;
285 :
286 271252 : auto &I = SubtargetMap[CPU + FS];
287 266478 : if (!I) {
288 : // This needs to be done before we create a new subtarget since any
289 : // creation will depend on the TM and the code generation flags on the
290 : // function that reside in TargetOptions.
291 1567 : resetTargetOptions(F);
292 3134 : I = llvm::make_unique<AArch64Subtarget>(TargetTriple, CPU, FS, *this,
293 1567 : isLittle);
294 : }
295 266478 : return I.get();
296 : }
297 :
298 0 : void AArch64leTargetMachine::anchor() { }
299 :
300 1811 : AArch64leTargetMachine::AArch64leTargetMachine(
301 : const Target &T, const Triple &TT, StringRef CPU, StringRef FS,
302 : const TargetOptions &Options, Optional<Reloc::Model> RM,
303 1811 : Optional<CodeModel::Model> CM, CodeGenOpt::Level OL, bool JIT)
304 3622 : : AArch64TargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, JIT, true) {}
305 :
306 0 : void AArch64beTargetMachine::anchor() { }
307 :
308 35 : AArch64beTargetMachine::AArch64beTargetMachine(
309 : const Target &T, const Triple &TT, StringRef CPU, StringRef FS,
310 : const TargetOptions &Options, Optional<Reloc::Model> RM,
311 35 : Optional<CodeModel::Model> CM, CodeGenOpt::Level OL, bool JIT)
312 70 : : AArch64TargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, JIT, false) {}
313 :
314 : namespace {
315 :
316 : /// AArch64 Code Generator Pass Configuration Options.
317 : class AArch64PassConfig : public TargetPassConfig {
318 : public:
319 1619 : AArch64PassConfig(AArch64TargetMachine &TM, PassManagerBase &PM)
320 1619 : : TargetPassConfig(TM, PM) {
321 1619 : if (TM.getOptLevel() != CodeGenOpt::None)
322 2476 : substitutePass(&PostRASchedulerID, &PostMachineSchedulerID);
323 1619 : }
324 :
325 : AArch64TargetMachine &getAArch64TargetMachine() const {
326 1206 : return getTM<AArch64TargetMachine>();
327 : }
328 :
329 : ScheduleDAGInstrs *
330 13743 : createMachineScheduler(MachineSchedContext *C) const override {
331 13743 : const AArch64Subtarget &ST = C->MF->getSubtarget<AArch64Subtarget>();
332 13743 : ScheduleDAGMILive *DAG = createGenericSchedLive(C);
333 13743 : DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI));
334 27486 : DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI));
335 : if (ST.hasFusion())
336 26402 : DAG->addMutation(createAArch64MacroFusionDAGMutation());
337 13743 : return DAG;
338 : }
339 :
340 : ScheduleDAGInstrs *
341 10662 : createPostMachineScheduler(MachineSchedContext *C) const override {
342 10662 : const AArch64Subtarget &ST = C->MF->getSubtarget<AArch64Subtarget>();
343 : if (ST.hasFusion()) {
344 : // Run the Macro Fusion after RA again since literals are expanded from
345 : // pseudos then (v. addPreSched2()).
346 10525 : ScheduleDAGMI *DAG = createGenericSchedPostRA(C);
347 10525 : DAG->addMutation(createAArch64MacroFusionDAGMutation());
348 10525 : return DAG;
349 : }
350 :
351 : return nullptr;
352 : }
353 :
354 : void addIRPasses() override;
355 : bool addPreISel() override;
356 : bool addInstSelector() override;
357 : bool addIRTranslator() override;
358 : void addPreLegalizeMachineIR() override;
359 : bool addLegalizeMachineIR() override;
360 : bool addRegBankSelect() override;
361 : void addPreGlobalInstructionSelect() override;
362 : bool addGlobalInstructionSelect() override;
363 : bool addILPOpts() override;
364 : void addPreRegAlloc() override;
365 : void addPostRegAlloc() override;
366 : void addPreSched2() override;
367 : void addPreEmitPass() override;
368 : };
369 :
370 : } // end anonymous namespace
371 :
372 : TargetTransformInfo
373 133071 : AArch64TargetMachine::getTargetTransformInfo(const Function &F) {
374 133071 : return TargetTransformInfo(AArch64TTIImpl(this, F));
375 : }
376 :
377 1619 : TargetPassConfig *AArch64TargetMachine::createPassConfig(PassManagerBase &PM) {
378 1619 : return new AArch64PassConfig(*this, PM);
379 : }
380 :
381 1223 : void AArch64PassConfig::addIRPasses() {
382 : // Always expand atomic operations, we don't deal with atomicrmw or cmpxchg
383 : // ourselves.
384 1223 : addPass(createAtomicExpandPass());
385 :
386 : // Cmpxchg instructions are often used with a subsequent comparison to
387 : // determine whether it succeeded. We can exploit existing control-flow in
388 : // ldrex/strex loops to simplify this, but it needs tidying up.
389 1223 : if (TM->getOptLevel() != CodeGenOpt::None && EnableAtomicTidy)
390 2174 : addPass(createCFGSimplificationPass(1, true, true, false, true));
391 :
392 : // Run LoopDataPrefetch
393 : //
394 : // Run this before LSR to remove the multiplies involved in computing the
395 : // pointer values N iterations ahead.
396 1223 : if (TM->getOptLevel() != CodeGenOpt::None) {
397 1120 : if (EnableLoopDataPrefetch)
398 1120 : addPass(createLoopDataPrefetchPass());
399 1120 : if (EnableFalkorHWPFFix)
400 1120 : addPass(createFalkorMarkStridedAccessesPass());
401 : }
402 :
403 1223 : TargetPassConfig::addIRPasses();
404 :
405 : // Match interleaved memory accesses to ldN/stN intrinsics.
406 1223 : if (TM->getOptLevel() != CodeGenOpt::None)
407 1120 : addPass(createInterleavedAccessPass());
408 :
409 1223 : if (TM->getOptLevel() == CodeGenOpt::Aggressive && EnableGEPOpt) {
410 : // Call SeparateConstOffsetFromGEP pass to extract constants within indices
411 : // and lower a GEP with multiple indices to either arithmetic operations or
412 : // multiple GEPs with single index.
413 6 : addPass(createSeparateConstOffsetFromGEPPass(true));
414 : // Call EarlyCSE pass to find and remove subexpressions in the lowered
415 : // result.
416 6 : addPass(createEarlyCSEPass());
417 : // Do loop invariant code motion in case part of the lowered result is
418 : // invariant.
419 6 : addPass(createLICMPass());
420 : }
421 1223 : }
422 :
423 : // Pass Pipeline Configuration
424 1223 : bool AArch64PassConfig::addPreISel() {
425 : // Run promote constant before global merge, so that the promoted constants
426 : // get a chance to be merged
427 1223 : if (TM->getOptLevel() != CodeGenOpt::None && EnablePromoteConstant)
428 1119 : addPass(createAArch64PromoteConstantPass());
429 : // FIXME: On AArch64, this depends on the type.
430 : // Basically, the addressable offsets are up to 4095 * Ty.getSizeInBytes().
431 : // and the offset has to be a multiple of the related size in bytes.
432 2343 : if ((TM->getOptLevel() != CodeGenOpt::None &&
433 1238 : EnableGlobalMerge == cl::BOU_UNSET) ||
434 : EnableGlobalMerge == cl::BOU_TRUE) {
435 1120 : bool OnlyOptimizeForSize = (TM->getOptLevel() < CodeGenOpt::Aggressive) &&
436 : (EnableGlobalMerge == cl::BOU_UNSET);
437 1120 : addPass(createGlobalMergePass(TM, 4095, OnlyOptimizeForSize));
438 : }
439 :
440 1223 : return false;
441 : }
442 :
443 1206 : bool AArch64PassConfig::addInstSelector() {
444 1206 : addPass(createAArch64ISelDag(getAArch64TargetMachine(), getOptLevel()));
445 :
446 : // For ELF, cleanup any local-dynamic TLS accesses (i.e. combine as many
447 : // references to _TLS_MODULE_BASE_ as possible.
448 3281 : if (TM->getTargetTriple().isOSBinFormatELF() &&
449 869 : getOptLevel() != CodeGenOpt::None)
450 823 : addPass(createAArch64CleanupLocalDynamicTLSPass());
451 :
452 1206 : return false;
453 : }
454 :
455 72 : bool AArch64PassConfig::addIRTranslator() {
456 72 : addPass(new IRTranslator());
457 72 : return false;
458 : }
459 :
460 72 : void AArch64PassConfig::addPreLegalizeMachineIR() {
461 72 : addPass(createAArch64PreLegalizeCombiner());
462 72 : }
463 :
464 72 : bool AArch64PassConfig::addLegalizeMachineIR() {
465 72 : addPass(new Legalizer());
466 72 : return false;
467 : }
468 :
469 72 : bool AArch64PassConfig::addRegBankSelect() {
470 72 : addPass(new RegBankSelect());
471 72 : return false;
472 : }
473 :
474 72 : void AArch64PassConfig::addPreGlobalInstructionSelect() {
475 : // Workaround the deficiency of the fast register allocator.
476 72 : if (TM->getOptLevel() == CodeGenOpt::None)
477 49 : addPass(new Localizer());
478 72 : }
479 :
480 72 : bool AArch64PassConfig::addGlobalInstructionSelect() {
481 72 : addPass(new InstructionSelect());
482 72 : return false;
483 : }
484 :
485 1120 : bool AArch64PassConfig::addILPOpts() {
486 1120 : if (EnableCondOpt)
487 1120 : addPass(createAArch64ConditionOptimizerPass());
488 1120 : if (EnableCCMP)
489 1120 : addPass(createAArch64ConditionalCompares());
490 1120 : if (EnableMCR)
491 1120 : addPass(&MachineCombinerID);
492 1120 : if (EnableCondBrTuning)
493 1116 : addPass(createAArch64CondBrTuning());
494 1120 : if (EnableEarlyIfConversion)
495 1119 : addPass(&EarlyIfConverterID);
496 1120 : if (EnableStPairSuppress)
497 1117 : addPass(createAArch64StorePairSuppressPass());
498 1120 : addPass(createAArch64SIMDInstrOptPass());
499 1120 : return true;
500 : }
501 :
502 1223 : void AArch64PassConfig::addPreRegAlloc() {
503 : // Change dead register definitions to refer to the zero register.
504 1223 : if (TM->getOptLevel() != CodeGenOpt::None && EnableDeadRegisterElimination)
505 1119 : addPass(createAArch64DeadRegisterDefinitions());
506 :
507 : // Use AdvSIMD scalar instructions whenever profitable.
508 1223 : if (TM->getOptLevel() != CodeGenOpt::None && EnableAdvSIMDScalar) {
509 5 : addPass(createAArch64AdvSIMDScalar());
510 : // The AdvSIMD pass may produce copies that can be rewritten to
511 : // be register coaleascer friendly.
512 5 : addPass(&PeepholeOptimizerID);
513 : }
514 1223 : }
515 :
516 1223 : void AArch64PassConfig::addPostRegAlloc() {
517 : // Remove redundant copy instructions.
518 1223 : if (TM->getOptLevel() != CodeGenOpt::None && EnableRedundantCopyElimination)
519 1120 : addPass(createAArch64RedundantCopyEliminationPass());
520 :
521 1223 : if (TM->getOptLevel() != CodeGenOpt::None && usingDefaultRegAlloc())
522 : // Improve performance for some FP/SIMD code for A57.
523 1114 : addPass(createAArch64A57FPLoadBalancing());
524 1223 : }
525 :
526 1223 : void AArch64PassConfig::addPreSched2() {
527 : // Expand some pseudo instructions to allow proper scheduling.
528 1223 : addPass(createAArch64ExpandPseudoPass());
529 : // Use load/store pair instructions when possible.
530 1223 : if (TM->getOptLevel() != CodeGenOpt::None) {
531 1120 : if (EnableLoadStoreOpt)
532 1113 : addPass(createAArch64LoadStoreOptimizationPass());
533 1120 : if (EnableFalkorHWPFFix)
534 1120 : addPass(createFalkorHWPFFixPass());
535 : }
536 1223 : }
537 :
538 1223 : void AArch64PassConfig::addPreEmitPass() {
539 1223 : if (EnableA53Fix835769)
540 3 : addPass(createAArch64A53Fix835769());
541 : // Relax conditional branch instructions if they're otherwise out of
542 : // range of their destination.
543 1223 : if (BranchRelaxation)
544 1223 : addPass(&BranchRelaxationPassID);
545 :
546 1223 : if (EnableBranchTargets)
547 1223 : addPass(createAArch64BranchTargetsPass());
548 :
549 1223 : if (TM->getOptLevel() != CodeGenOpt::None && EnableCollectLOH &&
550 1116 : TM->getTargetTriple().isOSBinFormatMachO())
551 261 : addPass(createAArch64CollectLOHPass());
552 1223 : }
|