File: | llvm/lib/Target/AArch64/AArch64LowerHomogeneousPrologEpilog.cpp |
Warning: | line 160, column 13 Value stored to 'F' during its initialization is never read |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | //===- AArch64LowerHomogeneousPrologEpilog.cpp ----------------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file contains a pass that lowers homogeneous prolog/epilog instructions. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #include "AArch64InstrInfo.h" |
14 | #include "AArch64Subtarget.h" |
15 | #include "MCTargetDesc/AArch64InstPrinter.h" |
16 | #include "Utils/AArch64BaseInfo.h" |
17 | #include "llvm/CodeGen/MachineBasicBlock.h" |
18 | #include "llvm/CodeGen/MachineFunction.h" |
19 | #include "llvm/CodeGen/MachineFunctionPass.h" |
20 | #include "llvm/CodeGen/MachineInstr.h" |
21 | #include "llvm/CodeGen/MachineInstrBuilder.h" |
22 | #include "llvm/CodeGen/MachineModuleInfo.h" |
23 | #include "llvm/CodeGen/MachineOperand.h" |
24 | #include "llvm/CodeGen/TargetSubtargetInfo.h" |
25 | #include "llvm/IR/DebugLoc.h" |
26 | #include "llvm/IR/IRBuilder.h" |
27 | #include "llvm/Pass.h" |
28 | #include "llvm/Support/raw_ostream.h" |
29 | #include <sstream> |
30 | |
31 | using namespace llvm; |
32 | |
33 | #define AARCH64_LOWER_HOMOGENEOUS_PROLOG_EPILOG_NAME"AArch64 homogeneous prolog/epilog lowering pass" \ |
34 | "AArch64 homogeneous prolog/epilog lowering pass" |
35 | |
36 | cl::opt<int> FrameHelperSizeThreshold( |
37 | "frame-helper-size-threshold", cl::init(2), cl::Hidden, |
38 | cl::desc("The minimum number of instructions that are outlined in a frame " |
39 | "helper (default = 2)")); |
40 | |
41 | namespace { |
42 | |
43 | class AArch64LowerHomogeneousPE { |
44 | public: |
45 | const AArch64InstrInfo *TII; |
46 | |
47 | AArch64LowerHomogeneousPE(Module *M, MachineModuleInfo *MMI) |
48 | : M(M), MMI(MMI) {} |
49 | |
50 | bool run(); |
51 | bool runOnMachineFunction(MachineFunction &Fn); |
52 | |
53 | private: |
54 | Module *M; |
55 | MachineModuleInfo *MMI; |
56 | |
57 | bool runOnMBB(MachineBasicBlock &MBB); |
58 | bool runOnMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, |
59 | MachineBasicBlock::iterator &NextMBBI); |
60 | |
61 | /// Lower a HOM_Prolog pseudo instruction into a helper call |
62 | /// or a sequence of homogeneous stores. |
63 | /// When a a fp setup follows, it can be optimized. |
64 | bool lowerProlog(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, |
65 | MachineBasicBlock::iterator &NextMBBI); |
66 | /// Lower a HOM_Epilog pseudo instruction into a helper call |
67 | /// or a sequence of homogeneous loads. |
68 | /// When a return follow, it can be optimized. |
69 | bool lowerEpilog(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, |
70 | MachineBasicBlock::iterator &NextMBBI); |
71 | }; |
72 | |
73 | class AArch64LowerHomogeneousPrologEpilog : public ModulePass { |
74 | public: |
75 | static char ID; |
76 | |
77 | AArch64LowerHomogeneousPrologEpilog() : ModulePass(ID) { |
78 | initializeAArch64LowerHomogeneousPrologEpilogPass( |
79 | *PassRegistry::getPassRegistry()); |
80 | } |
81 | void getAnalysisUsage(AnalysisUsage &AU) const override { |
82 | AU.addRequired<MachineModuleInfoWrapperPass>(); |
83 | AU.addPreserved<MachineModuleInfoWrapperPass>(); |
84 | AU.setPreservesAll(); |
85 | ModulePass::getAnalysisUsage(AU); |
86 | } |
87 | bool runOnModule(Module &M) override; |
88 | |
89 | StringRef getPassName() const override { |
90 | return AARCH64_LOWER_HOMOGENEOUS_PROLOG_EPILOG_NAME"AArch64 homogeneous prolog/epilog lowering pass"; |
91 | } |
92 | }; |
93 | |
94 | } // end anonymous namespace |
95 | |
96 | char AArch64LowerHomogeneousPrologEpilog::ID = 0; |
97 | |
98 | INITIALIZE_PASS(AArch64LowerHomogeneousPrologEpilog,static void *initializeAArch64LowerHomogeneousPrologEpilogPassOnce (PassRegistry &Registry) { PassInfo *PI = new PassInfo( "AArch64 homogeneous prolog/epilog lowering pass" , "aarch64-lower-homogeneous-prolog-epilog", &AArch64LowerHomogeneousPrologEpilog ::ID, PassInfo::NormalCtor_t(callDefaultCtor<AArch64LowerHomogeneousPrologEpilog >), false, false); Registry.registerPass(*PI, true); return PI; } static llvm::once_flag InitializeAArch64LowerHomogeneousPrologEpilogPassFlag ; void llvm::initializeAArch64LowerHomogeneousPrologEpilogPass (PassRegistry &Registry) { llvm::call_once(InitializeAArch64LowerHomogeneousPrologEpilogPassFlag , initializeAArch64LowerHomogeneousPrologEpilogPassOnce, std:: ref(Registry)); } |
99 | "aarch64-lower-homogeneous-prolog-epilog",static void *initializeAArch64LowerHomogeneousPrologEpilogPassOnce (PassRegistry &Registry) { PassInfo *PI = new PassInfo( "AArch64 homogeneous prolog/epilog lowering pass" , "aarch64-lower-homogeneous-prolog-epilog", &AArch64LowerHomogeneousPrologEpilog ::ID, PassInfo::NormalCtor_t(callDefaultCtor<AArch64LowerHomogeneousPrologEpilog >), false, false); Registry.registerPass(*PI, true); return PI; } static llvm::once_flag InitializeAArch64LowerHomogeneousPrologEpilogPassFlag ; void llvm::initializeAArch64LowerHomogeneousPrologEpilogPass (PassRegistry &Registry) { llvm::call_once(InitializeAArch64LowerHomogeneousPrologEpilogPassFlag , initializeAArch64LowerHomogeneousPrologEpilogPassOnce, std:: ref(Registry)); } |
100 | AARCH64_LOWER_HOMOGENEOUS_PROLOG_EPILOG_NAME, false, false)static void *initializeAArch64LowerHomogeneousPrologEpilogPassOnce (PassRegistry &Registry) { PassInfo *PI = new PassInfo( "AArch64 homogeneous prolog/epilog lowering pass" , "aarch64-lower-homogeneous-prolog-epilog", &AArch64LowerHomogeneousPrologEpilog ::ID, PassInfo::NormalCtor_t(callDefaultCtor<AArch64LowerHomogeneousPrologEpilog >), false, false); Registry.registerPass(*PI, true); return PI; } static llvm::once_flag InitializeAArch64LowerHomogeneousPrologEpilogPassFlag ; void llvm::initializeAArch64LowerHomogeneousPrologEpilogPass (PassRegistry &Registry) { llvm::call_once(InitializeAArch64LowerHomogeneousPrologEpilogPassFlag , initializeAArch64LowerHomogeneousPrologEpilogPassOnce, std:: ref(Registry)); } |
101 | |
102 | bool AArch64LowerHomogeneousPrologEpilog::runOnModule(Module &M) { |
103 | if (skipModule(M)) |
104 | return false; |
105 | |
106 | MachineModuleInfo *MMI = |
107 | &getAnalysis<MachineModuleInfoWrapperPass>().getMMI(); |
108 | return AArch64LowerHomogeneousPE(&M, MMI).run(); |
109 | } |
110 | |
111 | bool AArch64LowerHomogeneousPE::run() { |
112 | bool Changed = false; |
113 | for (auto &F : *M) { |
114 | if (F.empty()) |
115 | continue; |
116 | |
117 | MachineFunction *MF = MMI->getMachineFunction(F); |
118 | if (!MF) |
119 | continue; |
120 | Changed |= runOnMachineFunction(*MF); |
121 | } |
122 | |
123 | return Changed; |
124 | } |
125 | enum FrameHelperType { Prolog, PrologFrame, Epilog, EpilogTail }; |
126 | |
127 | /// Return a frame helper name with the given CSRs and the helper type. |
128 | /// For instance, a prolog helper that saves x19 and x20 is named as |
129 | /// OUTLINED_FUNCTION_PROLOG_x19x20. |
130 | static std::string getFrameHelperName(SmallVectorImpl<unsigned> &Regs, |
131 | FrameHelperType Type, unsigned FpOffset) { |
132 | std::ostringstream RegStream; |
133 | switch (Type) { |
134 | case FrameHelperType::Prolog: |
135 | RegStream << "OUTLINED_FUNCTION_PROLOG_"; |
136 | break; |
137 | case FrameHelperType::PrologFrame: |
138 | RegStream << "OUTLINED_FUNCTION_PROLOG_FRAME" << FpOffset << "_"; |
139 | break; |
140 | case FrameHelperType::Epilog: |
141 | RegStream << "OUTLINED_FUNCTION_EPILOG_"; |
142 | break; |
143 | case FrameHelperType::EpilogTail: |
144 | RegStream << "OUTLINED_FUNCTION_EPILOG_TAIL_"; |
145 | break; |
146 | } |
147 | |
148 | for (auto Reg : Regs) |
149 | RegStream << AArch64InstPrinter::getRegisterName(Reg); |
150 | |
151 | return RegStream.str(); |
152 | } |
153 | |
154 | /// Create a Function for the unique frame helper with the given name. |
155 | /// Return a newly created MachineFunction with an empty MachineBasicBlock. |
156 | static MachineFunction &createFrameHelperMachineFunction(Module *M, |
157 | MachineModuleInfo *MMI, |
158 | StringRef Name) { |
159 | LLVMContext &C = M->getContext(); |
160 | Function *F = M->getFunction(Name); |
Value stored to 'F' during its initialization is never read | |
161 | assert(F == nullptr && "Function has been created before")(static_cast<void> (0)); |
162 | F = Function::Create(FunctionType::get(Type::getVoidTy(C), false), |
163 | Function::ExternalLinkage, Name, M); |
164 | assert(F && "Function was null!")(static_cast<void> (0)); |
165 | |
166 | // Use ODR linkage to avoid duplication. |
167 | F->setLinkage(GlobalValue::LinkOnceODRLinkage); |
168 | F->setUnnamedAddr(GlobalValue::UnnamedAddr::Global); |
169 | |
170 | // Set no-opt/minsize, so we don't insert padding between outlined |
171 | // functions. |
172 | F->addFnAttr(Attribute::OptimizeNone); |
173 | F->addFnAttr(Attribute::NoInline); |
174 | F->addFnAttr(Attribute::MinSize); |
175 | F->addFnAttr(Attribute::Naked); |
176 | |
177 | MachineFunction &MF = MMI->getOrCreateMachineFunction(*F); |
178 | // Remove unnecessary register liveness and set NoVRegs. |
179 | MF.getProperties().reset(MachineFunctionProperties::Property::TracksLiveness); |
180 | MF.getProperties().reset(MachineFunctionProperties::Property::IsSSA); |
181 | MF.getProperties().set(MachineFunctionProperties::Property::NoVRegs); |
182 | MF.getRegInfo().freezeReservedRegs(MF); |
183 | |
184 | // Create entry block. |
185 | BasicBlock *EntryBB = BasicBlock::Create(C, "entry", F); |
186 | IRBuilder<> Builder(EntryBB); |
187 | Builder.CreateRetVoid(); |
188 | |
189 | // Insert the new block into the function. |
190 | MachineBasicBlock *MBB = MF.CreateMachineBasicBlock(); |
191 | MF.insert(MF.begin(), MBB); |
192 | |
193 | return MF; |
194 | } |
195 | |
196 | /// Emit a store-pair instruction for frame-setup. |
197 | static void emitStore(MachineFunction &MF, MachineBasicBlock &MBB, |
198 | MachineBasicBlock::iterator Pos, |
199 | const TargetInstrInfo &TII, unsigned Reg1, unsigned Reg2, |
200 | int Offset, bool IsPreDec) { |
201 | bool IsFloat = AArch64::FPR64RegClass.contains(Reg1); |
202 | assert(!(IsFloat ^ AArch64::FPR64RegClass.contains(Reg2)))(static_cast<void> (0)); |
203 | unsigned Opc; |
204 | if (IsPreDec) |
205 | Opc = IsFloat ? AArch64::STPDpre : AArch64::STPXpre; |
206 | else |
207 | Opc = IsFloat ? AArch64::STPDi : AArch64::STPXi; |
208 | |
209 | MachineInstrBuilder MIB = BuildMI(MBB, Pos, DebugLoc(), TII.get(Opc)); |
210 | if (IsPreDec) |
211 | MIB.addDef(AArch64::SP); |
212 | MIB.addReg(Reg2) |
213 | .addReg(Reg1) |
214 | .addReg(AArch64::SP) |
215 | .addImm(Offset) |
216 | .setMIFlag(MachineInstr::FrameSetup); |
217 | } |
218 | |
219 | /// Emit a load-pair instruction for frame-destroy. |
220 | static void emitLoad(MachineFunction &MF, MachineBasicBlock &MBB, |
221 | MachineBasicBlock::iterator Pos, |
222 | const TargetInstrInfo &TII, unsigned Reg1, unsigned Reg2, |
223 | int Offset, bool IsPostDec) { |
224 | bool IsFloat = AArch64::FPR64RegClass.contains(Reg1); |
225 | assert(!(IsFloat ^ AArch64::FPR64RegClass.contains(Reg2)))(static_cast<void> (0)); |
226 | unsigned Opc; |
227 | if (IsPostDec) |
228 | Opc = IsFloat ? AArch64::LDPDpost : AArch64::LDPXpost; |
229 | else |
230 | Opc = IsFloat ? AArch64::LDPDi : AArch64::LDPXi; |
231 | |
232 | MachineInstrBuilder MIB = BuildMI(MBB, Pos, DebugLoc(), TII.get(Opc)); |
233 | if (IsPostDec) |
234 | MIB.addDef(AArch64::SP); |
235 | MIB.addReg(Reg2, getDefRegState(true)) |
236 | .addReg(Reg1, getDefRegState(true)) |
237 | .addReg(AArch64::SP) |
238 | .addImm(Offset) |
239 | .setMIFlag(MachineInstr::FrameDestroy); |
240 | } |
241 | |
242 | /// Return a unique function if a helper can be formed with the given Regs |
243 | /// and frame type. |
244 | /// 1) _OUTLINED_FUNCTION_PROLOG_x30x29x19x20x21x22: |
245 | /// stp x22, x21, [sp, #-32]! ; x29/x30 has been stored at the caller |
246 | /// stp x20, x19, [sp, #16] |
247 | /// ret |
248 | /// |
249 | /// 2) _OUTLINED_FUNCTION_PROLOG_FRAME32_x30x29x19x20x21x22: |
250 | /// stp x22, x21, [sp, #-32]! ; x29/x30 has been stored at the caller |
251 | /// stp x20, x19, [sp, #16] |
252 | /// add fp, sp, #32 |
253 | /// ret |
254 | /// |
255 | /// 3) _OUTLINED_FUNCTION_EPILOG_x30x29x19x20x21x22: |
256 | /// mov x16, x30 |
257 | /// ldp x29, x30, [sp, #32] |
258 | /// ldp x20, x19, [sp, #16] |
259 | /// ldp x22, x21, [sp], #48 |
260 | /// ret x16 |
261 | /// |
262 | /// 4) _OUTLINED_FUNCTION_EPILOG_TAIL_x30x29x19x20x21x22: |
263 | /// ldp x29, x30, [sp, #32] |
264 | /// ldp x20, x19, [sp, #16] |
265 | /// ldp x22, x21, [sp], #48 |
266 | /// ret |
267 | /// @param M module |
268 | /// @param MMI machine module info |
269 | /// @param Regs callee save regs that the helper will handle |
270 | /// @param Type frame helper type |
271 | /// @return a helper function |
272 | static Function *getOrCreateFrameHelper(Module *M, MachineModuleInfo *MMI, |
273 | SmallVectorImpl<unsigned> &Regs, |
274 | FrameHelperType Type, |
275 | unsigned FpOffset = 0) { |
276 | assert(Regs.size() >= 2)(static_cast<void> (0)); |
277 | auto Name = getFrameHelperName(Regs, Type, FpOffset); |
278 | auto *F = M->getFunction(Name); |
279 | if (F) |
280 | return F; |
281 | |
282 | auto &MF = createFrameHelperMachineFunction(M, MMI, Name); |
283 | MachineBasicBlock &MBB = *MF.begin(); |
284 | const TargetSubtargetInfo &STI = MF.getSubtarget(); |
285 | const TargetInstrInfo &TII = *STI.getInstrInfo(); |
286 | |
287 | int Size = (int)Regs.size(); |
288 | switch (Type) { |
289 | case FrameHelperType::Prolog: |
290 | case FrameHelperType::PrologFrame: { |
291 | // Compute the remaining SP adjust beyond FP/LR. |
292 | auto LRIdx = std::distance( |
293 | Regs.begin(), std::find(Regs.begin(), Regs.end(), AArch64::LR)); |
294 | |
295 | // If the register stored to the lowest address is not LR, we must subtract |
296 | // more from SP here. |
297 | if (LRIdx != Size - 2) { |
298 | assert(Regs[Size - 2] != AArch64::LR)(static_cast<void> (0)); |
299 | emitStore(MF, MBB, MBB.end(), TII, Regs[Size - 2], Regs[Size - 1], |
300 | LRIdx - Size + 2, true); |
301 | } |
302 | |
303 | // Store CSRs in the reverse order. |
304 | for (int I = Size - 3; I >= 0; I -= 2) { |
305 | // FP/LR has been stored at call-site. |
306 | if (Regs[I - 1] == AArch64::LR) |
307 | continue; |
308 | emitStore(MF, MBB, MBB.end(), TII, Regs[I - 1], Regs[I], Size - I - 1, |
309 | false); |
310 | } |
311 | if (Type == FrameHelperType::PrologFrame) |
312 | BuildMI(MBB, MBB.end(), DebugLoc(), TII.get(AArch64::ADDXri)) |
313 | .addDef(AArch64::FP) |
314 | .addUse(AArch64::SP) |
315 | .addImm(FpOffset) |
316 | .addImm(0) |
317 | .setMIFlag(MachineInstr::FrameSetup); |
318 | |
319 | BuildMI(MBB, MBB.end(), DebugLoc(), TII.get(AArch64::RET)) |
320 | .addReg(AArch64::LR); |
321 | break; |
322 | } |
323 | case FrameHelperType::Epilog: |
324 | case FrameHelperType::EpilogTail: |
325 | if (Type == FrameHelperType::Epilog) |
326 | // Stash LR to X16 |
327 | BuildMI(MBB, MBB.end(), DebugLoc(), TII.get(AArch64::ORRXrs)) |
328 | .addDef(AArch64::X16) |
329 | .addReg(AArch64::XZR) |
330 | .addUse(AArch64::LR) |
331 | .addImm(0); |
332 | |
333 | for (int I = 0; I < Size - 2; I += 2) |
334 | emitLoad(MF, MBB, MBB.end(), TII, Regs[I], Regs[I + 1], Size - I - 2, |
335 | false); |
336 | // Restore the last CSR with post-increment of SP. |
337 | emitLoad(MF, MBB, MBB.end(), TII, Regs[Size - 2], Regs[Size - 1], Size, |
338 | true); |
339 | |
340 | BuildMI(MBB, MBB.end(), DebugLoc(), TII.get(AArch64::RET)) |
341 | .addReg(Type == FrameHelperType::Epilog ? AArch64::X16 : AArch64::LR); |
342 | break; |
343 | } |
344 | |
345 | return M->getFunction(Name); |
346 | } |
347 | |
348 | /// This function checks if a frame helper should be used for |
349 | /// HOM_Prolog/HOM_Epilog pseudo instruction expansion. |
350 | /// @param MBB machine basic block |
351 | /// @param NextMBBI next instruction following HOM_Prolog/HOM_Epilog |
352 | /// @param Regs callee save registers that are saved or restored. |
353 | /// @param Type frame helper type |
354 | /// @return True if a use of helper is qualified. |
355 | static bool shouldUseFrameHelper(MachineBasicBlock &MBB, |
356 | MachineBasicBlock::iterator &NextMBBI, |
357 | SmallVectorImpl<unsigned> &Regs, |
358 | FrameHelperType Type) { |
359 | const auto *TRI = MBB.getParent()->getSubtarget().getRegisterInfo(); |
360 | auto RegCount = Regs.size(); |
361 | assert(RegCount > 0 && (RegCount % 2 == 0))(static_cast<void> (0)); |
362 | // # of instructions that will be outlined. |
363 | int InstCount = RegCount / 2; |
364 | |
365 | // Do not use a helper call when not saving LR. |
366 | if (std::find(Regs.begin(), Regs.end(), AArch64::LR) == Regs.end()) |
367 | return false; |
368 | |
369 | switch (Type) { |
370 | case FrameHelperType::Prolog: |
371 | // Prolog helper cannot save FP/LR. |
372 | InstCount--; |
373 | break; |
374 | case FrameHelperType::PrologFrame: { |
375 | // Effecitvely no change in InstCount since FpAdjusment is included. |
376 | break; |
377 | } |
378 | case FrameHelperType::Epilog: |
379 | // Bail-out if X16 is live across the epilog helper because it is used in |
380 | // the helper to handle X30. |
381 | for (auto NextMI = NextMBBI; NextMI != MBB.end(); NextMI++) { |
382 | if (NextMI->readsRegister(AArch64::W16, TRI)) |
383 | return false; |
384 | } |
385 | // Epilog may not be in the last block. Check the liveness in successors. |
386 | for (const MachineBasicBlock *SuccMBB : MBB.successors()) { |
387 | if (SuccMBB->isLiveIn(AArch64::W16) || SuccMBB->isLiveIn(AArch64::X16)) |
388 | return false; |
389 | } |
390 | // No change in InstCount for the regular epilog case. |
391 | break; |
392 | case FrameHelperType::EpilogTail: { |
393 | // EpilogTail helper includes the caller's return. |
394 | if (NextMBBI == MBB.end()) |
395 | return false; |
396 | if (NextMBBI->getOpcode() != AArch64::RET_ReallyLR) |
397 | return false; |
398 | InstCount++; |
399 | break; |
400 | } |
401 | } |
402 | |
403 | return InstCount >= FrameHelperSizeThreshold; |
404 | } |
405 | |
406 | /// Lower a HOM_Epilog pseudo instruction into a helper call while |
407 | /// creating the helper on demand. Or emit a sequence of loads in place when not |
408 | /// using a helper call. |
409 | /// |
410 | /// 1. With a helper including ret |
411 | /// HOM_Epilog x30, x29, x19, x20, x21, x22 ; MBBI |
412 | /// ret ; NextMBBI |
413 | /// => |
414 | /// b _OUTLINED_FUNCTION_EPILOG_TAIL_x30x29x19x20x21x22 |
415 | /// ... ; NextMBBI |
416 | /// |
417 | /// 2. With a helper |
418 | /// HOM_Epilog x30, x29, x19, x20, x21, x22 |
419 | /// => |
420 | /// bl _OUTLINED_FUNCTION_EPILOG_x30x29x19x20x21x22 |
421 | /// |
422 | /// 3. Without a helper |
423 | /// HOM_Epilog x30, x29, x19, x20, x21, x22 |
424 | /// => |
425 | /// ldp x29, x30, [sp, #32] |
426 | /// ldp x20, x19, [sp, #16] |
427 | /// ldp x22, x21, [sp], #48 |
428 | bool AArch64LowerHomogeneousPE::lowerEpilog( |
429 | MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, |
430 | MachineBasicBlock::iterator &NextMBBI) { |
431 | auto &MF = *MBB.getParent(); |
432 | MachineInstr &MI = *MBBI; |
433 | |
434 | DebugLoc DL = MI.getDebugLoc(); |
435 | SmallVector<unsigned, 8> Regs; |
436 | for (auto &MO : MI.operands()) |
437 | if (MO.isReg()) |
438 | Regs.push_back(MO.getReg()); |
439 | int Size = (int)Regs.size(); |
440 | if (Size == 0) |
441 | return false; |
442 | // Registers are in pair. |
443 | assert(Size % 2 == 0)(static_cast<void> (0)); |
444 | assert(MI.getOpcode() == AArch64::HOM_Epilog)(static_cast<void> (0)); |
445 | |
446 | auto Return = NextMBBI; |
447 | if (shouldUseFrameHelper(MBB, NextMBBI, Regs, FrameHelperType::EpilogTail)) { |
448 | // When MBB ends with a return, emit a tail-call to the epilog helper |
449 | auto *EpilogTailHelper = |
450 | getOrCreateFrameHelper(M, MMI, Regs, FrameHelperType::EpilogTail); |
451 | BuildMI(MBB, MBBI, DL, TII->get(AArch64::TCRETURNdi)) |
452 | .addGlobalAddress(EpilogTailHelper) |
453 | .addImm(0) |
454 | .setMIFlag(MachineInstr::FrameDestroy) |
455 | .copyImplicitOps(MI) |
456 | .copyImplicitOps(*Return); |
457 | NextMBBI = std::next(Return); |
458 | Return->removeFromParent(); |
459 | } else if (shouldUseFrameHelper(MBB, NextMBBI, Regs, |
460 | FrameHelperType::Epilog)) { |
461 | // The default epilog helper case. |
462 | auto *EpilogHelper = |
463 | getOrCreateFrameHelper(M, MMI, Regs, FrameHelperType::Epilog); |
464 | BuildMI(MBB, MBBI, DL, TII->get(AArch64::BL)) |
465 | .addGlobalAddress(EpilogHelper) |
466 | .setMIFlag(MachineInstr::FrameDestroy) |
467 | .copyImplicitOps(MI); |
468 | } else { |
469 | // Fall back to no-helper. |
470 | for (int I = 0; I < Size - 2; I += 2) |
471 | emitLoad(MF, MBB, MBBI, *TII, Regs[I], Regs[I + 1], Size - I - 2, false); |
472 | // Restore the last CSR with post-increment of SP. |
473 | emitLoad(MF, MBB, MBBI, *TII, Regs[Size - 2], Regs[Size - 1], Size, true); |
474 | } |
475 | |
476 | MBBI->removeFromParent(); |
477 | return true; |
478 | } |
479 | |
480 | /// Lower a HOM_Prolog pseudo instruction into a helper call while |
481 | /// creating the helper on demand. Or emit a sequence of stores in place when |
482 | /// not using a helper call. |
483 | /// |
484 | /// 1. With a helper including frame-setup |
485 | /// HOM_Prolog x30, x29, x19, x20, x21, x22, 32 |
486 | /// => |
487 | /// stp x29, x30, [sp, #-16]! |
488 | /// bl _OUTLINED_FUNCTION_PROLOG_FRAME32_x30x29x19x20x21x22 |
489 | /// |
490 | /// 2. With a helper |
491 | /// HOM_Prolog x30, x29, x19, x20, x21, x22 |
492 | /// => |
493 | /// stp x29, x30, [sp, #-16]! |
494 | /// bl _OUTLINED_FUNCTION_PROLOG_x30x29x19x20x21x22 |
495 | /// |
496 | /// 3. Without a helper |
497 | /// HOM_Prolog x30, x29, x19, x20, x21, x22 |
498 | /// => |
499 | /// stp x22, x21, [sp, #-48]! |
500 | /// stp x20, x19, [sp, #16] |
501 | /// stp x29, x30, [sp, #32] |
502 | bool AArch64LowerHomogeneousPE::lowerProlog( |
503 | MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, |
504 | MachineBasicBlock::iterator &NextMBBI) { |
505 | auto &MF = *MBB.getParent(); |
506 | MachineInstr &MI = *MBBI; |
507 | |
508 | DebugLoc DL = MI.getDebugLoc(); |
509 | SmallVector<unsigned, 8> Regs; |
510 | int LRIdx = 0; |
511 | Optional<int> FpOffset; |
512 | for (auto &MO : MI.operands()) { |
513 | if (MO.isReg()) { |
514 | if (MO.getReg() == AArch64::LR) |
515 | LRIdx = Regs.size(); |
516 | Regs.push_back(MO.getReg()); |
517 | } else if (MO.isImm()) { |
518 | FpOffset = MO.getImm(); |
519 | } |
520 | } |
521 | int Size = (int)Regs.size(); |
522 | if (Size == 0) |
523 | return false; |
524 | // Allow compact unwind case only for oww. |
525 | assert(Size % 2 == 0)(static_cast<void> (0)); |
526 | assert(MI.getOpcode() == AArch64::HOM_Prolog)(static_cast<void> (0)); |
527 | |
528 | if (FpOffset && |
529 | shouldUseFrameHelper(MBB, NextMBBI, Regs, FrameHelperType::PrologFrame)) { |
530 | // FP/LR is stored at the top of stack before the prolog helper call. |
531 | emitStore(MF, MBB, MBBI, *TII, AArch64::LR, AArch64::FP, -LRIdx - 2, true); |
532 | auto *PrologFrameHelper = getOrCreateFrameHelper( |
533 | M, MMI, Regs, FrameHelperType::PrologFrame, *FpOffset); |
534 | BuildMI(MBB, MBBI, DL, TII->get(AArch64::BL)) |
535 | .addGlobalAddress(PrologFrameHelper) |
536 | .setMIFlag(MachineInstr::FrameSetup) |
537 | .copyImplicitOps(MI) |
538 | .addReg(AArch64::FP, RegState::Implicit | RegState::Define) |
539 | .addReg(AArch64::SP, RegState::Implicit); |
540 | } else if (!FpOffset && shouldUseFrameHelper(MBB, NextMBBI, Regs, |
541 | FrameHelperType::Prolog)) { |
542 | // FP/LR is stored at the top of stack before the prolog helper call. |
543 | emitStore(MF, MBB, MBBI, *TII, AArch64::LR, AArch64::FP, -LRIdx - 2, true); |
544 | auto *PrologHelper = |
545 | getOrCreateFrameHelper(M, MMI, Regs, FrameHelperType::Prolog); |
546 | BuildMI(MBB, MBBI, DL, TII->get(AArch64::BL)) |
547 | .addGlobalAddress(PrologHelper) |
548 | .setMIFlag(MachineInstr::FrameSetup) |
549 | .copyImplicitOps(MI); |
550 | } else { |
551 | // Fall back to no-helper. |
552 | emitStore(MF, MBB, MBBI, *TII, Regs[Size - 2], Regs[Size - 1], -Size, true); |
553 | for (int I = Size - 3; I >= 0; I -= 2) |
554 | emitStore(MF, MBB, MBBI, *TII, Regs[I - 1], Regs[I], Size - I - 1, false); |
555 | if (FpOffset) { |
556 | BuildMI(MBB, MBBI, DL, TII->get(AArch64::ADDXri)) |
557 | .addDef(AArch64::FP) |
558 | .addUse(AArch64::SP) |
559 | .addImm(*FpOffset) |
560 | .addImm(0) |
561 | .setMIFlag(MachineInstr::FrameSetup); |
562 | } |
563 | } |
564 | |
565 | MBBI->removeFromParent(); |
566 | return true; |
567 | } |
568 | |
569 | /// Process each machine instruction |
570 | /// @param MBB machine basic block |
571 | /// @param MBBI current instruction iterator |
572 | /// @param NextMBBI next instruction iterator which can be updated |
573 | /// @return True when IR is changed. |
574 | bool AArch64LowerHomogeneousPE::runOnMI(MachineBasicBlock &MBB, |
575 | MachineBasicBlock::iterator MBBI, |
576 | MachineBasicBlock::iterator &NextMBBI) { |
577 | MachineInstr &MI = *MBBI; |
578 | unsigned Opcode = MI.getOpcode(); |
579 | switch (Opcode) { |
580 | default: |
581 | break; |
582 | case AArch64::HOM_Prolog: |
583 | return lowerProlog(MBB, MBBI, NextMBBI); |
584 | case AArch64::HOM_Epilog: |
585 | return lowerEpilog(MBB, MBBI, NextMBBI); |
586 | } |
587 | return false; |
588 | } |
589 | |
590 | bool AArch64LowerHomogeneousPE::runOnMBB(MachineBasicBlock &MBB) { |
591 | bool Modified = false; |
592 | |
593 | MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); |
594 | while (MBBI != E) { |
595 | MachineBasicBlock::iterator NMBBI = std::next(MBBI); |
596 | Modified |= runOnMI(MBB, MBBI, NMBBI); |
597 | MBBI = NMBBI; |
598 | } |
599 | |
600 | return Modified; |
601 | } |
602 | |
603 | bool AArch64LowerHomogeneousPE::runOnMachineFunction(MachineFunction &MF) { |
604 | TII = static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo()); |
605 | |
606 | bool Modified = false; |
607 | for (auto &MBB : MF) |
608 | Modified |= runOnMBB(MBB); |
609 | return Modified; |
610 | } |
611 | |
612 | ModulePass *llvm::createAArch64LowerHomogeneousPrologEpilogPass() { |
613 | return new AArch64LowerHomogeneousPrologEpilog(); |
614 | } |