LLVM 19.0.0git
ARMFrameLowering.cpp
Go to the documentation of this file.
1//===- ARMFrameLowering.cpp - ARM Frame Information -----------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains the ARM implementation of TargetFrameLowering class.
10//
11//===----------------------------------------------------------------------===//
12//
13// This file contains the ARM implementation of TargetFrameLowering class.
14//
15// On ARM, stack frames are structured as follows:
16//
17// The stack grows downward.
18//
19// All of the individual frame areas on the frame below are optional, i.e. it's
20// possible to create a function so that the particular area isn't present
21// in the frame.
22//
23// At function entry, the "frame" looks as follows:
24//
25// | | Higher address
26// |-----------------------------------|
27// | |
28// | arguments passed on the stack |
29// | |
30// |-----------------------------------| <- sp
31// | | Lower address
32//
33//
34// After the prologue has run, the frame has the following general structure.
35// Technically the last frame area (VLAs) doesn't get created until in the
36// main function body, after the prologue is run. However, it's depicted here
37// for completeness.
38//
39// | | Higher address
40// |-----------------------------------|
41// | |
42// | arguments passed on the stack |
43// | |
44// |-----------------------------------| <- (sp at function entry)
45// | |
46// | varargs from registers |
47// | |
48// |-----------------------------------|
49// | |
50// | prev_lr |
51// | prev_fp |
52// | (a.k.a. "frame record") |
53// | |
54// |- - - - - - - - - - - - - - - - - -| <- fp (r7 or r11)
55// | |
56// | callee-saved gpr registers |
57// | |
58// |-----------------------------------|
59// | |
60// | callee-saved fp/simd regs |
61// | |
62// |-----------------------------------|
63// |.empty.space.to.make.part.below....|
64// |.aligned.in.case.it.needs.more.than| (size of this area is unknown at
65// |.the.standard.8-byte.alignment.....| compile time; if present)
66// |-----------------------------------|
67// | |
68// | local variables of fixed size |
69// | including spill slots |
70// |-----------------------------------| <- base pointer (not defined by ABI,
71// |.variable-sized.local.variables....| LLVM chooses r6)
72// |.(VLAs)............................| (size of this area is unknown at
73// |...................................| compile time)
74// |-----------------------------------| <- sp
75// | | Lower address
76//
77//
78// To access the data in a frame, at-compile time, a constant offset must be
79// computable from one of the pointers (fp, bp, sp) to access it. The size
80// of the areas with a dotted background cannot be computed at compile-time
81// if they are present, making it required to have all three of fp, bp and
82// sp to be set up to be able to access all contents in the frame areas,
83// assuming all of the frame areas are non-empty.
84//
85// For most functions, some of the frame areas are empty. For those functions,
86// it may not be necessary to set up fp or bp:
87// * A base pointer is definitely needed when there are both VLAs and local
88// variables with more-than-default alignment requirements.
89// * A frame pointer is definitely needed when there are local variables with
90// more-than-default alignment requirements.
91//
92// In some cases when a base pointer is not strictly needed, it is generated
93// anyway when offsets from the frame pointer to access local variables become
94// so large that the offset can't be encoded in the immediate fields of loads
95// or stores.
96//
97// The frame pointer might be chosen to be r7 or r11, depending on the target
98// architecture and operating system. See ARMSubtarget::getFramePointerReg for
99// details.
100//
101// Outgoing function arguments must be at the bottom of the stack frame when
102// calling another function. If we do not have variable-sized stack objects, we
103// can allocate a "reserved call frame" area at the bottom of the local
104// variable area, large enough for all outgoing calls. If we do have VLAs, then
105// the stack pointer must be decremented and incremented around each call to
106// make space for the arguments below the VLAs.
107//
108//===----------------------------------------------------------------------===//
109
110#include "ARMFrameLowering.h"
111#include "ARMBaseInstrInfo.h"
112#include "ARMBaseRegisterInfo.h"
113#include "ARMConstantPoolValue.h"
115#include "ARMSubtarget.h"
118#include "Utils/ARMBaseInfo.h"
119#include "llvm/ADT/BitVector.h"
120#include "llvm/ADT/STLExtras.h"
121#include "llvm/ADT/SmallPtrSet.h"
122#include "llvm/ADT/SmallVector.h"
138#include "llvm/IR/Attributes.h"
139#include "llvm/IR/CallingConv.h"
140#include "llvm/IR/DebugLoc.h"
141#include "llvm/IR/Function.h"
142#include "llvm/MC/MCAsmInfo.h"
143#include "llvm/MC/MCContext.h"
144#include "llvm/MC/MCDwarf.h"
145#include "llvm/MC/MCInstrDesc.h"
147#include "llvm/Support/CodeGen.h"
150#include "llvm/Support/Debug.h"
156#include <algorithm>
157#include <cassert>
158#include <cstddef>
159#include <cstdint>
160#include <iterator>
161#include <utility>
162#include <vector>
163
164#define DEBUG_TYPE "arm-frame-lowering"
165
166using namespace llvm;
167
168static cl::opt<bool>
169SpillAlignedNEONRegs("align-neon-spills", cl::Hidden, cl::init(true),
170 cl::desc("Align ARM NEON spills in prolog and epilog"));
171
174 unsigned NumAlignedDPRCS2Regs);
175
177 : TargetFrameLowering(StackGrowsDown, sti.getStackAlignment(), 0, Align(4)),
178 STI(sti) {}
179
181 // iOS always has a FP for backtracking, force other targets to keep their FP
182 // when doing FastISel. The emitted code is currently superior, and in cases
183 // like test-suite's lencod FastISel isn't quite correct when FP is eliminated.
184 return MF.getSubtarget<ARMSubtarget>().useFastISel();
185}
186
187/// Returns true if the target can safely skip saving callee-saved registers
188/// for noreturn nounwind functions.
190 assert(MF.getFunction().hasFnAttribute(Attribute::NoReturn) &&
191 MF.getFunction().hasFnAttribute(Attribute::NoUnwind) &&
192 !MF.getFunction().hasFnAttribute(Attribute::UWTable));
193
194 // Frame pointer and link register are not treated as normal CSR, thus we
195 // can always skip CSR saves for nonreturning functions.
196 return true;
197}
198
199/// hasFP - Return true if the specified function should have a dedicated frame
200/// pointer register. This is true if the function has variable sized allocas
201/// or if frame pointer elimination is disabled.
203 const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
204 const MachineFrameInfo &MFI = MF.getFrameInfo();
205
206 // ABI-required frame pointer.
208 return true;
209
210 // Frame pointer required for use within this function.
211 return (RegInfo->hasStackRealignment(MF) || MFI.hasVarSizedObjects() ||
212 MFI.isFrameAddressTaken());
213}
214
215/// isFPReserved - Return true if the frame pointer register should be
216/// considered a reserved register on the scope of the specified function.
218 return hasFP(MF) || MF.getSubtarget<ARMSubtarget>().createAAPCSFrameChain();
219}
220
221/// hasReservedCallFrame - Under normal circumstances, when a frame pointer is
222/// not required, we reserve argument space for call sites in the function
223/// immediately on entry to the current function. This eliminates the need for
224/// add/sub sp brackets around call sites. Returns true if the call frame is
225/// included as part of the stack frame.
227 const MachineFrameInfo &MFI = MF.getFrameInfo();
228 unsigned CFSize = MFI.getMaxCallFrameSize();
229 // It's not always a good idea to include the call frame as part of the
230 // stack frame. ARM (especially Thumb) has small immediate offset to
231 // address the stack frame. So a large call frame can cause poor codegen
232 // and may even makes it impossible to scavenge a register.
233 if (CFSize >= ((1 << 12) - 1) / 2) // Half of imm12
234 return false;
235
236 return !MFI.hasVarSizedObjects();
237}
238
239/// canSimplifyCallFramePseudos - If there is a reserved call frame, the
240/// call frame pseudos can be simplified. Unlike most targets, having a FP
241/// is not sufficient here since we still may reference some objects via SP
242/// even when FP is available in Thumb2 mode.
243bool
246}
247
248// Returns how much of the incoming argument stack area we should clean up in an
249// epilogue. For the C calling convention this will be 0, for guaranteed tail
250// call conventions it can be positive (a normal return or a tail call to a
251// function that uses less stack space for arguments) or negative (for a tail
252// call to a function that needs more stack space than us for arguments).
256 bool IsTailCallReturn = false;
257 if (MBB.end() != MBBI) {
258 unsigned RetOpcode = MBBI->getOpcode();
259 IsTailCallReturn = RetOpcode == ARM::TCRETURNdi ||
260 RetOpcode == ARM::TCRETURNri ||
261 RetOpcode == ARM::TCRETURNrinotr12;
262 }
264
265 int ArgumentPopSize = 0;
266 if (IsTailCallReturn) {
267 MachineOperand &StackAdjust = MBBI->getOperand(1);
268
269 // For a tail-call in a callee-pops-arguments environment, some or all of
270 // the stack may actually be in use for the call's arguments, this is
271 // calculated during LowerCall and consumed here...
272 ArgumentPopSize = StackAdjust.getImm();
273 } else {
274 // ... otherwise the amount to pop is *all* of the argument space,
275 // conveniently stored in the MachineFunctionInfo by
276 // LowerFormalArguments. This will, of course, be zero for the C calling
277 // convention.
278 ArgumentPopSize = AFI->getArgumentStackToRestore();
279 }
280
281 return ArgumentPopSize;
282}
283
284static bool needsWinCFI(const MachineFunction &MF) {
285 const Function &F = MF.getFunction();
286 return MF.getTarget().getMCAsmInfo()->usesWindowsCFI() &&
287 F.needsUnwindTableEntry();
288}
289
290// Given a load or a store instruction, generate an appropriate unwinding SEH
291// code on Windows.
293 const TargetInstrInfo &TII,
294 unsigned Flags) {
295 unsigned Opc = MBBI->getOpcode();
297 MachineFunction &MF = *MBB->getParent();
298 DebugLoc DL = MBBI->getDebugLoc();
300 const ARMSubtarget &Subtarget = MF.getSubtarget<ARMSubtarget>();
301 const ARMBaseRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
302
303 Flags |= MachineInstr::NoMerge;
304
305 switch (Opc) {
306 default:
307 report_fatal_error("No SEH Opcode for instruction " + TII.getName(Opc));
308 break;
309 case ARM::t2ADDri: // add.w r11, sp, #xx
310 case ARM::t2ADDri12: // add.w r11, sp, #xx
311 case ARM::t2MOVTi16: // movt r4, #xx
312 case ARM::tBL: // bl __chkstk
313 // These are harmless if used for just setting up a frame pointer,
314 // but that frame pointer can't be relied upon for unwinding, unless
315 // set up with SEH_SaveSP.
316 MIB = BuildMI(MF, DL, TII.get(ARM::SEH_Nop))
317 .addImm(/*Wide=*/1)
318 .setMIFlags(Flags);
319 break;
320
321 case ARM::t2MOVi16: { // mov(w) r4, #xx
322 bool Wide = MBBI->getOperand(1).getImm() >= 256;
323 if (!Wide) {
324 MachineInstrBuilder NewInstr =
325 BuildMI(MF, DL, TII.get(ARM::tMOVi8)).setMIFlags(MBBI->getFlags());
326 NewInstr.add(MBBI->getOperand(0));
327 NewInstr.add(t1CondCodeOp(/*isDead=*/true));
328 for (MachineOperand &MO : llvm::drop_begin(MBBI->operands()))
329 NewInstr.add(MO);
330 MachineBasicBlock::iterator NewMBBI = MBB->insertAfter(MBBI, NewInstr);
331 MBB->erase(MBBI);
332 MBBI = NewMBBI;
333 }
334 MIB = BuildMI(MF, DL, TII.get(ARM::SEH_Nop)).addImm(Wide).setMIFlags(Flags);
335 break;
336 }
337
338 case ARM::tBLXr: // blx r12 (__chkstk)
339 MIB = BuildMI(MF, DL, TII.get(ARM::SEH_Nop))
340 .addImm(/*Wide=*/0)
341 .setMIFlags(Flags);
342 break;
343
344 case ARM::t2MOVi32imm: // movw+movt
345 // This pseudo instruction expands into two mov instructions. If the
346 // second operand is a symbol reference, this will stay as two wide
347 // instructions, movw+movt. If they're immediates, the first one can
348 // end up as a narrow mov though.
349 // As two SEH instructions are appended here, they won't get interleaved
350 // between the two final movw/movt instructions, but it doesn't make any
351 // practical difference.
352 MIB = BuildMI(MF, DL, TII.get(ARM::SEH_Nop))
353 .addImm(/*Wide=*/1)
354 .setMIFlags(Flags);
355 MBB->insertAfter(MBBI, MIB);
356 MIB = BuildMI(MF, DL, TII.get(ARM::SEH_Nop))
357 .addImm(/*Wide=*/1)
358 .setMIFlags(Flags);
359 break;
360
361 case ARM::t2STR_PRE:
362 if (MBBI->getOperand(0).getReg() == ARM::SP &&
363 MBBI->getOperand(2).getReg() == ARM::SP &&
364 MBBI->getOperand(3).getImm() == -4) {
365 unsigned Reg = RegInfo->getSEHRegNum(MBBI->getOperand(1).getReg());
366 MIB = BuildMI(MF, DL, TII.get(ARM::SEH_SaveRegs))
367 .addImm(1ULL << Reg)
368 .addImm(/*Wide=*/1)
369 .setMIFlags(Flags);
370 } else {
371 report_fatal_error("No matching SEH Opcode for t2STR_PRE");
372 }
373 break;
374
375 case ARM::t2LDR_POST:
376 if (MBBI->getOperand(1).getReg() == ARM::SP &&
377 MBBI->getOperand(2).getReg() == ARM::SP &&
378 MBBI->getOperand(3).getImm() == 4) {
379 unsigned Reg = RegInfo->getSEHRegNum(MBBI->getOperand(0).getReg());
380 MIB = BuildMI(MF, DL, TII.get(ARM::SEH_SaveRegs))
381 .addImm(1ULL << Reg)
382 .addImm(/*Wide=*/1)
383 .setMIFlags(Flags);
384 } else {
385 report_fatal_error("No matching SEH Opcode for t2LDR_POST");
386 }
387 break;
388
389 case ARM::t2LDMIA_RET:
390 case ARM::t2LDMIA_UPD:
391 case ARM::t2STMDB_UPD: {
392 unsigned Mask = 0;
393 bool Wide = false;
394 for (unsigned i = 4, NumOps = MBBI->getNumOperands(); i != NumOps; ++i) {
395 const MachineOperand &MO = MBBI->getOperand(i);
396 if (!MO.isReg() || MO.isImplicit())
397 continue;
398 unsigned Reg = RegInfo->getSEHRegNum(MO.getReg());
399 if (Reg == 15)
400 Reg = 14;
401 if (Reg >= 8 && Reg <= 13)
402 Wide = true;
403 else if (Opc == ARM::t2LDMIA_UPD && Reg == 14)
404 Wide = true;
405 Mask |= 1 << Reg;
406 }
407 if (!Wide) {
408 unsigned NewOpc;
409 switch (Opc) {
410 case ARM::t2LDMIA_RET:
411 NewOpc = ARM::tPOP_RET;
412 break;
413 case ARM::t2LDMIA_UPD:
414 NewOpc = ARM::tPOP;
415 break;
416 case ARM::t2STMDB_UPD:
417 NewOpc = ARM::tPUSH;
418 break;
419 default:
421 }
422 MachineInstrBuilder NewInstr =
423 BuildMI(MF, DL, TII.get(NewOpc)).setMIFlags(MBBI->getFlags());
424 for (unsigned i = 2, NumOps = MBBI->getNumOperands(); i != NumOps; ++i)
425 NewInstr.add(MBBI->getOperand(i));
426 MachineBasicBlock::iterator NewMBBI = MBB->insertAfter(MBBI, NewInstr);
427 MBB->erase(MBBI);
428 MBBI = NewMBBI;
429 }
430 unsigned SEHOpc =
431 (Opc == ARM::t2LDMIA_RET) ? ARM::SEH_SaveRegs_Ret : ARM::SEH_SaveRegs;
432 MIB = BuildMI(MF, DL, TII.get(SEHOpc))
433 .addImm(Mask)
434 .addImm(Wide ? 1 : 0)
435 .setMIFlags(Flags);
436 break;
437 }
438 case ARM::VSTMDDB_UPD:
439 case ARM::VLDMDIA_UPD: {
440 int First = -1, Last = 0;
441 for (const MachineOperand &MO : llvm::drop_begin(MBBI->operands(), 4)) {
442 unsigned Reg = RegInfo->getSEHRegNum(MO.getReg());
443 if (First == -1)
444 First = Reg;
445 Last = Reg;
446 }
447 MIB = BuildMI(MF, DL, TII.get(ARM::SEH_SaveFRegs))
448 .addImm(First)
449 .addImm(Last)
450 .setMIFlags(Flags);
451 break;
452 }
453 case ARM::tSUBspi:
454 case ARM::tADDspi:
455 MIB = BuildMI(MF, DL, TII.get(ARM::SEH_StackAlloc))
456 .addImm(MBBI->getOperand(2).getImm() * 4)
457 .addImm(/*Wide=*/0)
458 .setMIFlags(Flags);
459 break;
460 case ARM::t2SUBspImm:
461 case ARM::t2SUBspImm12:
462 case ARM::t2ADDspImm:
463 case ARM::t2ADDspImm12:
464 MIB = BuildMI(MF, DL, TII.get(ARM::SEH_StackAlloc))
465 .addImm(MBBI->getOperand(2).getImm())
466 .addImm(/*Wide=*/1)
467 .setMIFlags(Flags);
468 break;
469
470 case ARM::tMOVr:
471 if (MBBI->getOperand(1).getReg() == ARM::SP &&
472 (Flags & MachineInstr::FrameSetup)) {
473 unsigned Reg = RegInfo->getSEHRegNum(MBBI->getOperand(0).getReg());
474 MIB = BuildMI(MF, DL, TII.get(ARM::SEH_SaveSP))
475 .addImm(Reg)
476 .setMIFlags(Flags);
477 } else if (MBBI->getOperand(0).getReg() == ARM::SP &&
478 (Flags & MachineInstr::FrameDestroy)) {
479 unsigned Reg = RegInfo->getSEHRegNum(MBBI->getOperand(1).getReg());
480 MIB = BuildMI(MF, DL, TII.get(ARM::SEH_SaveSP))
481 .addImm(Reg)
482 .setMIFlags(Flags);
483 } else {
484 report_fatal_error("No SEH Opcode for MOV");
485 }
486 break;
487
488 case ARM::tBX_RET:
489 case ARM::TCRETURNri:
490 case ARM::TCRETURNrinotr12:
491 MIB = BuildMI(MF, DL, TII.get(ARM::SEH_Nop_Ret))
492 .addImm(/*Wide=*/0)
493 .setMIFlags(Flags);
494 break;
495
496 case ARM::TCRETURNdi:
497 MIB = BuildMI(MF, DL, TII.get(ARM::SEH_Nop_Ret))
498 .addImm(/*Wide=*/1)
499 .setMIFlags(Flags);
500 break;
501 }
502 return MBB->insertAfter(MBBI, MIB);
503}
504
507 if (MBBI == MBB.begin())
509 return std::prev(MBBI);
510}
511
515 const ARMBaseInstrInfo &TII, unsigned MIFlags) {
516 if (Start.isValid())
517 Start = std::next(Start);
518 else
519 Start = MBB.begin();
520
521 for (auto MI = Start; MI != End;) {
522 auto Next = std::next(MI);
523 // Check if this instruction already has got a SEH opcode added. In that
524 // case, don't do this generic mapping.
525 if (Next != End && isSEHInstruction(*Next)) {
526 MI = std::next(Next);
527 while (MI != End && isSEHInstruction(*MI))
528 ++MI;
529 continue;
530 }
531 insertSEH(MI, TII, MIFlags);
532 MI = Next;
533 }
534}
535
538 const DebugLoc &dl, const ARMBaseInstrInfo &TII, unsigned DestReg,
539 unsigned SrcReg, int NumBytes, unsigned MIFlags = MachineInstr::NoFlags,
540 ARMCC::CondCodes Pred = ARMCC::AL, unsigned PredReg = 0) {
541 if (isARM)
542 emitARMRegPlusImmediate(MBB, MBBI, dl, DestReg, SrcReg, NumBytes,
543 Pred, PredReg, TII, MIFlags);
544 else
545 emitT2RegPlusImmediate(MBB, MBBI, dl, DestReg, SrcReg, NumBytes,
546 Pred, PredReg, TII, MIFlags);
547}
548
549static void emitSPUpdate(bool isARM, MachineBasicBlock &MBB,
551 const ARMBaseInstrInfo &TII, int NumBytes,
552 unsigned MIFlags = MachineInstr::NoFlags,
554 unsigned PredReg = 0) {
555 emitRegPlusImmediate(isARM, MBB, MBBI, dl, TII, ARM::SP, ARM::SP, NumBytes,
556 MIFlags, Pred, PredReg);
557}
558
560 int RegSize;
561 switch (MI.getOpcode()) {
562 case ARM::VSTMDDB_UPD:
563 RegSize = 8;
564 break;
565 case ARM::STMDB_UPD:
566 case ARM::t2STMDB_UPD:
567 RegSize = 4;
568 break;
569 case ARM::t2STR_PRE:
570 case ARM::STR_PRE_IMM:
571 return 4;
572 default:
573 llvm_unreachable("Unknown push or pop like instruction");
574 }
575
576 int count = 0;
577 // ARM and Thumb2 push/pop insts have explicit "sp, sp" operands (+
578 // pred) so the list starts at 4.
579 for (int i = MI.getNumOperands() - 1; i >= 4; --i)
580 count += RegSize;
581 return count;
582}
583
585 size_t StackSizeInBytes) {
586 const MachineFrameInfo &MFI = MF.getFrameInfo();
587 const Function &F = MF.getFunction();
588 unsigned StackProbeSize = (MFI.getStackProtectorIndex() > 0) ? 4080 : 4096;
589
590 StackProbeSize =
591 F.getFnAttributeAsParsedInteger("stack-probe-size", StackProbeSize);
592 return (StackSizeInBytes >= StackProbeSize) &&
593 !F.hasFnAttribute("no-stack-arg-probe");
594}
595
596namespace {
597
598struct StackAdjustingInsts {
599 struct InstInfo {
601 unsigned SPAdjust;
602 bool BeforeFPSet;
603 };
604
606
607 void addInst(MachineBasicBlock::iterator I, unsigned SPAdjust,
608 bool BeforeFPSet = false) {
609 InstInfo Info = {I, SPAdjust, BeforeFPSet};
610 Insts.push_back(Info);
611 }
612
613 void addExtraBytes(const MachineBasicBlock::iterator I, unsigned ExtraBytes) {
614 auto Info =
615 llvm::find_if(Insts, [&](InstInfo &Info) { return Info.I == I; });
616 assert(Info != Insts.end() && "invalid sp adjusting instruction");
617 Info->SPAdjust += ExtraBytes;
618 }
619
620 void emitDefCFAOffsets(MachineBasicBlock &MBB, const DebugLoc &dl,
621 const ARMBaseInstrInfo &TII, bool HasFP) {
623 unsigned CFAOffset = 0;
624 for (auto &Info : Insts) {
625 if (HasFP && !Info.BeforeFPSet)
626 return;
627
628 CFAOffset += Info.SPAdjust;
629 unsigned CFIIndex = MF.addFrameInst(
630 MCCFIInstruction::cfiDefCfaOffset(nullptr, CFAOffset));
631 BuildMI(MBB, std::next(Info.I), dl,
632 TII.get(TargetOpcode::CFI_INSTRUCTION))
633 .addCFIIndex(CFIIndex)
635 }
636 }
637};
638
639} // end anonymous namespace
640
641/// Emit an instruction sequence that will align the address in
642/// register Reg by zero-ing out the lower bits. For versions of the
643/// architecture that support Neon, this must be done in a single
644/// instruction, since skipAlignedDPRCS2Spills assumes it is done in a
645/// single instruction. That function only gets called when optimizing
646/// spilling of D registers on a core with the Neon instruction set
647/// present.
649 const TargetInstrInfo &TII,
652 const DebugLoc &DL, const unsigned Reg,
653 const Align Alignment,
654 const bool MustBeSingleInstruction) {
655 const ARMSubtarget &AST = MF.getSubtarget<ARMSubtarget>();
656 const bool CanUseBFC = AST.hasV6T2Ops() || AST.hasV7Ops();
657 const unsigned AlignMask = Alignment.value() - 1U;
658 const unsigned NrBitsToZero = Log2(Alignment);
659 assert(!AFI->isThumb1OnlyFunction() && "Thumb1 not supported");
660 if (!AFI->isThumbFunction()) {
661 // if the BFC instruction is available, use that to zero the lower
662 // bits:
663 // bfc Reg, #0, log2(Alignment)
664 // otherwise use BIC, if the mask to zero the required number of bits
665 // can be encoded in the bic immediate field
666 // bic Reg, Reg, Alignment-1
667 // otherwise, emit
668 // lsr Reg, Reg, log2(Alignment)
669 // lsl Reg, Reg, log2(Alignment)
670 if (CanUseBFC) {
671 BuildMI(MBB, MBBI, DL, TII.get(ARM::BFC), Reg)
673 .addImm(~AlignMask)
675 } else if (AlignMask <= 255) {
676 BuildMI(MBB, MBBI, DL, TII.get(ARM::BICri), Reg)
678 .addImm(AlignMask)
680 .add(condCodeOp());
681 } else {
682 assert(!MustBeSingleInstruction &&
683 "Shouldn't call emitAligningInstructions demanding a single "
684 "instruction to be emitted for large stack alignment for a target "
685 "without BFC.");
686 BuildMI(MBB, MBBI, DL, TII.get(ARM::MOVsi), Reg)
688 .addImm(ARM_AM::getSORegOpc(ARM_AM::lsr, NrBitsToZero))
690 .add(condCodeOp());
691 BuildMI(MBB, MBBI, DL, TII.get(ARM::MOVsi), Reg)
693 .addImm(ARM_AM::getSORegOpc(ARM_AM::lsl, NrBitsToZero))
695 .add(condCodeOp());
696 }
697 } else {
698 // Since this is only reached for Thumb-2 targets, the BFC instruction
699 // should always be available.
700 assert(CanUseBFC);
701 BuildMI(MBB, MBBI, DL, TII.get(ARM::t2BFC), Reg)
703 .addImm(~AlignMask)
705 }
706}
707
708/// We need the offset of the frame pointer relative to other MachineFrameInfo
709/// offsets which are encoded relative to SP at function begin.
710/// See also emitPrologue() for how the FP is set up.
711/// Unfortunately we cannot determine this value in determineCalleeSaves() yet
712/// as assignCalleeSavedSpillSlots() hasn't run at this point. Instead we use
713/// this to produce a conservative estimate that we check in an assert() later.
714static int getMaxFPOffset(const ARMSubtarget &STI, const ARMFunctionInfo &AFI,
715 const MachineFunction &MF) {
716 // For Thumb1, push.w isn't available, so the first push will always push
717 // r7 and lr onto the stack first.
718 if (AFI.isThumb1OnlyFunction())
719 return -AFI.getArgRegsSaveSize() - (2 * 4);
720 // This is a conservative estimation: Assume the frame pointer being r7 and
721 // pc("r15") up to r8 getting spilled before (= 8 registers).
722 int MaxRegBytes = 8 * 4;
723 if (STI.splitFramePointerPush(MF)) {
724 // Here, r11 can be stored below all of r4-r15 (3 registers more than
725 // above), plus d8-d15.
726 MaxRegBytes = 11 * 4 + 8 * 8;
727 }
728 int FPCXTSaveSize =
729 (STI.hasV8_1MMainlineOps() && AFI.isCmseNSEntryFunction()) ? 4 : 0;
730 return -FPCXTSaveSize - AFI.getArgRegsSaveSize() - MaxRegBytes;
731}
732
734 MachineBasicBlock &MBB) const {
736 MachineFrameInfo &MFI = MF.getFrameInfo();
738 MachineModuleInfo &MMI = MF.getMMI();
739 MCContext &Context = MMI.getContext();
740 const TargetMachine &TM = MF.getTarget();
741 const MCRegisterInfo *MRI = Context.getRegisterInfo();
742 const ARMBaseRegisterInfo *RegInfo = STI.getRegisterInfo();
745 "This emitPrologue does not support Thumb1!");
746 bool isARM = !AFI->isThumbFunction();
747 Align Alignment = STI.getFrameLowering()->getStackAlign();
748 unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize();
749 unsigned NumBytes = MFI.getStackSize();
750 const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
751 int FPCXTSaveSize = 0;
752 bool NeedsWinCFI = needsWinCFI(MF);
753
754 // Debug location must be unknown since the first debug location is used
755 // to determine the end of the prologue.
756 DebugLoc dl;
757
758 Register FramePtr = RegInfo->getFrameRegister(MF);
759
760 // Determine the sizes of each callee-save spill areas and record which frame
761 // belongs to which callee-save spill areas.
762 unsigned GPRCS1Size = 0, GPRCS2Size = 0, DPRCSSize = 0;
763 int FramePtrSpillFI = 0;
764 int D8SpillFI = 0;
765
766 // All calls are tail calls in GHC calling conv, and functions have no
767 // prologue/epilogue.
769 return;
770
771 StackAdjustingInsts DefCFAOffsetCandidates;
772 bool HasFP = hasFP(MF);
773
774 if (!AFI->hasStackFrame() &&
775 (!STI.isTargetWindows() || !WindowsRequiresStackProbe(MF, NumBytes))) {
776 if (NumBytes != 0) {
777 emitSPUpdate(isARM, MBB, MBBI, dl, TII, -NumBytes,
779 DefCFAOffsetCandidates.addInst(std::prev(MBBI), NumBytes, true);
780 }
781 if (!NeedsWinCFI)
782 DefCFAOffsetCandidates.emitDefCFAOffsets(MBB, dl, TII, HasFP);
783 if (NeedsWinCFI && MBBI != MBB.begin()) {
785 BuildMI(MBB, MBBI, dl, TII.get(ARM::SEH_PrologEnd))
787 MF.setHasWinCFI(true);
788 }
789 return;
790 }
791
792 // Determine spill area sizes.
793 if (STI.splitFramePointerPush(MF)) {
794 for (const CalleeSavedInfo &I : CSI) {
795 Register Reg = I.getReg();
796 int FI = I.getFrameIdx();
797 switch (Reg) {
798 case ARM::R11:
799 case ARM::LR:
800 if (Reg == FramePtr)
801 FramePtrSpillFI = FI;
802 GPRCS2Size += 4;
803 break;
804 case ARM::R0:
805 case ARM::R1:
806 case ARM::R2:
807 case ARM::R3:
808 case ARM::R4:
809 case ARM::R5:
810 case ARM::R6:
811 case ARM::R7:
812 case ARM::R8:
813 case ARM::R9:
814 case ARM::R10:
815 case ARM::R12:
816 GPRCS1Size += 4;
817 break;
818 case ARM::FPCXTNS:
819 FPCXTSaveSize = 4;
820 break;
821 default:
822 // This is a DPR. Exclude the aligned DPRCS2 spills.
823 if (Reg == ARM::D8)
824 D8SpillFI = FI;
825 if (Reg < ARM::D8 || Reg >= ARM::D8 + AFI->getNumAlignedDPRCS2Regs())
826 DPRCSSize += 8;
827 }
828 }
829 } else {
830 for (const CalleeSavedInfo &I : CSI) {
831 Register Reg = I.getReg();
832 int FI = I.getFrameIdx();
833 switch (Reg) {
834 case ARM::R8:
835 case ARM::R9:
836 case ARM::R10:
837 case ARM::R11:
838 case ARM::R12:
839 if (STI.splitFramePushPop(MF)) {
840 GPRCS2Size += 4;
841 break;
842 }
843 [[fallthrough]];
844 case ARM::R0:
845 case ARM::R1:
846 case ARM::R2:
847 case ARM::R3:
848 case ARM::R4:
849 case ARM::R5:
850 case ARM::R6:
851 case ARM::R7:
852 case ARM::LR:
853 if (Reg == FramePtr)
854 FramePtrSpillFI = FI;
855 GPRCS1Size += 4;
856 break;
857 case ARM::FPCXTNS:
858 FPCXTSaveSize = 4;
859 break;
860 default:
861 // This is a DPR. Exclude the aligned DPRCS2 spills.
862 if (Reg == ARM::D8)
863 D8SpillFI = FI;
864 if (Reg < ARM::D8 || Reg >= ARM::D8 + AFI->getNumAlignedDPRCS2Regs())
865 DPRCSSize += 8;
866 }
867 }
868 }
869
870 MachineBasicBlock::iterator LastPush = MBB.end(), GPRCS1Push, GPRCS2Push;
871
872 // Move past the PAC computation.
873 if (AFI->shouldSignReturnAddress())
874 LastPush = MBBI++;
875
876 // Move past FPCXT area.
877 if (FPCXTSaveSize > 0) {
878 LastPush = MBBI++;
879 DefCFAOffsetCandidates.addInst(LastPush, FPCXTSaveSize, true);
880 }
881
882 // Allocate the vararg register save area.
883 if (ArgRegsSaveSize) {
884 emitSPUpdate(isARM, MBB, MBBI, dl, TII, -ArgRegsSaveSize,
886 LastPush = std::prev(MBBI);
887 DefCFAOffsetCandidates.addInst(LastPush, ArgRegsSaveSize, true);
888 }
889
890 // Move past area 1.
891 if (GPRCS1Size > 0) {
892 GPRCS1Push = LastPush = MBBI++;
893 DefCFAOffsetCandidates.addInst(LastPush, GPRCS1Size, true);
894 }
895
896 // Determine starting offsets of spill areas.
897 unsigned FPCXTOffset = NumBytes - ArgRegsSaveSize - FPCXTSaveSize;
898 unsigned GPRCS1Offset = FPCXTOffset - GPRCS1Size;
899 unsigned GPRCS2Offset = GPRCS1Offset - GPRCS2Size;
900 Align DPRAlign = DPRCSSize ? std::min(Align(8), Alignment) : Align(4);
901 unsigned DPRGapSize = GPRCS1Size + FPCXTSaveSize + ArgRegsSaveSize;
902 if (!STI.splitFramePointerPush(MF)) {
903 DPRGapSize += GPRCS2Size;
904 }
905 DPRGapSize %= DPRAlign.value();
906
907 unsigned DPRCSOffset;
908 if (STI.splitFramePointerPush(MF)) {
909 DPRCSOffset = GPRCS1Offset - DPRGapSize - DPRCSSize;
910 GPRCS2Offset = DPRCSOffset - GPRCS2Size;
911 } else {
912 DPRCSOffset = GPRCS2Offset - DPRGapSize - DPRCSSize;
913 }
914 int FramePtrOffsetInPush = 0;
915 if (HasFP) {
916 int FPOffset = MFI.getObjectOffset(FramePtrSpillFI);
917 assert(getMaxFPOffset(STI, *AFI, MF) <= FPOffset &&
918 "Max FP estimation is wrong");
919 FramePtrOffsetInPush = FPOffset + ArgRegsSaveSize + FPCXTSaveSize;
920 AFI->setFramePtrSpillOffset(MFI.getObjectOffset(FramePtrSpillFI) +
921 NumBytes);
922 }
923 AFI->setGPRCalleeSavedArea1Offset(GPRCS1Offset);
924 AFI->setGPRCalleeSavedArea2Offset(GPRCS2Offset);
925 AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset);
926
927 // Move past area 2.
928 if (GPRCS2Size > 0 && !STI.splitFramePointerPush(MF)) {
929 GPRCS2Push = LastPush = MBBI++;
930 DefCFAOffsetCandidates.addInst(LastPush, GPRCS2Size);
931 }
932
933 // Prolog/epilog inserter assumes we correctly align DPRs on the stack, so our
934 // .cfi_offset operations will reflect that.
935 if (DPRGapSize) {
936 assert(DPRGapSize == 4 && "unexpected alignment requirements for DPRs");
937 if (LastPush != MBB.end() &&
938 tryFoldSPUpdateIntoPushPop(STI, MF, &*LastPush, DPRGapSize))
939 DefCFAOffsetCandidates.addExtraBytes(LastPush, DPRGapSize);
940 else {
941 emitSPUpdate(isARM, MBB, MBBI, dl, TII, -DPRGapSize,
943 DefCFAOffsetCandidates.addInst(std::prev(MBBI), DPRGapSize);
944 }
945 }
946
947 // Move past area 3.
948 if (DPRCSSize > 0) {
949 // Since vpush register list cannot have gaps, there may be multiple vpush
950 // instructions in the prologue.
951 while (MBBI != MBB.end() && MBBI->getOpcode() == ARM::VSTMDDB_UPD) {
952 DefCFAOffsetCandidates.addInst(MBBI, sizeOfSPAdjustment(*MBBI));
953 LastPush = MBBI++;
954 }
955 }
956
957 // Move past the aligned DPRCS2 area.
958 if (AFI->getNumAlignedDPRCS2Regs() > 0) {
960 // The code inserted by emitAlignedDPRCS2Spills realigns the stack, and
961 // leaves the stack pointer pointing to the DPRCS2 area.
962 //
963 // Adjust NumBytes to represent the stack slots below the DPRCS2 area.
964 NumBytes += MFI.getObjectOffset(D8SpillFI);
965 } else
966 NumBytes = DPRCSOffset;
967
968 if (GPRCS2Size > 0 && STI.splitFramePointerPush(MF)) {
969 GPRCS2Push = LastPush = MBBI++;
970 DefCFAOffsetCandidates.addInst(LastPush, GPRCS2Size);
971 }
972
973 bool NeedsWinCFIStackAlloc = NeedsWinCFI;
974 if (STI.splitFramePointerPush(MF) && HasFP)
975 NeedsWinCFIStackAlloc = false;
976
977 if (STI.isTargetWindows() && WindowsRequiresStackProbe(MF, NumBytes)) {
978 uint32_t NumWords = NumBytes >> 2;
979
980 if (NumWords < 65536) {
981 BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi16), ARM::R4)
982 .addImm(NumWords)
985 } else {
986 // Split into two instructions here, instead of using t2MOVi32imm,
987 // to allow inserting accurate SEH instructions (including accurate
988 // instruction size for each of them).
989 BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi16), ARM::R4)
990 .addImm(NumWords & 0xffff)
993 BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVTi16), ARM::R4)
994 .addReg(ARM::R4)
995 .addImm(NumWords >> 16)
998 }
999
1000 switch (TM.getCodeModel()) {
1001 case CodeModel::Tiny:
1002 llvm_unreachable("Tiny code model not available on ARM.");
1003 case CodeModel::Small:
1004 case CodeModel::Medium:
1005 case CodeModel::Kernel:
1006 BuildMI(MBB, MBBI, dl, TII.get(ARM::tBL))
1008 .addExternalSymbol("__chkstk")
1009 .addReg(ARM::R4, RegState::Implicit)
1011 break;
1012 case CodeModel::Large:
1013 BuildMI(MBB, MBBI, dl, TII.get(ARM::t2MOVi32imm), ARM::R12)
1014 .addExternalSymbol("__chkstk")
1016
1017 BuildMI(MBB, MBBI, dl, TII.get(ARM::tBLXr))
1019 .addReg(ARM::R12, RegState::Kill)
1020 .addReg(ARM::R4, RegState::Implicit)
1022 break;
1023 }
1024
1025 MachineInstrBuilder Instr, SEH;
1026 Instr = BuildMI(MBB, MBBI, dl, TII.get(ARM::t2SUBrr), ARM::SP)
1027 .addReg(ARM::SP, RegState::Kill)
1028 .addReg(ARM::R4, RegState::Kill)
1031 .add(condCodeOp());
1032 if (NeedsWinCFIStackAlloc) {
1033 SEH = BuildMI(MF, dl, TII.get(ARM::SEH_StackAlloc))
1034 .addImm(NumBytes)
1035 .addImm(/*Wide=*/1)
1037 MBB.insertAfter(Instr, SEH);
1038 }
1039 NumBytes = 0;
1040 }
1041
1042 if (NumBytes) {
1043 // Adjust SP after all the callee-save spills.
1044 if (AFI->getNumAlignedDPRCS2Regs() == 0 &&
1045 tryFoldSPUpdateIntoPushPop(STI, MF, &*LastPush, NumBytes))
1046 DefCFAOffsetCandidates.addExtraBytes(LastPush, NumBytes);
1047 else {
1048 emitSPUpdate(isARM, MBB, MBBI, dl, TII, -NumBytes,
1050 DefCFAOffsetCandidates.addInst(std::prev(MBBI), NumBytes);
1051 }
1052
1053 if (HasFP && isARM)
1054 // Restore from fp only in ARM mode: e.g. sub sp, r7, #24
1055 // Note it's not safe to do this in Thumb2 mode because it would have
1056 // taken two instructions:
1057 // mov sp, r7
1058 // sub sp, #24
1059 // If an interrupt is taken between the two instructions, then sp is in
1060 // an inconsistent state (pointing to the middle of callee-saved area).
1061 // The interrupt handler can end up clobbering the registers.
1062 AFI->setShouldRestoreSPFromFP(true);
1063 }
1064
1065 // Set FP to point to the stack slot that contains the previous FP.
1066 // For iOS, FP is R7, which has now been stored in spill area 1.
1067 // Otherwise, if this is not iOS, all the callee-saved registers go
1068 // into spill area 1, including the FP in R11. In either case, it
1069 // is in area one and the adjustment needs to take place just after
1070 // that push.
1071 // FIXME: The above is not necessary true when PACBTI is enabled.
1072 // AAPCS requires use of R11, and PACBTI gets in the way of regular pushes,
1073 // so FP ends up on area two.
1075 if (HasFP) {
1076 AfterPush = std::next(GPRCS1Push);
1077 unsigned PushSize = sizeOfSPAdjustment(*GPRCS1Push);
1078 int FPOffset = PushSize + FramePtrOffsetInPush;
1079 if (STI.splitFramePointerPush(MF)) {
1080 AfterPush = std::next(GPRCS2Push);
1081 emitRegPlusImmediate(!AFI->isThumbFunction(), MBB, AfterPush, dl, TII,
1082 FramePtr, ARM::SP, 0, MachineInstr::FrameSetup);
1083 } else {
1084 emitRegPlusImmediate(!AFI->isThumbFunction(), MBB, AfterPush, dl, TII,
1085 FramePtr, ARM::SP, FPOffset,
1087 }
1088 if (!NeedsWinCFI) {
1089 if (FramePtrOffsetInPush + PushSize != 0) {
1090 unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfa(
1091 nullptr, MRI->getDwarfRegNum(FramePtr, true),
1092 FPCXTSaveSize + ArgRegsSaveSize - FramePtrOffsetInPush));
1093 BuildMI(MBB, AfterPush, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1094 .addCFIIndex(CFIIndex)
1096 } else {
1097 unsigned CFIIndex =
1099 nullptr, MRI->getDwarfRegNum(FramePtr, true)));
1100 BuildMI(MBB, AfterPush, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1101 .addCFIIndex(CFIIndex)
1103 }
1104 }
1105 }
1106
1107 // Emit a SEH opcode indicating the prologue end. The rest of the prologue
1108 // instructions below don't need to be replayed to unwind the stack.
1109 if (NeedsWinCFI && MBBI != MBB.begin()) {
1111 if (HasFP && STI.splitFramePointerPush(MF))
1112 End = AfterPush;
1114 BuildMI(MBB, End, dl, TII.get(ARM::SEH_PrologEnd))
1116 MF.setHasWinCFI(true);
1117 }
1118
1119 // Now that the prologue's actual instructions are finalised, we can insert
1120 // the necessary DWARF cf instructions to describe the situation. Start by
1121 // recording where each register ended up:
1122 if (GPRCS1Size > 0 && !NeedsWinCFI) {
1123 MachineBasicBlock::iterator Pos = std::next(GPRCS1Push);
1124 int CFIIndex;
1125 for (const auto &Entry : CSI) {
1126 Register Reg = Entry.getReg();
1127 int FI = Entry.getFrameIdx();
1128 switch (Reg) {
1129 case ARM::R8:
1130 case ARM::R9:
1131 case ARM::R10:
1132 case ARM::R11:
1133 case ARM::R12:
1134 if (STI.splitFramePushPop(MF))
1135 break;
1136 [[fallthrough]];
1137 case ARM::R0:
1138 case ARM::R1:
1139 case ARM::R2:
1140 case ARM::R3:
1141 case ARM::R4:
1142 case ARM::R5:
1143 case ARM::R6:
1144 case ARM::R7:
1145 case ARM::LR:
1147 nullptr, MRI->getDwarfRegNum(Reg, true), MFI.getObjectOffset(FI)));
1148 BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1149 .addCFIIndex(CFIIndex)
1151 break;
1152 }
1153 }
1154 }
1155
1156 if (GPRCS2Size > 0 && !NeedsWinCFI) {
1157 MachineBasicBlock::iterator Pos = std::next(GPRCS2Push);
1158 for (const auto &Entry : CSI) {
1159 Register Reg = Entry.getReg();
1160 int FI = Entry.getFrameIdx();
1161 switch (Reg) {
1162 case ARM::R8:
1163 case ARM::R9:
1164 case ARM::R10:
1165 case ARM::R11:
1166 case ARM::R12:
1167 if (STI.splitFramePushPop(MF)) {
1168 unsigned DwarfReg = MRI->getDwarfRegNum(
1169 Reg == ARM::R12 ? ARM::RA_AUTH_CODE : Reg, true);
1170 unsigned Offset = MFI.getObjectOffset(FI);
1171 unsigned CFIIndex = MF.addFrameInst(
1172 MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset));
1173 BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1174 .addCFIIndex(CFIIndex)
1176 }
1177 break;
1178 }
1179 }
1180 }
1181
1182 if (DPRCSSize > 0 && !NeedsWinCFI) {
1183 // Since vpush register list cannot have gaps, there may be multiple vpush
1184 // instructions in the prologue.
1185 MachineBasicBlock::iterator Pos = std::next(LastPush);
1186 for (const auto &Entry : CSI) {
1187 Register Reg = Entry.getReg();
1188 int FI = Entry.getFrameIdx();
1189 if ((Reg >= ARM::D0 && Reg <= ARM::D31) &&
1190 (Reg < ARM::D8 || Reg >= ARM::D8 + AFI->getNumAlignedDPRCS2Regs())) {
1191 unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true);
1192 unsigned Offset = MFI.getObjectOffset(FI);
1193 unsigned CFIIndex = MF.addFrameInst(
1194 MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset));
1195 BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1196 .addCFIIndex(CFIIndex)
1198 }
1199 }
1200 }
1201
1202 // Now we can emit descriptions of where the canonical frame address was
1203 // throughout the process. If we have a frame pointer, it takes over the job
1204 // half-way through, so only the first few .cfi_def_cfa_offset instructions
1205 // actually get emitted.
1206 if (!NeedsWinCFI)
1207 DefCFAOffsetCandidates.emitDefCFAOffsets(MBB, dl, TII, HasFP);
1208
1209 if (STI.isTargetELF() && hasFP(MF))
1211 AFI->getFramePtrSpillOffset());
1212
1213 AFI->setFPCXTSaveAreaSize(FPCXTSaveSize);
1214 AFI->setGPRCalleeSavedArea1Size(GPRCS1Size);
1215 AFI->setGPRCalleeSavedArea2Size(GPRCS2Size);
1216 AFI->setDPRCalleeSavedGapSize(DPRGapSize);
1217 AFI->setDPRCalleeSavedAreaSize(DPRCSSize);
1218
1219 // If we need dynamic stack realignment, do it here. Be paranoid and make
1220 // sure if we also have VLAs, we have a base pointer for frame access.
1221 // If aligned NEON registers were spilled, the stack has already been
1222 // realigned.
1223 if (!AFI->getNumAlignedDPRCS2Regs() && RegInfo->hasStackRealignment(MF)) {
1224 Align MaxAlign = MFI.getMaxAlign();
1226 if (!AFI->isThumbFunction()) {
1227 emitAligningInstructions(MF, AFI, TII, MBB, MBBI, dl, ARM::SP, MaxAlign,
1228 false);
1229 } else {
1230 // We cannot use sp as source/dest register here, thus we're using r4 to
1231 // perform the calculations. We're emitting the following sequence:
1232 // mov r4, sp
1233 // -- use emitAligningInstructions to produce best sequence to zero
1234 // -- out lower bits in r4
1235 // mov sp, r4
1236 // FIXME: It will be better just to find spare register here.
1237 BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::R4)
1238 .addReg(ARM::SP, RegState::Kill)
1240 emitAligningInstructions(MF, AFI, TII, MBB, MBBI, dl, ARM::R4, MaxAlign,
1241 false);
1242 BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP)
1243 .addReg(ARM::R4, RegState::Kill)
1245 }
1246
1247 AFI->setShouldRestoreSPFromFP(true);
1248 }
1249
1250 // If we need a base pointer, set it up here. It's whatever the value
1251 // of the stack pointer is at this point. Any variable size objects
1252 // will be allocated after this, so we can still use the base pointer
1253 // to reference locals.
1254 // FIXME: Clarify FrameSetup flags here.
1255 if (RegInfo->hasBasePointer(MF)) {
1256 if (isARM)
1257 BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), RegInfo->getBaseRegister())
1258 .addReg(ARM::SP)
1260 .add(condCodeOp());
1261 else
1262 BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), RegInfo->getBaseRegister())
1263 .addReg(ARM::SP)
1265 }
1266
1267 // If the frame has variable sized objects then the epilogue must restore
1268 // the sp from fp. We can assume there's an FP here since hasFP already
1269 // checks for hasVarSizedObjects.
1270 if (MFI.hasVarSizedObjects())
1271 AFI->setShouldRestoreSPFromFP(true);
1272}
1273
1275 MachineBasicBlock &MBB) const {
1276 MachineFrameInfo &MFI = MF.getFrameInfo();
1278 const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
1279 const ARMBaseInstrInfo &TII =
1280 *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
1281 assert(!AFI->isThumb1OnlyFunction() &&
1282 "This emitEpilogue does not support Thumb1!");
1283 bool isARM = !AFI->isThumbFunction();
1284
1285 // Amount of stack space we reserved next to incoming args for either
1286 // varargs registers or stack arguments in tail calls made by this function.
1287 unsigned ReservedArgStack = AFI->getArgRegsSaveSize();
1288
1289 // How much of the stack used by incoming arguments this function is expected
1290 // to restore in this particular epilogue.
1291 int IncomingArgStackToRestore = getArgumentStackToRestore(MF, MBB);
1292 int NumBytes = (int)MFI.getStackSize();
1293 Register FramePtr = RegInfo->getFrameRegister(MF);
1294
1295 // All calls are tail calls in GHC calling conv, and functions have no
1296 // prologue/epilogue.
1298 return;
1299
1300 // First put ourselves on the first (from top) terminator instructions.
1302 DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();
1303
1304 MachineBasicBlock::iterator RangeStart;
1305 if (!AFI->hasStackFrame()) {
1306 if (MF.hasWinCFI()) {
1307 BuildMI(MBB, MBBI, dl, TII.get(ARM::SEH_EpilogStart))
1309 RangeStart = initMBBRange(MBB, MBBI);
1310 }
1311
1312 if (NumBytes + IncomingArgStackToRestore != 0)
1313 emitSPUpdate(isARM, MBB, MBBI, dl, TII,
1314 NumBytes + IncomingArgStackToRestore,
1316 } else {
1317 // Unwind MBBI to point to first LDR / VLDRD.
1318 if (MBBI != MBB.begin()) {
1319 do {
1320 --MBBI;
1321 } while (MBBI != MBB.begin() &&
1323 if (!MBBI->getFlag(MachineInstr::FrameDestroy))
1324 ++MBBI;
1325 }
1326
1327 if (MF.hasWinCFI()) {
1328 BuildMI(MBB, MBBI, dl, TII.get(ARM::SEH_EpilogStart))
1330 RangeStart = initMBBRange(MBB, MBBI);
1331 }
1332
1333 // Move SP to start of FP callee save spill area.
1334 NumBytes -= (ReservedArgStack +
1335 AFI->getFPCXTSaveAreaSize() +
1340
1341 // Reset SP based on frame pointer only if the stack frame extends beyond
1342 // frame pointer stack slot or target is ELF and the function has FP.
1343 if (AFI->shouldRestoreSPFromFP()) {
1344 NumBytes = AFI->getFramePtrSpillOffset() - NumBytes;
1345 if (NumBytes) {
1346 if (isARM)
1347 emitARMRegPlusImmediate(MBB, MBBI, dl, ARM::SP, FramePtr, -NumBytes,
1348 ARMCC::AL, 0, TII,
1350 else {
1351 // It's not possible to restore SP from FP in a single instruction.
1352 // For iOS, this looks like:
1353 // mov sp, r7
1354 // sub sp, #24
1355 // This is bad, if an interrupt is taken after the mov, sp is in an
1356 // inconsistent state.
1357 // Use the first callee-saved register as a scratch register.
1358 assert(!MFI.getPristineRegs(MF).test(ARM::R4) &&
1359 "No scratch register to restore SP from FP!");
1360 emitT2RegPlusImmediate(MBB, MBBI, dl, ARM::R4, FramePtr, -NumBytes,
1362 BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP)
1363 .addReg(ARM::R4)
1366 }
1367 } else {
1368 // Thumb2 or ARM.
1369 if (isARM)
1370 BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), ARM::SP)
1373 .add(condCodeOp())
1375 else
1376 BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP)
1380 }
1381 } else if (NumBytes &&
1382 !tryFoldSPUpdateIntoPushPop(STI, MF, &*MBBI, NumBytes))
1383 emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes,
1385
1386 // Increment past our save areas.
1388 MBBI++;
1389
1390 if (MBBI != MBB.end() && AFI->getDPRCalleeSavedAreaSize()) {
1391 MBBI++;
1392 // Since vpop register list cannot have gaps, there may be multiple vpop
1393 // instructions in the epilogue.
1394 while (MBBI != MBB.end() && MBBI->getOpcode() == ARM::VLDMDIA_UPD)
1395 MBBI++;
1396 }
1397 if (AFI->getDPRCalleeSavedGapSize()) {
1398 assert(AFI->getDPRCalleeSavedGapSize() == 4 &&
1399 "unexpected DPR alignment gap");
1400 emitSPUpdate(isARM, MBB, MBBI, dl, TII, AFI->getDPRCalleeSavedGapSize(),
1402 }
1403
1405 MBBI++;
1406 if (AFI->getGPRCalleeSavedArea1Size()) MBBI++;
1407
1408 if (ReservedArgStack || IncomingArgStackToRestore) {
1409 assert((int)ReservedArgStack + IncomingArgStackToRestore >= 0 &&
1410 "attempting to restore negative stack amount");
1411 emitSPUpdate(isARM, MBB, MBBI, dl, TII,
1412 ReservedArgStack + IncomingArgStackToRestore,
1414 }
1415
1416 // Validate PAC, It should have been already popped into R12. For CMSE entry
1417 // function, the validation instruction is emitted during expansion of the
1418 // tBXNS_RET, since the validation must use the value of SP at function
1419 // entry, before saving, resp. after restoring, FPCXTNS.
1420 if (AFI->shouldSignReturnAddress() && !AFI->isCmseNSEntryFunction())
1421 BuildMI(MBB, MBBI, DebugLoc(), STI.getInstrInfo()->get(ARM::t2AUT));
1422 }
1423
1424 if (MF.hasWinCFI()) {
1426 BuildMI(MBB, MBB.end(), dl, TII.get(ARM::SEH_EpilogEnd))
1428 }
1429}
1430
1431/// getFrameIndexReference - Provide a base+offset reference to an FI slot for
1432/// debug info. It's the same as what we use for resolving the code-gen
1433/// references for now. FIXME: This can go wrong when references are
1434/// SP-relative and simple call frames aren't used.
1436 int FI,
1437 Register &FrameReg) const {
1438 return StackOffset::getFixed(ResolveFrameIndexReference(MF, FI, FrameReg, 0));
1439}
1440
1442 int FI, Register &FrameReg,
1443 int SPAdj) const {
1444 const MachineFrameInfo &MFI = MF.getFrameInfo();
1445 const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>(
1447 const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
1448 int Offset = MFI.getObjectOffset(FI) + MFI.getStackSize();
1449 int FPOffset = Offset - AFI->getFramePtrSpillOffset();
1450 bool isFixed = MFI.isFixedObjectIndex(FI);
1451
1452 FrameReg = ARM::SP;
1453 Offset += SPAdj;
1454
1455 // SP can move around if there are allocas. We may also lose track of SP
1456 // when emergency spilling inside a non-reserved call frame setup.
1457 bool hasMovingSP = !hasReservedCallFrame(MF);
1458
1459 // When dynamically realigning the stack, use the frame pointer for
1460 // parameters, and the stack/base pointer for locals.
1461 if (RegInfo->hasStackRealignment(MF)) {
1462 assert(hasFP(MF) && "dynamic stack realignment without a FP!");
1463 if (isFixed) {
1464 FrameReg = RegInfo->getFrameRegister(MF);
1465 Offset = FPOffset;
1466 } else if (hasMovingSP) {
1467 assert(RegInfo->hasBasePointer(MF) &&
1468 "VLAs and dynamic stack alignment, but missing base pointer!");
1469 FrameReg = RegInfo->getBaseRegister();
1470 Offset -= SPAdj;
1471 }
1472 return Offset;
1473 }
1474
1475 // If there is a frame pointer, use it when we can.
1476 if (hasFP(MF) && AFI->hasStackFrame()) {
1477 // Use frame pointer to reference fixed objects. Use it for locals if
1478 // there are VLAs (and thus the SP isn't reliable as a base).
1479 if (isFixed || (hasMovingSP && !RegInfo->hasBasePointer(MF))) {
1480 FrameReg = RegInfo->getFrameRegister(MF);
1481 return FPOffset;
1482 } else if (hasMovingSP) {
1483 assert(RegInfo->hasBasePointer(MF) && "missing base pointer!");
1484 if (AFI->isThumb2Function()) {
1485 // Try to use the frame pointer if we can, else use the base pointer
1486 // since it's available. This is handy for the emergency spill slot, in
1487 // particular.
1488 if (FPOffset >= -255 && FPOffset < 0) {
1489 FrameReg = RegInfo->getFrameRegister(MF);
1490 return FPOffset;
1491 }
1492 }
1493 } else if (AFI->isThumbFunction()) {
1494 // Prefer SP to base pointer, if the offset is suitably aligned and in
1495 // range as the effective range of the immediate offset is bigger when
1496 // basing off SP.
1497 // Use add <rd>, sp, #<imm8>
1498 // ldr <rd>, [sp, #<imm8>]
1499 if (Offset >= 0 && (Offset & 3) == 0 && Offset <= 1020)
1500 return Offset;
1501 // In Thumb2 mode, the negative offset is very limited. Try to avoid
1502 // out of range references. ldr <rt>,[<rn>, #-<imm8>]
1503 if (AFI->isThumb2Function() && FPOffset >= -255 && FPOffset < 0) {
1504 FrameReg = RegInfo->getFrameRegister(MF);
1505 return FPOffset;
1506 }
1507 } else if (Offset > (FPOffset < 0 ? -FPOffset : FPOffset)) {
1508 // Otherwise, use SP or FP, whichever is closer to the stack slot.
1509 FrameReg = RegInfo->getFrameRegister(MF);
1510 return FPOffset;
1511 }
1512 }
1513 // Use the base pointer if we have one.
1514 // FIXME: Maybe prefer sp on Thumb1 if it's legal and the offset is cheaper?
1515 // That can happen if we forced a base pointer for a large call frame.
1516 if (RegInfo->hasBasePointer(MF)) {
1517 FrameReg = RegInfo->getBaseRegister();
1518 Offset -= SPAdj;
1519 }
1520 return Offset;
1521}
1522
1523void ARMFrameLowering::emitPushInst(MachineBasicBlock &MBB,
1526 unsigned StmOpc, unsigned StrOpc,
1527 bool NoGap, bool (*Func)(unsigned, bool),
1528 unsigned NumAlignedDPRCS2Regs,
1529 unsigned MIFlags) const {
1530 MachineFunction &MF = *MBB.getParent();
1533
1534 DebugLoc DL;
1535
1536 using RegAndKill = std::pair<unsigned, bool>;
1537
1539 unsigned i = CSI.size();
1540 while (i != 0) {
1541 unsigned LastReg = 0;
1542 for (; i != 0; --i) {
1543 Register Reg = CSI[i-1].getReg();
1544 if (!(Func)(Reg, STI.splitFramePushPop(MF))) continue;
1545
1546 // D-registers in the aligned area DPRCS2 are NOT spilled here.
1547 if (Reg >= ARM::D8 && Reg < ARM::D8 + NumAlignedDPRCS2Regs)
1548 continue;
1549
1550 const MachineRegisterInfo &MRI = MF.getRegInfo();
1551 bool isLiveIn = MRI.isLiveIn(Reg);
1552 if (!isLiveIn && !MRI.isReserved(Reg))
1553 MBB.addLiveIn(Reg);
1554 // If NoGap is true, push consecutive registers and then leave the rest
1555 // for other instructions. e.g.
1556 // vpush {d8, d10, d11} -> vpush {d8}, vpush {d10, d11}
1557 if (NoGap && LastReg && LastReg != Reg-1)
1558 break;
1559 LastReg = Reg;
1560 // Do not set a kill flag on values that are also marked as live-in. This
1561 // happens with the @llvm-returnaddress intrinsic and with arguments
1562 // passed in callee saved registers.
1563 // Omitting the kill flags is conservatively correct even if the live-in
1564 // is not used after all.
1565 Regs.push_back(std::make_pair(Reg, /*isKill=*/!isLiveIn));
1566 }
1567
1568 if (Regs.empty())
1569 continue;
1570
1571 llvm::sort(Regs, [&](const RegAndKill &LHS, const RegAndKill &RHS) {
1572 return TRI.getEncodingValue(LHS.first) < TRI.getEncodingValue(RHS.first);
1573 });
1574
1575 if (Regs.size() > 1 || StrOpc== 0) {
1576 MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(StmOpc), ARM::SP)
1577 .addReg(ARM::SP)
1578 .setMIFlags(MIFlags)
1580 for (unsigned i = 0, e = Regs.size(); i < e; ++i)
1581 MIB.addReg(Regs[i].first, getKillRegState(Regs[i].second));
1582 } else if (Regs.size() == 1) {
1583 BuildMI(MBB, MI, DL, TII.get(StrOpc), ARM::SP)
1584 .addReg(Regs[0].first, getKillRegState(Regs[0].second))
1585 .addReg(ARM::SP)
1586 .setMIFlags(MIFlags)
1587 .addImm(-4)
1589 }
1590 Regs.clear();
1591
1592 // Put any subsequent vpush instructions before this one: they will refer to
1593 // higher register numbers so need to be pushed first in order to preserve
1594 // monotonicity.
1595 if (MI != MBB.begin())
1596 --MI;
1597 }
1598}
1599
1600void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB,
1603 unsigned LdmOpc, unsigned LdrOpc,
1604 bool isVarArg, bool NoGap,
1605 bool (*Func)(unsigned, bool),
1606 unsigned NumAlignedDPRCS2Regs) const {
1607 MachineFunction &MF = *MBB.getParent();
1611 bool hasPAC = AFI->shouldSignReturnAddress();
1612 DebugLoc DL;
1613 bool isTailCall = false;
1614 bool isInterrupt = false;
1615 bool isTrap = false;
1616 bool isCmseEntry = false;
1617 if (MBB.end() != MI) {
1618 DL = MI->getDebugLoc();
1619 unsigned RetOpcode = MI->getOpcode();
1620 isTailCall =
1621 (RetOpcode == ARM::TCRETURNdi || RetOpcode == ARM::TCRETURNri ||
1622 RetOpcode == ARM::TCRETURNrinotr12);
1623 isInterrupt =
1624 RetOpcode == ARM::SUBS_PC_LR || RetOpcode == ARM::t2SUBS_PC_LR;
1625 isTrap =
1626 RetOpcode == ARM::TRAP || RetOpcode == ARM::TRAPNaCl ||
1627 RetOpcode == ARM::tTRAP;
1628 isCmseEntry = (RetOpcode == ARM::tBXNS || RetOpcode == ARM::tBXNS_RET);
1629 }
1630
1632 unsigned i = CSI.size();
1633 while (i != 0) {
1634 unsigned LastReg = 0;
1635 bool DeleteRet = false;
1636 for (; i != 0; --i) {
1637 CalleeSavedInfo &Info = CSI[i-1];
1638 Register Reg = Info.getReg();
1639 if (!(Func)(Reg, STI.splitFramePushPop(MF))) continue;
1640
1641 // The aligned reloads from area DPRCS2 are not inserted here.
1642 if (Reg >= ARM::D8 && Reg < ARM::D8 + NumAlignedDPRCS2Regs)
1643 continue;
1644 if (Reg == ARM::LR && !isTailCall && !isVarArg && !isInterrupt &&
1645 !isCmseEntry && !isTrap && AFI->getArgumentStackToRestore() == 0 &&
1646 STI.hasV5TOps() && MBB.succ_empty() && !hasPAC &&
1648 Reg = ARM::PC;
1649 // Fold the return instruction into the LDM.
1650 DeleteRet = true;
1651 LdmOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_RET : ARM::LDMIA_RET;
1652 }
1653
1654 // If NoGap is true, pop consecutive registers and then leave the rest
1655 // for other instructions. e.g.
1656 // vpop {d8, d10, d11} -> vpop {d8}, vpop {d10, d11}
1657 if (NoGap && LastReg && LastReg != Reg-1)
1658 break;
1659
1660 LastReg = Reg;
1661 Regs.push_back(Reg);
1662 }
1663
1664 if (Regs.empty())
1665 continue;
1666
1667 llvm::sort(Regs, [&](unsigned LHS, unsigned RHS) {
1668 return TRI.getEncodingValue(LHS) < TRI.getEncodingValue(RHS);
1669 });
1670
1671 if (Regs.size() > 1 || LdrOpc == 0) {
1672 MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(LdmOpc), ARM::SP)
1673 .addReg(ARM::SP)
1676 for (unsigned i = 0, e = Regs.size(); i < e; ++i)
1677 MIB.addReg(Regs[i], getDefRegState(true));
1678 if (DeleteRet) {
1679 if (MI != MBB.end()) {
1680 MIB.copyImplicitOps(*MI);
1681 MI->eraseFromParent();
1682 }
1683 }
1684 MI = MIB;
1685 } else if (Regs.size() == 1) {
1686 // If we adjusted the reg to PC from LR above, switch it back here. We
1687 // only do that for LDM.
1688 if (Regs[0] == ARM::PC)
1689 Regs[0] = ARM::LR;
1691 BuildMI(MBB, MI, DL, TII.get(LdrOpc), Regs[0])
1692 .addReg(ARM::SP, RegState::Define)
1693 .addReg(ARM::SP)
1695 // ARM mode needs an extra reg0 here due to addrmode2. Will go away once
1696 // that refactoring is complete (eventually).
1697 if (LdrOpc == ARM::LDR_POST_REG || LdrOpc == ARM::LDR_POST_IMM) {
1698 MIB.addReg(0);
1700 } else
1701 MIB.addImm(4);
1702 MIB.add(predOps(ARMCC::AL));
1703 }
1704 Regs.clear();
1705
1706 // Put any subsequent vpop instructions after this one: they will refer to
1707 // higher register numbers so need to be popped afterwards.
1708 if (MI != MBB.end())
1709 ++MI;
1710 }
1711}
1712
1713/// Emit aligned spill instructions for NumAlignedDPRCS2Regs D-registers
1714/// starting from d8. Also insert stack realignment code and leave the stack
1715/// pointer pointing to the d8 spill slot.
1718 unsigned NumAlignedDPRCS2Regs,
1720 const TargetRegisterInfo *TRI) {
1721 MachineFunction &MF = *MBB.getParent();
1723 DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc() : DebugLoc();
1725 MachineFrameInfo &MFI = MF.getFrameInfo();
1726
1727 // Mark the D-register spill slots as properly aligned. Since MFI computes
1728 // stack slot layout backwards, this can actually mean that the d-reg stack
1729 // slot offsets can be wrong. The offset for d8 will always be correct.
1730 for (const CalleeSavedInfo &I : CSI) {
1731 unsigned DNum = I.getReg() - ARM::D8;
1732 if (DNum > NumAlignedDPRCS2Regs - 1)
1733 continue;
1734 int FI = I.getFrameIdx();
1735 // The even-numbered registers will be 16-byte aligned, the odd-numbered
1736 // registers will be 8-byte aligned.
1737 MFI.setObjectAlignment(FI, DNum % 2 ? Align(8) : Align(16));
1738
1739 // The stack slot for D8 needs to be maximally aligned because this is
1740 // actually the point where we align the stack pointer. MachineFrameInfo
1741 // computes all offsets relative to the incoming stack pointer which is a
1742 // bit weird when realigning the stack. Any extra padding for this
1743 // over-alignment is not realized because the code inserted below adjusts
1744 // the stack pointer by numregs * 8 before aligning the stack pointer.
1745 if (DNum == 0)
1746 MFI.setObjectAlignment(FI, MFI.getMaxAlign());
1747 }
1748
1749 // Move the stack pointer to the d8 spill slot, and align it at the same
1750 // time. Leave the stack slot address in the scratch register r4.
1751 //
1752 // sub r4, sp, #numregs * 8
1753 // bic r4, r4, #align - 1
1754 // mov sp, r4
1755 //
1756 bool isThumb = AFI->isThumbFunction();
1757 assert(!AFI->isThumb1OnlyFunction() && "Can't realign stack for thumb1");
1758 AFI->setShouldRestoreSPFromFP(true);
1759
1760 // sub r4, sp, #numregs * 8
1761 // The immediate is <= 64, so it doesn't need any special encoding.
1762 unsigned Opc = isThumb ? ARM::t2SUBri : ARM::SUBri;
1763 BuildMI(MBB, MI, DL, TII.get(Opc), ARM::R4)
1764 .addReg(ARM::SP)
1765 .addImm(8 * NumAlignedDPRCS2Regs)
1767 .add(condCodeOp());
1768
1769 Align MaxAlign = MF.getFrameInfo().getMaxAlign();
1770 // We must set parameter MustBeSingleInstruction to true, since
1771 // skipAlignedDPRCS2Spills expects exactly 3 instructions to perform
1772 // stack alignment. Luckily, this can always be done since all ARM
1773 // architecture versions that support Neon also support the BFC
1774 // instruction.
1775 emitAligningInstructions(MF, AFI, TII, MBB, MI, DL, ARM::R4, MaxAlign, true);
1776
1777 // mov sp, r4
1778 // The stack pointer must be adjusted before spilling anything, otherwise
1779 // the stack slots could be clobbered by an interrupt handler.
1780 // Leave r4 live, it is used below.
1781 Opc = isThumb ? ARM::tMOVr : ARM::MOVr;
1782 MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(Opc), ARM::SP)
1783 .addReg(ARM::R4)
1785 if (!isThumb)
1786 MIB.add(condCodeOp());
1787
1788 // Now spill NumAlignedDPRCS2Regs registers starting from d8.
1789 // r4 holds the stack slot address.
1790 unsigned NextReg = ARM::D8;
1791
1792 // 16-byte aligned vst1.64 with 4 d-regs and address writeback.
1793 // The writeback is only needed when emitting two vst1.64 instructions.
1794 if (NumAlignedDPRCS2Regs >= 6) {
1795 unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
1796 &ARM::QQPRRegClass);
1797 MBB.addLiveIn(SupReg);
1798 BuildMI(MBB, MI, DL, TII.get(ARM::VST1d64Qwb_fixed), ARM::R4)
1799 .addReg(ARM::R4, RegState::Kill)
1800 .addImm(16)
1801 .addReg(NextReg)
1804 NextReg += 4;
1805 NumAlignedDPRCS2Regs -= 4;
1806 }
1807
1808 // We won't modify r4 beyond this point. It currently points to the next
1809 // register to be spilled.
1810 unsigned R4BaseReg = NextReg;
1811
1812 // 16-byte aligned vst1.64 with 4 d-regs, no writeback.
1813 if (NumAlignedDPRCS2Regs >= 4) {
1814 unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
1815 &ARM::QQPRRegClass);
1816 MBB.addLiveIn(SupReg);
1817 BuildMI(MBB, MI, DL, TII.get(ARM::VST1d64Q))
1818 .addReg(ARM::R4)
1819 .addImm(16)
1820 .addReg(NextReg)
1823 NextReg += 4;
1824 NumAlignedDPRCS2Regs -= 4;
1825 }
1826
1827 // 16-byte aligned vst1.64 with 2 d-regs.
1828 if (NumAlignedDPRCS2Regs >= 2) {
1829 unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
1830 &ARM::QPRRegClass);
1831 MBB.addLiveIn(SupReg);
1832 BuildMI(MBB, MI, DL, TII.get(ARM::VST1q64))
1833 .addReg(ARM::R4)
1834 .addImm(16)
1835 .addReg(SupReg)
1837 NextReg += 2;
1838 NumAlignedDPRCS2Regs -= 2;
1839 }
1840
1841 // Finally, use a vanilla vstr.64 for the odd last register.
1842 if (NumAlignedDPRCS2Regs) {
1843 MBB.addLiveIn(NextReg);
1844 // vstr.64 uses addrmode5 which has an offset scale of 4.
1845 BuildMI(MBB, MI, DL, TII.get(ARM::VSTRD))
1846 .addReg(NextReg)
1847 .addReg(ARM::R4)
1848 .addImm((NextReg - R4BaseReg) * 2)
1850 }
1851
1852 // The last spill instruction inserted should kill the scratch register r4.
1853 std::prev(MI)->addRegisterKilled(ARM::R4, TRI);
1854}
1855
1856/// Skip past the code inserted by emitAlignedDPRCS2Spills, and return an
1857/// iterator to the following instruction.
1860 unsigned NumAlignedDPRCS2Regs) {
1861 // sub r4, sp, #numregs * 8
1862 // bic r4, r4, #align - 1
1863 // mov sp, r4
1864 ++MI; ++MI; ++MI;
1865 assert(MI->mayStore() && "Expecting spill instruction");
1866
1867 // These switches all fall through.
1868 switch(NumAlignedDPRCS2Regs) {
1869 case 7:
1870 ++MI;
1871 assert(MI->mayStore() && "Expecting spill instruction");
1872 [[fallthrough]];
1873 default:
1874 ++MI;
1875 assert(MI->mayStore() && "Expecting spill instruction");
1876 [[fallthrough]];
1877 case 1:
1878 case 2:
1879 case 4:
1880 assert(MI->killsRegister(ARM::R4, /*TRI=*/nullptr) && "Missed kill flag");
1881 ++MI;
1882 }
1883 return MI;
1884}
1885
1886/// Emit aligned reload instructions for NumAlignedDPRCS2Regs D-registers
1887/// starting from d8. These instructions are assumed to execute while the
1888/// stack is still aligned, unlike the code inserted by emitPopInst.
1891 unsigned NumAlignedDPRCS2Regs,
1893 const TargetRegisterInfo *TRI) {
1894 MachineFunction &MF = *MBB.getParent();
1896 DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc() : DebugLoc();
1898
1899 // Find the frame index assigned to d8.
1900 int D8SpillFI = 0;
1901 for (const CalleeSavedInfo &I : CSI)
1902 if (I.getReg() == ARM::D8) {
1903 D8SpillFI = I.getFrameIdx();
1904 break;
1905 }
1906
1907 // Materialize the address of the d8 spill slot into the scratch register r4.
1908 // This can be fairly complicated if the stack frame is large, so just use
1909 // the normal frame index elimination mechanism to do it. This code runs as
1910 // the initial part of the epilog where the stack and base pointers haven't
1911 // been changed yet.
1912 bool isThumb = AFI->isThumbFunction();
1913 assert(!AFI->isThumb1OnlyFunction() && "Can't realign stack for thumb1");
1914
1915 unsigned Opc = isThumb ? ARM::t2ADDri : ARM::ADDri;
1916 BuildMI(MBB, MI, DL, TII.get(Opc), ARM::R4)
1917 .addFrameIndex(D8SpillFI)
1918 .addImm(0)
1920 .add(condCodeOp());
1921
1922 // Now restore NumAlignedDPRCS2Regs registers starting from d8.
1923 unsigned NextReg = ARM::D8;
1924
1925 // 16-byte aligned vld1.64 with 4 d-regs and writeback.
1926 if (NumAlignedDPRCS2Regs >= 6) {
1927 unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
1928 &ARM::QQPRRegClass);
1929 BuildMI(MBB, MI, DL, TII.get(ARM::VLD1d64Qwb_fixed), NextReg)
1930 .addReg(ARM::R4, RegState::Define)
1931 .addReg(ARM::R4, RegState::Kill)
1932 .addImm(16)
1935 NextReg += 4;
1936 NumAlignedDPRCS2Regs -= 4;
1937 }
1938
1939 // We won't modify r4 beyond this point. It currently points to the next
1940 // register to be spilled.
1941 unsigned R4BaseReg = NextReg;
1942
1943 // 16-byte aligned vld1.64 with 4 d-regs, no writeback.
1944 if (NumAlignedDPRCS2Regs >= 4) {
1945 unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
1946 &ARM::QQPRRegClass);
1947 BuildMI(MBB, MI, DL, TII.get(ARM::VLD1d64Q), NextReg)
1948 .addReg(ARM::R4)
1949 .addImm(16)
1952 NextReg += 4;
1953 NumAlignedDPRCS2Regs -= 4;
1954 }
1955
1956 // 16-byte aligned vld1.64 with 2 d-regs.
1957 if (NumAlignedDPRCS2Regs >= 2) {
1958 unsigned SupReg = TRI->getMatchingSuperReg(NextReg, ARM::dsub_0,
1959 &ARM::QPRRegClass);
1960 BuildMI(MBB, MI, DL, TII.get(ARM::VLD1q64), SupReg)
1961 .addReg(ARM::R4)
1962 .addImm(16)
1964 NextReg += 2;
1965 NumAlignedDPRCS2Regs -= 2;
1966 }
1967
1968 // Finally, use a vanilla vldr.64 for the remaining odd register.
1969 if (NumAlignedDPRCS2Regs)
1970 BuildMI(MBB, MI, DL, TII.get(ARM::VLDRD), NextReg)
1971 .addReg(ARM::R4)
1972 .addImm(2 * (NextReg - R4BaseReg))
1974
1975 // Last store kills r4.
1976 std::prev(MI)->addRegisterKilled(ARM::R4, TRI);
1977}
1978
1982 if (CSI.empty())
1983 return false;
1984
1985 MachineFunction &MF = *MBB.getParent();
1987
1988 unsigned PushOpc = AFI->isThumbFunction() ? ARM::t2STMDB_UPD : ARM::STMDB_UPD;
1989 unsigned PushOneOpc = AFI->isThumbFunction() ?
1990 ARM::t2STR_PRE : ARM::STR_PRE_IMM;
1991 unsigned FltOpc = ARM::VSTMDDB_UPD;
1992 unsigned NumAlignedDPRCS2Regs = AFI->getNumAlignedDPRCS2Regs();
1993 // Compute PAC in R12.
1994 if (AFI->shouldSignReturnAddress()) {
1995 BuildMI(MBB, MI, DebugLoc(), STI.getInstrInfo()->get(ARM::t2PAC))
1997 }
1998 // Save the non-secure floating point context.
1999 if (llvm::any_of(CSI, [](const CalleeSavedInfo &C) {
2000 return C.getReg() == ARM::FPCXTNS;
2001 })) {
2002 BuildMI(MBB, MI, DebugLoc(), STI.getInstrInfo()->get(ARM::VSTR_FPCXTNS_pre),
2003 ARM::SP)
2004 .addReg(ARM::SP)
2005 .addImm(-4)
2007 }
2008 if (STI.splitFramePointerPush(MF)) {
2009 emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false,
2011 emitPushInst(MBB, MI, CSI, FltOpc, 0, true, &isARMArea3Register,
2012 NumAlignedDPRCS2Regs, MachineInstr::FrameSetup);
2013 emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false,
2015 } else {
2016 emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea1Register,
2018 emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea2Register,
2020 emitPushInst(MBB, MI, CSI, FltOpc, 0, true, &isARMArea3Register,
2021 NumAlignedDPRCS2Regs, MachineInstr::FrameSetup);
2022 }
2023
2024 // The code above does not insert spill code for the aligned DPRCS2 registers.
2025 // The stack realignment code will be inserted between the push instructions
2026 // and these spills.
2027 if (NumAlignedDPRCS2Regs)
2028 emitAlignedDPRCS2Spills(MBB, MI, NumAlignedDPRCS2Regs, CSI, TRI);
2029
2030 return true;
2031}
2032
2036 if (CSI.empty())
2037 return false;
2038
2039 MachineFunction &MF = *MBB.getParent();
2041 bool isVarArg = AFI->getArgRegsSaveSize() > 0;
2042 unsigned NumAlignedDPRCS2Regs = AFI->getNumAlignedDPRCS2Regs();
2043
2044 // The emitPopInst calls below do not insert reloads for the aligned DPRCS2
2045 // registers. Do that here instead.
2046 if (NumAlignedDPRCS2Regs)
2047 emitAlignedDPRCS2Restores(MBB, MI, NumAlignedDPRCS2Regs, CSI, TRI);
2048
2049 unsigned PopOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_UPD : ARM::LDMIA_UPD;
2050 unsigned LdrOpc =
2051 AFI->isThumbFunction() ? ARM::t2LDR_POST : ARM::LDR_POST_IMM;
2052 unsigned FltOpc = ARM::VLDMDIA_UPD;
2053 if (STI.splitFramePointerPush(MF)) {
2054 emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false,
2056 emitPopInst(MBB, MI, CSI, FltOpc, 0, isVarArg, true, &isARMArea3Register,
2057 NumAlignedDPRCS2Regs);
2058 emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false,
2060 } else {
2061 emitPopInst(MBB, MI, CSI, FltOpc, 0, isVarArg, true, &isARMArea3Register,
2062 NumAlignedDPRCS2Regs);
2063 emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false,
2064 &isARMArea2Register, 0);
2065 emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false,
2066 &isARMArea1Register, 0);
2067 }
2068
2069 return true;
2070}
2071
2072// FIXME: Make generic?
2074 const ARMBaseInstrInfo &TII) {
2075 unsigned FnSize = 0;
2076 for (auto &MBB : MF) {
2077 for (auto &MI : MBB)
2078 FnSize += TII.getInstSizeInBytes(MI);
2079 }
2080 if (MF.getJumpTableInfo())
2081 for (auto &Table: MF.getJumpTableInfo()->getJumpTables())
2082 FnSize += Table.MBBs.size() * 4;
2083 FnSize += MF.getConstantPool()->getConstants().size() * 4;
2084 return FnSize;
2085}
2086
2087/// estimateRSStackSizeLimit - Look at each instruction that references stack
2088/// frames and return the stack size limit beyond which some of these
2089/// instructions will require a scratch register during their expansion later.
2090// FIXME: Move to TII?
2092 const TargetFrameLowering *TFI,
2093 bool &HasNonSPFrameIndex) {
2094 const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2095 const ARMBaseInstrInfo &TII =
2096 *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
2098 unsigned Limit = (1 << 12) - 1;
2099 for (auto &MBB : MF) {
2100 for (auto &MI : MBB) {
2101 if (MI.isDebugInstr())
2102 continue;
2103 for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
2104 if (!MI.getOperand(i).isFI())
2105 continue;
2106
2107 // When using ADDri to get the address of a stack object, 255 is the
2108 // largest offset guaranteed to fit in the immediate offset.
2109 if (MI.getOpcode() == ARM::ADDri) {
2110 Limit = std::min(Limit, (1U << 8) - 1);
2111 break;
2112 }
2113 // t2ADDri will not require an extra register, it can reuse the
2114 // destination.
2115 if (MI.getOpcode() == ARM::t2ADDri || MI.getOpcode() == ARM::t2ADDri12)
2116 break;
2117
2118 const MCInstrDesc &MCID = MI.getDesc();
2119 const TargetRegisterClass *RegClass = TII.getRegClass(MCID, i, TRI, MF);
2120 if (RegClass && !RegClass->contains(ARM::SP))
2121 HasNonSPFrameIndex = true;
2122
2123 // Otherwise check the addressing mode.
2124 switch (MI.getDesc().TSFlags & ARMII::AddrModeMask) {
2126 case ARMII::AddrMode2:
2127 // Default 12 bit limit.
2128 break;
2129 case ARMII::AddrMode3:
2131 Limit = std::min(Limit, (1U << 8) - 1);
2132 break;
2134 Limit = std::min(Limit, ((1U << 8) - 1) * 2);
2135 break;
2136 case ARMII::AddrMode5:
2139 Limit = std::min(Limit, ((1U << 8) - 1) * 4);
2140 break;
2142 // i12 supports only positive offset so these will be converted to
2143 // i8 opcodes. See llvm::rewriteT2FrameIndex.
2144 if (TFI->hasFP(MF) && AFI->hasStackFrame())
2145 Limit = std::min(Limit, (1U << 8) - 1);
2146 break;
2147 case ARMII::AddrMode4:
2148 case ARMII::AddrMode6:
2149 // Addressing modes 4 & 6 (load/store) instructions can't encode an
2150 // immediate offset for stack references.
2151 return 0;
2153 Limit = std::min(Limit, ((1U << 7) - 1) * 1);
2154 break;
2156 Limit = std::min(Limit, ((1U << 7) - 1) * 2);
2157 break;
2159 Limit = std::min(Limit, ((1U << 7) - 1) * 4);
2160 break;
2161 default:
2162 llvm_unreachable("Unhandled addressing mode in stack size limit calculation");
2163 }
2164 break; // At most one FI per instruction
2165 }
2166 }
2167 }
2168
2169 return Limit;
2170}
2171
2172// In functions that realign the stack, it can be an advantage to spill the
2173// callee-saved vector registers after realigning the stack. The vst1 and vld1
2174// instructions take alignment hints that can improve performance.
2175static void
2177 MF.getInfo<ARMFunctionInfo>()->setNumAlignedDPRCS2Regs(0);
2179 return;
2180
2181 // Naked functions don't spill callee-saved registers.
2182 if (MF.getFunction().hasFnAttribute(Attribute::Naked))
2183 return;
2184
2185 // We are planning to use NEON instructions vst1 / vld1.
2186 if (!MF.getSubtarget<ARMSubtarget>().hasNEON())
2187 return;
2188
2189 // Don't bother if the default stack alignment is sufficiently high.
2191 return;
2192
2193 // Aligned spills require stack realignment.
2194 if (!static_cast<const ARMBaseRegisterInfo *>(
2196 return;
2197
2198 // We always spill contiguous d-registers starting from d8. Count how many
2199 // needs spilling. The register allocator will almost always use the
2200 // callee-saved registers in order, but it can happen that there are holes in
2201 // the range. Registers above the hole will be spilled to the standard DPRCS
2202 // area.
2203 unsigned NumSpills = 0;
2204 for (; NumSpills < 8; ++NumSpills)
2205 if (!SavedRegs.test(ARM::D8 + NumSpills))
2206 break;
2207
2208 // Don't do this for just one d-register. It's not worth it.
2209 if (NumSpills < 2)
2210 return;
2211
2212 // Spill the first NumSpills D-registers after realigning the stack.
2213 MF.getInfo<ARMFunctionInfo>()->setNumAlignedDPRCS2Regs(NumSpills);
2214
2215 // A scratch register is required for the vst1 / vld1 instructions.
2216 SavedRegs.set(ARM::R4);
2217}
2218
2220 // For CMSE entry functions, we want to save the FPCXT_NS immediately
2221 // upon function entry (resp. restore it immmediately before return)
2222 if (STI.hasV8_1MMainlineOps() &&
2224 return false;
2225
2226 // We are disabling shrinkwrapping for now when PAC is enabled, as
2227 // shrinkwrapping can cause clobbering of r12 when the PAC code is
2228 // generated. A follow-up patch will fix this in a more performant manner.
2230 true /* SpillsLR */))
2231 return false;
2232
2233 return true;
2234}
2235
2237 const auto &Subtarget = MF.getSubtarget<ARMSubtarget>();
2238 return Subtarget.createAAPCSFrameChainLeaf() ||
2239 (Subtarget.createAAPCSFrameChain() && MF.getFrameInfo().hasCalls());
2240}
2241
2242// Thumb1 may require a spill when storing to a frame index through FP (or any
2243// access with execute-only), for cases where FP is a high register (R11). This
2244// scans the function for cases where this may happen.
2246 const TargetFrameLowering &TFI) {
2247 const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2248 if (!AFI->isThumb1OnlyFunction())
2249 return false;
2250
2251 const ARMSubtarget &STI = MF.getSubtarget<ARMSubtarget>();
2252 for (const auto &MBB : MF)
2253 for (const auto &MI : MBB)
2254 if (MI.getOpcode() == ARM::tSTRspi || MI.getOpcode() == ARM::tSTRi ||
2255 STI.genExecuteOnly())
2256 for (const auto &Op : MI.operands())
2257 if (Op.isFI()) {
2258 Register Reg;
2259 TFI.getFrameIndexReference(MF, Op.getIndex(), Reg);
2260 if (ARM::hGPRRegClass.contains(Reg) && Reg != ARM::SP)
2261 return true;
2262 }
2263 return false;
2264}
2265
2267 BitVector &SavedRegs,
2268 RegScavenger *RS) const {
2270 // This tells PEI to spill the FP as if it is any other callee-save register
2271 // to take advantage the eliminateFrameIndex machinery. This also ensures it
2272 // is spilled in the order specified by getCalleeSavedRegs() to make it easier
2273 // to combine multiple loads / stores.
2274 bool CanEliminateFrame = !(requiresAAPCSFrameRecord(MF) && hasFP(MF));
2275 bool CS1Spilled = false;
2276 bool LRSpilled = false;
2277 unsigned NumGPRSpills = 0;
2278 unsigned NumFPRSpills = 0;
2279 SmallVector<unsigned, 4> UnspilledCS1GPRs;
2280 SmallVector<unsigned, 4> UnspilledCS2GPRs;
2281 const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>(
2283 const ARMBaseInstrInfo &TII =
2284 *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
2286 MachineFrameInfo &MFI = MF.getFrameInfo();
2289 (void)TRI; // Silence unused warning in non-assert builds.
2290 Register FramePtr = RegInfo->getFrameRegister(MF);
2291
2292 // Spill R4 if Thumb2 function requires stack realignment - it will be used as
2293 // scratch register. Also spill R4 if Thumb2 function has varsized objects,
2294 // since it's not always possible to restore sp from fp in a single
2295 // instruction.
2296 // FIXME: It will be better just to find spare register here.
2297 if (AFI->isThumb2Function() &&
2298 (MFI.hasVarSizedObjects() || RegInfo->hasStackRealignment(MF)))
2299 SavedRegs.set(ARM::R4);
2300
2301 // If a stack probe will be emitted, spill R4 and LR, since they are
2302 // clobbered by the stack probe call.
2303 // This estimate should be a safe, conservative estimate. The actual
2304 // stack probe is enabled based on the size of the local objects;
2305 // this estimate also includes the varargs store size.
2306 if (STI.isTargetWindows() &&
2307 WindowsRequiresStackProbe(MF, MFI.estimateStackSize(MF))) {
2308 SavedRegs.set(ARM::R4);
2309 SavedRegs.set(ARM::LR);
2310 }
2311
2312 if (AFI->isThumb1OnlyFunction()) {
2313 // Spill LR if Thumb1 function uses variable length argument lists.
2314 if (AFI->getArgRegsSaveSize() > 0)
2315 SavedRegs.set(ARM::LR);
2316
2317 // Spill R4 if Thumb1 epilogue has to restore SP from FP or the function
2318 // requires stack alignment. We don't know for sure what the stack size
2319 // will be, but for this, an estimate is good enough. If there anything
2320 // changes it, it'll be a spill, which implies we've used all the registers
2321 // and so R4 is already used, so not marking it here will be OK.
2322 // FIXME: It will be better just to find spare register here.
2323 if (MFI.hasVarSizedObjects() || RegInfo->hasStackRealignment(MF) ||
2324 MFI.estimateStackSize(MF) > 508)
2325 SavedRegs.set(ARM::R4);
2326 }
2327
2328 // See if we can spill vector registers to aligned stack.
2329 checkNumAlignedDPRCS2Regs(MF, SavedRegs);
2330
2331 // Spill the BasePtr if it's used.
2332 if (RegInfo->hasBasePointer(MF))
2333 SavedRegs.set(RegInfo->getBaseRegister());
2334
2335 // On v8.1-M.Main CMSE entry functions save/restore FPCXT.
2336 if (STI.hasV8_1MMainlineOps() && AFI->isCmseNSEntryFunction())
2337 CanEliminateFrame = false;
2338
2339 // When return address signing is enabled R12 is treated as callee-saved.
2340 if (AFI->shouldSignReturnAddress())
2341 CanEliminateFrame = false;
2342
2343 // Don't spill FP if the frame can be eliminated. This is determined
2344 // by scanning the callee-save registers to see if any is modified.
2345 const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
2346 for (unsigned i = 0; CSRegs[i]; ++i) {
2347 unsigned Reg = CSRegs[i];
2348 bool Spilled = false;
2349 if (SavedRegs.test(Reg)) {
2350 Spilled = true;
2351 CanEliminateFrame = false;
2352 }
2353
2354 if (!ARM::GPRRegClass.contains(Reg)) {
2355 if (Spilled) {
2356 if (ARM::SPRRegClass.contains(Reg))
2357 NumFPRSpills++;
2358 else if (ARM::DPRRegClass.contains(Reg))
2359 NumFPRSpills += 2;
2360 else if (ARM::QPRRegClass.contains(Reg))
2361 NumFPRSpills += 4;
2362 }
2363 continue;
2364 }
2365
2366 if (Spilled) {
2367 NumGPRSpills++;
2368
2369 if (!STI.splitFramePushPop(MF)) {
2370 if (Reg == ARM::LR)
2371 LRSpilled = true;
2372 CS1Spilled = true;
2373 continue;
2374 }
2375
2376 // Keep track if LR and any of R4, R5, R6, and R7 is spilled.
2377 switch (Reg) {
2378 case ARM::LR:
2379 LRSpilled = true;
2380 [[fallthrough]];
2381 case ARM::R0: case ARM::R1:
2382 case ARM::R2: case ARM::R3:
2383 case ARM::R4: case ARM::R5:
2384 case ARM::R6: case ARM::R7:
2385 CS1Spilled = true;
2386 break;
2387 default:
2388 break;
2389 }
2390 } else {
2391 if (!STI.splitFramePushPop(MF)) {
2392 UnspilledCS1GPRs.push_back(Reg);
2393 continue;
2394 }
2395
2396 switch (Reg) {
2397 case ARM::R0: case ARM::R1:
2398 case ARM::R2: case ARM::R3:
2399 case ARM::R4: case ARM::R5:
2400 case ARM::R6: case ARM::R7:
2401 case ARM::LR:
2402 UnspilledCS1GPRs.push_back(Reg);
2403 break;
2404 default:
2405 UnspilledCS2GPRs.push_back(Reg);
2406 break;
2407 }
2408 }
2409 }
2410
2411 bool ForceLRSpill = false;
2412 if (!LRSpilled && AFI->isThumb1OnlyFunction()) {
2413 unsigned FnSize = EstimateFunctionSizeInBytes(MF, TII);
2414 // Force LR to be spilled if the Thumb function size is > 2048. This enables
2415 // use of BL to implement far jump.
2416 if (FnSize >= (1 << 11)) {
2417 CanEliminateFrame = false;
2418 ForceLRSpill = true;
2419 }
2420 }
2421
2422 // If any of the stack slot references may be out of range of an immediate
2423 // offset, make sure a register (or a spill slot) is available for the
2424 // register scavenger. Note that if we're indexing off the frame pointer, the
2425 // effective stack size is 4 bytes larger since the FP points to the stack
2426 // slot of the previous FP. Also, if we have variable sized objects in the
2427 // function, stack slot references will often be negative, and some of
2428 // our instructions are positive-offset only, so conservatively consider
2429 // that case to want a spill slot (or register) as well. Similarly, if
2430 // the function adjusts the stack pointer during execution and the
2431 // adjustments aren't already part of our stack size estimate, our offset
2432 // calculations may be off, so be conservative.
2433 // FIXME: We could add logic to be more precise about negative offsets
2434 // and which instructions will need a scratch register for them. Is it
2435 // worth the effort and added fragility?
2436 unsigned EstimatedStackSize =
2437 MFI.estimateStackSize(MF) + 4 * (NumGPRSpills + NumFPRSpills);
2438
2439 // Determine biggest (positive) SP offset in MachineFrameInfo.
2440 int MaxFixedOffset = 0;
2441 for (int I = MFI.getObjectIndexBegin(); I < 0; ++I) {
2442 int MaxObjectOffset = MFI.getObjectOffset(I) + MFI.getObjectSize(I);
2443 MaxFixedOffset = std::max(MaxFixedOffset, MaxObjectOffset);
2444 }
2445
2446 bool HasFP = hasFP(MF);
2447 if (HasFP) {
2448 if (AFI->hasStackFrame())
2449 EstimatedStackSize += 4;
2450 } else {
2451 // If FP is not used, SP will be used to access arguments, so count the
2452 // size of arguments into the estimation.
2453 EstimatedStackSize += MaxFixedOffset;
2454 }
2455 EstimatedStackSize += 16; // For possible paddings.
2456
2457 unsigned EstimatedRSStackSizeLimit, EstimatedRSFixedSizeLimit;
2458 bool HasNonSPFrameIndex = false;
2459 if (AFI->isThumb1OnlyFunction()) {
2460 // For Thumb1, don't bother to iterate over the function. The only
2461 // instruction that requires an emergency spill slot is a store to a
2462 // frame index.
2463 //
2464 // tSTRspi, which is used for sp-relative accesses, has an 8-bit unsigned
2465 // immediate. tSTRi, which is used for bp- and fp-relative accesses, has
2466 // a 5-bit unsigned immediate.
2467 //
2468 // We could try to check if the function actually contains a tSTRspi
2469 // that might need the spill slot, but it's not really important.
2470 // Functions with VLAs or extremely large call frames are rare, and
2471 // if a function is allocating more than 1KB of stack, an extra 4-byte
2472 // slot probably isn't relevant.
2473 //
2474 // A special case is the scenario where r11 is used as FP, where accesses
2475 // to a frame index will require its value to be moved into a low reg.
2476 // This is handled later on, once we are able to determine if we have any
2477 // fp-relative accesses.
2478 if (RegInfo->hasBasePointer(MF))
2479 EstimatedRSStackSizeLimit = (1U << 5) * 4;
2480 else
2481 EstimatedRSStackSizeLimit = (1U << 8) * 4;
2482 EstimatedRSFixedSizeLimit = (1U << 5) * 4;
2483 } else {
2484 EstimatedRSStackSizeLimit =
2485 estimateRSStackSizeLimit(MF, this, HasNonSPFrameIndex);
2486 EstimatedRSFixedSizeLimit = EstimatedRSStackSizeLimit;
2487 }
2488 // Final estimate of whether sp or bp-relative accesses might require
2489 // scavenging.
2490 bool HasLargeStack = EstimatedStackSize > EstimatedRSStackSizeLimit;
2491
2492 // If the stack pointer moves and we don't have a base pointer, the
2493 // estimate logic doesn't work. The actual offsets might be larger when
2494 // we're constructing a call frame, or we might need to use negative
2495 // offsets from fp.
2496 bool HasMovingSP = MFI.hasVarSizedObjects() ||
2497 (MFI.adjustsStack() && !canSimplifyCallFramePseudos(MF));
2498 bool HasBPOrFixedSP = RegInfo->hasBasePointer(MF) || !HasMovingSP;
2499
2500 // If we have a frame pointer, we assume arguments will be accessed
2501 // relative to the frame pointer. Check whether fp-relative accesses to
2502 // arguments require scavenging.
2503 //
2504 // We could do slightly better on Thumb1; in some cases, an sp-relative
2505 // offset would be legal even though an fp-relative offset is not.
2506 int MaxFPOffset = getMaxFPOffset(STI, *AFI, MF);
2507 bool HasLargeArgumentList =
2508 HasFP && (MaxFixedOffset - MaxFPOffset) > (int)EstimatedRSFixedSizeLimit;
2509
2510 bool BigFrameOffsets = HasLargeStack || !HasBPOrFixedSP ||
2511 HasLargeArgumentList || HasNonSPFrameIndex;
2512 LLVM_DEBUG(dbgs() << "EstimatedLimit: " << EstimatedRSStackSizeLimit
2513 << "; EstimatedStack: " << EstimatedStackSize
2514 << "; EstimatedFPStack: " << MaxFixedOffset - MaxFPOffset
2515 << "; BigFrameOffsets: " << BigFrameOffsets << "\n");
2516 if (BigFrameOffsets ||
2517 !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF)) {
2518 AFI->setHasStackFrame(true);
2519
2520 if (HasFP) {
2521 SavedRegs.set(FramePtr);
2522 // If the frame pointer is required by the ABI, also spill LR so that we
2523 // emit a complete frame record.
2524 if ((requiresAAPCSFrameRecord(MF) ||
2526 !LRSpilled) {
2527 SavedRegs.set(ARM::LR);
2528 LRSpilled = true;
2529 NumGPRSpills++;
2530 auto LRPos = llvm::find(UnspilledCS1GPRs, ARM::LR);
2531 if (LRPos != UnspilledCS1GPRs.end())
2532 UnspilledCS1GPRs.erase(LRPos);
2533 }
2534 auto FPPos = llvm::find(UnspilledCS1GPRs, FramePtr);
2535 if (FPPos != UnspilledCS1GPRs.end())
2536 UnspilledCS1GPRs.erase(FPPos);
2537 NumGPRSpills++;
2538 if (FramePtr == ARM::R7)
2539 CS1Spilled = true;
2540 }
2541
2542 // This is the number of extra spills inserted for callee-save GPRs which
2543 // would not otherwise be used by the function. When greater than zero it
2544 // guaranteees that it is possible to scavenge a register to hold the
2545 // address of a stack slot. On Thumb1, the register must be a valid operand
2546 // to tSTRi, i.e. r4-r7. For other subtargets, this is any GPR, i.e. r4-r11
2547 // or lr.
2548 //
2549 // If we don't insert a spill, we instead allocate an emergency spill
2550 // slot, which can be used by scavenging to spill an arbitrary register.
2551 //
2552 // We currently don't try to figure out whether any specific instruction
2553 // requires scavening an additional register.
2554 unsigned NumExtraCSSpill = 0;
2555
2556 if (AFI->isThumb1OnlyFunction()) {
2557 // For Thumb1-only targets, we need some low registers when we save and
2558 // restore the high registers (which aren't allocatable, but could be
2559 // used by inline assembly) because the push/pop instructions can not
2560 // access high registers. If necessary, we might need to push more low
2561 // registers to ensure that there is at least one free that can be used
2562 // for the saving & restoring, and preferably we should ensure that as
2563 // many as are needed are available so that fewer push/pop instructions
2564 // are required.
2565
2566 // Low registers which are not currently pushed, but could be (r4-r7).
2567 SmallVector<unsigned, 4> AvailableRegs;
2568
2569 // Unused argument registers (r0-r3) can be clobbered in the prologue for
2570 // free.
2571 int EntryRegDeficit = 0;
2572 for (unsigned Reg : {ARM::R0, ARM::R1, ARM::R2, ARM::R3}) {
2573 if (!MF.getRegInfo().isLiveIn(Reg)) {
2574 --EntryRegDeficit;
2576 << printReg(Reg, TRI)
2577 << " is unused argument register, EntryRegDeficit = "
2578 << EntryRegDeficit << "\n");
2579 }
2580 }
2581
2582 // Unused return registers can be clobbered in the epilogue for free.
2583 int ExitRegDeficit = AFI->getReturnRegsCount() - 4;
2585 << " return regs used, ExitRegDeficit = "
2586 << ExitRegDeficit << "\n");
2587
2588 int RegDeficit = std::max(EntryRegDeficit, ExitRegDeficit);
2589 LLVM_DEBUG(dbgs() << "RegDeficit = " << RegDeficit << "\n");
2590
2591 // r4-r6 can be used in the prologue if they are pushed by the first push
2592 // instruction.
2593 for (unsigned Reg : {ARM::R4, ARM::R5, ARM::R6}) {
2594 if (SavedRegs.test(Reg)) {
2595 --RegDeficit;
2596 LLVM_DEBUG(dbgs() << printReg(Reg, TRI)
2597 << " is saved low register, RegDeficit = "
2598 << RegDeficit << "\n");
2599 } else {
2600 AvailableRegs.push_back(Reg);
2601 LLVM_DEBUG(
2602 dbgs()
2603 << printReg(Reg, TRI)
2604 << " is non-saved low register, adding to AvailableRegs\n");
2605 }
2606 }
2607
2608 // r7 can be used if it is not being used as the frame pointer.
2609 if (!HasFP || FramePtr != ARM::R7) {
2610 if (SavedRegs.test(ARM::R7)) {
2611 --RegDeficit;
2612 LLVM_DEBUG(dbgs() << "%r7 is saved low register, RegDeficit = "
2613 << RegDeficit << "\n");
2614 } else {
2615 AvailableRegs.push_back(ARM::R7);
2616 LLVM_DEBUG(
2617 dbgs()
2618 << "%r7 is non-saved low register, adding to AvailableRegs\n");
2619 }
2620 }
2621
2622 // Each of r8-r11 needs to be copied to a low register, then pushed.
2623 for (unsigned Reg : {ARM::R8, ARM::R9, ARM::R10, ARM::R11}) {
2624 if (SavedRegs.test(Reg)) {
2625 ++RegDeficit;
2626 LLVM_DEBUG(dbgs() << printReg(Reg, TRI)
2627 << " is saved high register, RegDeficit = "
2628 << RegDeficit << "\n");
2629 }
2630 }
2631
2632 // LR can only be used by PUSH, not POP, and can't be used at all if the
2633 // llvm.returnaddress intrinsic is used. This is only worth doing if we
2634 // are more limited at function entry than exit.
2635 if ((EntryRegDeficit > ExitRegDeficit) &&
2636 !(MF.getRegInfo().isLiveIn(ARM::LR) &&
2638 if (SavedRegs.test(ARM::LR)) {
2639 --RegDeficit;
2640 LLVM_DEBUG(dbgs() << "%lr is saved register, RegDeficit = "
2641 << RegDeficit << "\n");
2642 } else {
2643 AvailableRegs.push_back(ARM::LR);
2644 LLVM_DEBUG(dbgs() << "%lr is not saved, adding to AvailableRegs\n");
2645 }
2646 }
2647
2648 // If there are more high registers that need pushing than low registers
2649 // available, push some more low registers so that we can use fewer push
2650 // instructions. This might not reduce RegDeficit all the way to zero,
2651 // because we can only guarantee that r4-r6 are available, but r8-r11 may
2652 // need saving.
2653 LLVM_DEBUG(dbgs() << "Final RegDeficit = " << RegDeficit << "\n");
2654 for (; RegDeficit > 0 && !AvailableRegs.empty(); --RegDeficit) {
2655 unsigned Reg = AvailableRegs.pop_back_val();
2656 LLVM_DEBUG(dbgs() << "Spilling " << printReg(Reg, TRI)
2657 << " to make up reg deficit\n");
2658 SavedRegs.set(Reg);
2659 NumGPRSpills++;
2660 CS1Spilled = true;
2661 assert(!MRI.isReserved(Reg) && "Should not be reserved");
2662 if (Reg != ARM::LR && !MRI.isPhysRegUsed(Reg))
2663 NumExtraCSSpill++;
2664 UnspilledCS1GPRs.erase(llvm::find(UnspilledCS1GPRs, Reg));
2665 if (Reg == ARM::LR)
2666 LRSpilled = true;
2667 }
2668 LLVM_DEBUG(dbgs() << "After adding spills, RegDeficit = " << RegDeficit
2669 << "\n");
2670 }
2671
2672 // Avoid spilling LR in Thumb1 if there's a tail call: it's expensive to
2673 // restore LR in that case.
2674 bool ExpensiveLRRestore = AFI->isThumb1OnlyFunction() && MFI.hasTailCall();
2675
2676 // If LR is not spilled, but at least one of R4, R5, R6, and R7 is spilled.
2677 // Spill LR as well so we can fold BX_RET to the registers restore (LDM).
2678 if (!LRSpilled && CS1Spilled && !ExpensiveLRRestore) {
2679 SavedRegs.set(ARM::LR);
2680 NumGPRSpills++;
2682 LRPos = llvm::find(UnspilledCS1GPRs, (unsigned)ARM::LR);
2683 if (LRPos != UnspilledCS1GPRs.end())
2684 UnspilledCS1GPRs.erase(LRPos);
2685
2686 ForceLRSpill = false;
2687 if (!MRI.isReserved(ARM::LR) && !MRI.isPhysRegUsed(ARM::LR) &&
2688 !AFI->isThumb1OnlyFunction())
2689 NumExtraCSSpill++;
2690 }
2691
2692 // If stack and double are 8-byte aligned and we are spilling an odd number
2693 // of GPRs, spill one extra callee save GPR so we won't have to pad between
2694 // the integer and double callee save areas.
2695 LLVM_DEBUG(dbgs() << "NumGPRSpills = " << NumGPRSpills << "\n");
2696 const Align TargetAlign = getStackAlign();
2697 if (TargetAlign >= Align(8) && (NumGPRSpills & 1)) {
2698 if (CS1Spilled && !UnspilledCS1GPRs.empty()) {
2699 for (unsigned Reg : UnspilledCS1GPRs) {
2700 // Don't spill high register if the function is thumb. In the case of
2701 // Windows on ARM, accept R11 (frame pointer)
2702 if (!AFI->isThumbFunction() ||
2703 (STI.isTargetWindows() && Reg == ARM::R11) ||
2704 isARMLowRegister(Reg) ||
2705 (Reg == ARM::LR && !ExpensiveLRRestore)) {
2706 SavedRegs.set(Reg);
2707 LLVM_DEBUG(dbgs() << "Spilling " << printReg(Reg, TRI)
2708 << " to make up alignment\n");
2709 if (!MRI.isReserved(Reg) && !MRI.isPhysRegUsed(Reg) &&
2710 !(Reg == ARM::LR && AFI->isThumb1OnlyFunction()))
2711 NumExtraCSSpill++;
2712 break;
2713 }
2714 }
2715 } else if (!UnspilledCS2GPRs.empty() && !AFI->isThumb1OnlyFunction()) {
2716 unsigned Reg = UnspilledCS2GPRs.front();
2717 SavedRegs.set(Reg);
2718 LLVM_DEBUG(dbgs() << "Spilling " << printReg(Reg, TRI)
2719 << " to make up alignment\n");
2720 if (!MRI.isReserved(Reg) && !MRI.isPhysRegUsed(Reg))
2721 NumExtraCSSpill++;
2722 }
2723 }
2724
2725 // Estimate if we might need to scavenge registers at some point in order
2726 // to materialize a stack offset. If so, either spill one additional
2727 // callee-saved register or reserve a special spill slot to facilitate
2728 // register scavenging. Thumb1 needs a spill slot for stack pointer
2729 // adjustments and for frame index accesses when FP is high register,
2730 // even when the frame itself is small.
2731 unsigned RegsNeeded = 0;
2732 if (BigFrameOffsets || canSpillOnFrameIndexAccess(MF, *this)) {
2733 RegsNeeded++;
2734 // With thumb1 execute-only we may need an additional register for saving
2735 // and restoring the CPSR.
2736 if (AFI->isThumb1OnlyFunction() && STI.genExecuteOnly() && !STI.useMovt())
2737 RegsNeeded++;
2738 }
2739
2740 if (RegsNeeded > NumExtraCSSpill) {
2741 // If any non-reserved CS register isn't spilled, just spill one or two
2742 // extra. That should take care of it!
2743 unsigned NumExtras = TargetAlign.value() / 4;
2745 while (NumExtras && !UnspilledCS1GPRs.empty()) {
2746 unsigned Reg = UnspilledCS1GPRs.pop_back_val();
2747 if (!MRI.isReserved(Reg) &&
2748 (!AFI->isThumb1OnlyFunction() || isARMLowRegister(Reg))) {
2749 Extras.push_back(Reg);
2750 NumExtras--;
2751 }
2752 }
2753 // For non-Thumb1 functions, also check for hi-reg CS registers
2754 if (!AFI->isThumb1OnlyFunction()) {
2755 while (NumExtras && !UnspilledCS2GPRs.empty()) {
2756 unsigned Reg = UnspilledCS2GPRs.pop_back_val();
2757 if (!MRI.isReserved(Reg)) {
2758 Extras.push_back(Reg);
2759 NumExtras--;
2760 }
2761 }
2762 }
2763 if (NumExtras == 0) {
2764 for (unsigned Reg : Extras) {
2765 SavedRegs.set(Reg);
2766 if (!MRI.isPhysRegUsed(Reg))
2767 NumExtraCSSpill++;
2768 }
2769 }
2770 while ((RegsNeeded > NumExtraCSSpill) && RS) {
2771 // Reserve a slot closest to SP or frame pointer.
2772 LLVM_DEBUG(dbgs() << "Reserving emergency spill slot\n");
2773 const TargetRegisterClass &RC = ARM::GPRRegClass;
2774 unsigned Size = TRI->getSpillSize(RC);
2775 Align Alignment = TRI->getSpillAlign(RC);
2777 MFI.CreateStackObject(Size, Alignment, false));
2778 --RegsNeeded;
2779 }
2780 }
2781 }
2782
2783 if (ForceLRSpill)
2784 SavedRegs.set(ARM::LR);
2785 AFI->setLRIsSpilled(SavedRegs.test(ARM::LR));
2786}
2787
2789 MachineFrameInfo &MFI = MF.getFrameInfo();
2790 if (!MFI.isCalleeSavedInfoValid())
2791 return;
2792
2793 // Check if all terminators do not implicitly use LR. Then we can 'restore' LR
2794 // into PC so it is not live out of the return block: Clear the Restored bit
2795 // in that case.
2796 for (CalleeSavedInfo &Info : MFI.getCalleeSavedInfo()) {
2797 if (Info.getReg() != ARM::LR)
2798 continue;
2799 if (all_of(MF, [](const MachineBasicBlock &MBB) {
2800 return all_of(MBB.terminators(), [](const MachineInstr &Term) {
2801 return !Term.isReturn() || Term.getOpcode() == ARM::LDMIA_RET ||
2802 Term.getOpcode() == ARM::t2LDMIA_RET ||
2803 Term.getOpcode() == ARM::tPOP_RET;
2804 });
2805 })) {
2806 Info.setRestored(false);
2807 break;
2808 }
2809 }
2810}
2811
2813 MachineFunction &MF, RegScavenger *RS) const {
2815 updateLRRestored(MF);
2816}
2817
2819 BitVector &SavedRegs) const {
2821
2822 // If we have the "returned" parameter attribute which guarantees that we
2823 // return the value which was passed in r0 unmodified (e.g. C++ 'structors),
2824 // record that fact for IPRA.
2825 const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
2826 if (AFI->getPreservesR0())
2827 SavedRegs.set(ARM::R0);
2828}
2829
2832 std::vector<CalleeSavedInfo> &CSI) const {
2833 // For CMSE entry functions, handle floating-point context as if it was a
2834 // callee-saved register.
2835 if (STI.hasV8_1MMainlineOps() &&
2837 CSI.emplace_back(ARM::FPCXTNS);
2838 CSI.back().setRestored(false);
2839 }
2840
2841 // For functions, which sign their return address, upon function entry, the
2842 // return address PAC is computed in R12. Treat R12 as a callee-saved register
2843 // in this case.
2844 const auto &AFI = *MF.getInfo<ARMFunctionInfo>();
2845 if (AFI.shouldSignReturnAddress()) {
2846 // The order of register must match the order we push them, because the
2847 // PEI assigns frame indices in that order. When compiling for return
2848 // address sign and authenication, we use split push, therefore the orders
2849 // we want are:
2850 // LR, R7, R6, R5, R4, <R12>, R11, R10, R9, R8, D15-D8
2851 CSI.insert(find_if(CSI,
2852 [=](const auto &CS) {
2853 Register Reg = CS.getReg();
2854 return Reg == ARM::R10 || Reg == ARM::R11 ||
2855 Reg == ARM::R8 || Reg == ARM::R9 ||
2856 ARM::DPRRegClass.contains(Reg);
2857 }),
2858 CalleeSavedInfo(ARM::R12));
2859 }
2860
2861 return false;
2862}
2863
2866 static const SpillSlot FixedSpillOffsets[] = {{ARM::FPCXTNS, -4}};
2867 NumEntries = std::size(FixedSpillOffsets);
2868 return FixedSpillOffsets;
2869}
2870
2871MachineBasicBlock::iterator ARMFrameLowering::eliminateCallFramePseudoInstr(
2874 const ARMBaseInstrInfo &TII =
2875 *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
2877 bool isARM = !AFI->isThumbFunction();
2878 DebugLoc dl = I->getDebugLoc();
2879 unsigned Opc = I->getOpcode();
2880 bool IsDestroy = Opc == TII.getCallFrameDestroyOpcode();
2881 unsigned CalleePopAmount = IsDestroy ? I->getOperand(1).getImm() : 0;
2882
2883 assert(!AFI->isThumb1OnlyFunction() &&
2884 "This eliminateCallFramePseudoInstr does not support Thumb1!");
2885
2886 int PIdx = I->findFirstPredOperandIdx();
2887 ARMCC::CondCodes Pred = (PIdx == -1)
2888 ? ARMCC::AL
2889 : (ARMCC::CondCodes)I->getOperand(PIdx).getImm();
2890 unsigned PredReg = TII.getFramePred(*I);
2891
2892 if (!hasReservedCallFrame(MF)) {
2893 // Bail early if the callee is expected to do the adjustment.
2894 if (IsDestroy && CalleePopAmount != -1U)
2895 return MBB.erase(I);
2896
2897 // If we have alloca, convert as follows:
2898 // ADJCALLSTACKDOWN -> sub, sp, sp, amount
2899 // ADJCALLSTACKUP -> add, sp, sp, amount
2900 unsigned Amount = TII.getFrameSize(*I);
2901 if (Amount != 0) {
2902 // We need to keep the stack aligned properly. To do this, we round the
2903 // amount of space needed for the outgoing arguments up to the next
2904 // alignment boundary.
2905 Amount = alignSPAdjust(Amount);
2906
2907 if (Opc == ARM::ADJCALLSTACKDOWN || Opc == ARM::tADJCALLSTACKDOWN) {
2908 emitSPUpdate(isARM, MBB, I, dl, TII, -Amount, MachineInstr::NoFlags,
2909 Pred, PredReg);
2910 } else {
2911 assert(Opc == ARM::ADJCALLSTACKUP || Opc == ARM::tADJCALLSTACKUP);
2912 emitSPUpdate(isARM, MBB, I, dl, TII, Amount, MachineInstr::NoFlags,
2913 Pred, PredReg);
2914 }
2915 }
2916 } else if (CalleePopAmount != -1U) {
2917 // If the calling convention demands that the callee pops arguments from the
2918 // stack, we want to add it back if we have a reserved call frame.
2919 emitSPUpdate(isARM, MBB, I, dl, TII, -CalleePopAmount,
2920 MachineInstr::NoFlags, Pred, PredReg);
2921 }
2922 return MBB.erase(I);
2923}
2924
2925/// Get the minimum constant for ARM that is greater than or equal to the
2926/// argument. In ARM, constants can have any value that can be produced by
2927/// rotating an 8-bit value to the right by an even number of bits within a
2928/// 32-bit word.
2930 unsigned Shifted = 0;
2931
2932 if (Value == 0)
2933 return 0;
2934
2935 while (!(Value & 0xC0000000)) {
2936 Value = Value << 2;
2937 Shifted += 2;
2938 }
2939
2940 bool Carry = (Value & 0x00FFFFFF);
2941 Value = ((Value & 0xFF000000) >> 24) + Carry;
2942
2943 if (Value & 0x0000100)
2944 Value = Value & 0x000001FC;
2945
2946 if (Shifted > 24)
2947 Value = Value >> (Shifted - 24);
2948 else
2949 Value = Value << (24 - Shifted);
2950
2951 return Value;
2952}
2953
2954// The stack limit in the TCB is set to this many bytes above the actual
2955// stack limit.
2957
2958// Adjust the function prologue to enable split stacks. This currently only
2959// supports android and linux.
2960//
2961// The ABI of the segmented stack prologue is a little arbitrarily chosen, but
2962// must be well defined in order to allow for consistent implementations of the
2963// __morestack helper function. The ABI is also not a normal ABI in that it
2964// doesn't follow the normal calling conventions because this allows the
2965// prologue of each function to be optimized further.
2966//
2967// Currently, the ABI looks like (when calling __morestack)
2968//
2969// * r4 holds the minimum stack size requested for this function call
2970// * r5 holds the stack size of the arguments to the function
2971// * the beginning of the function is 3 instructions after the call to
2972// __morestack
2973//
2974// Implementations of __morestack should use r4 to allocate a new stack, r5 to
2975// place the arguments on to the new stack, and the 3-instruction knowledge to
2976// jump directly to the body of the function when working on the new stack.
2977//
2978// An old (and possibly no longer compatible) implementation of __morestack for
2979// ARM can be found at [1].
2980//
2981// [1] - https://github.com/mozilla/rust/blob/86efd9/src/rt/arch/arm/morestack.S
2983 MachineFunction &MF, MachineBasicBlock &PrologueMBB) const {
2984 unsigned Opcode;
2985 unsigned CFIIndex;
2986 const ARMSubtarget *ST = &MF.getSubtarget<ARMSubtarget>();
2987 bool Thumb = ST->isThumb();
2988 bool Thumb2 = ST->isThumb2();
2989
2990 // Sadly, this currently doesn't support varargs, platforms other than
2991 // android/linux. Note that thumb1/thumb2 are support for android/linux.
2992 if (MF.getFunction().isVarArg())
2993 report_fatal_error("Segmented stacks do not support vararg functions.");
2994 if (!ST->isTargetAndroid() && !ST->isTargetLinux())
2995 report_fatal_error("Segmented stacks not supported on this platform.");
2996
2997 MachineFrameInfo &MFI = MF.getFrameInfo();
2998 MachineModuleInfo &MMI = MF.getMMI();
2999 MCContext &Context = MMI.getContext();
3000 const MCRegisterInfo *MRI = Context.getRegisterInfo();
3001 const ARMBaseInstrInfo &TII =
3002 *static_cast<const ARMBaseInstrInfo *>(MF.getSubtarget().getInstrInfo());
3004 DebugLoc DL;
3005
3006 if (!MFI.needsSplitStackProlog())
3007 return;
3008
3009 uint64_t StackSize = MFI.getStackSize();
3010
3011 // Use R4 and R5 as scratch registers.
3012 // We save R4 and R5 before use and restore them before leaving the function.
3013 unsigned ScratchReg0 = ARM::R4;
3014 unsigned ScratchReg1 = ARM::R5;
3015 unsigned MovOp = ST->useMovt() ? ARM::t2MOVi32imm : ARM::tMOVi32imm;
3016 uint64_t AlignedStackSize;
3017
3018 MachineBasicBlock *PrevStackMBB = MF.CreateMachineBasicBlock();
3019 MachineBasicBlock *PostStackMBB = MF.CreateMachineBasicBlock();
3023
3024 // Grab everything that reaches PrologueMBB to update there liveness as well.
3025 SmallPtrSet<MachineBasicBlock *, 8> BeforePrologueRegion;
3027 WalkList.push_back(&PrologueMBB);
3028
3029 do {
3030 MachineBasicBlock *CurMBB = WalkList.pop_back_val();
3031 for (MachineBasicBlock *PredBB : CurMBB->predecessors()) {
3032 if (BeforePrologueRegion.insert(PredBB).second)
3033 WalkList.push_back(PredBB);
3034 }
3035 } while (!WalkList.empty());
3036
3037 // The order in that list is important.
3038 // The blocks will all be inserted before PrologueMBB using that order.
3039 // Therefore the block that should appear first in the CFG should appear
3040 // first in the list.
3041 MachineBasicBlock *AddedBlocks[] = {PrevStackMBB, McrMBB, GetMBB, AllocMBB,
3042 PostStackMBB};
3043
3044 for (MachineBasicBlock *B : AddedBlocks)
3045 BeforePrologueRegion.insert(B);
3046
3047 for (const auto &LI : PrologueMBB.liveins()) {
3048 for (MachineBasicBlock *PredBB : BeforePrologueRegion)
3049 PredBB->addLiveIn(LI);
3050 }
3051
3052 // Remove the newly added blocks from the list, since we know
3053 // we do not have to do the following updates for them.
3054 for (MachineBasicBlock *B : AddedBlocks) {
3055 BeforePrologueRegion.erase(B);
3056 MF.insert(PrologueMBB.getIterator(), B);
3057 }
3058
3059 for (MachineBasicBlock *MBB : BeforePrologueRegion) {
3060 // Make sure the LiveIns are still sorted and unique.
3062 // Replace the edges to PrologueMBB by edges to the sequences
3063 // we are about to add, but only update for immediate predecessors.
3064 if (MBB->isSuccessor(&PrologueMBB))
3065 MBB->ReplaceUsesOfBlockWith(&PrologueMBB, AddedBlocks[0]);
3066 }
3067
3068 // The required stack size that is aligned to ARM constant criterion.
3069 AlignedStackSize = alignToARMConstant(StackSize);
3070
3071 // When the frame size is less than 256 we just compare the stack
3072 // boundary directly to the value of the stack pointer, per gcc.
3073 bool CompareStackPointer = AlignedStackSize < kSplitStackAvailable;
3074
3075 // We will use two of the callee save registers as scratch registers so we
3076 // need to save those registers onto the stack.
3077 // We will use SR0 to hold stack limit and SR1 to hold the stack size
3078 // requested and arguments for __morestack().
3079 // SR0: Scratch Register #0
3080 // SR1: Scratch Register #1
3081 // push {SR0, SR1}
3082 if (Thumb) {
3083 BuildMI(PrevStackMBB, DL, TII.get(ARM::tPUSH))
3085 .addReg(ScratchReg0)
3086 .addReg(ScratchReg1);
3087 } else {
3088 BuildMI(PrevStackMBB, DL, TII.get(ARM::STMDB_UPD))
3089 .addReg(ARM::SP, RegState::Define)
3090 .addReg(ARM::SP)
3092 .addReg(ScratchReg0)
3093 .addReg(ScratchReg1);
3094 }
3095
3096 // Emit the relevant DWARF information about the change in stack pointer as
3097 // well as where to find both r4 and r5 (the callee-save registers)
3098 if (!MF.getTarget().getMCAsmInfo()->usesWindowsCFI()) {
3099 CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, 8));
3100 BuildMI(PrevStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
3101 .addCFIIndex(CFIIndex);
3103 nullptr, MRI->getDwarfRegNum(ScratchReg1, true), -4));
3104 BuildMI(PrevStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
3105 .addCFIIndex(CFIIndex);
3107 nullptr, MRI->getDwarfRegNum(ScratchReg0, true), -8));
3108 BuildMI(PrevStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
3109 .addCFIIndex(CFIIndex);
3110 }
3111
3112 // mov SR1, sp
3113 if (Thumb) {
3114 BuildMI(McrMBB, DL, TII.get(ARM::tMOVr), ScratchReg1)
3115 .addReg(ARM::SP)
3117 } else if (CompareStackPointer) {
3118 BuildMI(McrMBB, DL, TII.get(ARM::MOVr), ScratchReg1)
3119 .addReg(ARM::SP)
3121 .add(condCodeOp());
3122 }
3123
3124 // sub SR1, sp, #StackSize
3125 if (!CompareStackPointer && Thumb) {
3126 if (AlignedStackSize < 256) {
3127 BuildMI(McrMBB, DL, TII.get(ARM::tSUBi8), ScratchReg1)
3128 .add(condCodeOp())
3129 .addReg(ScratchReg1)
3130 .addImm(AlignedStackSize)
3132 } else {
3133 if (Thumb2 || ST->genExecuteOnly()) {
3134 BuildMI(McrMBB, DL, TII.get(MovOp), ScratchReg0)
3135 .addImm(AlignedStackSize);
3136 } else {
3137 auto MBBI = McrMBB->end();
3138 auto RegInfo = STI.getRegisterInfo();
3139 RegInfo->emitLoadConstPool(*McrMBB, MBBI, DL, ScratchReg0, 0,
3140 AlignedStackSize);
3141 }
3142 BuildMI(McrMBB, DL, TII.get(ARM::tSUBrr), ScratchReg1)
3143 .add(condCodeOp())
3144 .addReg(ScratchReg1)
3145 .addReg(ScratchReg0)
3147 }
3148 } else if (!CompareStackPointer) {
3149 if (AlignedStackSize < 256) {
3150 BuildMI(McrMBB, DL, TII.get(ARM::SUBri), ScratchReg1)
3151 .addReg(ARM::SP)
3152 .addImm(AlignedStackSize)
3154 .add(condCodeOp());
3155 } else {
3156 auto MBBI = McrMBB->end();
3157 auto RegInfo = STI.getRegisterInfo();
3158 RegInfo->emitLoadConstPool(*McrMBB, MBBI, DL, ScratchReg0, 0,
3159 AlignedStackSize);
3160 BuildMI(McrMBB, DL, TII.get(ARM::SUBrr), ScratchReg1)
3161 .addReg(ARM::SP)
3162 .addReg(ScratchReg0)
3164 .add(condCodeOp());
3165 }
3166 }
3167
3168 if (Thumb && ST->isThumb1Only()) {
3169 if (ST->genExecuteOnly()) {
3170 BuildMI(GetMBB, DL, TII.get(MovOp), ScratchReg0)
3171 .addExternalSymbol("__STACK_LIMIT");
3172 } else {
3173 unsigned PCLabelId = ARMFI->createPICLabelUId();
3175 MF.getFunction().getContext(), "__STACK_LIMIT", PCLabelId, 0);
3177 unsigned CPI = MCP->getConstantPoolIndex(NewCPV, Align(4));
3178
3179 // ldr SR0, [pc, offset(STACK_LIMIT)]
3180 BuildMI(GetMBB, DL, TII.get(ARM::tLDRpci), ScratchReg0)
3183 }
3184
3185 // ldr SR0, [SR0]
3186 BuildMI(GetMBB, DL, TII.get(ARM::tLDRi), ScratchReg0)
3187 .addReg(ScratchReg0)
3188 .addImm(0)
3190 } else {
3191 // Get TLS base address from the coprocessor
3192 // mrc p15, #0, SR0, c13, c0, #3
3193 BuildMI(McrMBB, DL, TII.get(Thumb ? ARM::t2MRC : ARM::MRC),
3194 ScratchReg0)
3195 .addImm(15)
3196 .addImm(0)
3197 .addImm(13)
3198 .addImm(0)
3199 .addImm(3)
3201
3202 // Use the last tls slot on android and a private field of the TCP on linux.
3203 assert(ST->isTargetAndroid() || ST->isTargetLinux());
3204 unsigned TlsOffset = ST->isTargetAndroid() ? 63 : 1;
3205
3206 // Get the stack limit from the right offset
3207 // ldr SR0, [sr0, #4 * TlsOffset]
3208 BuildMI(GetMBB, DL, TII.get(Thumb ? ARM::t2LDRi12 : ARM::LDRi12),
3209 ScratchReg0)
3210 .addReg(ScratchReg0)
3211 .addImm(4 * TlsOffset)
3213 }
3214
3215 // Compare stack limit with stack size requested.
3216 // cmp SR0, SR1
3217 Opcode = Thumb ? ARM::tCMPr : ARM::CMPrr;
3218 BuildMI(GetMBB, DL, TII.get(Opcode))
3219 .addReg(ScratchReg0)
3220 .addReg(ScratchReg1)
3222
3223 // This jump is taken if StackLimit <= SP - stack required.
3224 Opcode = Thumb ? ARM::tBcc : ARM::Bcc;
3225 BuildMI(GetMBB, DL, TII.get(Opcode))
3226 .addMBB(PostStackMBB)
3228 .addReg(ARM::CPSR);
3229
3230 // Calling __morestack(StackSize, Size of stack arguments).
3231 // __morestack knows that the stack size requested is in SR0(r4)
3232 // and amount size of stack arguments is in SR1(r5).
3233
3234 // Pass first argument for the __morestack by Scratch Register #0.
3235 // The amount size of stack required
3236 if (Thumb) {
3237 if (AlignedStackSize < 256) {
3238 BuildMI(AllocMBB, DL, TII.get(ARM::tMOVi8), ScratchReg0)
3239 .add(condCodeOp())
3240 .addImm(AlignedStackSize)
3242 } else {
3243 if (Thumb2 || ST->genExecuteOnly()) {
3244 BuildMI(AllocMBB, DL, TII.get(MovOp), ScratchReg0)
3245 .addImm(AlignedStackSize);
3246 } else {
3247 auto MBBI = AllocMBB->end();
3248 auto RegInfo = STI.getRegisterInfo();
3249 RegInfo->emitLoadConstPool(*AllocMBB, MBBI, DL, ScratchReg0, 0,
3250 AlignedStackSize);
3251 }
3252 }
3253 } else {
3254 if (AlignedStackSize < 256) {
3255 BuildMI(AllocMBB, DL, TII.get(ARM::MOVi), ScratchReg0)
3256 .addImm(AlignedStackSize)
3258 .add(condCodeOp());
3259 } else {
3260 auto MBBI = AllocMBB->end();
3261 auto RegInfo = STI.getRegisterInfo();
3262 RegInfo->emitLoadConstPool(*AllocMBB, MBBI, DL, ScratchReg0, 0,
3263 AlignedStackSize);
3264 }
3265 }
3266
3267 // Pass second argument for the __morestack by Scratch Register #1.
3268 // The amount size of stack consumed to save function arguments.
3269 if (Thumb) {
3270 if (ARMFI->getArgumentStackSize() < 256) {
3271 BuildMI(AllocMBB, DL, TII.get(ARM::tMOVi8), ScratchReg1)
3272 .add(condCodeOp())
3275 } else {
3276 if (Thumb2 || ST->genExecuteOnly()) {
3277 BuildMI(AllocMBB, DL, TII.get(MovOp), ScratchReg1)
3279 } else {
3280 auto MBBI = AllocMBB->end();
3281 auto RegInfo = STI.getRegisterInfo();
3282 RegInfo->emitLoadConstPool(
3283 *AllocMBB, MBBI, DL, ScratchReg1, 0,
3285 }
3286 }
3287 } else {
3288 if (alignToARMConstant(ARMFI->getArgumentStackSize()) < 256) {
3289 BuildMI(AllocMBB, DL, TII.get(ARM::MOVi), ScratchReg1)
3292 .add(condCodeOp());
3293 } else {
3294 auto MBBI = AllocMBB->end();
3295 auto RegInfo = STI.getRegisterInfo();
3296 RegInfo->emitLoadConstPool(
3297 *AllocMBB, MBBI, DL, ScratchReg1, 0,
3299 }
3300 }
3301
3302 // push {lr} - Save return address of this function.
3303 if (Thumb) {
3304 BuildMI(AllocMBB, DL, TII.get(ARM::tPUSH))
3306 .addReg(ARM::LR);
3307 } else {
3308 BuildMI(AllocMBB, DL, TII.get(ARM::STMDB_UPD))
3309 .addReg(ARM::SP, RegState::Define)
3310 .addReg(ARM::SP)
3312 .addReg(ARM::LR);
3313 }
3314
3315 // Emit the DWARF info about the change in stack as well as where to find the
3316 // previous link register
3317 if (!MF.getTarget().getMCAsmInfo()->usesWindowsCFI()) {
3318 CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, 12));
3319 BuildMI(AllocMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
3320 .addCFIIndex(CFIIndex);
3322 nullptr, MRI->getDwarfRegNum(ARM::LR, true), -12));
3323 BuildMI(AllocMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
3324 .addCFIIndex(CFIIndex);
3325 }
3326
3327 // Call __morestack().
3328 if (Thumb) {
3329 BuildMI(AllocMBB, DL, TII.get(ARM::tBL))
3331 .addExternalSymbol("__morestack");
3332 } else {
3333 BuildMI(AllocMBB, DL, TII.get(ARM::BL))
3334 .addExternalSymbol("__morestack");
3335 }
3336
3337 // pop {lr} - Restore return address of this original function.
3338 if (Thumb) {
3339 if (ST->isThumb1Only()) {
3340 BuildMI(AllocMBB, DL, TII.get(ARM::tPOP))
3342 .addReg(ScratchReg0);
3343 BuildMI(AllocMBB, DL, TII.get(ARM::tMOVr), ARM::LR)
3344 .addReg(ScratchReg0)
3346 } else {
3347 BuildMI(AllocMBB, DL, TII.get(ARM::t2LDR_POST))
3348 .addReg(ARM::LR, RegState::Define)
3349 .addReg(ARM::SP, RegState::Define)
3350 .addReg(ARM::SP)
3351 .addImm(4)
3353 }
3354 } else {
3355 BuildMI(AllocMBB, DL, TII.get(ARM::LDMIA_UPD))
3356 .addReg(ARM::SP, RegState::Define)
3357 .addReg(ARM::SP)
3359 .addReg(ARM::LR);
3360 }
3361
3362 // Restore SR0 and SR1 in case of __morestack() was called.
3363 // __morestack() will skip PostStackMBB block so we need to restore
3364 // scratch registers from here.
3365 // pop {SR0, SR1}
3366 if (Thumb) {
3367 BuildMI(AllocMBB, DL, TII.get(ARM::tPOP))
3369 .addReg(ScratchReg0)
3370 .addReg(ScratchReg1);
3371 } else {
3372 BuildMI(AllocMBB, DL, TII.get(ARM::LDMIA_UPD))
3373 .addReg(ARM::SP, RegState::Define)
3374 .addReg(ARM::SP)
3376 .addReg(ScratchReg0)
3377 .addReg(ScratchReg1);
3378 }
3379
3380 // Update the CFA offset now that we've popped
3381 if (!MF.getTarget().getMCAsmInfo()->usesWindowsCFI()) {
3382 CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, 0));
3383 BuildMI(AllocMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
3384 .addCFIIndex(CFIIndex);
3385 }
3386
3387 // Return from this function.
3388 BuildMI(AllocMBB, DL, TII.get(ST->getReturnOpcode())).add(predOps(ARMCC::AL));
3389
3390 // Restore SR0 and SR1 in case of __morestack() was not called.
3391 // pop {SR0, SR1}
3392 if (Thumb) {
3393 BuildMI(PostStackMBB, DL, TII.get(ARM::tPOP))
3395 .addReg(ScratchReg0)
3396 .addReg(ScratchReg1);
3397 } else {
3398 BuildMI(PostStackMBB, DL, TII.get(ARM::LDMIA_UPD))
3399 .addReg(ARM::SP, RegState::Define)
3400 .addReg(ARM::SP)
3402 .addReg(ScratchReg0)
3403 .addReg(ScratchReg1);
3404 }
3405
3406 // Update the CFA offset now that we've popped
3407 if (!MF.getTarget().getMCAsmInfo()->usesWindowsCFI()) {
3408 CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, 0));
3409 BuildMI(PostStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
3410 .addCFIIndex(CFIIndex);
3411
3412 // Tell debuggers that r4 and r5 are now the same as they were in the
3413 // previous function, that they're the "Same Value".
3415 nullptr, MRI->getDwarfRegNum(ScratchReg0, true)));
3416 BuildMI(PostStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
3417 .addCFIIndex(CFIIndex);
3419 nullptr, MRI->getDwarfRegNum(ScratchReg1, true)));
3420 BuildMI(PostStackMBB, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
3421 .addCFIIndex(CFIIndex);
3422 }
3423
3424 // Organizing MBB lists
3425 PostStackMBB->addSuccessor(&PrologueMBB);
3426
3427 AllocMBB->addSuccessor(PostStackMBB);
3428
3429 GetMBB->addSuccessor(PostStackMBB);
3430 GetMBB->addSuccessor(AllocMBB);
3431
3432 McrMBB->addSuccessor(GetMBB);
3433
3434 PrevStackMBB->addSuccessor(McrMBB);
3435
3436#ifdef EXPENSIVE_CHECKS
3437 MF.verify();
3438#endif
3439}
unsigned const MachineRegisterInfo * MRI
static int64_t getArgumentStackToRestore(MachineFunction &MF, MachineBasicBlock &MBB)
Returns how much of the incoming argument stack area (in bytes) we should clean up in an epilogue.
static bool needsWinCFI(const MachineFunction &MF)
static unsigned estimateRSStackSizeLimit(MachineFunction &MF)
Look at each instruction that references stack frames and return the stack size limit beyond which so...
unsigned RegSize
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static bool isThumb(const MCSubtargetInfo &STI)
static MachineBasicBlock::iterator skipAlignedDPRCS2Spills(MachineBasicBlock::iterator MI, unsigned NumAlignedDPRCS2Regs)
Skip past the code inserted by emitAlignedDPRCS2Spills, and return an iterator to the following instr...
static void emitAligningInstructions(MachineFunction &MF, ARMFunctionInfo *AFI, const TargetInstrInfo &TII, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, const unsigned Reg, const Align Alignment, const bool MustBeSingleInstruction)
Emit an instruction sequence that will align the address in register Reg by zero-ing out the lower bi...
static uint32_t alignToARMConstant(uint32_t Value)
Get the minimum constant for ARM that is greater than or equal to the argument.
static void checkNumAlignedDPRCS2Regs(MachineFunction &MF, BitVector &SavedRegs)
static void insertSEHRange(MachineBasicBlock &MBB, MachineBasicBlock::iterator Start, const MachineBasicBlock::iterator &End, const ARMBaseInstrInfo &TII, unsigned MIFlags)
static void emitAlignedDPRCS2Restores(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned NumAlignedDPRCS2Regs, ArrayRef< CalleeSavedInfo > CSI, const TargetRegisterInfo *TRI)
Emit aligned reload instructions for NumAlignedDPRCS2Regs D-registers starting from d8.
static void emitAlignedDPRCS2Spills(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned NumAlignedDPRCS2Regs, ArrayRef< CalleeSavedInfo > CSI, const TargetRegisterInfo *TRI)
Emit aligned spill instructions for NumAlignedDPRCS2Regs D-registers starting from d8.
static void emitSPUpdate(bool isARM, MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, const DebugLoc &dl, const ARMBaseInstrInfo &TII, int NumBytes, unsigned MIFlags=MachineInstr::NoFlags, ARMCC::CondCodes Pred=ARMCC::AL, unsigned PredReg=0)
static unsigned EstimateFunctionSizeInBytes(const MachineFunction &MF, const ARMBaseInstrInfo &TII)
static MachineBasicBlock::iterator insertSEH(MachineBasicBlock::iterator MBBI, const TargetInstrInfo &TII, unsigned Flags)
static bool canSpillOnFrameIndexAccess(const MachineFunction &MF, const TargetFrameLowering &TFI)
static bool requiresAAPCSFrameRecord(const MachineFunction &MF)
static bool WindowsRequiresStackProbe(const MachineFunction &MF, size_t StackSizeInBytes)
static int getMaxFPOffset(const ARMSubtarget &STI, const ARMFunctionInfo &AFI, const MachineFunction &MF)
We need the offset of the frame pointer relative to other MachineFrameInfo offsets which are encoded ...
static MachineBasicBlock::iterator initMBBRange(MachineBasicBlock &MBB, const MachineBasicBlock::iterator &MBBI)
static int sizeOfSPAdjustment(const MachineInstr &MI)
static const uint64_t kSplitStackAvailable
static cl::opt< bool > SpillAlignedNEONRegs("align-neon-spills", cl::Hidden, cl::init(true), cl::desc("Align ARM NEON spills in prolog and epilog"))
static void emitRegPlusImmediate(bool isARM, MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, const DebugLoc &dl, const ARMBaseInstrInfo &TII, unsigned DestReg, unsigned SrcReg, int NumBytes, unsigned MIFlags=MachineInstr::NoFlags, ARMCC::CondCodes Pred=ARMCC::AL, unsigned PredReg=0)
This file contains the simple types necessary to represent the attributes associated with functions a...
This file implements the BitVector class.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
#define LLVM_DEBUG(X)
Definition: Debug.h:101
uint64_t Size
bool End
Definition: ELF_riscv.cpp:480
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
unsigned const TargetRegisterInfo * TRI
LLVMContext & Context
const char LLVMTargetMachineRef TM
This file declares the machine register scavenger class.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition: Value.cpp:469
Value * RHS
Value * LHS
static const unsigned FramePtr
bool hasBasePointer(const MachineFunction &MF) const
virtual void emitLoadConstPool(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, const DebugLoc &dl, Register DestReg, unsigned SubIdx, int Val, ARMCC::CondCodes Pred=ARMCC::AL, Register PredReg=Register(), unsigned MIFlags=MachineInstr::NoFlags) const
emitLoadConstPool - Emits a load from constpool to materialize the specified immediate.
const MCPhysReg * getCalleeSavedRegs(const MachineFunction *MF) const override
Code Generation virtual methods...
bool cannotEliminateFrame(const MachineFunction &MF) const
Register getFrameRegister(const MachineFunction &MF) const override
bool canRealignStack(const MachineFunction &MF) const override
static ARMConstantPoolSymbol * Create(LLVMContext &C, StringRef s, unsigned ID, unsigned char PCAdj)
ARMConstantPoolValue - ARM specific constantpool value.
bool restoreCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, MutableArrayRef< CalleeSavedInfo > CSI, const TargetRegisterInfo *TRI) const override
restoreCalleeSavedRegisters - Issues instruction(s) to restore all callee saved registers and returns...
bool hasFP(const MachineFunction &MF) const override
hasFP - Return true if the specified function should have a dedicated frame pointer register.
StackOffset getFrameIndexReference(const MachineFunction &MF, int FI, Register &FrameReg) const override
getFrameIndexReference - Provide a base+offset reference to an FI slot for debug info.
static void updateLRRestored(MachineFunction &MF)
Update the IsRestored flag on LR if it is spilled, based on the return instructions.
void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override
emitProlog/emitEpilog - These methods insert prolog and epilog code into the function.
ARMFrameLowering(const ARMSubtarget &sti)
bool enableShrinkWrapping(const MachineFunction &MF) const override
Returns true if the target will correctly handle shrink wrapping.
bool keepFramePointer(const MachineFunction &MF) const override
Return true if the target wants to keep the frame pointer regardless of the function attribute "frame...
void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override
bool isFPReserved(const MachineFunction &MF) const
isFPReserved - Return true if the frame pointer register should be considered a reserved register on ...
bool canSimplifyCallFramePseudos(const MachineFunction &MF) const override
canSimplifyCallFramePseudos - If there is a reserved call frame, the call frame pseudos can be simpli...
void adjustForSegmentedStacks(MachineFunction &MF, MachineBasicBlock &MBB) const override
Adjust the prologue to have the function use segmented stacks.
bool spillCalleeSavedRegisters(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, ArrayRef< CalleeSavedInfo > CSI, const TargetRegisterInfo *TRI) const override
spillCalleeSavedRegisters - Issues instruction(s) to spill all callee saved registers and returns tru...
bool hasReservedCallFrame(const MachineFunction &MF) const override
hasReservedCallFrame - Under normal circumstances, when a frame pointer is not required,...
const SpillSlot * getCalleeSavedSpillSlots(unsigned &NumEntries) const override
getCalleeSavedSpillSlots - This method returns a pointer to an array of pairs, that contains an entry...
void processFunctionBeforeFrameFinalized(MachineFunction &MF, RegScavenger *RS=nullptr) const override
processFunctionBeforeFrameFinalized - This method is called immediately before the specified function...
int ResolveFrameIndexReference(const MachineFunction &MF, int FI, Register &FrameReg, int SPAdj) const
void getCalleeSaves(const MachineFunction &MF, BitVector &SavedRegs) const override
Returns the callee-saved registers as computed by determineCalleeSaves in the BitVector SavedRegs.
const ARMSubtarget & STI
void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS) const override
This method determines which of the registers reported by TargetRegisterInfo::getCalleeSavedRegs() sh...
bool enableCalleeSaveSkip(const MachineFunction &MF) const override
Returns true if the target can safely skip saving callee-saved registers for noreturn nounwind functi...
bool assignCalleeSavedSpillSlots(MachineFunction &MF, const TargetRegisterInfo *TRI, std::vector< CalleeSavedInfo > &CSI) const override
ARMFunctionInfo - This class is derived from MachineFunctionInfo and contains private ARM-specific in...
void setDPRCalleeSavedAreaSize(unsigned s)
unsigned getFPCXTSaveAreaSize() const
unsigned getGPRCalleeSavedArea1Size() const
unsigned getDPRCalleeSavedGapSize() const
void setGPRCalleeSavedArea2Size(unsigned s)
void setDPRCalleeSavedAreaOffset(unsigned o)
void setFramePtrSpillOffset(unsigned o)
unsigned getGPRCalleeSavedArea2Size() const
unsigned getNumAlignedDPRCS2Regs() const
void setGPRCalleeSavedArea1Size(unsigned s)
unsigned getArgumentStackToRestore() const
void setFPCXTSaveAreaSize(unsigned s)
unsigned getDPRCalleeSavedAreaSize() const
unsigned getFramePtrSpillOffset() const
unsigned getArgRegsSaveSize() const
void setGPRCalleeSavedArea2Offset(unsigned o)
void setGPRCalleeSavedArea1Offset(unsigned o)
void setDPRCalleeSavedGapSize(unsigned s)
unsigned getArgumentStackSize() const
unsigned getReturnRegsCount() const
bool useMovt() const
const ARMBaseInstrInfo * getInstrInfo() const override
Definition: ARMSubtarget.h:196
bool isTargetWindows() const
Definition: ARMSubtarget.h:308
const ARMBaseRegisterInfo * getRegisterInfo() const override
Definition: ARMSubtarget.h:208
bool splitFramePushPop(const MachineFunction &MF) const
Returns true if the frame setup is split into two separate pushes (first r0-r7,lr then r8-r11),...
Definition: ARMSubtarget.h:385
bool splitFramePointerPush(const MachineFunction &MF) const
bool isTargetELF() const
Definition: ARMSubtarget.h:311
const ARMFrameLowering * getFrameLowering() const override
Definition: ARMSubtarget.h:204
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:160
bool test(unsigned Idx) const
Definition: BitVector.h:461
BitVector & set()
Definition: BitVector.h:351
The CalleeSavedInfo class tracks the information need to locate where a callee saved register is in t...
This class represents an Operation in the Expression.
A debug info location.
Definition: DebugLoc.h:33
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:264
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition: Function.cpp:358
bool isVarArg() const
isVarArg - Return true if this function takes a variable number of arguments.
Definition: Function.h:215
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:677
bool usesWindowsCFI() const
Definition: MCAsmInfo.h:799
static MCCFIInstruction createDefCfaRegister(MCSymbol *L, unsigned Register, SMLoc Loc={})
.cfi_def_cfa_register modifies a rule for computing CFA.
Definition: MCDwarf.h:548
static MCCFIInstruction createOffset(MCSymbol *L, unsigned Register, int Offset, SMLoc Loc={})
.cfi_offset Previous value of Register is saved at offset Offset from CFA.
Definition: MCDwarf.h:583
static MCCFIInstruction cfiDefCfaOffset(MCSymbol *L, int Offset, SMLoc Loc={})
.cfi_def_cfa_offset modifies a rule for computing CFA.
Definition: MCDwarf.h:556
static MCCFIInstruction cfiDefCfa(MCSymbol *L, unsigned Register, int Offset, SMLoc Loc={})
.cfi_def_cfa defines a rule for computing CFA as: take address from Register and add Offset to it.
Definition: MCDwarf.h:541
static MCCFIInstruction createSameValue(MCSymbol *L, unsigned Register, SMLoc Loc={})
.cfi_same_value Current value of Register is the same as in the previous frame.
Definition: MCDwarf.h:630
Context object for machine code objects.
Definition: MCContext.h:81
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:198
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
iterator_range< livein_iterator > liveins() const
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
void sortUniqueLiveIns()
Sorts and uniques the LiveIns vector.
iterator getLastNonDebugInstr(bool SkipPseudoOp=true)
Returns an iterator to the last non-debug instruction in the basic block, or end().
void ReplaceUsesOfBlockWith(MachineBasicBlock *Old, MachineBasicBlock *New)
Given a machine basic block that branched to 'Old', change the code and CFG so that it branches to 'N...
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
iterator_range< iterator > terminators()
iterator insertAfter(iterator I, MachineInstr *MI)
Insert MI into the instruction list after I.
bool isSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB is a successor of this block.
iterator_range< pred_iterator > predecessors()
MachineInstrBundleIterator< MachineInstr > iterator
The MachineConstantPool class keeps track of constants referenced by a function which must be spilled...
const std::vector< MachineConstantPoolEntry > & getConstants() const
unsigned getConstantPoolIndex(const Constant *C, Align Alignment)
getConstantPoolIndex - Create a new entry in the constant pool or return an existing one.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
bool needsSplitStackProlog() const
Return true if this function requires a split stack prolog, even if it uses no stack space.
bool hasVarSizedObjects() const
This method may be called any time after instruction selection is complete to determine if the stack ...
uint64_t getStackSize() const
Return the number of bytes that must be allocated to hold all of the fixed size frame objects.
bool isReturnAddressTaken() const
This method may be called any time after instruction selection is complete to determine if there is a...
bool hasCalls() const
Return true if the current function has any function calls.
bool isFrameAddressTaken() const
This method may be called any time after instruction selection is complete to determine if there is a...
Align getMaxAlign() const
Return the alignment in bytes that this function must be aligned to, which is greater than the defaul...
int getStackProtectorIndex() const
Return the index for the stack protector object.
int getOffsetAdjustment() const
Return the correction for frame offsets.
bool isCalleeSavedInfoValid() const
Has the callee saved info been calculated yet?
void setOffsetAdjustment(int Adj)
Set the correction for frame offsets.
BitVector getPristineRegs(const MachineFunction &MF) const
Return a set of physical registers that are pristine.
const std::vector< CalleeSavedInfo > & getCalleeSavedInfo() const
Returns a reference to call saved info vector for the current function.
unsigned getMaxCallFrameSize() const
Return the maximum size of a call frame that must be allocated for an outgoing function call.
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
void setObjectAlignment(int ObjectIdx, Align Alignment)
setObjectAlignment - Change the alignment of the specified stack object.
unsigned addFrameInst(const MCCFIInstruction &Inst)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
MachineModuleInfo & getMMI() const
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
MachineConstantPool * getConstantPool()
getConstantPool - Return the constant pool object for the current function.
bool verify(Pass *p=nullptr, const char *Banner=nullptr, bool AbortOnError=true) const
Run the current MachineFunction through the machine code verifier, useful for debugger use.
const MachineJumpTableInfo * getJumpTableInfo() const
getJumpTableInfo - Return the jump table info object for the current function.
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const MachineInstrBuilder & addExternalSymbol(const char *FnName, unsigned TargetFlags=0) const
const MachineInstrBuilder & addCFIIndex(unsigned CFIIndex) const
const MachineInstrBuilder & setMIFlag(MachineInstr::MIFlag Flag) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addConstantPoolIndex(unsigned Idx, int Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
const MachineInstrBuilder & copyImplicitOps(const MachineInstr &OtherMI) const
Copy all the implicit operands from OtherMI onto this one.
Representation of each machine instruction.
Definition: MachineInstr.h:69
bool addRegisterKilled(Register IncomingReg, const TargetRegisterInfo *RegInfo, bool AddIfNotFound=false)
We have determined MI kills a register.
const std::vector< MachineJumpTableEntry > & getJumpTables() const
This class contains meta information specific to a module.
const MCContext & getContext() const
MachineOperand class - Representation of each machine instruction operand.
int64_t getImm() const
bool isImplicit() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
bool isLiveIn(Register Reg) const
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition: ArrayRef.h:307
void addScavengingFrameIndex(int FI)
Add a scavenging frame index.
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
bool erase(PtrType Ptr)
erase - If the set contains the specified pointer, remove it and return true, otherwise return false.
Definition: SmallPtrSet.h:356
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:342
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:427
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
iterator erase(const_iterator CI)
Definition: SmallVector.h:750
typename SuperClass::iterator iterator
Definition: SmallVector.h:590
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
StackOffset holds a fixed and a scalable offset in bytes.
Definition: TypeSize.h:33
int64_t getFixed() const
Returns the fixed component of the stack.
Definition: TypeSize.h:49
Information about stack frame layout on the target.
virtual void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS=nullptr) const
This method determines which of the registers reported by TargetRegisterInfo::getCalleeSavedRegs() sh...
virtual void processFunctionBeforeFrameFinalized(MachineFunction &MF, RegScavenger *RS=nullptr) const
processFunctionBeforeFrameFinalized - This method is called immediately before the specified function...
virtual bool hasFP(const MachineFunction &MF) const =0
hasFP - Return true if the specified function should have a dedicated frame pointer register.
virtual void getCalleeSaves(const MachineFunction &MF, BitVector &SavedRegs) const
Returns the callee-saved registers as computed by determineCalleeSaves in the BitVector SavedRegs.
Align getStackAlign() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
int alignSPAdjust(int SPAdj) const
alignSPAdjust - This method aligns the stack adjustment to the correct alignment.
virtual StackOffset getFrameIndexReference(const MachineFunction &MF, int FI, Register &FrameReg) const
getFrameIndexReference - This method should return the base register and offset used to reference a f...
TargetInstrInfo - Interface to description of machine instruction set.
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:76
TargetOptions Options
const MCAsmInfo * getMCAsmInfo() const
Return target specific asm information.
bool DisableFramePointerElim(const MachineFunction &MF) const
DisableFramePointerElim - This returns true if frame pointer elimination optimization should be disab...
bool contains(Register Reg) const
Return true if the specified register is included in this register class.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
bool hasStackRealignment(const MachineFunction &MF) const
True if stack realignment is required and still possible.
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
virtual const TargetFrameLowering * getFrameLowering() const
virtual const TargetInstrInfo * getInstrInfo() const
LLVM Value Representation.
Definition: Value.h:74
self_iterator getIterator()
Definition: ilist_node.h:109
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned getAM2Opc(AddrOpc Opc, unsigned Imm12, ShiftOpc SO, unsigned IdxMode=0)
unsigned getSORegOpc(ShiftOpc ShOp, unsigned Imm)
@ GHC
Used by the Glasgow Haskell Compiler (GHC).
Definition: CallingConv.h:50
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Define
Register definition.
@ Kill
The last use of a register.
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:450
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:329
@ Offset
Definition: DWP.cpp:456
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1742
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1722
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
static bool isSplitFPArea1Register(unsigned Reg, bool SplitFramePushPop)
static bool isARMArea2Register(unsigned Reg, bool SplitFramePushPop)
static std::array< MachineOperand, 2 > predOps(ARMCC::CondCodes Pred, unsigned PredReg=0)
Get the operands corresponding to the given Pred value.
static bool isSEHInstruction(const MachineInstr &MI)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1729
static bool isARMArea3Register(unsigned Reg, bool SplitFramePushPop)
bool tryFoldSPUpdateIntoPushPop(const ARMSubtarget &Subtarget, MachineFunction &MF, MachineInstr *MI, unsigned NumBytes)
Tries to add registers to the reglist of a given base-updating push/pop instruction to adjust the sta...
static bool isARMLowRegister(unsigned Reg)
isARMLowRegister - Returns true if the register is a low register (r0-r7).
Definition: ARMBaseInfo.h:160
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1647
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:156
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
unsigned getDefRegState(bool B)
unsigned getKillRegState(bool B)
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition: STLExtras.h:1914
static MachineOperand t1CondCodeOp(bool isDead=false)
Get the operand corresponding to the conditional code result for Thumb1.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1749
static bool isARMArea1Register(unsigned Reg, bool SplitFramePushPop)
isARMArea1Register - Returns true if the register is a low register (r0-r7) or a stack/pc register th...
static MachineOperand condCodeOp(unsigned CCReg=0)
Get the operand corresponding to the conditional code result.
unsigned Log2(Align A)
Returns the log2 of the alignment.
Definition: Alignment.h:208
void emitARMRegPlusImmediate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, const DebugLoc &dl, Register DestReg, Register BaseReg, int NumBytes, ARMCC::CondCodes Pred, Register PredReg, const ARMBaseInstrInfo &TII, unsigned MIFlags=0)
emitARMRegPlusImmediate / emitT2RegPlusImmediate - Emits a series of instructions to materializea des...
static bool isSplitFPArea2Register(unsigned Reg, bool SplitFramePushPop)
Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
void emitT2RegPlusImmediate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, const DebugLoc &dl, Register DestReg, Register BaseReg, int NumBytes, ARMCC::CondCodes Pred, Register PredReg, const ARMBaseInstrInfo &TII, unsigned MIFlags=0)
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85