LLVM 23.0.0git
HexagonFrameLowering.cpp
Go to the documentation of this file.
1//===- HexagonFrameLowering.cpp - Define frame lowering -------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//
8//===----------------------------------------------------------------------===//
9
11#include "HexagonBlockRanges.h"
12#include "HexagonISelLowering.h"
13#include "HexagonInstrInfo.h"
15#include "HexagonRegisterInfo.h"
16#include "HexagonSubtarget.h"
19#include "llvm/ADT/BitVector.h"
20#include "llvm/ADT/DenseMap.h"
22#include "llvm/ADT/SetVector.h"
23#include "llvm/ADT/SmallSet.h"
43#include "llvm/IR/Attributes.h"
44#include "llvm/IR/DebugLoc.h"
45#include "llvm/IR/Function.h"
46#include "llvm/MC/MCDwarf.h"
48#include "llvm/Pass.h"
52#include "llvm/Support/Debug.h"
58#include <algorithm>
59#include <cassert>
60#include <cstdint>
61#include <iterator>
62#include <limits>
63#include <map>
64#include <optional>
65#include <utility>
66#include <vector>
67
68#define DEBUG_TYPE "hexagon-pei"
69
70// Hexagon stack frame layout as defined by the ABI:
71//
72// Incoming arguments
73// passed via stack
74// |
75// |
76// SP during function's FP during function's |
77// +-- runtime (top of stack) runtime (bottom) --+ |
78// | | |
79// --++---------------------+------------------+-----------------++-+-------
80// | parameter area for | variable-size | fixed-size |LR| arg
81// | called functions | local objects | local objects |FP|
82// --+----------------------+------------------+-----------------+--+-------
83// <- size known -> <- size unknown -> <- size known ->
84//
85// Low address High address
86//
87// <--- stack growth
88//
89//
90// - In any circumstances, the outgoing function arguments are always accessi-
91// ble using the SP, and the incoming arguments are accessible using the FP.
92// - If the local objects are not aligned, they can always be accessed using
93// the FP.
94// - If there are no variable-sized objects, the local objects can always be
95// accessed using the SP, regardless whether they are aligned or not. (The
96// alignment padding will be at the bottom of the stack (highest address),
97// and so the offset with respect to the SP will be known at the compile-
98// -time.)
99//
100// The only complication occurs if there are both, local aligned objects, and
101// dynamically allocated (variable-sized) objects. The alignment pad will be
102// placed between the FP and the local objects, thus preventing the use of the
103// FP to access the local objects. At the same time, the variable-sized objects
104// will be between the SP and the local objects, thus introducing an unknown
105// distance from the SP to the locals.
106//
107// To avoid this problem, a new register is created that holds the aligned
108// address of the bottom of the stack, referred in the sources as AP (aligned
109// pointer). The AP will be equal to "FP-p", where "p" is the smallest pad
110// that aligns AP to the required boundary (a maximum of the alignments of
111// all stack objects, fixed- and variable-sized). All local objects[1] will
112// then use AP as the base pointer.
113// [1] The exception is with "fixed" stack objects. "Fixed" stack objects get
114// their name from being allocated at fixed locations on the stack, relative
115// to the FP. In the presence of dynamic allocation and local alignment, such
116// objects can only be accessed through the FP.
117//
118// Illustration of the AP:
119// FP --+
120// |
121// ---------------+---------------------+-----+-----------------------++-+--
122// Rest of the | Local stack objects | Pad | Fixed stack objects |LR|
123// stack frame | (aligned) | | (CSR, spills, etc.) |FP|
124// ---------------+---------------------+-----+-----------------+-----+--+--
125// |<-- Multiple of the -->|
126// stack alignment +-- AP
127//
128// The AP is set up at the beginning of the function. Since it is not a dedi-
129// cated (reserved) register, it needs to be kept live throughout the function
130// to be available as the base register for local object accesses.
131// Normally, an address of a stack objects is obtained by a pseudo-instruction
132// PS_fi. To access local objects with the AP register present, a different
133// pseudo-instruction needs to be used: PS_fia. The PS_fia takes one extra
134// argument compared to PS_fi: the first input register is the AP register.
135// This keeps the register live between its definition and its uses.
136
137// The AP register is originally set up using pseudo-instruction PS_aligna:
138// AP = PS_aligna A
139// where
140// A - required stack alignment
141// The alignment value must be the maximum of all alignments required by
142// any stack object.
143
144// The dynamic allocation uses a pseudo-instruction PS_alloca:
145// Rd = PS_alloca Rs, A
146// where
147// Rd - address of the allocated space
148// Rs - minimum size (the actual allocated can be larger to accommodate
149// alignment)
150// A - required alignment
151
152using namespace llvm;
153
156 const DebugLoc &DL) {
157 if (!MF.getFunction().hasFnAttribute(Attribute::ShadowCallStack))
158 return;
159
160 const auto &HST = MF.getSubtarget<HexagonSubtarget>();
161 // Hexagon saves LR (R31) via allocframe. If there is no frame, LR is
162 // not on the regular stack and does not need shadow-stack protection.
163 if (!HST.getFrameLowering()->hasFP(MF))
164 return;
165
166 Register SCSPReg = Hexagon::R19;
167 if (!MF.getSubtarget().isRegisterReservedByUser(SCSPReg))
168 report_fatal_error("Must reserve r19 to use shadow call stack on Hexagon");
169
170 const auto &HII = *HST.getInstrInfo();
171
172 // r19 = add(r19, #4)
173 BuildMI(MBB, MI, DL, HII.get(Hexagon::A2_addi), SCSPReg)
174 .addReg(SCSPReg)
175 .addImm(4)
177 // memw(r19 + #-4) = r31
178 BuildMI(MBB, MI, DL, HII.get(Hexagon::S2_storeri_io))
179 .addReg(SCSPReg)
180 .addImm(-4)
181 .addReg(Hexagon::R31)
183
184 MBB.addLiveIn(SCSPReg);
185
186 if (!MF.needsFrameMoves())
187 return;
188
189 // CFI: DW_CFA_val_expression for the SCS register, DW_OP_bregN -4
190 // Tells the unwinder that the SCS register at entry = current value - 4.
191 const auto &TRI = *MF.getSubtarget().getRegisterInfo();
192 unsigned DwarfSCSReg = TRI.getDwarfRegNum(SCSPReg, /*IsEH=*/true);
193 // DW_OP_breg0..DW_OP_breg31 (0x70..0x8f) are 32 opcodes indexed by
194 // register number, so the register number must fit in [0, 31].
195 assert(DwarfSCSReg < 32 && "SCS register should be < 32");
196 const char CFIInst[] = {
197 (char)dwarf::DW_CFA_val_expression,
198 (char)DwarfSCSReg,
199 2, // expression length
200 (char)(unsigned)(dwarf::DW_OP_breg0 + DwarfSCSReg),
201 (char)(-4 & 0x7f), // SLEB128 -4
202 };
204 .buildEscape(StringRef(CFIInst, sizeof(CFIInst)));
205}
206
209 const DebugLoc &DL) {
210 if (!MF.getFunction().hasFnAttribute(Attribute::ShadowCallStack))
211 return;
212
213 // hasFP() is true at both call sites: the non-vararg path in
214 // insertEpilogueInBlock returns early when !hasFP(), and the vararg+musl
215 // path is inside the hasFP() branch. Check defensively.
217 report_fatal_error("SCS epilogue requires a frame");
218
219 Register SCSPReg = Hexagon::R19;
220 const auto &HII = *MF.getSubtarget<HexagonSubtarget>().getInstrInfo();
221
222 // r31 = memw(r19 + #-4)
223 BuildMI(MBB, MI, DL, HII.get(Hexagon::L2_loadri_io), Hexagon::R31)
224 .addReg(SCSPReg)
225 .addImm(-4)
227 // r19 = add(r19, #-4)
228 BuildMI(MBB, MI, DL, HII.get(Hexagon::A2_addi), SCSPReg)
229 .addReg(SCSPReg)
230 .addImm(-4)
232
233 if (MF.needsFrameMoves())
235}
236
237static cl::opt<bool> DisableDeallocRet("disable-hexagon-dealloc-ret",
238 cl::Hidden, cl::desc("Disable Dealloc Return for Hexagon target"));
239
241 NumberScavengerSlots("number-scavenger-slots", cl::Hidden,
242 cl::desc("Set the number of scavenger slots"),
243 cl::init(2));
244
245static cl::opt<int>
246 SpillFuncThreshold("spill-func-threshold", cl::Hidden,
247 cl::desc("Specify O2(not Os) spill func threshold"),
248 cl::init(6));
249
250static cl::opt<int>
251 SpillFuncThresholdOs("spill-func-threshold-Os", cl::Hidden,
252 cl::desc("Specify Os spill func threshold"),
253 cl::init(1));
254
256 "enable-stackovf-sanitizer", cl::Hidden,
257 cl::desc("Enable runtime checks for stack overflow."), cl::init(false));
258
259static cl::opt<bool>
260 EnableShrinkWrapping("hexagon-shrink-frame", cl::init(true), cl::Hidden,
261 cl::desc("Enable stack frame shrink wrapping"));
262
264 ShrinkLimit("shrink-frame-limit",
265 cl::init(std::numeric_limits<unsigned>::max()), cl::Hidden,
266 cl::desc("Max count of stack frame shrink-wraps"));
267
268static cl::opt<bool>
269 EnableSaveRestoreLong("enable-save-restore-long", cl::Hidden,
270 cl::desc("Enable long calls for save-restore stubs."),
271 cl::init(false));
272
273static cl::opt<bool> EliminateFramePointer("hexagon-fp-elim", cl::init(true),
274 cl::Hidden, cl::desc("Refrain from using FP whenever possible"));
275
276static cl::opt<bool> OptimizeSpillSlots("hexagon-opt-spill", cl::Hidden,
277 cl::init(true), cl::desc("Optimize spill slots"));
278
279#ifndef NDEBUG
281 cl::init(std::numeric_limits<unsigned>::max()));
282static unsigned SpillOptCount = 0;
283#endif
284
285namespace {
286
287 class HexagonCallFrameInformation : public MachineFunctionPass {
288 public:
289 static char ID;
290
291 HexagonCallFrameInformation() : MachineFunctionPass(ID) {}
292
293 bool runOnMachineFunction(MachineFunction &MF) override;
294
295 MachineFunctionProperties getRequiredProperties() const override {
296 return MachineFunctionProperties().setNoVRegs();
297 }
298 };
299
300 char HexagonCallFrameInformation::ID = 0;
301
302} // end anonymous namespace
303
304bool HexagonCallFrameInformation::runOnMachineFunction(MachineFunction &MF) {
305 auto &HFI = *MF.getSubtarget<HexagonSubtarget>().getFrameLowering();
306 bool NeedCFI = MF.needsFrameMoves();
307
308 if (!NeedCFI)
309 return false;
310 HFI.insertCFIInstructions(MF);
311 return true;
312}
313
314INITIALIZE_PASS(HexagonCallFrameInformation, "hexagon-cfi",
315 "Hexagon call frame information", false, false)
316
318 return new HexagonCallFrameInformation();
319}
320
321/// Map a register pair Reg to the subregister that has the greater "number",
322/// i.e. D3 (aka R7:6) will be mapped to R7, etc.
324 const TargetRegisterInfo &TRI,
325 bool hireg = true) {
326 if (Reg < Hexagon::D0 || Reg > Hexagon::D15)
327 return Reg;
328
329 Register RegNo = 0;
330 for (MCPhysReg SubReg : TRI.subregs(Reg)) {
331 if (hireg) {
332 if (SubReg > RegNo)
333 RegNo = SubReg;
334 } else {
335 if (!RegNo || SubReg < RegNo)
336 RegNo = SubReg;
337 }
338 }
339 return RegNo;
340}
341
342/// Returns the callee saved register with the largest id in the vector.
344 const TargetRegisterInfo &TRI) {
345 static_assert(Hexagon::R1 > 0,
346 "Assume physical registers are encoded as positive integers");
347 if (CSI.empty())
348 return 0;
349
350 Register Max = getMax32BitSubRegister(CSI[0].getReg(), TRI);
351 for (unsigned I = 1, E = CSI.size(); I < E; ++I) {
353 if (Reg > Max)
354 Max = Reg;
355 }
356 return Max;
357}
358
359/// Checks if the basic block contains any instruction that needs a stack
360/// frame to be already in place.
361static bool needsStackFrame(const MachineBasicBlock &MBB, const BitVector &CSR,
362 const HexagonRegisterInfo &HRI) {
363 for (const MachineInstr &MI : MBB) {
364 if (MI.isCall())
365 return true;
366 unsigned Opc = MI.getOpcode();
367 switch (Opc) {
368 case Hexagon::PS_alloca:
369 case Hexagon::PS_aligna:
370 return true;
371 default:
372 break;
373 }
374 // Check individual operands.
375 for (const MachineOperand &MO : MI.operands()) {
376 // While the presence of a frame index does not prove that a stack
377 // frame will be required, all frame indexes should be within alloc-
378 // frame/deallocframe. Otherwise, the code that translates a frame
379 // index into an offset would have to be aware of the placement of
380 // the frame creation/destruction instructions.
381 if (MO.isFI())
382 return true;
383 if (MO.isReg()) {
384 Register R = MO.getReg();
385 // Debug instructions may refer to $noreg.
386 if (!R)
387 continue;
388 // Virtual registers will need scavenging, which then may require
389 // a stack slot.
390 if (R.isVirtual())
391 return true;
392 for (MCPhysReg S : HRI.subregs_inclusive(R))
393 if (CSR[S])
394 return true;
395 continue;
396 }
397 if (MO.isRegMask()) {
398 // A regmask would normally have all callee-saved registers marked
399 // as preserved, so this check would not be needed, but in case of
400 // ever having other regmasks (for other calling conventions),
401 // make sure they would be processed correctly.
402 const uint32_t *BM = MO.getRegMask();
403 for (int x = CSR.find_first(); x >= 0; x = CSR.find_next(x)) {
404 unsigned R = x;
405 // If this regmask does not preserve a CSR, a frame will be needed.
406 if (!(BM[R/32] & (1u << (R%32))))
407 return true;
408 }
409 }
410 }
411 }
412 return false;
413}
414
415 /// Returns true if MBB has a machine instructions that indicates a tail call
416 /// in the block.
417static bool hasTailCall(const MachineBasicBlock &MBB) {
418 MachineBasicBlock::const_iterator I = MBB.getLastNonDebugInstr();
419 if (I == MBB.end())
420 return false;
421 unsigned RetOpc = I->getOpcode();
422 return RetOpc == Hexagon::PS_tailcall_i || RetOpc == Hexagon::PS_tailcall_r;
423}
424
425/// Returns true if MBB contains an instruction that returns.
426static bool hasReturn(const MachineBasicBlock &MBB) {
427 for (const MachineInstr &MI : MBB.terminators())
428 if (MI.isReturn())
429 return true;
430 return false;
431}
432
433/// Returns the "return" instruction from this block, or nullptr if there
434/// isn't any.
436 for (auto &I : MBB)
437 if (I.isReturn())
438 return &I;
439 return nullptr;
440}
441
442static bool isRestoreCall(unsigned Opc) {
443 switch (Opc) {
444 case Hexagon::RESTORE_DEALLOC_RET_JMP_V4:
445 case Hexagon::RESTORE_DEALLOC_RET_JMP_V4_PIC:
446 case Hexagon::RESTORE_DEALLOC_RET_JMP_V4_EXT:
447 case Hexagon::RESTORE_DEALLOC_RET_JMP_V4_EXT_PIC:
448 case Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4_EXT:
449 case Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4_EXT_PIC:
450 case Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4:
451 case Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4_PIC:
452 return true;
453 }
454 return false;
455}
456
457static inline bool isOptNone(const MachineFunction &MF) {
458 return MF.getFunction().hasOptNone() ||
460}
461
462static inline bool isOptSize(const MachineFunction &MF) {
463 const Function &F = MF.getFunction();
464 return F.hasOptSize() && !F.hasMinSize();
465}
466
467static inline bool isMinSize(const MachineFunction &MF) {
468 return MF.getFunction().hasMinSize();
469}
470
471/// Implements shrink-wrapping of the stack frame. By default, stack frame
472/// is created in the function entry block, and is cleaned up in every block
473/// that returns. This function finds alternate blocks: one for the frame
474/// setup (prolog) and one for the cleanup (epilog).
475void HexagonFrameLowering::findShrunkPrologEpilog(MachineFunction &MF,
476 MachineBasicBlock *&PrologB, MachineBasicBlock *&EpilogB) const {
477 static unsigned ShrinkCounter = 0;
478
479 if (MF.getSubtarget<HexagonSubtarget>().isEnvironmentMusl() &&
480 MF.getFunction().isVarArg())
481 return;
482 if (ShrinkLimit.getPosition()) {
483 if (ShrinkCounter >= ShrinkLimit)
484 return;
485 ShrinkCounter++;
486 }
487
488 auto &HRI = *MF.getSubtarget<HexagonSubtarget>().getRegisterInfo();
489
490 MachineDominatorTree MDT;
491 MDT.recalculate(MF);
492 MachinePostDominatorTree MPT;
493 MPT.recalculate(MF);
494
495 using UnsignedMap = DenseMap<unsigned, unsigned>;
496 using RPOTType = ReversePostOrderTraversal<const MachineFunction *>;
497
498 UnsignedMap RPO;
499 RPOTType RPOT(&MF);
500 unsigned RPON = 0;
501 for (auto &I : RPOT)
502 RPO[I->getNumber()] = RPON++;
503
504 // Don't process functions that have loops, at least for now. Placement
505 // of prolog and epilog must take loop structure into account. For simpli-
506 // city don't do it right now.
507 for (auto &I : MF) {
508 unsigned BN = RPO[I.getNumber()];
509 for (MachineBasicBlock *Succ : I.successors())
510 // If found a back-edge, return.
511 if (RPO[Succ->getNumber()] <= BN)
512 return;
513 }
514
515 // Collect the set of blocks that need a stack frame to execute. Scan
516 // each block for uses/defs of callee-saved registers, calls, etc.
518 BitVector CSR(Hexagon::NUM_TARGET_REGS);
519 for (const MCPhysReg *P = HRI.getCalleeSavedRegs(&MF); *P; ++P)
520 for (MCPhysReg S : HRI.subregs_inclusive(*P))
521 CSR[S] = true;
522
523 for (auto &I : MF)
524 if (needsStackFrame(I, CSR, HRI))
525 SFBlocks.push_back(&I);
526
527 LLVM_DEBUG({
528 dbgs() << "Blocks needing SF: {";
529 for (auto &B : SFBlocks)
530 dbgs() << " " << printMBBReference(*B);
531 dbgs() << " }\n";
532 });
533 // No frame needed?
534 if (SFBlocks.empty())
535 return;
536
537 // Pick a common dominator and a common post-dominator.
538 MachineBasicBlock *DomB = SFBlocks[0];
539 for (unsigned i = 1, n = SFBlocks.size(); i < n; ++i) {
540 DomB = MDT.findNearestCommonDominator(DomB, SFBlocks[i]);
541 if (!DomB)
542 break;
543 }
544 MachineBasicBlock *PDomB = SFBlocks[0];
545 for (unsigned i = 1, n = SFBlocks.size(); i < n; ++i) {
546 PDomB = MPT.findNearestCommonDominator(PDomB, SFBlocks[i]);
547 if (!PDomB)
548 break;
549 }
550 LLVM_DEBUG({
551 dbgs() << "Computed dom block: ";
552 if (DomB)
553 dbgs() << printMBBReference(*DomB);
554 else
555 dbgs() << "<null>";
556 dbgs() << ", computed pdom block: ";
557 if (PDomB)
558 dbgs() << printMBBReference(*PDomB);
559 else
560 dbgs() << "<null>";
561 dbgs() << "\n";
562 });
563 if (!DomB || !PDomB)
564 return;
565
566 // Make sure that DomB dominates PDomB and PDomB post-dominates DomB.
567 if (!MDT.dominates(DomB, PDomB)) {
568 LLVM_DEBUG(dbgs() << "Dom block does not dominate pdom block\n");
569 return;
570 }
571 if (!MPT.dominates(PDomB, DomB)) {
572 LLVM_DEBUG(dbgs() << "PDom block does not post-dominate dom block\n");
573 return;
574 }
575
576 // Finally, everything seems right.
577 PrologB = DomB;
578 EpilogB = PDomB;
579}
580
581/// Perform most of the PEI work here:
582/// - saving/restoring of the callee-saved registers,
583/// - stack frame creation and destruction.
584/// Normally, this work is distributed among various functions, but doing it
585/// in one place allows shrink-wrapping of the stack frame.
587 MachineBasicBlock &MBB) const {
588 auto &HRI = *MF.getSubtarget<HexagonSubtarget>().getRegisterInfo();
589
590 MachineFrameInfo &MFI = MF.getFrameInfo();
591 const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
592
593 MachineBasicBlock *PrologB = &MF.front(), *EpilogB = nullptr;
595 findShrunkPrologEpilog(MF, PrologB, EpilogB);
596
597 bool PrologueStubs = false;
598 insertCSRSpillsInBlock(*PrologB, CSI, HRI, PrologueStubs);
599 insertPrologueInBlock(*PrologB, PrologueStubs);
600 // Insert the SCS prologue after all FrameSetup instructions so that it
601 // follows allocframe and any CSR spills in the instruction stream. The
602 // packetizer may still fuse the SCS store with the first call in the
603 // function, but because Hexagon packets use old-value reads the original
604 // R31 is always what is stored.
605 {
606 MachineBasicBlock::iterator AfterProlog = PrologB->begin();
607 while (AfterProlog != PrologB->end() &&
608 AfterProlog->getFlag(MachineInstr::FrameSetup))
609 ++AfterProlog;
610 DebugLoc PrologDL = PrologB->findDebugLoc(AfterProlog);
611 emitSCSPrologue(MF, *PrologB, AfterProlog, PrologDL);
612 }
613 updateEntryPaths(MF, *PrologB);
614
615 if (EpilogB) {
616 insertCSRRestoresInBlock(*EpilogB, CSI, HRI);
617 insertEpilogueInBlock(*EpilogB);
618 } else {
619 for (auto &B : MF)
620 if (B.isReturnBlock())
621 insertCSRRestoresInBlock(B, CSI, HRI);
622
623 for (auto &B : MF)
624 if (B.isReturnBlock())
625 insertEpilogueInBlock(B);
626
627 for (auto &B : MF) {
628 if (B.empty())
629 continue;
630 MachineInstr *RetI = getReturn(B);
631 if (!RetI || isRestoreCall(RetI->getOpcode()))
632 continue;
633 for (auto &R : CSI)
634 RetI->addOperand(MachineOperand::CreateReg(R.getReg(), false, true));
635 }
636 }
637
638 if (EpilogB) {
639 // If there is an epilog block, it may not have a return instruction.
640 // In such case, we need to add the callee-saved registers as live-ins
641 // in all blocks on all paths from the epilog to any return block.
642 unsigned MaxBN = MF.getNumBlockIDs();
643 BitVector DoneT(MaxBN+1), DoneF(MaxBN+1), Path(MaxBN+1);
644 updateExitPaths(*EpilogB, *EpilogB, DoneT, DoneF, Path);
645 }
646}
647
648/// Returns true if the target can safely skip saving callee-saved registers
649/// for noreturn nounwind functions.
651 const MachineFunction &MF) const {
652 const auto &F = MF.getFunction();
653 assert(F.hasFnAttribute(Attribute::NoReturn) &&
654 F.getFunction().hasFnAttribute(Attribute::NoUnwind) &&
655 !F.getFunction().hasFnAttribute(Attribute::UWTable));
656 (void)F;
657
658 // No need to save callee saved registers if the function does not return.
659 return MF.getSubtarget<HexagonSubtarget>().noreturnStackElim();
660}
661
662// Helper function used to determine when to eliminate the stack frame for
663// functions marked as noreturn and when the noreturn-stack-elim options are
664// specified. When both these conditions are true, then a FP may not be needed
665// if the function makes a call. It is very similar to enableCalleeSaveSkip,
666// but it used to check if the allocframe can be eliminated as well.
667static bool enableAllocFrameElim(const MachineFunction &MF) {
668 const auto &F = MF.getFunction();
669 const auto &MFI = MF.getFrameInfo();
670 const auto &HST = MF.getSubtarget<HexagonSubtarget>();
671 assert(!MFI.hasVarSizedObjects() &&
672 !HST.getRegisterInfo()->hasStackRealignment(MF));
673 return F.hasFnAttribute(Attribute::NoReturn) &&
674 F.hasFnAttribute(Attribute::NoUnwind) &&
675 !F.hasFnAttribute(Attribute::UWTable) && HST.noreturnStackElim() &&
676 MFI.getStackSize() == 0;
677}
678
679void HexagonFrameLowering::insertPrologueInBlock(MachineBasicBlock &MBB,
680 bool PrologueStubs) const {
681 MachineFunction &MF = *MBB.getParent();
682 MachineFrameInfo &MFI = MF.getFrameInfo();
683 auto &HST = MF.getSubtarget<HexagonSubtarget>();
684 auto &HII = *HST.getInstrInfo();
685 auto &HRI = *HST.getRegisterInfo();
686
687 Align MaxAlign = std::max(MFI.getMaxAlign(), getStackAlign());
688
689 // Calculate the total stack frame size.
690 // Get the number of bytes to allocate from the FrameInfo.
691 unsigned FrameSize = MFI.getStackSize();
692 // Round up the max call frame size to the max alignment on the stack.
693 unsigned MaxCFA = alignTo(MFI.getMaxCallFrameSize(), MaxAlign);
694 MFI.setMaxCallFrameSize(MaxCFA);
695
696 FrameSize = MaxCFA + alignTo(FrameSize, MaxAlign);
697 MFI.setStackSize(FrameSize);
698
699 bool AlignStack = (MaxAlign > getStackAlign());
700
701 // Get the number of bytes to allocate from the FrameInfo.
702 unsigned NumBytes = MFI.getStackSize();
703 Register SP = HRI.getStackRegister();
704 unsigned MaxCF = MFI.getMaxCallFrameSize();
706
707 SmallVector<MachineInstr *, 4> AdjustRegs;
708 for (auto &MBB : MF)
709 for (auto &MI : MBB)
710 if (MI.getOpcode() == Hexagon::PS_alloca)
711 AdjustRegs.push_back(&MI);
712
713 for (auto *MI : AdjustRegs) {
714 assert((MI->getOpcode() == Hexagon::PS_alloca) && "Expected alloca");
715 expandAlloca(MI, MF, HII, SP, MaxCF);
716 MI->eraseFromParent();
717 }
718
719 DebugLoc dl = MBB.findDebugLoc(InsertPt);
720
721 if (MF.getFunction().isVarArg() &&
722 MF.getSubtarget<HexagonSubtarget>().isEnvironmentMusl()) {
723 // Calculate the size of register saved area.
724 int NumVarArgRegs = 6 - FirstVarArgSavedReg;
725 int RegisterSavedAreaSizePlusPadding = (NumVarArgRegs % 2 == 0)
726 ? NumVarArgRegs * 4
727 : NumVarArgRegs * 4 + 4;
728 if (RegisterSavedAreaSizePlusPadding > 0) {
729 // Decrement the stack pointer by size of register saved area plus
730 // padding if any.
731 BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::A2_addi), SP)
732 .addReg(SP)
733 .addImm(-RegisterSavedAreaSizePlusPadding)
735
736 int NumBytes = 0;
737 // Copy all the named arguments below register saved area.
738 auto &HMFI = *MF.getInfo<HexagonMachineFunctionInfo>();
739 for (int i = HMFI.getFirstNamedArgFrameIndex(),
740 e = HMFI.getLastNamedArgFrameIndex(); i >= e; --i) {
741 uint64_t ObjSize = MFI.getObjectSize(i);
742 Align ObjAlign = MFI.getObjectAlign(i);
743
744 // Determine the kind of load/store that should be used.
745 unsigned LDOpc, STOpc;
746 uint64_t OpcodeChecker = ObjAlign.value();
747
748 // Handle cases where alignment of an object is > its size.
749 if (ObjAlign > ObjSize) {
750 if (ObjSize <= 1)
751 OpcodeChecker = 1;
752 else if (ObjSize <= 2)
753 OpcodeChecker = 2;
754 else if (ObjSize <= 4)
755 OpcodeChecker = 4;
756 else if (ObjSize > 4)
757 OpcodeChecker = 8;
758 }
759
760 switch (OpcodeChecker) {
761 case 1:
762 LDOpc = Hexagon::L2_loadrb_io;
763 STOpc = Hexagon::S2_storerb_io;
764 break;
765 case 2:
766 LDOpc = Hexagon::L2_loadrh_io;
767 STOpc = Hexagon::S2_storerh_io;
768 break;
769 case 4:
770 LDOpc = Hexagon::L2_loadri_io;
771 STOpc = Hexagon::S2_storeri_io;
772 break;
773 case 8:
774 default:
775 LDOpc = Hexagon::L2_loadrd_io;
776 STOpc = Hexagon::S2_storerd_io;
777 break;
778 }
779
780 Register RegUsed = LDOpc == Hexagon::L2_loadrd_io ? Hexagon::D3
781 : Hexagon::R6;
782 int LoadStoreCount = ObjSize / OpcodeChecker;
783
784 if (ObjSize % OpcodeChecker)
785 ++LoadStoreCount;
786
787 // Get the start location of the load. NumBytes is basically the
788 // offset from the stack pointer of previous function, which would be
789 // the caller in this case, as this function has variable argument
790 // list.
791 if (NumBytes != 0)
792 NumBytes = alignTo(NumBytes, ObjAlign);
793
794 int Count = 0;
795 while (Count < LoadStoreCount) {
796 // Load the value of the named argument on stack.
797 BuildMI(MBB, InsertPt, dl, HII.get(LDOpc), RegUsed)
798 .addReg(SP)
799 .addImm(RegisterSavedAreaSizePlusPadding +
800 ObjAlign.value() * Count + NumBytes)
802
803 // Store it below the register saved area plus padding.
804 BuildMI(MBB, InsertPt, dl, HII.get(STOpc))
805 .addReg(SP)
806 .addImm(ObjAlign.value() * Count + NumBytes)
807 .addReg(RegUsed)
809
810 Count++;
811 }
812 NumBytes += MFI.getObjectSize(i);
813 }
814
815 // Make NumBytes 8 byte aligned
816 NumBytes = alignTo(NumBytes, 8);
817
818 // If the number of registers having variable arguments is odd,
819 // leave 4 bytes of padding to get to the location where first
820 // variable argument which was passed through register was copied.
821 NumBytes = (NumVarArgRegs % 2 == 0) ? NumBytes : NumBytes + 4;
822
823 for (int j = FirstVarArgSavedReg, i = 0; j < 6; ++j, ++i) {
824 BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::S2_storeri_io))
825 .addReg(SP)
826 .addImm(NumBytes + 4 * i)
827 .addReg(Hexagon::R0 + j)
829 }
830 }
831 }
832
833 if (hasFP(MF)) {
834 insertAllocframe(MBB, InsertPt, NumBytes);
835 if (AlignStack) {
836 BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::A2_andir), SP)
837 .addReg(SP)
838 .addImm(-int64_t(MaxAlign.value()));
839 }
840 // If the stack-checking is enabled, and we spilled the callee-saved
841 // registers inline (i.e. did not use a spill function), then call
842 // the stack checker directly.
843 if (EnableStackOVFSanitizer && !PrologueStubs)
844 BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::PS_call_stk))
845 .addExternalSymbol("__runtime_stack_check");
846 } else if (NumBytes > 0) {
847 assert(alignTo(NumBytes, 8) == NumBytes);
848 auto *TLI = HST.getTargetLowering();
849 bool NeedsProbing = TLI->hasInlineStackProbe(MF);
850 unsigned ProbeSize = 0;
851 if (NeedsProbing) {
852 Align StackAlign = getStackAlign();
853 ProbeSize = TLI->getStackProbeSize(MF, StackAlign);
854 }
855 if (NeedsProbing && NumBytes > ProbeSize) {
856 // Compute target SP in R28 (caller-saved scratch).
857 BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::A2_addi), Hexagon::R28)
858 .addReg(SP)
859 .addImm(-int(NumBytes))
861 // Emit pseudo to be expanded by inlineStackProbe().
862 BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::PS_probed_stackalloc))
863 .addReg(Hexagon::R28)
865 } else {
866 BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::A2_addi), SP)
867 .addReg(SP)
868 .addImm(-int(NumBytes))
870 }
871 }
872}
873
874void HexagonFrameLowering::insertEpilogueInBlock(MachineBasicBlock &MBB) const {
875 MachineFunction &MF = *MBB.getParent();
876 auto &HST = MF.getSubtarget<HexagonSubtarget>();
877 auto &HII = *HST.getInstrInfo();
878 auto &HRI = *HST.getRegisterInfo();
879 Register SP = HRI.getStackRegister();
880
882 DebugLoc dl = MBB.findDebugLoc(InsertPt);
883
884 if (!hasFP(MF)) {
885 MachineFrameInfo &MFI = MF.getFrameInfo();
886 unsigned NumBytes = MFI.getStackSize();
887 if (MF.getFunction().isVarArg() &&
888 MF.getSubtarget<HexagonSubtarget>().isEnvironmentMusl()) {
889 // On Hexagon Linux, deallocate the stack for the register saved area.
890 int NumVarArgRegs = 6 - FirstVarArgSavedReg;
891 int RegisterSavedAreaSizePlusPadding = (NumVarArgRegs % 2 == 0) ?
892 (NumVarArgRegs * 4) : (NumVarArgRegs * 4 + 4);
893 NumBytes += RegisterSavedAreaSizePlusPadding;
894 }
895 if (NumBytes) {
896 BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::A2_addi), SP)
897 .addReg(SP)
898 .addImm(NumBytes);
899 }
900 return;
901 }
902
903 MachineInstr *RetI = getReturn(MBB);
904 unsigned RetOpc = RetI ? RetI->getOpcode() : 0;
905
906 // Handle EH_RETURN.
907 if (RetOpc == Hexagon::EH_RETURN_JMPR) {
908 // EH paths overwrite R31 with a handler address; the shadow stack is
909 // not read on this path, so no SCS epilogue is needed.
910 BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::L2_deallocframe))
911 .addDef(Hexagon::D15)
912 .addReg(Hexagon::R30);
913 BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::A2_add), SP)
914 .addReg(SP)
915 .addReg(Hexagon::R28);
916 return;
917 }
918
919 // Check for RESTORE_DEALLOC_RET* tail call. Don't emit an extra dealloc-
920 // frame instruction if we encounter it.
921 // These spill-stub tail calls include r19 in their save range, but SCS
922 // requires -ffixed-r19, which prevents the allocator from selecting stubs
923 // that cover r19. The two features are therefore mutually exclusive and no
924 // SCS epilogue is needed here.
925 if (RetOpc == Hexagon::RESTORE_DEALLOC_RET_JMP_V4 ||
926 RetOpc == Hexagon::RESTORE_DEALLOC_RET_JMP_V4_PIC ||
927 RetOpc == Hexagon::RESTORE_DEALLOC_RET_JMP_V4_EXT ||
928 RetOpc == Hexagon::RESTORE_DEALLOC_RET_JMP_V4_EXT_PIC) {
930 ++It;
931 // Delete all instructions after the RESTORE (except labels).
932 while (It != MBB.end()) {
933 if (!It->isLabel())
934 It = MBB.erase(It);
935 else
936 ++It;
937 }
938 return;
939 }
940
941 // It is possible that the restoring code is a call to a library function.
942 // All of the restore* functions include "deallocframe", so we need to make
943 // sure that we don't add an extra one.
944 bool NeedsSCS = MF.getFunction().hasFnAttribute(Attribute::ShadowCallStack);
945 bool NeedsDeallocframe = true;
946 unsigned PrevOpc = 0;
947 if (!MBB.empty() && InsertPt != MBB.begin()) {
948 MachineBasicBlock::iterator PrevIt = std::prev(InsertPt);
949 PrevOpc = PrevIt->getOpcode();
950 if (PrevOpc == Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4 ||
951 PrevOpc == Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4_PIC ||
952 PrevOpc == Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4_EXT ||
953 PrevOpc == Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4_EXT_PIC ||
954 PrevOpc == Hexagon::PS_call_nr || PrevOpc == Hexagon::PS_callr_nr)
955 NeedsDeallocframe = false;
956 }
957
958 if (!MF.getSubtarget<HexagonSubtarget>().isEnvironmentMusl() ||
959 !MF.getFunction().isVarArg()) {
960 if (!NeedsDeallocframe) {
961 // RESTORE_DEALLOC_BEFORE_TAILCALL stubs include r19 in their save range,
962 // but SCS requires -ffixed-r19 which prevents the allocator from
963 // selecting stubs that cover r19, so SCS and stubs are mutually
964 // exclusive. PS_call_nr/PS_callr_nr are noreturn calls so the shadow
965 // stack entry is never read - no SCS epilogue is needed on either path.
966 if (NeedsSCS && PrevOpc != Hexagon::PS_call_nr &&
967 PrevOpc != Hexagon::PS_callr_nr)
968 report_fatal_error("SCS with RESTORE_DEALLOC stub: "
969 "-ffixed-r19 should have prevented this");
970 return;
971 }
972 // If the returning instruction is PS_jmpret, replace it with
973 // dealloc_return, otherwise just add deallocframe. The function
974 // could be returning via a tail call.
975 if (RetOpc != Hexagon::PS_jmpret || DisableDeallocRet || NeedsSCS) {
976 BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::L2_deallocframe))
977 .addDef(Hexagon::D15)
978 .addReg(Hexagon::R30);
979 // When shadow call stack is active, overwrite R31 restored by
980 // deallocframe with the shadow-stack copy, then retract the pointer.
981 if (NeedsSCS)
982 emitSCSEpilogue(MF, MBB, InsertPt, dl);
983 return;
984 }
985 unsigned NewOpc = Hexagon::L4_return;
986 MachineInstr *NewI = BuildMI(MBB, RetI, dl, HII.get(NewOpc))
987 .addDef(Hexagon::D15)
988 .addReg(Hexagon::R30);
989 // Transfer the function live-out registers.
990 NewI->copyImplicitOps(MF, *RetI);
991 MBB.erase(RetI);
992 } else {
993 // L2_deallocframe instruction after it.
994 // Calculate the size of register saved area.
995 int NumVarArgRegs = 6 - FirstVarArgSavedReg;
996 int RegisterSavedAreaSizePlusPadding = (NumVarArgRegs % 2 == 0) ?
997 (NumVarArgRegs * 4) : (NumVarArgRegs * 4 + 4);
998
1001 : std::prev(Term);
1002 bool HasRestoreStub =
1003 I != MBB.end() &&
1004 (I->getOpcode() == Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4_EXT ||
1005 I->getOpcode() ==
1006 Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4_EXT_PIC ||
1007 I->getOpcode() == Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4 ||
1008 I->getOpcode() == Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4_PIC);
1009 if (!HasRestoreStub)
1010 BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::L2_deallocframe))
1011 .addDef(Hexagon::D15)
1012 .addReg(Hexagon::R30);
1013 if (RegisterSavedAreaSizePlusPadding != 0)
1014 BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::A2_addi), SP)
1015 .addReg(SP)
1016 .addImm(RegisterSavedAreaSizePlusPadding);
1017 // RESTORE_DEALLOC stubs are mutually exclusive with SCS (-ffixed-r19
1018 // prevents stubs that cover r19), so only emit SCS epilogue when we
1019 // emitted our own deallocframe above.
1020 if (NeedsSCS && !HasRestoreStub)
1021 emitSCSEpilogue(MF, MBB, InsertPt, dl);
1022 }
1023}
1024
1025void HexagonFrameLowering::insertAllocframe(MachineBasicBlock &MBB,
1026 MachineBasicBlock::iterator InsertPt, unsigned NumBytes) const {
1027 MachineFunction &MF = *MBB.getParent();
1028 auto &HST = MF.getSubtarget<HexagonSubtarget>();
1029 auto &HII = *HST.getInstrInfo();
1030 auto &HRI = *HST.getRegisterInfo();
1031
1032 // Check for overflow.
1033 // Hexagon_TODO: Ugh! hardcoding. Is there an API that can be used?
1034 const unsigned int ALLOCFRAME_MAX = 16384;
1035
1036 // Create a dummy memory operand to avoid allocframe from being treated as
1037 // a volatile memory reference.
1040
1041 DebugLoc dl = MBB.findDebugLoc(InsertPt);
1042 Register SP = HRI.getStackRegister();
1043
1044 auto *TLI = HST.getTargetLowering();
1045 bool NeedsProbing = TLI->hasInlineStackProbe(MF) && NumBytes > 0;
1046 unsigned ProbeSize = 0;
1047 if (NeedsProbing) {
1048 Align StackAlign = getStackAlign();
1049 ProbeSize = TLI->getStackProbeSize(MF, StackAlign);
1050 }
1051
1052 if (NeedsProbing && NumBytes > ProbeSize) {
1053 // Emit allocframe(#0) to save FP/LR only.
1054 BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::S2_allocframe))
1055 .addDef(SP)
1056 .addReg(SP)
1057 .addImm(0)
1058 .addMemOperand(MMO)
1060
1061 // Compute target SP in R28 (caller-saved scratch).
1062 BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::A2_addi), Hexagon::R28)
1063 .addReg(SP)
1064 .addImm(-int(NumBytes))
1066
1067 // Emit pseudo to be expanded by inlineStackProbe().
1068 BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::PS_probed_stackalloc))
1069 .addReg(Hexagon::R28)
1071 } else if (NumBytes >= ALLOCFRAME_MAX) {
1072 // Emit allocframe(#0).
1073 BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::S2_allocframe))
1074 .addDef(SP)
1075 .addReg(SP)
1076 .addImm(0)
1077 .addMemOperand(MMO)
1079
1080 // Subtract the size from the stack pointer.
1081 Register SP = HRI.getStackRegister();
1082 BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::A2_addi), SP)
1083 .addReg(SP)
1084 .addImm(-int(NumBytes))
1086 } else {
1087 BuildMI(MBB, InsertPt, dl, HII.get(Hexagon::S2_allocframe))
1088 .addDef(SP)
1089 .addReg(SP)
1090 .addImm(NumBytes)
1091 .addMemOperand(MMO)
1093 }
1094}
1095
1097 MachineFunction &MF, MachineBasicBlock &PrologueMBB) const {
1098 // Collect PS_probed_stackalloc pseudos to expand. Collecting first avoids
1099 // issues with modifying the block while iterating.
1101 for (MachineInstr &MI : PrologueMBB)
1102 if (MI.getOpcode() == Hexagon::PS_probed_stackalloc)
1103 ToReplace.push_back(&MI);
1104
1105 auto &HST = MF.getSubtarget<HexagonSubtarget>();
1106 auto &HII = *HST.getInstrInfo();
1107 auto *TLI = HST.getTargetLowering();
1108 Align StackAlign = getStackAlign();
1109 unsigned ProbeSize = TLI->getStackProbeSize(MF, StackAlign);
1111
1112 for (MachineInstr *MI : ToReplace) {
1113 MachineBasicBlock::iterator MBBI = MI->getIterator();
1114 DebugLoc DL = PrologueMBB.findDebugLoc(MBBI);
1115 Register TargetReg = MI->getOperand(0).getReg();
1116
1117 // Split the block: everything after the pseudo goes into ExitMBB.
1118 MachineBasicBlock *MBB = MI->getParent();
1119 MachineFunction::iterator InsertPt = std::next(MBB->getIterator());
1120 MachineBasicBlock *LoopMBB =
1121 MF.CreateMachineBasicBlock(MBB->getBasicBlock());
1122 MF.insert(InsertPt, LoopMBB);
1123 MachineBasicBlock *ExitMBB =
1124 MF.CreateMachineBasicBlock(MBB->getBasicBlock());
1125 MF.insert(InsertPt, ExitMBB);
1126
1127 // Move everything after the pseudo into ExitMBB.
1128 ExitMBB->splice(ExitMBB->end(), MBB, std::next(MBBI), MBB->end());
1130
1131 // LoopMBB: probe each page by decrementing SP and storing zero.
1132 // When NumBytes is not an exact multiple of ProbeSize the loop
1133 // will overshoot by up to ProbeSize-1 bytes; the final r29 = r28
1134 // in ExitMBB corrects SP to the true target.
1135 //
1136 // The store is placed before the compare+branch so that the
1137 // packetizer can bundle them into a single VLIW packet. All
1138 // non-predicated instructions in a packet commit unconditionally,
1139 // so the probe store executes on every iteration including the
1140 // last (when the branch falls through).
1141 //
1142 // r29 = add(r29, #-ProbeSize)
1143 // memw(r29+#0) = #0
1144 // p0 = cmp.gtu(r29, r28)
1145 // if (p0) jump LoopMBB
1146 BuildMI(*LoopMBB, LoopMBB->end(), DL, HII.get(Hexagon::A2_addi),
1147 Hexagon::R29)
1148 .addReg(Hexagon::R29)
1149 .addImm(-int(ProbeSize))
1150 .setMIFlags(Flags);
1151
1152 BuildMI(*LoopMBB, LoopMBB->end(), DL, HII.get(Hexagon::S4_storeiri_io))
1153 .addReg(Hexagon::R29)
1154 .addImm(0)
1155 .addImm(0)
1156 .setMIFlags(Flags);
1157
1158 BuildMI(*LoopMBB, LoopMBB->end(), DL, HII.get(Hexagon::C2_cmpgtu),
1159 Hexagon::P0)
1160 .addReg(Hexagon::R29)
1161 .addReg(TargetReg)
1162 .setMIFlags(Flags);
1163
1164 BuildMI(*LoopMBB, LoopMBB->end(), DL, HII.get(Hexagon::J2_jumpt))
1165 .addReg(Hexagon::P0)
1166 .addMBB(LoopMBB)
1167 .setMIFlags(Flags);
1168
1169 // ExitMBB: set final SP.
1170 BuildMI(*ExitMBB, ExitMBB->begin(), DL, HII.get(Hexagon::A2_tfr),
1171 Hexagon::R29)
1172 .addReg(TargetReg)
1173 .setMIFlags(Flags);
1174
1175 // Set up CFG edges.
1176 MBB->addSuccessor(LoopMBB);
1177 LoopMBB->addSuccessor(LoopMBB);
1178 LoopMBB->addSuccessor(ExitMBB);
1179
1180 // Remove the pseudo.
1181 MI->eraseFromParent();
1182
1183 // Recompute live-ins for the new blocks.
1184 fullyRecomputeLiveIns({ExitMBB, LoopMBB});
1185 }
1186}
1187
1188void HexagonFrameLowering::updateEntryPaths(MachineFunction &MF,
1189 MachineBasicBlock &SaveB) const {
1190 SetVector<unsigned> Worklist;
1191
1192 MachineBasicBlock &EntryB = MF.front();
1193 Worklist.insert(EntryB.getNumber());
1194
1195 unsigned SaveN = SaveB.getNumber();
1196 auto &CSI = MF.getFrameInfo().getCalleeSavedInfo();
1197
1198 for (unsigned i = 0; i < Worklist.size(); ++i) {
1199 unsigned BN = Worklist[i];
1201 for (auto &R : CSI)
1202 if (!MBB.isLiveIn(R.getReg()))
1203 MBB.addLiveIn(R.getReg());
1204 if (BN != SaveN)
1205 for (auto &SB : MBB.successors())
1206 Worklist.insert(SB->getNumber());
1207 }
1208}
1209
1210bool HexagonFrameLowering::updateExitPaths(MachineBasicBlock &MBB,
1211 MachineBasicBlock &RestoreB, BitVector &DoneT, BitVector &DoneF,
1212 BitVector &Path) const {
1213 assert(MBB.getNumber() >= 0);
1214 unsigned BN = MBB.getNumber();
1215 if (Path[BN] || DoneF[BN])
1216 return false;
1217 if (DoneT[BN])
1218 return true;
1219
1220 auto &CSI = MBB.getParent()->getFrameInfo().getCalleeSavedInfo();
1221
1222 Path[BN] = true;
1223 bool ReachedExit = false;
1224 for (auto &SB : MBB.successors())
1225 ReachedExit |= updateExitPaths(*SB, RestoreB, DoneT, DoneF, Path);
1226
1227 if (!MBB.empty() && MBB.back().isReturn()) {
1228 // Add implicit uses of all callee-saved registers to the reached
1229 // return instructions. This is to prevent the anti-dependency breaker
1230 // from renaming these registers.
1231 MachineInstr &RetI = MBB.back();
1232 if (!isRestoreCall(RetI.getOpcode()))
1233 for (auto &R : CSI)
1234 RetI.addOperand(MachineOperand::CreateReg(R.getReg(), false, true));
1235 ReachedExit = true;
1236 }
1237
1238 // We don't want to add unnecessary live-ins to the restore block: since
1239 // the callee-saved registers are being defined in it, the entry of the
1240 // restore block cannot be on the path from the definitions to any exit.
1241 if (ReachedExit && &MBB != &RestoreB) {
1242 for (auto &R : CSI)
1243 if (!MBB.isLiveIn(R.getReg()))
1244 MBB.addLiveIn(R.getReg());
1245 DoneT[BN] = true;
1246 }
1247 if (!ReachedExit)
1248 DoneF[BN] = true;
1249
1250 Path[BN] = false;
1251 return ReachedExit;
1252}
1253
1254static std::optional<MachineBasicBlock::iterator>
1256 // The CFI instructions need to be inserted right after allocframe.
1257 // An exception to this is a situation where allocframe is bundled
1258 // with a call: then the CFI instructions need to be inserted before
1259 // the packet with the allocframe+call (in case the call throws an
1260 // exception).
1261 auto End = B.instr_end();
1262
1263 for (MachineInstr &I : B) {
1264 MachineBasicBlock::iterator It = I.getIterator();
1265 if (!I.isBundle()) {
1266 if (I.getOpcode() == Hexagon::S2_allocframe)
1267 return std::next(It);
1268 continue;
1269 }
1270 // I is a bundle.
1271 bool HasCall = false, HasAllocFrame = false;
1272 auto T = It.getInstrIterator();
1273 while (++T != End && T->isBundled()) {
1274 if (T->getOpcode() == Hexagon::S2_allocframe)
1275 HasAllocFrame = true;
1276 else if (T->isCall())
1277 HasCall = true;
1278 }
1279 if (HasAllocFrame)
1280 return HasCall ? It : std::next(It);
1281 }
1282 return std::nullopt;
1283}
1284
1286 for (auto &B : MF)
1287 if (auto At = findCFILocation(B))
1288 insertCFIInstructionsAt(B, *At);
1289}
1290
1291void HexagonFrameLowering::insertCFIInstructionsAt(MachineBasicBlock &MBB,
1292 MachineBasicBlock::iterator At) const {
1293 MachineFunction &MF = *MBB.getParent();
1294 MachineFrameInfo &MFI = MF.getFrameInfo();
1295 auto &HST = MF.getSubtarget<HexagonSubtarget>();
1296 auto &HII = *HST.getInstrInfo();
1297 auto &HRI = *HST.getRegisterInfo();
1298
1299 // If CFI instructions have debug information attached, something goes
1300 // wrong with the final assembly generation: the prolog_end is placed
1301 // in a wrong location.
1302 DebugLoc DL;
1303 const MCInstrDesc &CFID = HII.get(TargetOpcode::CFI_INSTRUCTION);
1304
1305 MCSymbol *FrameLabel = MF.getContext().createTempSymbol();
1306 bool HasFP = hasFP(MF);
1307
1308 if (HasFP) {
1309 unsigned DwFPReg = HRI.getDwarfRegNum(HRI.getFrameRegister(), true);
1310 unsigned DwRAReg = HRI.getDwarfRegNum(HRI.getRARegister(), true);
1311
1312 // Define CFA via an offset from the value of FP.
1313 //
1314 // -8 -4 0 (SP)
1315 // --+----+----+---------------------
1316 // | FP | LR | increasing addresses -->
1317 // --+----+----+---------------------
1318 // | +-- Old SP (before allocframe)
1319 // +-- New FP (after allocframe)
1320 //
1321 // MCCFIInstruction::cfiDefCfa adds the offset from the register.
1322 // MCCFIInstruction::createOffset takes the offset without sign change.
1323 auto DefCfa = MCCFIInstruction::cfiDefCfa(FrameLabel, DwFPReg, 8);
1324 BuildMI(MBB, At, DL, CFID)
1325 .addCFIIndex(MF.addFrameInst(DefCfa));
1326 // R31 (return addr) = CFA - 4
1327 auto OffR31 = MCCFIInstruction::createOffset(FrameLabel, DwRAReg, -4);
1328 BuildMI(MBB, At, DL, CFID)
1329 .addCFIIndex(MF.addFrameInst(OffR31));
1330 // R30 (frame ptr) = CFA - 8
1331 auto OffR30 = MCCFIInstruction::createOffset(FrameLabel, DwFPReg, -8);
1332 BuildMI(MBB, At, DL, CFID)
1333 .addCFIIndex(MF.addFrameInst(OffR30));
1334 }
1335
1336 static const MCPhysReg RegsToMove[] = {
1337 Hexagon::R1, Hexagon::R0, Hexagon::R3, Hexagon::R2,
1338 Hexagon::R17, Hexagon::R16, Hexagon::R19, Hexagon::R18,
1339 Hexagon::R21, Hexagon::R20, Hexagon::R23, Hexagon::R22,
1340 Hexagon::R25, Hexagon::R24, Hexagon::R27, Hexagon::R26,
1341 Hexagon::D0, Hexagon::D1, Hexagon::D8, Hexagon::D9,
1342 Hexagon::D10, Hexagon::D11, Hexagon::D12, Hexagon::D13
1343 };
1344
1345 const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
1346
1347 for (MCPhysReg Reg : RegsToMove) {
1348 auto IfR = [Reg] (const CalleeSavedInfo &C) -> bool {
1349 return C.getReg() == Reg;
1350 };
1351 auto F = find_if(CSI, IfR);
1352 if (F == CSI.end())
1353 continue;
1354
1355 int64_t Offset;
1356 if (HasFP) {
1357 // If the function has a frame pointer (i.e. has an allocframe),
1358 // then the CFA has been defined in terms of FP. Any offsets in
1359 // the following CFI instructions have to be defined relative
1360 // to FP, which points to the bottom of the stack frame.
1361 // The function getFrameIndexReference can still choose to use SP
1362 // for the offset calculation, so we cannot simply call it here.
1363 // Instead, get the offset (relative to the FP) directly.
1364 Offset = MFI.getObjectOffset(F->getFrameIdx());
1365 } else {
1366 Register FrameReg;
1367 Offset =
1368 getFrameIndexReference(MF, F->getFrameIdx(), FrameReg).getFixed();
1369 }
1370 // Subtract 8 to make room for R30 and R31, which are added above.
1371 Offset -= 8;
1372
1373 if (Reg < Hexagon::D0 || Reg > Hexagon::D15) {
1374 unsigned DwarfReg = HRI.getDwarfRegNum(Reg, true);
1375 auto OffReg = MCCFIInstruction::createOffset(FrameLabel, DwarfReg,
1376 Offset);
1377 BuildMI(MBB, At, DL, CFID)
1378 .addCFIIndex(MF.addFrameInst(OffReg));
1379 } else {
1380 // Split the double regs into subregs, and generate appropriate
1381 // cfi_offsets.
1382 // The only reason, we are split double regs is, llvm-mc does not
1383 // understand paired registers for cfi_offset.
1384 // Eg .cfi_offset r1:0, -64
1385
1386 Register HiReg = HRI.getSubReg(Reg, Hexagon::isub_hi);
1387 Register LoReg = HRI.getSubReg(Reg, Hexagon::isub_lo);
1388 unsigned HiDwarfReg = HRI.getDwarfRegNum(HiReg, true);
1389 unsigned LoDwarfReg = HRI.getDwarfRegNum(LoReg, true);
1390 auto OffHi = MCCFIInstruction::createOffset(FrameLabel, HiDwarfReg,
1391 Offset+4);
1392 BuildMI(MBB, At, DL, CFID)
1393 .addCFIIndex(MF.addFrameInst(OffHi));
1394 auto OffLo = MCCFIInstruction::createOffset(FrameLabel, LoDwarfReg,
1395 Offset);
1396 BuildMI(MBB, At, DL, CFID)
1397 .addCFIIndex(MF.addFrameInst(OffLo));
1398 }
1399 }
1400}
1401
1403 auto &MFI = MF.getFrameInfo();
1404 auto &HRI = *MF.getSubtarget<HexagonSubtarget>().getRegisterInfo();
1405 bool HasExtraAlign = HRI.hasStackRealignment(MF);
1406 bool HasAlloca = MFI.hasVarSizedObjects();
1407
1408 // Insert ALLOCFRAME if we need to or at -O0 for the debugger. Think
1409 // that this shouldn't be required, but doing so now because gcc does and
1410 // gdb can't break at the start of the function without it. Will remove if
1411 // this turns out to be a gdb bug.
1412 //
1414 return true;
1415
1416 // By default we want to use SP (since it's always there). FP requires
1417 // some setup (i.e. ALLOCFRAME).
1418 // Both, alloca and stack alignment modify the stack pointer by an
1419 // undetermined value, so we need to save it at the entry to the function
1420 // (i.e. use allocframe).
1421 if (HasAlloca || HasExtraAlign)
1422 return true;
1423
1424 // If FP-elimination is disabled, we have to use FP. This must not be
1425 // gated on stack size: the user/ABI-requested frame pointer is needed
1426 // regardless of whether the function currently has a stack frame.
1427 // Every other target checks DisableFramePointerElim unconditionally.
1428 const TargetMachine &TM = MF.getTarget();
1430 return true;
1431
1432 if (MFI.getStackSize() > 0) {
1434 return true;
1435 }
1436
1437 const auto &HMFI = *MF.getInfo<HexagonMachineFunctionInfo>();
1438 if ((MFI.hasCalls() && !enableAllocFrameElim(MF)) || HMFI.hasClobberLR())
1439 return true;
1440
1441 return false;
1442}
1443
1449
1450static const char *getSpillFunctionFor(Register MaxReg, SpillKind SpillType,
1451 bool Stkchk = false) {
1452 const char * V4SpillToMemoryFunctions[] = {
1453 "__save_r16_through_r17",
1454 "__save_r16_through_r19",
1455 "__save_r16_through_r21",
1456 "__save_r16_through_r23",
1457 "__save_r16_through_r25",
1458 "__save_r16_through_r27" };
1459
1460 const char * V4SpillToMemoryStkchkFunctions[] = {
1461 "__save_r16_through_r17_stkchk",
1462 "__save_r16_through_r19_stkchk",
1463 "__save_r16_through_r21_stkchk",
1464 "__save_r16_through_r23_stkchk",
1465 "__save_r16_through_r25_stkchk",
1466 "__save_r16_through_r27_stkchk" };
1467
1468 const char * V4SpillFromMemoryFunctions[] = {
1469 "__restore_r16_through_r17_and_deallocframe",
1470 "__restore_r16_through_r19_and_deallocframe",
1471 "__restore_r16_through_r21_and_deallocframe",
1472 "__restore_r16_through_r23_and_deallocframe",
1473 "__restore_r16_through_r25_and_deallocframe",
1474 "__restore_r16_through_r27_and_deallocframe" };
1475
1476 const char * V4SpillFromMemoryTailcallFunctions[] = {
1477 "__restore_r16_through_r17_and_deallocframe_before_tailcall",
1478 "__restore_r16_through_r19_and_deallocframe_before_tailcall",
1479 "__restore_r16_through_r21_and_deallocframe_before_tailcall",
1480 "__restore_r16_through_r23_and_deallocframe_before_tailcall",
1481 "__restore_r16_through_r25_and_deallocframe_before_tailcall",
1482 "__restore_r16_through_r27_and_deallocframe_before_tailcall"
1483 };
1484
1485 const char **SpillFunc = nullptr;
1486
1487 switch(SpillType) {
1488 case SK_ToMem:
1489 SpillFunc = Stkchk ? V4SpillToMemoryStkchkFunctions
1490 : V4SpillToMemoryFunctions;
1491 break;
1492 case SK_FromMem:
1493 SpillFunc = V4SpillFromMemoryFunctions;
1494 break;
1495 case SK_FromMemTailcall:
1496 SpillFunc = V4SpillFromMemoryTailcallFunctions;
1497 break;
1498 }
1499 assert(SpillFunc && "Unknown spill kind");
1500
1501 // Spill all callee-saved registers up to the highest register used.
1502 switch (MaxReg) {
1503 case Hexagon::R17:
1504 return SpillFunc[0];
1505 case Hexagon::R19:
1506 return SpillFunc[1];
1507 case Hexagon::R21:
1508 return SpillFunc[2];
1509 case Hexagon::R23:
1510 return SpillFunc[3];
1511 case Hexagon::R25:
1512 return SpillFunc[4];
1513 case Hexagon::R27:
1514 return SpillFunc[5];
1515 default:
1516 llvm_unreachable("Unhandled maximum callee save register");
1517 }
1518 return nullptr;
1519}
1520
1521StackOffset
1523 Register &FrameReg) const {
1524 auto &MFI = MF.getFrameInfo();
1525 auto &HRI = *MF.getSubtarget<HexagonSubtarget>().getRegisterInfo();
1526
1527 int Offset = MFI.getObjectOffset(FI);
1528 bool HasAlloca = MFI.hasVarSizedObjects();
1529 bool HasExtraAlign = HRI.hasStackRealignment(MF);
1530 bool NoOpt = MF.getTarget().getOptLevel() == CodeGenOptLevel::None;
1531
1532 auto &HMFI = *MF.getInfo<HexagonMachineFunctionInfo>();
1533 unsigned FrameSize = MFI.getStackSize();
1534 Register SP = HRI.getStackRegister();
1535 Register FP = HRI.getFrameRegister();
1536 Register AP = HMFI.getStackAlignBaseReg();
1537 // It may happen that AP will be absent even HasAlloca && HasExtraAlign
1538 // is true. HasExtraAlign may be set because of vector spills, without
1539 // aligned locals or aligned outgoing function arguments. Since vector
1540 // spills will ultimately be "unaligned", it is safe to use FP as the
1541 // base register.
1542 // In fact, in such a scenario the stack is actually not required to be
1543 // aligned, although it may end up being aligned anyway, since this
1544 // particular case is not easily detectable. The alignment will be
1545 // unnecessary, but not incorrect.
1546 // Unfortunately there is no quick way to verify that the above is
1547 // indeed the case (and that it's not a result of an error), so just
1548 // assume that missing AP will be replaced by FP.
1549 // (A better fix would be to rematerialize AP from FP and always align
1550 // vector spills.)
1551 bool UseFP = false, UseAP = false; // Default: use SP (except at -O0).
1552 // Use FP at -O0, except when there are objects with extra alignment.
1553 // That additional alignment requirement may cause a pad to be inserted,
1554 // which will make it impossible to use FP to access objects located
1555 // past the pad.
1556 if (NoOpt && !HasExtraAlign)
1557 UseFP = true;
1558 if (MFI.isFixedObjectIndex(FI) || MFI.isObjectPreAllocated(FI)) {
1559 // Fixed and preallocated objects will be located before any padding
1560 // so FP must be used to access them.
1561 UseFP |= (HasAlloca || HasExtraAlign);
1562 } else {
1563 if (HasAlloca) {
1564 if (HasExtraAlign)
1565 UseAP = true;
1566 else
1567 UseFP = true;
1568 }
1569 }
1570
1571 // If FP was picked, then there had better be FP.
1572 bool HasFP = hasFP(MF);
1573 assert((HasFP || !UseFP) && "This function must have frame pointer");
1574
1575 // Having FP implies allocframe. Allocframe will store extra 8 bytes:
1576 // FP/LR. If the base register is used to access an object across these
1577 // 8 bytes, then the offset will need to be adjusted by 8.
1578 //
1579 // After allocframe:
1580 // HexagonISelLowering adds 8 to ---+
1581 // the offsets of all stack-based |
1582 // arguments (*) |
1583 // |
1584 // getObjectOffset < 0 0 8 getObjectOffset >= 8
1585 // ------------------------+-----+------------------------> increasing
1586 // <local objects> |FP/LR| <input arguments> addresses
1587 // -----------------+------+-----+------------------------>
1588 // | |
1589 // SP/AP point --+ +-- FP points here (**)
1590 // somewhere on
1591 // this side of FP/LR
1592 //
1593 // (*) See LowerFormalArguments. The FP/LR is assumed to be present.
1594 // (**) *FP == old-FP. FP+0..7 are the bytes of FP/LR.
1595
1596 // The lowering assumes that FP/LR is present, and so the offsets of
1597 // the formal arguments start at 8. If FP/LR is not there we need to
1598 // reduce the offset by 8.
1599 if (Offset > 0 && !HasFP)
1600 Offset -= 8;
1601
1602 if (UseFP)
1603 FrameReg = FP;
1604 else if (UseAP)
1605 FrameReg = AP;
1606 else
1607 FrameReg = SP;
1608
1609 // Calculate the actual offset in the instruction. If there is no FP
1610 // (in other words, no allocframe), then SP will not be adjusted (i.e.
1611 // there will be no SP -= FrameSize), so the frame size should not be
1612 // added to the calculated offset.
1613 int RealOffset = Offset;
1614 if (!UseFP && !UseAP)
1615 RealOffset = FrameSize+Offset;
1616 return StackOffset::getFixed(RealOffset);
1617}
1618
1619bool HexagonFrameLowering::insertCSRSpillsInBlock(MachineBasicBlock &MBB,
1620 const CSIVect &CSI, const HexagonRegisterInfo &HRI,
1621 bool &PrologueStubs) const {
1622 if (CSI.empty())
1623 return true;
1624
1626 PrologueStubs = false;
1627 MachineFunction &MF = *MBB.getParent();
1628 auto &HST = MF.getSubtarget<HexagonSubtarget>();
1629 auto &HII = *HST.getInstrInfo();
1630
1631 if (useSpillFunction(MF, CSI)) {
1632 PrologueStubs = true;
1633 Register MaxReg = getMaxCalleeSavedReg(CSI, HRI);
1634 bool StkOvrFlowEnabled = EnableStackOVFSanitizer;
1635 const char *SpillFun = getSpillFunctionFor(MaxReg, SK_ToMem,
1636 StkOvrFlowEnabled);
1637 auto &HTM = static_cast<const HexagonTargetMachine&>(MF.getTarget());
1638 bool IsPIC = HTM.isPositionIndependent();
1639 bool LongCalls = HST.useLongCalls() || EnableSaveRestoreLong;
1640
1641 // Call spill function.
1642 DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc() : DebugLoc();
1643 unsigned SpillOpc;
1644 if (StkOvrFlowEnabled) {
1645 if (LongCalls)
1646 SpillOpc = IsPIC ? Hexagon::SAVE_REGISTERS_CALL_V4STK_EXT_PIC
1647 : Hexagon::SAVE_REGISTERS_CALL_V4STK_EXT;
1648 else
1649 SpillOpc = IsPIC ? Hexagon::SAVE_REGISTERS_CALL_V4STK_PIC
1650 : Hexagon::SAVE_REGISTERS_CALL_V4STK;
1651 } else {
1652 if (LongCalls)
1653 SpillOpc = IsPIC ? Hexagon::SAVE_REGISTERS_CALL_V4_EXT_PIC
1654 : Hexagon::SAVE_REGISTERS_CALL_V4_EXT;
1655 else
1656 SpillOpc = IsPIC ? Hexagon::SAVE_REGISTERS_CALL_V4_PIC
1657 : Hexagon::SAVE_REGISTERS_CALL_V4;
1658 }
1659
1660 MachineInstr *SaveRegsCall =
1661 BuildMI(MBB, MI, DL, HII.get(SpillOpc))
1662 .addExternalSymbol(SpillFun);
1663
1664 // Add callee-saved registers as use.
1665 addCalleeSaveRegistersAsImpOperand(SaveRegsCall, CSI, false, true);
1666 // Add live in registers.
1667 for (const CalleeSavedInfo &I : CSI)
1668 MBB.addLiveIn(I.getReg());
1669 } else {
1670 for (const CalleeSavedInfo &I : CSI) {
1671 MCRegister Reg = I.getReg();
1672 // Add live in registers. We treat eh_return callee saved register r0 - r3
1673 // specially. They are not really callee saved registers as they are not
1674 // supposed to be killed.
1675 bool IsKill = !HRI.isEHReturnCalleeSaveReg(Reg);
1676 int FI = I.getFrameIdx();
1677 const TargetRegisterClass *RC = HRI.getMinimalPhysRegClass(Reg);
1678 HII.storeRegToStackSlot(MBB, MI, Reg, IsKill, FI, RC, Register());
1679 if (IsKill)
1680 MBB.addLiveIn(Reg);
1681 }
1682 }
1683
1684 // Move PS_aligna to after all CSR spills (both inline and spill-function
1685 // paths). PS_aligna initializes the AP register (e.g. R16) with an aligned
1686 // value derived from FP. Since AP is a callee-saved register, its original
1687 // value must be saved before it is overwritten, and it must be defined
1688 // before any AP-relative stack accesses.
1689 // MI points to the first non-spill instruction; all spills are before it.
1690 auto &HFI = *MF.getSubtarget<HexagonSubtarget>().getFrameLowering();
1691 if (const MachineInstr *AlignaI = HFI.getAlignaInstr(MF)) {
1692 MachineInstr *AI = const_cast<MachineInstr *>(AlignaI);
1693 // PS_aligna is always created in EntryBB during ISEL. Since PS_aligna
1694 // causes needsStackFrame() to return true, EntryBB will be included in
1695 // the set of blocks needing a frame. Because EntryBB dominates all blocks,
1696 // shrink-wrapping will always place PrologB at EntryBB when PS_aligna
1697 // exists. Therefore, this assertion should always hold.
1698 assert(AI->getParent() == &MBB && "PS_aligna not in prologue block");
1699 MBB.splice(MI, AI->getParent(), AI->getIterator());
1700 }
1701
1702 return true;
1703}
1704
1705bool HexagonFrameLowering::insertCSRRestoresInBlock(MachineBasicBlock &MBB,
1706 const CSIVect &CSI, const HexagonRegisterInfo &HRI) const {
1707 if (CSI.empty())
1708 return false;
1709
1711 MachineFunction &MF = *MBB.getParent();
1712 auto &HST = MF.getSubtarget<HexagonSubtarget>();
1713 auto &HII = *HST.getInstrInfo();
1714
1715 if (useRestoreFunction(MF, CSI)) {
1716 bool HasTC = hasTailCall(MBB) || !hasReturn(MBB);
1717 Register MaxR = getMaxCalleeSavedReg(CSI, HRI);
1719 const char *RestoreFn = getSpillFunctionFor(MaxR, Kind);
1720 auto &HTM = static_cast<const HexagonTargetMachine&>(MF.getTarget());
1721 bool IsPIC = HTM.isPositionIndependent();
1722 bool LongCalls = HST.useLongCalls() || EnableSaveRestoreLong;
1723
1724 // Call spill function.
1725 DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc()
1726 : MBB.findDebugLoc(MBB.end());
1727 MachineInstr *DeallocCall = nullptr;
1728
1729 if (HasTC) {
1730 unsigned RetOpc;
1731 if (LongCalls)
1732 RetOpc = IsPIC ? Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4_EXT_PIC
1733 : Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4_EXT;
1734 else
1735 RetOpc = IsPIC ? Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4_PIC
1736 : Hexagon::RESTORE_DEALLOC_BEFORE_TAILCALL_V4;
1737 DeallocCall = BuildMI(MBB, MI, DL, HII.get(RetOpc))
1738 .addExternalSymbol(RestoreFn);
1739 } else {
1740 // The block has a return.
1742 assert(It->isReturn() && std::next(It) == MBB.end());
1743 unsigned RetOpc;
1744 if (LongCalls)
1745 RetOpc = IsPIC ? Hexagon::RESTORE_DEALLOC_RET_JMP_V4_EXT_PIC
1746 : Hexagon::RESTORE_DEALLOC_RET_JMP_V4_EXT;
1747 else
1748 RetOpc = IsPIC ? Hexagon::RESTORE_DEALLOC_RET_JMP_V4_PIC
1749 : Hexagon::RESTORE_DEALLOC_RET_JMP_V4;
1750 DeallocCall = BuildMI(MBB, It, DL, HII.get(RetOpc))
1751 .addExternalSymbol(RestoreFn);
1752 // Transfer the function live-out registers.
1753 DeallocCall->copyImplicitOps(MF, *It);
1754 }
1755 addCalleeSaveRegistersAsImpOperand(DeallocCall, CSI, true, false);
1756 return true;
1757 }
1758
1759 for (const CalleeSavedInfo &I : CSI) {
1760 MCRegister Reg = I.getReg();
1761 const TargetRegisterClass *RC = HRI.getMinimalPhysRegClass(Reg);
1762 int FI = I.getFrameIdx();
1763 HII.loadRegFromStackSlot(MBB, MI, Reg, FI, RC, Register());
1764 }
1765
1766 return true;
1767}
1768
1772 MachineInstr &MI = *I;
1773 unsigned Opc = MI.getOpcode();
1774 (void)Opc; // Silence compiler warning.
1775 assert((Opc == Hexagon::ADJCALLSTACKDOWN || Opc == Hexagon::ADJCALLSTACKUP) &&
1776 "Cannot handle this call frame pseudo instruction");
1777 return MBB.erase(I);
1778}
1779
1781 MachineFunction &MF, RegScavenger *RS) const {
1782 // If this function has uses aligned stack and also has variable sized stack
1783 // objects, then we need to map all spill slots to fixed positions, so that
1784 // they can be accessed through FP. Otherwise they would have to be accessed
1785 // via AP, which may not be available at the particular place in the program.
1786 MachineFrameInfo &MFI = MF.getFrameInfo();
1787 bool HasAlloca = MFI.hasVarSizedObjects();
1788 bool NeedsAlign = (MFI.getMaxAlign() > getStackAlign());
1789
1790 if (!HasAlloca || !NeedsAlign)
1791 return;
1792
1793 // Set the physical aligned-stack base address register.
1794 MCRegister AP;
1795 if (const MachineInstr *AI = getAlignaInstr(MF))
1796 AP = AI->getOperand(0).getReg();
1797 auto &HMFI = *MF.getInfo<HexagonMachineFunctionInfo>();
1798 assert(!AP.isValid() || AP.isPhysical());
1799 HMFI.setStackAlignBaseReg(AP);
1800}
1801
1802/// Returns true if there are no caller-saved registers available in class RC.
1804 const HexagonRegisterInfo &HRI, const TargetRegisterClass *RC) {
1805 MachineRegisterInfo &MRI = MF.getRegInfo();
1806
1807 auto IsUsed = [&HRI,&MRI] (Register Reg) -> bool {
1808 for (MCRegAliasIterator AI(Reg, &HRI, true); AI.isValid(); ++AI)
1809 if (MRI.isPhysRegUsed(*AI))
1810 return true;
1811 return false;
1812 };
1813
1814 // Check for an unused caller-saved register. Callee-saved registers
1815 // have become pristine by now.
1816 for (const MCPhysReg *P = HRI.getCallerSavedRegs(&MF, RC); *P; ++P)
1817 if (!IsUsed(*P))
1818 return false;
1819
1820 // All caller-saved registers are used.
1821 return true;
1822}
1823
1824#ifndef NDEBUG
1826 dbgs() << '{';
1827 for (int x = Regs.find_first(); x >= 0; x = Regs.find_next(x)) {
1828 Register R = x;
1829 dbgs() << ' ' << printReg(R, &TRI);
1830 }
1831 dbgs() << " }";
1832}
1833#endif
1834
1836 const TargetRegisterInfo *TRI, std::vector<CalleeSavedInfo> &CSI) const {
1837 LLVM_DEBUG(dbgs() << __func__ << " on " << MF.getName() << '\n');
1838 MachineFrameInfo &MFI = MF.getFrameInfo();
1839 BitVector SRegs(Hexagon::NUM_TARGET_REGS);
1840
1841 // Generate a set of unique, callee-saved registers (SRegs), where each
1842 // register in the set is maximal in terms of sub-/super-register relation,
1843 // i.e. for each R in SRegs, no proper super-register of R is also in SRegs.
1844
1845 // (1) For each callee-saved register, add that register and all of its
1846 // sub-registers to SRegs.
1847 LLVM_DEBUG(dbgs() << "Initial CS registers: {");
1848 for (const CalleeSavedInfo &I : CSI) {
1849 Register R = I.getReg();
1850 LLVM_DEBUG(dbgs() << ' ' << printReg(R, TRI));
1851 for (MCPhysReg SR : TRI->subregs_inclusive(R))
1852 SRegs[SR] = true;
1853 }
1854 LLVM_DEBUG(dbgs() << " }\n");
1855 LLVM_DEBUG(dbgs() << "SRegs.1: "; dump_registers(SRegs, *TRI);
1856 dbgs() << "\n");
1857
1858 // (2) For each reserved register, remove that register and all of its
1859 // sub- and super-registers from SRegs.
1860 BitVector Reserved = TRI->getReservedRegs(MF);
1861 // Unreserve the stack align register: it is reserved for this function
1862 // only, it still needs to be saved/restored.
1863 Register AP =
1864 MF.getInfo<HexagonMachineFunctionInfo>()->getStackAlignBaseReg();
1865 if (AP.isValid()) {
1866 Reserved[AP] = false;
1867 // Unreserve super-regs if no other subregisters are reserved.
1868 for (MCPhysReg SP : TRI->superregs(AP)) {
1869 bool HasResSub = false;
1870 for (MCPhysReg SB : TRI->subregs(SP)) {
1871 if (!Reserved[SB])
1872 continue;
1873 HasResSub = true;
1874 break;
1875 }
1876 if (!HasResSub)
1877 Reserved[SP] = false;
1878 }
1879 }
1880
1881 for (int x = Reserved.find_first(); x >= 0; x = Reserved.find_next(x)) {
1882 Register R = x;
1883 for (MCPhysReg SR : TRI->superregs_inclusive(R))
1884 SRegs[SR] = false;
1885 }
1886 LLVM_DEBUG(dbgs() << "Res: "; dump_registers(Reserved, *TRI);
1887 dbgs() << "\n");
1888 LLVM_DEBUG(dbgs() << "SRegs.2: "; dump_registers(SRegs, *TRI);
1889 dbgs() << "\n");
1890
1891 // (3) Collect all registers that have at least one sub-register in SRegs,
1892 // and also have no sub-registers that are reserved. These will be the can-
1893 // didates for saving as a whole instead of their individual sub-registers.
1894 // (Saving R17:16 instead of R16 is fine, but only if R17 was not reserved.)
1895 BitVector TmpSup(Hexagon::NUM_TARGET_REGS);
1896 for (int x = SRegs.find_first(); x >= 0; x = SRegs.find_next(x)) {
1897 Register R = x;
1898 for (MCPhysReg SR : TRI->superregs(R))
1899 TmpSup[SR] = true;
1900 }
1901 for (int x = TmpSup.find_first(); x >= 0; x = TmpSup.find_next(x)) {
1902 Register R = x;
1903 for (MCPhysReg SR : TRI->subregs_inclusive(R)) {
1904 if (!Reserved[SR])
1905 continue;
1906 TmpSup[R] = false;
1907 break;
1908 }
1909 }
1910 LLVM_DEBUG(dbgs() << "TmpSup: "; dump_registers(TmpSup, *TRI);
1911 dbgs() << "\n");
1912
1913 // (4) Include all super-registers found in (3) into SRegs.
1914 SRegs |= TmpSup;
1915 LLVM_DEBUG(dbgs() << "SRegs.4: "; dump_registers(SRegs, *TRI);
1916 dbgs() << "\n");
1917
1918 // (5) For each register R in SRegs, if any super-register of R is in SRegs,
1919 // remove R from SRegs.
1920 for (int x = SRegs.find_first(); x >= 0; x = SRegs.find_next(x)) {
1921 Register R = x;
1922 for (MCPhysReg SR : TRI->superregs(R)) {
1923 if (!SRegs[SR])
1924 continue;
1925 SRegs[R] = false;
1926 break;
1927 }
1928 }
1929 LLVM_DEBUG(dbgs() << "SRegs.5: "; dump_registers(SRegs, *TRI);
1930 dbgs() << "\n");
1931
1932 // Now, for each register that has a fixed stack slot, create the stack
1933 // object for it.
1934 CSI.clear();
1935
1937
1938 unsigned NumFixed;
1939 int64_t MinOffset = 0; // CS offsets are negative.
1940 const SpillSlot *FixedSlots = getCalleeSavedSpillSlots(NumFixed);
1941 for (const SpillSlot *S = FixedSlots; S != FixedSlots+NumFixed; ++S) {
1942 if (!SRegs[S->Reg])
1943 continue;
1944 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(S->Reg);
1945 int FI = MFI.CreateFixedSpillStackObject(TRI->getSpillSize(*RC), S->Offset);
1946 MinOffset = std::min(MinOffset, S->Offset);
1947 CSI.push_back(CalleeSavedInfo(S->Reg, FI));
1948 SRegs[S->Reg] = false;
1949 }
1950
1951 // There can be some registers that don't have fixed slots. For example,
1952 // we need to store R0-R3 in functions with exception handling. For each
1953 // such register, create a non-fixed stack object.
1954 for (int x = SRegs.find_first(); x >= 0; x = SRegs.find_next(x)) {
1955 Register R = x;
1956 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(R);
1957 unsigned Size = TRI->getSpillSize(*RC);
1958 int64_t Off = MinOffset - Size;
1959 Align Alignment = std::min(TRI->getSpillAlign(*RC), getStackAlign());
1960 Off &= -Alignment.value();
1961 int FI = MFI.CreateFixedSpillStackObject(Size, Off);
1962 MinOffset = std::min(MinOffset, Off);
1963 CSI.push_back(CalleeSavedInfo(R, FI));
1964 SRegs[R] = false;
1965 }
1966
1967 LLVM_DEBUG({
1968 dbgs() << "CS information: {";
1969 for (const CalleeSavedInfo &I : CSI) {
1970 int FI = I.getFrameIdx();
1971 int Off = MFI.getObjectOffset(FI);
1972 dbgs() << ' ' << printReg(I.getReg(), TRI) << ":fi#" << FI << ":sp";
1973 if (Off >= 0)
1974 dbgs() << '+';
1975 dbgs() << Off;
1976 }
1977 dbgs() << " }\n";
1978 });
1979
1980#ifndef NDEBUG
1981 // Verify that all registers were handled.
1982 bool MissedReg = false;
1983 for (int x = SRegs.find_first(); x >= 0; x = SRegs.find_next(x)) {
1984 Register R = x;
1985 dbgs() << printReg(R, TRI) << ' ';
1986 MissedReg = true;
1987 }
1988 if (MissedReg)
1989 llvm_unreachable("...there are unhandled callee-saved registers!");
1990#endif
1991
1992 return true;
1993}
1994
1995bool HexagonFrameLowering::expandCopy(MachineBasicBlock &B,
1997 const HexagonInstrInfo &HII, SmallVectorImpl<Register> &NewRegs) const {
1998 MachineInstr *MI = &*It;
1999 DebugLoc DL = MI->getDebugLoc();
2000 Register DstR = MI->getOperand(0).getReg();
2001 Register SrcR = MI->getOperand(1).getReg();
2002 if (!Hexagon::ModRegsRegClass.contains(DstR) ||
2003 !Hexagon::ModRegsRegClass.contains(SrcR))
2004 return false;
2005
2006 Register TmpR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
2007 BuildMI(B, It, DL, HII.get(TargetOpcode::COPY), TmpR).add(MI->getOperand(1));
2008 BuildMI(B, It, DL, HII.get(TargetOpcode::COPY), DstR)
2009 .addReg(TmpR, RegState::Kill);
2010
2011 NewRegs.push_back(TmpR);
2012 B.erase(It);
2013 return true;
2014}
2015
2016bool HexagonFrameLowering::expandStoreInt(MachineBasicBlock &B,
2018 const HexagonInstrInfo &HII, SmallVectorImpl<Register> &NewRegs) const {
2019 MachineInstr *MI = &*It;
2020 if (!MI->getOperand(0).isFI())
2021 return false;
2022
2023 DebugLoc DL = MI->getDebugLoc();
2024 unsigned Opc = MI->getOpcode();
2025 Register SrcR = MI->getOperand(2).getReg();
2026 bool IsKill = MI->getOperand(2).isKill();
2027 int FI = MI->getOperand(0).getIndex();
2028
2029 // TmpR = C2_tfrpr SrcR if SrcR is a predicate register
2030 // TmpR = A2_tfrcrr SrcR if SrcR is a modifier register
2031 Register TmpR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
2032 unsigned TfrOpc = (Opc == Hexagon::STriw_pred) ? Hexagon::C2_tfrpr
2033 : Hexagon::A2_tfrcrr;
2034 BuildMI(B, It, DL, HII.get(TfrOpc), TmpR)
2035 .addReg(SrcR, getKillRegState(IsKill));
2036
2037 // S2_storeri_io FI, 0, TmpR
2038 BuildMI(B, It, DL, HII.get(Hexagon::S2_storeri_io))
2039 .addFrameIndex(FI)
2040 .addImm(0)
2041 .addReg(TmpR, RegState::Kill)
2042 .cloneMemRefs(*MI);
2043
2044 NewRegs.push_back(TmpR);
2045 B.erase(It);
2046 return true;
2047}
2048
2049bool HexagonFrameLowering::expandLoadInt(MachineBasicBlock &B,
2050 MachineBasicBlock::iterator It, MachineRegisterInfo &MRI,
2051 const HexagonInstrInfo &HII, SmallVectorImpl<Register> &NewRegs) const {
2052 MachineInstr *MI = &*It;
2053 if (!MI->getOperand(1).isFI())
2054 return false;
2055
2056 DebugLoc DL = MI->getDebugLoc();
2057 unsigned Opc = MI->getOpcode();
2058 Register DstR = MI->getOperand(0).getReg();
2059 int FI = MI->getOperand(1).getIndex();
2060
2061 // TmpR = L2_loadri_io FI, 0
2062 Register TmpR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
2063 BuildMI(B, It, DL, HII.get(Hexagon::L2_loadri_io), TmpR)
2064 .addFrameIndex(FI)
2065 .addImm(0)
2066 .cloneMemRefs(*MI);
2067
2068 // DstR = C2_tfrrp TmpR if DstR is a predicate register
2069 // DstR = A2_tfrrcr TmpR if DstR is a modifier register
2070 unsigned TfrOpc = (Opc == Hexagon::LDriw_pred) ? Hexagon::C2_tfrrp
2071 : Hexagon::A2_tfrrcr;
2072 BuildMI(B, It, DL, HII.get(TfrOpc), DstR)
2073 .addReg(TmpR, RegState::Kill);
2074
2075 NewRegs.push_back(TmpR);
2076 B.erase(It);
2077 return true;
2078}
2079
2080bool HexagonFrameLowering::expandStoreVecPred(MachineBasicBlock &B,
2081 MachineBasicBlock::iterator It, MachineRegisterInfo &MRI,
2082 const HexagonInstrInfo &HII, SmallVectorImpl<Register> &NewRegs) const {
2083 MachineInstr *MI = &*It;
2084 if (!MI->getOperand(0).isFI())
2085 return false;
2086
2087 DebugLoc DL = MI->getDebugLoc();
2088 Register SrcR = MI->getOperand(2).getReg();
2089 bool IsKill = MI->getOperand(2).isKill();
2090 int FI = MI->getOperand(0).getIndex();
2091 auto *RC = &Hexagon::HvxVRRegClass;
2092
2093 // Insert transfer to general vector register.
2094 // TmpR0 = A2_tfrsi 0x01010101
2095 // TmpR1 = V6_vandqrt Qx, TmpR0
2096 // store FI, 0, TmpR1
2097 Register TmpR0 = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
2098 Register TmpR1 = MRI.createVirtualRegister(RC);
2099
2100 BuildMI(B, It, DL, HII.get(Hexagon::A2_tfrsi), TmpR0)
2101 .addImm(0x01010101);
2102
2103 BuildMI(B, It, DL, HII.get(Hexagon::V6_vandqrt), TmpR1)
2104 .addReg(SrcR, getKillRegState(IsKill))
2105 .addReg(TmpR0, RegState::Kill);
2106
2107 HII.storeRegToStackSlot(B, It, TmpR1, true, FI, RC, Register());
2108 expandStoreVec(B, std::prev(It), MRI, HII, NewRegs);
2109
2110 NewRegs.push_back(TmpR0);
2111 NewRegs.push_back(TmpR1);
2112 B.erase(It);
2113 return true;
2114}
2115
2116bool HexagonFrameLowering::expandLoadVecPred(MachineBasicBlock &B,
2117 MachineBasicBlock::iterator It, MachineRegisterInfo &MRI,
2118 const HexagonInstrInfo &HII, SmallVectorImpl<Register> &NewRegs) const {
2119 MachineInstr *MI = &*It;
2120 if (!MI->getOperand(1).isFI())
2121 return false;
2122
2123 DebugLoc DL = MI->getDebugLoc();
2124 Register DstR = MI->getOperand(0).getReg();
2125 int FI = MI->getOperand(1).getIndex();
2126 auto *RC = &Hexagon::HvxVRRegClass;
2127
2128 // TmpR0 = A2_tfrsi 0x01010101
2129 // TmpR1 = load FI, 0
2130 // DstR = V6_vandvrt TmpR1, TmpR0
2131 Register TmpR0 = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
2132 Register TmpR1 = MRI.createVirtualRegister(RC);
2133
2134 BuildMI(B, It, DL, HII.get(Hexagon::A2_tfrsi), TmpR0)
2135 .addImm(0x01010101);
2136 HII.loadRegFromStackSlot(B, It, TmpR1, FI, RC, Register());
2137 expandLoadVec(B, std::prev(It), MRI, HII, NewRegs);
2138
2139 BuildMI(B, It, DL, HII.get(Hexagon::V6_vandvrt), DstR)
2140 .addReg(TmpR1, RegState::Kill)
2141 .addReg(TmpR0, RegState::Kill);
2142
2143 NewRegs.push_back(TmpR0);
2144 NewRegs.push_back(TmpR1);
2145 B.erase(It);
2146 return true;
2147}
2148
2149bool HexagonFrameLowering::expandStoreVec2(MachineBasicBlock &B,
2150 MachineBasicBlock::iterator It, MachineRegisterInfo &MRI,
2151 const HexagonInstrInfo &HII, SmallVectorImpl<Register> &NewRegs) const {
2152 MachineFunction &MF = *B.getParent();
2153 auto &MFI = MF.getFrameInfo();
2154 auto &HRI = *MF.getSubtarget<HexagonSubtarget>().getRegisterInfo();
2155 MachineInstr *MI = &*It;
2156 if (!MI->getOperand(0).isFI())
2157 return false;
2158
2159 // It is possible that the double vector being stored is only partially
2160 // defined. From the point of view of the liveness tracking, it is ok to
2161 // store it as a whole, but if we break it up we may end up storing a
2162 // register that is entirely undefined.
2163 LivePhysRegs LPR(HRI);
2164 LPR.addLiveIns(B);
2166 for (auto R = B.begin(); R != It; ++R) {
2167 Clobbers.clear();
2168 LPR.stepForward(*R, Clobbers);
2169 }
2170
2171 DebugLoc DL = MI->getDebugLoc();
2172 Register SrcR = MI->getOperand(2).getReg();
2173 Register SrcLo = HRI.getSubReg(SrcR, Hexagon::vsub_lo);
2174 Register SrcHi = HRI.getSubReg(SrcR, Hexagon::vsub_hi);
2175 bool IsKill = MI->getOperand(2).isKill();
2176 int FI = MI->getOperand(0).getIndex();
2177
2178 unsigned Size = HRI.getSpillSize(Hexagon::HvxVRRegClass);
2179 Align NeedAlign = HRI.getSpillAlign(Hexagon::HvxVRRegClass);
2180 Align HasAlign = MFI.getObjectAlign(FI);
2181 unsigned StoreOpc;
2182
2183 // Store low part.
2184 if (LPR.contains(SrcLo)) {
2185 StoreOpc = NeedAlign <= HasAlign ? Hexagon::V6_vS32b_ai
2186 : Hexagon::V6_vS32Ub_ai;
2187 BuildMI(B, It, DL, HII.get(StoreOpc))
2188 .addFrameIndex(FI)
2189 .addImm(0)
2190 .addReg(SrcLo, getKillRegState(IsKill))
2191 .cloneMemRefs(*MI);
2192 }
2193
2194 // Store high part.
2195 if (LPR.contains(SrcHi)) {
2196 StoreOpc = NeedAlign <= HasAlign ? Hexagon::V6_vS32b_ai
2197 : Hexagon::V6_vS32Ub_ai;
2198 BuildMI(B, It, DL, HII.get(StoreOpc))
2199 .addFrameIndex(FI)
2200 .addImm(Size)
2201 .addReg(SrcHi, getKillRegState(IsKill))
2202 .cloneMemRefs(*MI);
2203 }
2204
2205 B.erase(It);
2206 return true;
2207}
2208
2209bool HexagonFrameLowering::expandLoadVec2(MachineBasicBlock &B,
2210 MachineBasicBlock::iterator It, MachineRegisterInfo &MRI,
2211 const HexagonInstrInfo &HII, SmallVectorImpl<Register> &NewRegs) const {
2212 MachineFunction &MF = *B.getParent();
2213 auto &MFI = MF.getFrameInfo();
2214 auto &HRI = *MF.getSubtarget<HexagonSubtarget>().getRegisterInfo();
2215 MachineInstr *MI = &*It;
2216 if (!MI->getOperand(1).isFI())
2217 return false;
2218
2219 DebugLoc DL = MI->getDebugLoc();
2220 Register DstR = MI->getOperand(0).getReg();
2221 Register DstHi = HRI.getSubReg(DstR, Hexagon::vsub_hi);
2222 Register DstLo = HRI.getSubReg(DstR, Hexagon::vsub_lo);
2223 int FI = MI->getOperand(1).getIndex();
2224
2225 unsigned Size = HRI.getSpillSize(Hexagon::HvxVRRegClass);
2226 Align NeedAlign = HRI.getSpillAlign(Hexagon::HvxVRRegClass);
2227 Align HasAlign = MFI.getObjectAlign(FI);
2228 unsigned LoadOpc;
2229
2230 // Load low part.
2231 LoadOpc = NeedAlign <= HasAlign ? Hexagon::V6_vL32b_ai
2232 : Hexagon::V6_vL32Ub_ai;
2233 BuildMI(B, It, DL, HII.get(LoadOpc), DstLo)
2234 .addFrameIndex(FI)
2235 .addImm(0)
2236 .cloneMemRefs(*MI);
2237
2238 // Load high part.
2239 LoadOpc = NeedAlign <= HasAlign ? Hexagon::V6_vL32b_ai
2240 : Hexagon::V6_vL32Ub_ai;
2241 BuildMI(B, It, DL, HII.get(LoadOpc), DstHi)
2242 .addFrameIndex(FI)
2243 .addImm(Size)
2244 .cloneMemRefs(*MI);
2245
2246 B.erase(It);
2247 return true;
2248}
2249
2250bool HexagonFrameLowering::expandStoreVec(MachineBasicBlock &B,
2251 MachineBasicBlock::iterator It, MachineRegisterInfo &MRI,
2252 const HexagonInstrInfo &HII, SmallVectorImpl<Register> &NewRegs) const {
2253 MachineFunction &MF = *B.getParent();
2254 auto &MFI = MF.getFrameInfo();
2255 MachineInstr *MI = &*It;
2256 if (!MI->getOperand(0).isFI())
2257 return false;
2258
2259 auto &HRI = *MF.getSubtarget<HexagonSubtarget>().getRegisterInfo();
2260 DebugLoc DL = MI->getDebugLoc();
2261 Register SrcR = MI->getOperand(2).getReg();
2262 bool IsKill = MI->getOperand(2).isKill();
2263 int FI = MI->getOperand(0).getIndex();
2264
2265 Align NeedAlign = HRI.getSpillAlign(Hexagon::HvxVRRegClass);
2266 Align HasAlign = MFI.getObjectAlign(FI);
2267 unsigned StoreOpc = NeedAlign <= HasAlign ? Hexagon::V6_vS32b_ai
2268 : Hexagon::V6_vS32Ub_ai;
2269 BuildMI(B, It, DL, HII.get(StoreOpc))
2270 .addFrameIndex(FI)
2271 .addImm(0)
2272 .addReg(SrcR, getKillRegState(IsKill))
2273 .cloneMemRefs(*MI);
2274
2275 B.erase(It);
2276 return true;
2277}
2278
2279bool HexagonFrameLowering::expandLoadVec(MachineBasicBlock &B,
2280 MachineBasicBlock::iterator It, MachineRegisterInfo &MRI,
2281 const HexagonInstrInfo &HII, SmallVectorImpl<Register> &NewRegs) const {
2282 MachineFunction &MF = *B.getParent();
2283 auto &MFI = MF.getFrameInfo();
2284 MachineInstr *MI = &*It;
2285 if (!MI->getOperand(1).isFI())
2286 return false;
2287
2288 auto &HRI = *MF.getSubtarget<HexagonSubtarget>().getRegisterInfo();
2289 DebugLoc DL = MI->getDebugLoc();
2290 Register DstR = MI->getOperand(0).getReg();
2291 int FI = MI->getOperand(1).getIndex();
2292
2293 Align NeedAlign = HRI.getSpillAlign(Hexagon::HvxVRRegClass);
2294 Align HasAlign = MFI.getObjectAlign(FI);
2295 unsigned LoadOpc = NeedAlign <= HasAlign ? Hexagon::V6_vL32b_ai
2296 : Hexagon::V6_vL32Ub_ai;
2297 BuildMI(B, It, DL, HII.get(LoadOpc), DstR)
2298 .addFrameIndex(FI)
2299 .addImm(0)
2300 .cloneMemRefs(*MI);
2301
2302 B.erase(It);
2303 return true;
2304}
2305
2306bool HexagonFrameLowering::expandSpillMacros(MachineFunction &MF,
2307 SmallVectorImpl<Register> &NewRegs) const {
2308 auto &HII = *MF.getSubtarget<HexagonSubtarget>().getInstrInfo();
2309 MachineRegisterInfo &MRI = MF.getRegInfo();
2310 bool Changed = false;
2311
2312 for (auto &B : MF) {
2313 // Traverse the basic block.
2315 for (auto I = B.begin(), E = B.end(); I != E; I = NextI) {
2316 MachineInstr *MI = &*I;
2317 NextI = std::next(I);
2318 unsigned Opc = MI->getOpcode();
2319
2320 switch (Opc) {
2321 case TargetOpcode::COPY:
2322 Changed |= expandCopy(B, I, MRI, HII, NewRegs);
2323 break;
2324 case Hexagon::STriw_pred:
2325 case Hexagon::STriw_ctr:
2326 Changed |= expandStoreInt(B, I, MRI, HII, NewRegs);
2327 break;
2328 case Hexagon::LDriw_pred:
2329 case Hexagon::LDriw_ctr:
2330 Changed |= expandLoadInt(B, I, MRI, HII, NewRegs);
2331 break;
2332 case Hexagon::PS_vstorerq_ai:
2333 Changed |= expandStoreVecPred(B, I, MRI, HII, NewRegs);
2334 break;
2335 case Hexagon::PS_vloadrq_ai:
2336 Changed |= expandLoadVecPred(B, I, MRI, HII, NewRegs);
2337 break;
2338 case Hexagon::PS_vloadrw_ai:
2339 Changed |= expandLoadVec2(B, I, MRI, HII, NewRegs);
2340 break;
2341 case Hexagon::PS_vstorerw_ai:
2342 Changed |= expandStoreVec2(B, I, MRI, HII, NewRegs);
2343 break;
2344 }
2345 }
2346 }
2347
2348 return Changed;
2349}
2350
2352 BitVector &SavedRegs,
2353 RegScavenger *RS) const {
2354 auto &HRI = *MF.getSubtarget<HexagonSubtarget>().getRegisterInfo();
2355
2356 SavedRegs.resize(HRI.getNumRegs());
2357
2358 // If we have a function containing __builtin_eh_return we want to spill and
2359 // restore all callee saved registers. Pretend that they are used.
2361 for (const MCPhysReg *R = HRI.getCalleeSavedRegs(&MF); *R; ++R)
2362 SavedRegs.set(*R);
2363
2364 // Replace predicate register pseudo spill code.
2366 expandSpillMacros(MF, NewRegs);
2367 if (OptimizeSpillSlots && !isOptNone(MF))
2368 optimizeSpillSlots(MF, NewRegs);
2369
2370 // We need to reserve a spill slot if scavenging could potentially require
2371 // spilling a scavenged register.
2372 if (!NewRegs.empty() || mayOverflowFrameOffset(MF)) {
2373 MachineFrameInfo &MFI = MF.getFrameInfo();
2374 MachineRegisterInfo &MRI = MF.getRegInfo();
2376 // Reserve an int register in any case, because it could be used to hold
2377 // the stack offset in case it does not fit into a spill instruction.
2378 SpillRCs.insert(&Hexagon::IntRegsRegClass);
2379
2380 for (Register VR : NewRegs)
2381 SpillRCs.insert(MRI.getRegClass(VR));
2382
2383 for (const auto *RC : SpillRCs) {
2384 if (!needToReserveScavengingSpillSlots(MF, HRI, RC))
2385 continue;
2386 unsigned Num = 1;
2387 switch (RC->getID()) {
2388 case Hexagon::IntRegsRegClassID:
2390 break;
2391 case Hexagon::HvxQRRegClassID:
2392 Num = 2; // Vector predicate spills also need a vector register.
2393 break;
2394 }
2395 unsigned S = HRI.getSpillSize(*RC);
2396 Align A = HRI.getSpillAlign(*RC);
2397 for (unsigned i = 0; i < Num; i++) {
2398 int NewFI = MFI.CreateSpillStackObject(S, A);
2399 RS->addScavengingFrameIndex(NewFI);
2400 }
2401 }
2402 }
2403
2405}
2406
2407Register HexagonFrameLowering::findPhysReg(MachineFunction &MF,
2411 const TargetRegisterClass *RC) const {
2412 auto &HRI = *MF.getSubtarget<HexagonSubtarget>().getRegisterInfo();
2413 auto &MRI = MF.getRegInfo();
2414
2415 auto isDead = [&FIR,&DeadMap] (Register Reg) -> bool {
2416 auto F = DeadMap.find({Reg,0});
2417 if (F == DeadMap.end())
2418 return false;
2419 for (auto &DR : F->second)
2420 if (DR.contains(FIR))
2421 return true;
2422 return false;
2423 };
2424
2425 for (Register Reg : RC->getRawAllocationOrder(MF)) {
2426 bool Dead = true;
2427 for (auto R : HexagonBlockRanges::expandToSubRegs({Reg,0}, MRI, HRI)) {
2428 if (isDead(R.Reg))
2429 continue;
2430 Dead = false;
2431 break;
2432 }
2433 if (Dead)
2434 return Reg;
2435 }
2436 return 0;
2437}
2438
2439void HexagonFrameLowering::optimizeSpillSlots(MachineFunction &MF,
2440 SmallVectorImpl<Register> &VRegs) const {
2441 auto &HST = MF.getSubtarget<HexagonSubtarget>();
2442 auto &HII = *HST.getInstrInfo();
2443 auto &HRI = *HST.getRegisterInfo();
2444 auto &MRI = MF.getRegInfo();
2445 HexagonBlockRanges HBR(MF);
2446
2447 using BlockIndexMap =
2448 std::map<MachineBasicBlock *, HexagonBlockRanges::InstrIndexMap>;
2449 using BlockRangeMap =
2450 std::map<MachineBasicBlock *, HexagonBlockRanges::RangeList>;
2451 using IndexType = HexagonBlockRanges::IndexType;
2452
2453 struct SlotInfo {
2454 BlockRangeMap Map;
2455 unsigned Size = 0;
2456 const TargetRegisterClass *RC = nullptr;
2457
2458 SlotInfo() = default;
2459 };
2460
2461 BlockIndexMap BlockIndexes;
2462 SmallSet<int,4> BadFIs;
2463 std::map<int,SlotInfo> FIRangeMap;
2464
2465 // Accumulate register classes: get a common class for a pre-existing
2466 // class HaveRC and a new class NewRC. Return nullptr if a common class
2467 // cannot be found, otherwise return the resulting class. If HaveRC is
2468 // nullptr, assume that it is still unset.
2469 auto getCommonRC =
2470 [](const TargetRegisterClass *HaveRC,
2471 const TargetRegisterClass *NewRC) -> const TargetRegisterClass * {
2472 if (HaveRC == nullptr || HaveRC == NewRC)
2473 return NewRC;
2474 // Different classes, both non-null. Pick the more general one.
2475 if (HaveRC->hasSubClassEq(NewRC))
2476 return HaveRC;
2477 if (NewRC->hasSubClassEq(HaveRC))
2478 return NewRC;
2479 return nullptr;
2480 };
2481
2482 // Scan all blocks in the function. Check all occurrences of frame indexes,
2483 // and collect relevant information.
2484 for (auto &B : MF) {
2485 std::map<int,IndexType> LastStore, LastLoad;
2486 auto P = BlockIndexes.emplace(&B, HexagonBlockRanges::InstrIndexMap(B));
2487 auto &IndexMap = P.first->second;
2488 LLVM_DEBUG(dbgs() << "Index map for " << printMBBReference(B) << "\n"
2489 << IndexMap << '\n');
2490
2491 for (auto &In : B) {
2492 int LFI, SFI;
2493 bool Load = HII.isLoadFromStackSlot(In, LFI) && !HII.isPredicated(In);
2494 bool Store = HII.isStoreToStackSlot(In, SFI) && !HII.isPredicated(In);
2495 if (Load && Store) {
2496 // If it's both a load and a store, then we won't handle it.
2497 BadFIs.insert(LFI);
2498 BadFIs.insert(SFI);
2499 continue;
2500 }
2501 // Check for register classes of the register used as the source for
2502 // the store, and the register used as the destination for the load.
2503 // Also, only accept base+imm_offset addressing modes. Other addressing
2504 // modes can have side-effects (post-increments, etc.). For stack
2505 // slots they are very unlikely, so there is not much loss due to
2506 // this restriction.
2507 if (Load || Store) {
2508 int TFI = Load ? LFI : SFI;
2509 unsigned AM = HII.getAddrMode(In);
2510 SlotInfo &SI = FIRangeMap[TFI];
2511 bool Bad = (AM != HexagonII::BaseImmOffset);
2512 if (!Bad) {
2513 // If the addressing mode is ok, check the register class.
2514 unsigned OpNum = Load ? 0 : 2;
2515 auto *RC = HII.getRegClass(In.getDesc(), OpNum);
2516 RC = getCommonRC(SI.RC, RC);
2517 if (RC == nullptr)
2518 Bad = true;
2519 else
2520 SI.RC = RC;
2521 }
2522 if (!Bad) {
2523 // Check sizes.
2524 unsigned S = HII.getMemAccessSize(In);
2525 if (SI.Size != 0 && SI.Size != S)
2526 Bad = true;
2527 else
2528 SI.Size = S;
2529 }
2530 if (!Bad) {
2531 for (auto *Mo : In.memoperands()) {
2532 if (!Mo->isVolatile() && !Mo->isAtomic())
2533 continue;
2534 Bad = true;
2535 break;
2536 }
2537 }
2538 if (Bad)
2539 BadFIs.insert(TFI);
2540 }
2541
2542 // Locate uses of frame indices.
2543 for (unsigned i = 0, n = In.getNumOperands(); i < n; ++i) {
2544 const MachineOperand &Op = In.getOperand(i);
2545 if (!Op.isFI())
2546 continue;
2547 int FI = Op.getIndex();
2548 // Make sure that the following operand is an immediate and that
2549 // it is 0. This is the offset in the stack object.
2550 if (i+1 >= n || !In.getOperand(i+1).isImm() ||
2551 In.getOperand(i+1).getImm() != 0)
2552 BadFIs.insert(FI);
2553 if (BadFIs.count(FI))
2554 continue;
2555
2556 IndexType Index = IndexMap.getIndex(&In);
2557 auto &LS = LastStore[FI];
2558 auto &LL = LastLoad[FI];
2559 if (Load) {
2560 if (LS == IndexType::None)
2561 LS = IndexType::Entry;
2562 LL = Index;
2563 } else if (Store) {
2564 HexagonBlockRanges::RangeList &RL = FIRangeMap[FI].Map[&B];
2565 if (LS != IndexType::None)
2566 RL.add(LS, LL, false, false);
2567 else if (LL != IndexType::None)
2568 RL.add(IndexType::Entry, LL, false, false);
2569 LL = IndexType::None;
2570 LS = Index;
2571 } else {
2572 BadFIs.insert(FI);
2573 }
2574 }
2575 }
2576
2577 for (auto &I : LastLoad) {
2578 IndexType LL = I.second;
2579 if (LL == IndexType::None)
2580 continue;
2581 auto &RL = FIRangeMap[I.first].Map[&B];
2582 IndexType &LS = LastStore[I.first];
2583 if (LS != IndexType::None)
2584 RL.add(LS, LL, false, false);
2585 else
2586 RL.add(IndexType::Entry, LL, false, false);
2587 LS = IndexType::None;
2588 }
2589 for (auto &I : LastStore) {
2590 IndexType LS = I.second;
2591 if (LS == IndexType::None)
2592 continue;
2593 auto &RL = FIRangeMap[I.first].Map[&B];
2594 RL.add(LS, IndexType::None, false, false);
2595 }
2596 }
2597
2598 LLVM_DEBUG({
2599 for (auto &P : FIRangeMap) {
2600 dbgs() << "fi#" << P.first;
2601 if (BadFIs.count(P.first))
2602 dbgs() << " (bad)";
2603 dbgs() << " RC: ";
2604 if (P.second.RC != nullptr)
2605 dbgs() << HRI.getRegClassName(P.second.RC) << '\n';
2606 else
2607 dbgs() << "<null>\n";
2608 for (auto &R : P.second.Map)
2609 dbgs() << " " << printMBBReference(*R.first) << " { " << R.second
2610 << "}\n";
2611 }
2612 });
2613
2614 // When a slot is loaded from in a block without being stored to in the
2615 // same block, it is live-on-entry to this block. To avoid CFG analysis,
2616 // consider this slot to be live-on-exit from all blocks.
2617 SmallSet<int,4> LoxFIs;
2618
2619 std::map<MachineBasicBlock*,std::vector<int>> BlockFIMap;
2620
2621 for (auto &P : FIRangeMap) {
2622 // P = pair(FI, map: BB->RangeList)
2623 if (BadFIs.count(P.first))
2624 continue;
2625 for (auto &B : MF) {
2626 auto F = P.second.Map.find(&B);
2627 // F = pair(BB, RangeList)
2628 if (F == P.second.Map.end() || F->second.empty())
2629 continue;
2630 HexagonBlockRanges::IndexRange &IR = F->second.front();
2631 if (IR.start() == IndexType::Entry)
2632 LoxFIs.insert(P.first);
2633 BlockFIMap[&B].push_back(P.first);
2634 }
2635 }
2636
2637 LLVM_DEBUG({
2638 dbgs() << "Block-to-FI map (* -- live-on-exit):\n";
2639 for (auto &P : BlockFIMap) {
2640 auto &FIs = P.second;
2641 if (FIs.empty())
2642 continue;
2643 dbgs() << " " << printMBBReference(*P.first) << ": {";
2644 for (auto I : FIs) {
2645 dbgs() << " fi#" << I;
2646 if (LoxFIs.count(I))
2647 dbgs() << '*';
2648 }
2649 dbgs() << " }\n";
2650 }
2651 });
2652
2653#ifndef NDEBUG
2654 bool HasOptLimit = SpillOptMax.getPosition();
2655#endif
2656
2657 // eliminate loads, when all loads eliminated, eliminate all stores.
2658 for (auto &B : MF) {
2659 auto F = BlockIndexes.find(&B);
2660 assert(F != BlockIndexes.end());
2661 HexagonBlockRanges::InstrIndexMap &IM = F->second;
2662 HexagonBlockRanges::RegToRangeMap LM = HBR.computeLiveMap(IM);
2663 HexagonBlockRanges::RegToRangeMap DM = HBR.computeDeadMap(IM, LM);
2664 LLVM_DEBUG(dbgs() << printMBBReference(B) << " dead map\n"
2665 << HexagonBlockRanges::PrintRangeMap(DM, HRI));
2666
2667 for (auto FI : BlockFIMap[&B]) {
2668 if (BadFIs.count(FI))
2669 continue;
2670 LLVM_DEBUG(dbgs() << "Working on fi#" << FI << '\n');
2671 HexagonBlockRanges::RangeList &RL = FIRangeMap[FI].Map[&B];
2672 for (auto &Range : RL) {
2673 LLVM_DEBUG(dbgs() << "--Examining range:" << RL << '\n');
2674 if (!IndexType::isInstr(Range.start()) ||
2675 !IndexType::isInstr(Range.end()))
2676 continue;
2677 MachineInstr &SI = *IM.getInstr(Range.start());
2678 MachineInstr &EI = *IM.getInstr(Range.end());
2679 assert(SI.mayStore() && "Unexpected start instruction");
2680 assert(EI.mayLoad() && "Unexpected end instruction");
2681 MachineOperand &SrcOp = SI.getOperand(2);
2682
2683 HexagonBlockRanges::RegisterRef SrcRR = { SrcOp.getReg(),
2684 SrcOp.getSubReg() };
2685 auto *RC = HII.getRegClass(SI.getDesc(), 2);
2686 // The this-> is needed to unconfuse MSVC.
2687 Register FoundR = this->findPhysReg(MF, Range, IM, DM, RC);
2688 LLVM_DEBUG(dbgs() << "Replacement reg:" << printReg(FoundR, &HRI)
2689 << '\n');
2690 if (FoundR == 0)
2691 continue;
2692#ifndef NDEBUG
2693 if (HasOptLimit) {
2695 return;
2696 SpillOptCount++;
2697 }
2698#endif
2699
2700 // Generate the copy-in: "FoundR = COPY SrcR" at the store location.
2701 MachineBasicBlock::iterator StartIt = SI.getIterator(), NextIt;
2702 MachineInstr *CopyIn = nullptr;
2703 if (SrcRR.Reg != FoundR || SrcRR.Sub != 0) {
2704 const DebugLoc &DL = SI.getDebugLoc();
2705 CopyIn = BuildMI(B, StartIt, DL, HII.get(TargetOpcode::COPY), FoundR)
2706 .add(SrcOp);
2707 }
2708
2709 ++StartIt;
2710 // Check if this is a last store and the FI is live-on-exit.
2711 if (LoxFIs.count(FI) && (&Range == &RL.back())) {
2712 // Update store's source register.
2713 if (unsigned SR = SrcOp.getSubReg())
2714 SrcOp.setReg(HRI.getSubReg(FoundR, SR));
2715 else
2716 SrcOp.setReg(FoundR);
2717 SrcOp.setSubReg(0);
2718 // We are keeping this register live.
2719 SrcOp.setIsKill(false);
2720 } else {
2721 B.erase(&SI);
2722 IM.replaceInstr(&SI, CopyIn);
2723 }
2724
2725 auto EndIt = std::next(EI.getIterator());
2726 for (auto It = StartIt; It != EndIt; It = NextIt) {
2727 MachineInstr &MI = *It;
2728 NextIt = std::next(It);
2729 int TFI;
2730 if (!HII.isLoadFromStackSlot(MI, TFI) || TFI != FI)
2731 continue;
2732 Register DstR = MI.getOperand(0).getReg();
2733 assert(MI.getOperand(0).getSubReg() == 0);
2734 MachineInstr *CopyOut = nullptr;
2735 if (DstR != FoundR) {
2736 DebugLoc DL = MI.getDebugLoc();
2737 unsigned MemSize = HII.getMemAccessSize(MI);
2738 assert(HII.getAddrMode(MI) == HexagonII::BaseImmOffset);
2739 unsigned CopyOpc = TargetOpcode::COPY;
2740 if (HII.isSignExtendingLoad(MI))
2741 CopyOpc = (MemSize == 1) ? Hexagon::A2_sxtb : Hexagon::A2_sxth;
2742 else if (HII.isZeroExtendingLoad(MI))
2743 CopyOpc = (MemSize == 1) ? Hexagon::A2_zxtb : Hexagon::A2_zxth;
2744 CopyOut = BuildMI(B, It, DL, HII.get(CopyOpc), DstR)
2745 .addReg(FoundR, getKillRegState(&MI == &EI));
2746 }
2747 IM.replaceInstr(&MI, CopyOut);
2748 B.erase(It);
2749 }
2750
2751 // Update the dead map.
2752 HexagonBlockRanges::RegisterRef FoundRR = { FoundR, 0 };
2753 for (auto RR : HexagonBlockRanges::expandToSubRegs(FoundRR, MRI, HRI))
2754 DM[RR].subtract(Range);
2755 } // for Range in range list
2756 }
2757 }
2758}
2759
2760void HexagonFrameLowering::expandAlloca(MachineInstr *AI, MachineFunction &MF,
2761 const HexagonInstrInfo &HII,
2762 Register SP, unsigned CF) const {
2763 MachineBasicBlock &MB = *AI->getParent();
2764 DebugLoc DL = AI->getDebugLoc();
2765 unsigned A = AI->getOperand(2).getImm();
2766
2767 MachineOperand &RdOp = AI->getOperand(0);
2768 MachineOperand &RsOp = AI->getOperand(1);
2769 Register Rd = RdOp.getReg(), Rs = RsOp.getReg();
2770
2771 auto &HST = MF.getSubtarget<HexagonSubtarget>();
2772 auto *TLI = HST.getTargetLowering();
2773 bool NeedsProbing = TLI->hasInlineStackProbe(MF);
2774
2775 if (!NeedsProbing) {
2776 // Have
2777 // Rd = alloca Rs, #A
2778 //
2779 // If Rs and Rd are different registers, use this sequence:
2780 // Rd = sub(r29, Rs)
2781 // r29 = sub(r29, Rs)
2782 // Rd = and(Rd, #-A) ; if necessary
2783 // r29 = and(r29, #-A) ; if necessary
2784 // Rd = add(Rd, #CF) ; CF size aligned to at most A
2785 // otherwise, do
2786 // Rd = sub(r29, Rs)
2787 // Rd = and(Rd, #-A) ; if necessary
2788 // r29 = Rd
2789 // Rd = add(Rd, #CF) ; CF size aligned to at most A
2790
2791 // Rd = sub(r29, Rs)
2792 BuildMI(MB, AI, DL, HII.get(Hexagon::A2_sub), Rd).addReg(SP).addReg(Rs);
2793 if (Rs != Rd) {
2794 // r29 = sub(r29, Rs)
2795 BuildMI(MB, AI, DL, HII.get(Hexagon::A2_sub), SP).addReg(SP).addReg(Rs);
2796 }
2797 if (A > 8) {
2798 // Rd = and(Rd, #-A)
2799 BuildMI(MB, AI, DL, HII.get(Hexagon::A2_andir), Rd)
2800 .addReg(Rd)
2801 .addImm(-int64_t(A));
2802 if (Rs != Rd)
2803 BuildMI(MB, AI, DL, HII.get(Hexagon::A2_andir), SP)
2804 .addReg(SP)
2805 .addImm(-int64_t(A));
2806 }
2807 if (Rs == Rd) {
2808 // r29 = Rd
2809 BuildMI(MB, AI, DL, HII.get(TargetOpcode::COPY), SP).addReg(Rd);
2810 }
2811 if (CF > 0) {
2812 // Rd = add(Rd, #CF)
2813 BuildMI(MB, AI, DL, HII.get(Hexagon::A2_addi), Rd).addReg(Rd).addImm(CF);
2814 }
2815 return;
2816 }
2817
2818 // Stack probing for dynamic allocation. The size Rs is a runtime value
2819 // so the probe loop is always emitted; it is a no-op when Rs is small.
2820 //
2821 // Compute the target SP into Rd (with optional alignment), then probe
2822 // each page on the way down:
2823 //
2824 // Rd = sub(r29, Rs)
2825 // [Rd = and(Rd, #-A)] ; if alignment > 8
2826 // LoopMBB:
2827 // r29 = add(r29, #-ProbeSize)
2828 // memw(r29+#0) = #0
2829 // p0 = cmp.gtu(r29, Rd)
2830 // if (p0.new) jump:t LoopMBB
2831 // ExitMBB:
2832 // r29 = Rd
2833 // [Rd = add(Rd, #CF)] ; if CF > 0
2834 // <rest of original block>
2835 //
2836 // Rd holds the exact (aligned) target SP throughout the loop, so the
2837 // final "r29 = Rd" snaps SP to the correct value even when Rs is not
2838 // a multiple of ProbeSize.
2839
2840 Align StackAlign = getStackAlign();
2841 unsigned ProbeSize = TLI->getStackProbeSize(MF, StackAlign);
2843
2844 // Emit target-SP computation into Rd before splitting the block.
2845 // Rd = sub(r29, Rs)
2846 BuildMI(MB, AI, DL, HII.get(Hexagon::A2_sub), Rd)
2847 .addReg(SP)
2848 .addReg(Rs)
2849 .setMIFlags(Flags);
2850 if (A > 8) {
2851 // Rd = and(Rd, #-A)
2852 BuildMI(MB, AI, DL, HII.get(Hexagon::A2_andir), Rd)
2853 .addReg(Rd)
2854 .addImm(-int64_t(A))
2855 .setMIFlags(Flags);
2856 }
2857
2858 // Split the block: everything after AI goes into ExitMBB.
2859 MachineFunction::iterator InsertPt = std::next(MB.getIterator());
2860 MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(MB.getBasicBlock());
2861 MF.insert(InsertPt, LoopMBB);
2862 MachineBasicBlock *ExitMBB = MF.CreateMachineBasicBlock(MB.getBasicBlock());
2863 MF.insert(InsertPt, ExitMBB);
2864
2865 // Move instructions after AI (exclusive) into ExitMBB.
2866 ExitMBB->splice(ExitMBB->end(), &MB, std::next(AI->getIterator()), MB.end());
2867 ExitMBB->transferSuccessorsAndUpdatePHIs(&MB);
2868
2869 // LoopMBB: probe each page.
2870 // r29 = add(r29, #-ProbeSize)
2871 // memw(r29+#0) = #0
2872 // p0 = cmp.gtu(r29, Rd)
2873 // if (p0.new) jump:t LoopMBB
2874 BuildMI(*LoopMBB, LoopMBB->end(), DL, HII.get(Hexagon::A2_addi), Hexagon::R29)
2875 .addReg(Hexagon::R29)
2876 .addImm(-int(ProbeSize))
2877 .setMIFlags(Flags);
2878
2879 BuildMI(*LoopMBB, LoopMBB->end(), DL, HII.get(Hexagon::S4_storeiri_io))
2880 .addReg(Hexagon::R29)
2881 .addImm(0)
2882 .addImm(0)
2883 .setMIFlags(Flags);
2884
2885 BuildMI(*LoopMBB, LoopMBB->end(), DL, HII.get(Hexagon::C2_cmpgtu),
2886 Hexagon::P0)
2887 .addReg(Hexagon::R29)
2888 .addReg(Rd)
2889 .setMIFlags(Flags);
2890
2891 BuildMI(*LoopMBB, LoopMBB->end(), DL, HII.get(Hexagon::J2_jumpt))
2892 .addReg(Hexagon::P0)
2893 .addMBB(LoopMBB)
2894 .setMIFlags(Flags);
2895
2896 // ExitMBB: snap SP to exact target, then apply CF offset to Rd.
2897 // r29 = Rd
2898 // [Rd = add(Rd, #CF)]
2899 MachineBasicBlock::iterator ExitIt = ExitMBB->begin();
2900 BuildMI(*ExitMBB, ExitIt, DL, HII.get(Hexagon::A2_tfr), Hexagon::R29)
2901 .addReg(Rd)
2902 .setMIFlags(Flags);
2903 if (CF > 0) {
2904 BuildMI(*ExitMBB, ExitIt, DL, HII.get(Hexagon::A2_addi), Rd)
2905 .addReg(Rd)
2906 .addImm(CF)
2907 .setMIFlags(Flags);
2908 }
2909
2910 // Wire up CFG edges.
2911 MB.addSuccessor(LoopMBB);
2912 LoopMBB->addSuccessor(LoopMBB);
2913 LoopMBB->addSuccessor(ExitMBB);
2914
2915 // Recompute live-ins for the new blocks. AI is still in MB at this
2916 // point; the caller erases it after expandAlloca returns.
2917 fullyRecomputeLiveIns({ExitMBB, LoopMBB});
2918}
2919
2921 const MachineFrameInfo &MFI = MF.getFrameInfo();
2922 if (!MFI.hasVarSizedObjects())
2923 return false;
2924 // Do not check for max stack object alignment here, because the stack
2925 // may not be complete yet. Assume that we will need PS_aligna if there
2926 // are variable-sized objects.
2927 return true;
2928}
2929
2931 const MachineFunction &MF) const {
2932 for (auto &B : MF)
2933 for (auto &I : B)
2934 if (I.getOpcode() == Hexagon::PS_aligna)
2935 return &I;
2936 return nullptr;
2937}
2938
2939/// Adds all callee-saved registers as implicit uses or defs to the
2940/// instruction.
2941void HexagonFrameLowering::addCalleeSaveRegistersAsImpOperand(MachineInstr *MI,
2942 const CSIVect &CSI, bool IsDef, bool IsKill) const {
2943 // Add the callee-saved registers as implicit uses.
2944 for (auto &R : CSI)
2945 MI->addOperand(MachineOperand::CreateReg(R.getReg(), IsDef, true, IsKill));
2946}
2947
2948/// Determine whether the callee-saved register saves and restores should
2949/// be generated via inline code. If this function returns "true", inline
2950/// code will be generated. If this function returns "false", additional
2951/// checks are performed, which may still lead to the inline code.
2952bool HexagonFrameLowering::shouldInlineCSR(const MachineFunction &MF,
2953 const CSIVect &CSI) const {
2955 return true;
2957 return true;
2958 if (!hasFP(MF))
2959 return true;
2960 if (!isOptSize(MF) && !isMinSize(MF))
2962 return true;
2963
2964 // Check if CSI only has double registers, and if the registers form
2965 // a contiguous block starting from D8.
2966 BitVector Regs(Hexagon::NUM_TARGET_REGS);
2967 for (const CalleeSavedInfo &I : CSI) {
2968 MCRegister R = I.getReg();
2969 if (!Hexagon::DoubleRegsRegClass.contains(R))
2970 return true;
2971 Regs[R] = true;
2972 }
2973 int F = Regs.find_first();
2974 if (F != Hexagon::D8)
2975 return true;
2976 while (F >= 0) {
2977 int N = Regs.find_next(F);
2978 if (N >= 0 && N != F+1)
2979 return true;
2980 F = N;
2981 }
2982
2983 return false;
2984}
2985
2986bool HexagonFrameLowering::useSpillFunction(const MachineFunction &MF,
2987 const CSIVect &CSI) const {
2988 if (shouldInlineCSR(MF, CSI))
2989 return false;
2990 unsigned NumCSI = CSI.size();
2991 if (NumCSI <= 1)
2992 return false;
2993
2994 unsigned Threshold = isOptSize(MF) ? SpillFuncThresholdOs
2996 return Threshold < NumCSI;
2997}
2998
2999bool HexagonFrameLowering::useRestoreFunction(const MachineFunction &MF,
3000 const CSIVect &CSI) const {
3001 if (shouldInlineCSR(MF, CSI))
3002 return false;
3003 // The restore functions do a bit more than just restoring registers.
3004 // The non-returning versions will go back directly to the caller's
3005 // caller, others will clean up the stack frame in preparation for
3006 // a tail call. Using them can still save code size even if only one
3007 // register is getting restores. Make the decision based on -Oz:
3008 // using -Os will use inline restore for a single register.
3009 if (isMinSize(MF))
3010 return true;
3011 unsigned NumCSI = CSI.size();
3012 if (NumCSI <= 1)
3013 return false;
3014
3015 unsigned Threshold = isOptSize(MF) ? SpillFuncThresholdOs-1
3017 return Threshold < NumCSI;
3018}
3019
3020bool HexagonFrameLowering::mayOverflowFrameOffset(MachineFunction &MF) const {
3021 unsigned StackSize = MF.getFrameInfo().estimateStackSize(MF);
3022 auto &HST = MF.getSubtarget<HexagonSubtarget>();
3023 // A fairly simplistic guess as to whether a potential load/store to a
3024 // stack location could require an extra register.
3025 if (HST.useHVXOps() && StackSize > 256)
3026 return true;
3027
3028 // Check if the function has store-immediate instructions that access
3029 // the stack. Since the offset field is not extendable, if the stack
3030 // size exceeds the offset limit (6 bits, shifted), the stores will
3031 // require a new base register.
3032 bool HasImmStack = false;
3033 unsigned MinLS = ~0u; // Log_2 of the memory access size.
3034
3035 for (const MachineBasicBlock &B : MF) {
3036 for (const MachineInstr &MI : B) {
3037 unsigned LS = 0;
3038 switch (MI.getOpcode()) {
3039 case Hexagon::S4_storeirit_io:
3040 case Hexagon::S4_storeirif_io:
3041 case Hexagon::S4_storeiri_io:
3042 ++LS;
3043 [[fallthrough]];
3044 case Hexagon::S4_storeirht_io:
3045 case Hexagon::S4_storeirhf_io:
3046 case Hexagon::S4_storeirh_io:
3047 ++LS;
3048 [[fallthrough]];
3049 case Hexagon::S4_storeirbt_io:
3050 case Hexagon::S4_storeirbf_io:
3051 case Hexagon::S4_storeirb_io:
3052 if (MI.getOperand(0).isFI())
3053 HasImmStack = true;
3054 MinLS = std::min(MinLS, LS);
3055 break;
3056 }
3057 }
3058 }
3059
3060 if (HasImmStack)
3061 return !isUInt<6>(StackSize >> MinLS);
3062
3063 return false;
3064}
3065
3066namespace {
3067// Struct used by orderFrameObjects to help sort the stack objects.
3068struct HexagonFrameSortingObject {
3069 bool IsValid = false;
3070 unsigned Index = 0; // Index of Object into MFI list.
3071 unsigned Size = 0;
3072 Align ObjectAlignment = Align(1); // Alignment of Object in bytes.
3073};
3074
3075struct HexagonFrameSortingComparator {
3076 inline bool operator()(const HexagonFrameSortingObject &A,
3077 const HexagonFrameSortingObject &B) const {
3078 return std::make_tuple(!A.IsValid, A.ObjectAlignment, A.Size) <
3079 std::make_tuple(!B.IsValid, B.ObjectAlignment, B.Size);
3080 }
3081};
3082} // namespace
3083
3084// Sort objects on the stack by alignment value and then by size to minimize
3085// padding.
3087 const MachineFunction &MF, SmallVectorImpl<int> &ObjectsToAllocate) const {
3088
3089 if (ObjectsToAllocate.empty())
3090 return;
3091
3092 const MachineFrameInfo &MFI = MF.getFrameInfo();
3093 int NObjects = ObjectsToAllocate.size();
3094
3095 // Create an array of all MFI objects.
3097 MFI.getObjectIndexEnd());
3098
3099 for (int i = 0, j = 0, e = MFI.getObjectIndexEnd(); i < e && j != NObjects;
3100 ++i) {
3101 if (i != ObjectsToAllocate[j])
3102 continue;
3103 j++;
3104
3105 // A variable size object has size equal to 0. Since Hexagon sets
3106 // getUseLocalStackAllocationBlock() to true, a local block is allocated
3107 // earlier. This case is not handled here for now.
3108 int Size = MFI.getObjectSize(i);
3109 if (Size == 0)
3110 return;
3111
3112 SortingObjects[i].IsValid = true;
3113 SortingObjects[i].Index = i;
3114 SortingObjects[i].Size = Size;
3115 SortingObjects[i].ObjectAlignment = MFI.getObjectAlign(i);
3116 }
3117
3118 // Sort objects by alignment and then by size.
3119 llvm::stable_sort(SortingObjects, HexagonFrameSortingComparator());
3120
3121 // Modify the original list to represent the final order.
3122 int i = NObjects;
3123 for (auto &Obj : SortingObjects) {
3124 if (i == 0)
3125 break;
3126 ObjectsToAllocate[--i] = Obj.Index;
3127 }
3128}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
This file contains the simple types necessary to represent the attributes associated with functions a...
This file implements the BitVector class.
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static RegisterPass< DebugifyModulePass > DM("debugify", "Attach debug info to everything")
This file defines the DenseMap class.
This file contains constants used for implementing Dwarf debug support.
static MachineInstr * getReturn(MachineBasicBlock &MBB)
Returns the "return" instruction from this block, or nullptr if there isn't any.
static cl::opt< unsigned > ShrinkLimit("shrink-frame-limit", cl::init(std::numeric_limits< unsigned >::max()), cl::Hidden, cl::desc("Max count of stack frame shrink-wraps"))
static bool isOptNone(const MachineFunction &MF)
static cl::opt< int > SpillFuncThreshold("spill-func-threshold", cl::Hidden, cl::desc("Specify O2(not Os) spill func threshold"), cl::init(6))
static std::optional< MachineBasicBlock::iterator > findCFILocation(MachineBasicBlock &B)
static cl::opt< bool > EliminateFramePointer("hexagon-fp-elim", cl::init(true), cl::Hidden, cl::desc("Refrain from using FP whenever possible"))
static bool enableAllocFrameElim(const MachineFunction &MF)
static const char * getSpillFunctionFor(Register MaxReg, SpillKind SpillType, bool Stkchk=false)
static bool hasReturn(const MachineBasicBlock &MBB)
Returns true if MBB contains an instruction that returns.
static cl::opt< bool > EnableSaveRestoreLong("enable-save-restore-long", cl::Hidden, cl::desc("Enable long calls for save-restore stubs."), cl::init(false))
static bool needToReserveScavengingSpillSlots(MachineFunction &MF, const HexagonRegisterInfo &HRI, const TargetRegisterClass *RC)
Returns true if there are no caller-saved registers available in class RC.
static bool isOptSize(const MachineFunction &MF)
static Register getMax32BitSubRegister(Register Reg, const TargetRegisterInfo &TRI, bool hireg=true)
Map a register pair Reg to the subregister that has the greater "number", i.e.
static cl::opt< int > SpillFuncThresholdOs("spill-func-threshold-Os", cl::Hidden, cl::desc("Specify Os spill func threshold"), cl::init(1))
static bool needsStackFrame(const MachineBasicBlock &MBB, const BitVector &CSR, const HexagonRegisterInfo &HRI)
Checks if the basic block contains any instruction that needs a stack frame to be already in place.
static cl::opt< bool > DisableDeallocRet("disable-hexagon-dealloc-ret", cl::Hidden, cl::desc("Disable Dealloc Return for Hexagon target"))
static cl::opt< bool > EnableShrinkWrapping("hexagon-shrink-frame", cl::init(true), cl::Hidden, cl::desc("Enable stack frame shrink wrapping"))
static bool hasTailCall(const MachineBasicBlock &MBB)
Returns true if MBB has a machine instructions that indicates a tail call in the block.
static cl::opt< unsigned > NumberScavengerSlots("number-scavenger-slots", cl::Hidden, cl::desc("Set the number of scavenger slots"), cl::init(2))
static Register getMaxCalleeSavedReg(ArrayRef< CalleeSavedInfo > CSI, const TargetRegisterInfo &TRI)
Returns the callee saved register with the largest id in the vector.
static bool isMinSize(const MachineFunction &MF)
static cl::opt< unsigned > SpillOptMax("spill-opt-max", cl::Hidden, cl::init(std::numeric_limits< unsigned >::max()))
static unsigned SpillOptCount
static void dump_registers(BitVector &Regs, const TargetRegisterInfo &TRI)
static void emitSCSPrologue(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL)
static bool isRestoreCall(unsigned Opc)
static cl::opt< bool > OptimizeSpillSlots("hexagon-opt-spill", cl::Hidden, cl::init(true), cl::desc("Optimize spill slots"))
static void emitSCSEpilogue(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL)
static cl::opt< bool > EnableStackOVFSanitizer("enable-stackovf-sanitizer", cl::Hidden, cl::desc("Enable runtime checks for stack overflow."), cl::init(false))
IRTranslator LLVM IR MI
Legalize the Machine IR a function s Machine IR
Definition Legalizer.cpp:81
This file implements the LivePhysRegs utility for tracking liveness of physical registers.
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
#define T
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
#define P(N)
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition PassSupport.h:56
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
This file declares the machine register scavenger class.
bool isDead(const MachineInstr &MI, const MachineRegisterInfo &MRI)
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition Value.cpp:484
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallSet class.
This file defines the SmallVector class.
#define LLVM_DEBUG(...)
Definition Debug.h:119
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
size_t size() const
Get the array size.
Definition ArrayRef.h:141
bool empty() const
Check if the array is empty.
Definition ArrayRef.h:136
int find_first() const
Returns the index of the first set bit, -1 if none of the bits are set.
Definition BitVector.h:317
void resize(unsigned N, bool t=false)
Grow or shrink the bitvector.
Definition BitVector.h:355
BitVector & set()
Set all bits in the bitvector.
Definition BitVector.h:366
int find_next(unsigned Prev) const
Returns the index of the next set bit following the "Prev" bit.
Definition BitVector.h:324
Helper class for creating CFI instructions and inserting them into MIR.
void buildEscape(StringRef Bytes, StringRef Comment="") const
void buildRestore(MCRegister Reg) const
The CalleeSavedInfo class tracks the information need to locate where a callee saved register is in t...
A debug info location.
Definition DebugLoc.h:126
NodeT * findNearestCommonDominator(NodeT *A, NodeT *B) const
Find nearest common dominator basic block for basic block A and B.
bool dominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
dominates - Returns true iff A dominates B.
void recalculate(ParentType &Func)
recalculate - compute a dominator tree for the given function
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition Function.h:685
bool hasOptNone() const
Do not optimize this function (-O0).
Definition Function.h:682
bool isVarArg() const
isVarArg - Return true if this function takes a variable number of arguments.
Definition Function.h:229
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition Function.cpp:723
void replaceInstr(MachineInstr *OldMI, MachineInstr *NewMI)
IndexType getIndex(MachineInstr *MI) const
MachineInstr * getInstr(IndexType Idx) const
void add(IndexType Start, IndexType End, bool Fixed, bool TiedEnd)
const MachineInstr * getAlignaInstr(const MachineFunction &MF) const
void insertCFIInstructions(MachineFunction &MF) const
bool hasFPImpl(const MachineFunction &MF) const override
bool enableCalleeSaveSkip(const MachineFunction &MF) const override
Returns true if the target can safely skip saving callee-saved registers for noreturn nounwind functi...
MachineBasicBlock::iterator eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, MachineBasicBlock::iterator I) const override
This method is called during prolog/epilog code insertion to eliminate call frame setup and destroy p...
StackOffset getFrameIndexReference(const MachineFunction &MF, int FI, Register &FrameReg) const override
getFrameIndexReference - This method should return the base register and offset used to reference a f...
void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override
Perform most of the PEI work here:
void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS) const override
This method determines which of the registers reported by TargetRegisterInfo::getCalleeSavedRegs() sh...
void orderFrameObjects(const MachineFunction &MF, SmallVectorImpl< int > &ObjectsToAllocate) const override
Order the symbols in the local stack frame.
void processFunctionBeforeFrameFinalized(MachineFunction &MF, RegScavenger *RS=nullptr) const override
processFunctionBeforeFrameFinalized - This method is called immediately before the specified function...
void inlineStackProbe(MachineFunction &MF, MachineBasicBlock &PrologueMBB) const override
Replace a StackProbe stub (if any) with the actual probe code inline.
const SpillSlot * getCalleeSavedSpillSlots(unsigned &NumEntries) const override
getCalleeSavedSpillSlots - This method returns a pointer to an array of pairs, that contains an entry...
bool needsAligna(const MachineFunction &MF) const
bool assignCalleeSavedSpillSlots(MachineFunction &MF, const TargetRegisterInfo *TRI, std::vector< CalleeSavedInfo > &CSI) const override
assignCalleeSavedSpillSlots - Allows target to override spill slot assignment logic.
void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, Register VReg, MachineInstr::MIFlag Flags=MachineInstr::NoFlags) const override
Store the specified register of the given register class to the specified stack frame index.
const HexagonRegisterInfo & getRegisterInfo() const
void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register DestReg, int FrameIndex, const TargetRegisterClass *RC, Register VReg, unsigned SubReg=0, MachineInstr::MIFlag Flags=MachineInstr::NoFlags) const override
Load the specified register of the given register class from the specified stack frame index.
Hexagon target-specific information for each MachineFunction.
bool isEHReturnCalleeSaveReg(Register Reg) const
const MCPhysReg * getCalleeSavedRegs(const MachineFunction *MF) const override
Code Generation virtual methods...
const MCPhysReg * getCallerSavedRegs(const MachineFunction *MF, const TargetRegisterClass *RC) const
const HexagonInstrInfo * getInstrInfo() const override
const HexagonFrameLowering * getFrameLowering() const override
static MCCFIInstruction cfiDefCfa(MCSymbol *L, unsigned Register, int64_t Offset, SMLoc Loc={})
.cfi_def_cfa defines a rule for computing CFA as: take address from Register and add Offset to it.
Definition MCDwarf.h:615
static MCCFIInstruction createOffset(MCSymbol *L, unsigned Register, int64_t Offset, SMLoc Loc={})
.cfi_offset Previous value of Register is saved at offset Offset from CFA.
Definition MCDwarf.h:657
LLVM_ABI MCSymbol * createTempSymbol()
Create a temporary symbol with a unique name.
Describe properties that are true of each instruction in the target description file.
MCRegAliasIterator enumerates all registers aliasing Reg.
Wrapper class representing physical registers. Should be passed by value.
Definition MCRegister.h:41
constexpr bool isValid() const
Definition MCRegister.h:84
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition MCRegister.h:72
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition MCSymbol.h:42
LLVM_ABI void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
MachineInstrBundleIterator< const MachineInstr > const_iterator
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
LLVM_ABI iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
LLVM_ABI DebugLoc findDebugLoc(instr_iterator MBBI)
Find the next valid DebugLoc starting at MBBI, skipping any debug instructions.
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
LLVM_ABI instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
iterator_range< succ_iterator > successors()
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
LLVM_ABI bool isLiveIn(MCRegister Reg, LaneBitmask LaneMask=LaneBitmask::getAll()) const
Return true if the specified register is in the live in set.
bool dominates(const MachineInstr *A, const MachineInstr *B) const
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
void setMaxCallFrameSize(uint64_t S)
bool hasVarSizedObjects() const
This method may be called any time after instruction selection is complete to determine if the stack ...
bool isObjectPreAllocated(int ObjectIdx) const
Return true if the object was pre-allocated into the local block.
uint64_t getStackSize() const
Return the number of bytes that must be allocated to hold all of the fixed size frame objects.
bool hasCalls() const
Return true if the current function has any function calls.
Align getMaxAlign() const
Return the alignment in bytes that this function must be aligned to, which is greater than the defaul...
uint64_t getMaxCallFrameSize() const
Return the maximum size of a call frame that must be allocated for an outgoing function call.
LLVM_ABI uint64_t estimateStackSize(const MachineFunction &MF) const
Estimate and return the size of the stack frame.
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
LLVM_ABI int CreateSpillStackObject(uint64_t Size, Align Alignment, TargetStackID::Value StackID=TargetStackID::Default)
Create a new statically sized stack object that represents a spill slot, returning a nonnegative iden...
const std::vector< CalleeSavedInfo > & getCalleeSavedInfo() const
Returns a reference to call saved info vector for the current function.
int getObjectIndexEnd() const
Return one past the maximum frame object index.
LLVM_ABI int CreateFixedSpillStackObject(uint64_t Size, int64_t SPOffset, bool IsImmutable=false)
Create a spill slot at a fixed location on the stack.
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
void setStackSize(uint64_t Size)
Set the size of the stack.
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
Properties which a MachineFunction may have at a given point in time.
unsigned addFrameInst(const MCCFIInstruction &Inst)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
bool needsFrameMoves() const
True if this function needs frame moves for debug or exceptions.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MCContext & getContext() const
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
MachineBasicBlock * getBlockNumbered(unsigned N) const
getBlockNumbered - MachineBasicBlocks are automatically numbered when they are inserted into the mach...
Function & getFunction()
Return the LLVM function that this machine code represents.
unsigned getNumBlockIDs() const
getNumBlockIDs - Return the number of MBB ID's allocated.
BasicBlockListType::iterator iterator
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineBasicBlock & front() const
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineInstr - Allocate a new MachineInstr.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
const MachineInstrBuilder & addExternalSymbol(const char *FnName, unsigned TargetFlags=0) const
const MachineInstrBuilder & addCFIIndex(unsigned CFIIndex) const
const MachineInstrBuilder & addReg(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & setMIFlag(MachineInstr::MIFlag Flag) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & addDef(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a virtual register definition operand.
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
bool isReturn(QueryType Type=AnyInBundle) const
const MachineBasicBlock * getParent() const
LLVM_ABI void addOperand(MachineFunction &MF, const MachineOperand &Op)
Add the specified operand to the instruction.
LLVM_ABI void copyImplicitOps(MachineFunction &MF, const MachineInstr &MI)
Copy implicit register operands from specified instruction to this instruction.
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
const MachineOperand & getOperand(unsigned i) const
@ MOStore
The memory access writes data.
MachineOperand class - Representation of each machine instruction operand.
void setSubReg(unsigned subReg)
unsigned getSubReg() const
int64_t getImm() const
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
void setIsKill(bool Val=true)
Register getReg() const
getReg - Returns the register number.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
LLVM_ABI MachineBasicBlock * findNearestCommonDominator(ArrayRef< MachineBasicBlock * > Blocks) const
Returns the nearest common dominator of the given blocks.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
LLVM_ABI Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
LLVM_ABI bool isPhysRegUsed(MCRegister PhysReg, bool SkipRegMaskTest=false) const
Return true if the specified register is modified or read in this function.
Wrapper class representing virtual and physical registers.
Definition Register.h:20
constexpr bool isValid() const
Definition Register.h:112
A vector that has set insertion semantics.
Definition SetVector.h:57
size_type size() const
Determine the number of elements in the SetVector.
Definition SetVector.h:103
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition SetVector.h:151
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition SmallSet.h:176
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition SmallSet.h:184
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
int64_t getFixed() const
Returns the fixed component of the stack.
Definition TypeSize.h:46
Represent a constant reference to a string, i.e.
Definition StringRef.h:56
bool hasFP(const MachineFunction &MF) const
hasFP - Return true if the specified function should have a dedicated frame pointer register.
virtual void determineCalleeSaves(MachineFunction &MF, BitVector &SavedRegs, RegScavenger *RS=nullptr) const
This method determines which of the registers reported by TargetRegisterInfo::getCalleeSavedRegs() sh...
Align getStackAlign() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
const TargetRegisterInfo & getRegisterInfo() const
Primary interface to the complete machine description for the target machine.
CodeGenOptLevel getOptLevel() const
Returns the optimization level: None, Less, Default, or Aggressive.
bool isPositionIndependent() const
TargetOptions Options
LLVM_ABI bool DisableFramePointerElim(const MachineFunction &MF) const
DisableFramePointerElim - This returns true if frame pointer elimination optimization should be disab...
unsigned getID() const
Return the register class ID number.
ArrayRef< MCPhysReg > getRawAllocationOrder(const MachineFunction &MF, bool Rev=false) const
Returns the preferred order for allocating registers from this register class in MF.
bool hasSubClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a sub-class of or equal to this class.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual bool isRegisterReservedByUser(Register R) const
virtual const TargetInstrInfo * getInstrInfo() const
virtual const TargetRegisterInfo * getRegisterInfo() const =0
Return the target's register information.
virtual const TargetLowering * getTargetLowering() const
self_iterator getIterator()
Definition ilist_node.h:123
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:573
void stable_sort(R &&Range)
Definition STLExtras.h:2116
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
@ Dead
Unused definition.
@ Kill
The last use of a register.
constexpr RegState getKillRegState(bool B)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:209
FunctionPass * createHexagonCallFrameInformation()
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:163
constexpr uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition Alignment.h:144
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:189
@ Default
-O2, -Os, -Oz
Definition CodeGen.h:85
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition MCRegister.h:21
RelativeUniformCounterPtr ValuesPtrExpr VTableAddr Count
Definition InstrProf.h:145
MaybeAlign getStackAlign(const CallBase &I, unsigned Index)
DWARFExpression::Operation Op
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1772
void fullyRecomputeLiveIns(ArrayRef< MachineBasicBlock * > MBBs)
Convenience function for recomputing live-in's for a set of MBBs until the computation converges.
LLVM_ABI Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
LLVM_ABI Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
Definition Alignment.h:77
static RegisterSet expandToSubRegs(RegisterRef R, const MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI)
std::map< RegisterRef, RangeList > RegToRangeMap
static LLVM_ABI MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID=0)
Stack pointer relative access.