LLVM 22.0.0git
AArch64InstrInfo.cpp
Go to the documentation of this file.
1//===- AArch64InstrInfo.cpp - AArch64 Instruction Information -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains the AArch64 implementation of the TargetInstrInfo class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "AArch64InstrInfo.h"
14#include "AArch64ExpandImm.h"
16#include "AArch64PointerAuth.h"
17#include "AArch64Subtarget.h"
21#include "llvm/ADT/ArrayRef.h"
22#include "llvm/ADT/STLExtras.h"
23#include "llvm/ADT/SmallSet.h"
43#include "llvm/IR/DebugLoc.h"
44#include "llvm/IR/GlobalValue.h"
45#include "llvm/IR/Module.h"
46#include "llvm/MC/MCAsmInfo.h"
47#include "llvm/MC/MCInst.h"
49#include "llvm/MC/MCInstrDesc.h"
54#include "llvm/Support/LEB128.h"
58#include <cassert>
59#include <cstdint>
60#include <iterator>
61#include <utility>
62
63using namespace llvm;
64
65#define GET_INSTRINFO_CTOR_DTOR
66#include "AArch64GenInstrInfo.inc"
67
69 CBDisplacementBits("aarch64-cb-offset-bits", cl::Hidden, cl::init(9),
70 cl::desc("Restrict range of CB instructions (DEBUG)"));
71
73 "aarch64-tbz-offset-bits", cl::Hidden, cl::init(14),
74 cl::desc("Restrict range of TB[N]Z instructions (DEBUG)"));
75
77 "aarch64-cbz-offset-bits", cl::Hidden, cl::init(19),
78 cl::desc("Restrict range of CB[N]Z instructions (DEBUG)"));
79
81 BCCDisplacementBits("aarch64-bcc-offset-bits", cl::Hidden, cl::init(19),
82 cl::desc("Restrict range of Bcc instructions (DEBUG)"));
83
85 BDisplacementBits("aarch64-b-offset-bits", cl::Hidden, cl::init(26),
86 cl::desc("Restrict range of B instructions (DEBUG)"));
87
89 "aarch64-search-limit", cl::Hidden, cl::init(2048),
90 cl::desc("Restrict range of instructions to search for the "
91 "machine-combiner gather pattern optimization"));
92
94 : AArch64GenInstrInfo(STI, AArch64::ADJCALLSTACKDOWN,
95 AArch64::ADJCALLSTACKUP, AArch64::CATCHRET),
96 RI(STI.getTargetTriple(), STI.getHwMode()), Subtarget(STI) {}
97
98/// GetInstSize - Return the number of bytes of code the specified
99/// instruction may be. This returns the maximum number of bytes.
101 const MachineBasicBlock &MBB = *MI.getParent();
102 const MachineFunction *MF = MBB.getParent();
103 const Function &F = MF->getFunction();
104 const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo();
105
106 {
107 auto Op = MI.getOpcode();
108 if (Op == AArch64::INLINEASM || Op == AArch64::INLINEASM_BR)
109 return getInlineAsmLength(MI.getOperand(0).getSymbolName(), *MAI);
110 }
111
112 // Meta-instructions emit no code.
113 if (MI.isMetaInstruction())
114 return 0;
115
116 // FIXME: We currently only handle pseudoinstructions that don't get expanded
117 // before the assembly printer.
118 unsigned NumBytes = 0;
119 const MCInstrDesc &Desc = MI.getDesc();
120
121 if (!MI.isBundle() && isTailCallReturnInst(MI)) {
122 NumBytes = Desc.getSize() ? Desc.getSize() : 4;
123
124 const auto *MFI = MF->getInfo<AArch64FunctionInfo>();
125 if (!MFI->shouldSignReturnAddress(MF))
126 return NumBytes;
127
128 const auto &STI = MF->getSubtarget<AArch64Subtarget>();
129 auto Method = STI.getAuthenticatedLRCheckMethod(*MF);
130 NumBytes += AArch64PAuth::getCheckerSizeInBytes(Method);
131 return NumBytes;
132 }
133
134 // Size should be preferably set in
135 // llvm/lib/Target/AArch64/AArch64InstrInfo.td (default case).
136 // Specific cases handle instructions of variable sizes
137 switch (Desc.getOpcode()) {
138 default:
139 if (Desc.getSize())
140 return Desc.getSize();
141
142 // Anything not explicitly designated otherwise (i.e. pseudo-instructions
143 // with fixed constant size but not specified in .td file) is a normal
144 // 4-byte insn.
145 NumBytes = 4;
146 break;
147 case TargetOpcode::STACKMAP:
148 // The upper bound for a stackmap intrinsic is the full length of its shadow
149 NumBytes = StackMapOpers(&MI).getNumPatchBytes();
150 assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
151 break;
152 case TargetOpcode::PATCHPOINT:
153 // The size of the patchpoint intrinsic is the number of bytes requested
154 NumBytes = PatchPointOpers(&MI).getNumPatchBytes();
155 assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
156 break;
157 case TargetOpcode::STATEPOINT:
158 NumBytes = StatepointOpers(&MI).getNumPatchBytes();
159 assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
160 // No patch bytes means a normal call inst is emitted
161 if (NumBytes == 0)
162 NumBytes = 4;
163 break;
164 case TargetOpcode::PATCHABLE_FUNCTION_ENTER:
165 // If `patchable-function-entry` is set, PATCHABLE_FUNCTION_ENTER
166 // instructions are expanded to the specified number of NOPs. Otherwise,
167 // they are expanded to 36-byte XRay sleds.
168 NumBytes =
169 F.getFnAttributeAsParsedInteger("patchable-function-entry", 9) * 4;
170 break;
171 case TargetOpcode::PATCHABLE_FUNCTION_EXIT:
172 case TargetOpcode::PATCHABLE_TAIL_CALL:
173 case TargetOpcode::PATCHABLE_TYPED_EVENT_CALL:
174 // An XRay sled can be 4 bytes of alignment plus a 32-byte block.
175 NumBytes = 36;
176 break;
177 case TargetOpcode::PATCHABLE_EVENT_CALL:
178 // EVENT_CALL XRay sleds are exactly 6 instructions long (no alignment).
179 NumBytes = 24;
180 break;
181
182 case AArch64::SPACE:
183 NumBytes = MI.getOperand(1).getImm();
184 break;
185 case TargetOpcode::BUNDLE:
186 NumBytes = getInstBundleLength(MI);
187 break;
188 }
189
190 return NumBytes;
191}
192
193unsigned AArch64InstrInfo::getInstBundleLength(const MachineInstr &MI) const {
194 unsigned Size = 0;
196 MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end();
197 while (++I != E && I->isInsideBundle()) {
198 assert(!I->isBundle() && "No nested bundle!");
200 }
201 return Size;
202}
203
206 // Block ends with fall-through condbranch.
207 switch (LastInst->getOpcode()) {
208 default:
209 llvm_unreachable("Unknown branch instruction?");
210 case AArch64::Bcc:
211 Target = LastInst->getOperand(1).getMBB();
212 Cond.push_back(LastInst->getOperand(0));
213 break;
214 case AArch64::CBZW:
215 case AArch64::CBZX:
216 case AArch64::CBNZW:
217 case AArch64::CBNZX:
218 Target = LastInst->getOperand(1).getMBB();
219 Cond.push_back(MachineOperand::CreateImm(-1));
220 Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
221 Cond.push_back(LastInst->getOperand(0));
222 break;
223 case AArch64::TBZW:
224 case AArch64::TBZX:
225 case AArch64::TBNZW:
226 case AArch64::TBNZX:
227 Target = LastInst->getOperand(2).getMBB();
228 Cond.push_back(MachineOperand::CreateImm(-1));
229 Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
230 Cond.push_back(LastInst->getOperand(0));
231 Cond.push_back(LastInst->getOperand(1));
232 break;
233 case AArch64::CBWPri:
234 case AArch64::CBXPri:
235 case AArch64::CBWPrr:
236 case AArch64::CBXPrr:
237 Target = LastInst->getOperand(3).getMBB();
238 Cond.push_back(MachineOperand::CreateImm(-1));
239 Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
240 Cond.push_back(LastInst->getOperand(0));
241 Cond.push_back(LastInst->getOperand(1));
242 Cond.push_back(LastInst->getOperand(2));
243 break;
244 }
245}
246
247static unsigned getBranchDisplacementBits(unsigned Opc) {
248 switch (Opc) {
249 default:
250 llvm_unreachable("unexpected opcode!");
251 case AArch64::B:
252 return BDisplacementBits;
253 case AArch64::TBNZW:
254 case AArch64::TBZW:
255 case AArch64::TBNZX:
256 case AArch64::TBZX:
257 return TBZDisplacementBits;
258 case AArch64::CBNZW:
259 case AArch64::CBZW:
260 case AArch64::CBNZX:
261 case AArch64::CBZX:
262 return CBZDisplacementBits;
263 case AArch64::Bcc:
264 return BCCDisplacementBits;
265 case AArch64::CBWPri:
266 case AArch64::CBXPri:
267 case AArch64::CBWPrr:
268 case AArch64::CBXPrr:
269 return CBDisplacementBits;
270 }
271}
272
274 int64_t BrOffset) const {
275 unsigned Bits = getBranchDisplacementBits(BranchOp);
276 assert(Bits >= 3 && "max branch displacement must be enough to jump"
277 "over conditional branch expansion");
278 return isIntN(Bits, BrOffset / 4);
279}
280
283 switch (MI.getOpcode()) {
284 default:
285 llvm_unreachable("unexpected opcode!");
286 case AArch64::B:
287 return MI.getOperand(0).getMBB();
288 case AArch64::TBZW:
289 case AArch64::TBNZW:
290 case AArch64::TBZX:
291 case AArch64::TBNZX:
292 return MI.getOperand(2).getMBB();
293 case AArch64::CBZW:
294 case AArch64::CBNZW:
295 case AArch64::CBZX:
296 case AArch64::CBNZX:
297 case AArch64::Bcc:
298 return MI.getOperand(1).getMBB();
299 case AArch64::CBWPri:
300 case AArch64::CBXPri:
301 case AArch64::CBWPrr:
302 case AArch64::CBXPrr:
303 return MI.getOperand(3).getMBB();
304 }
305}
306
308 MachineBasicBlock &NewDestBB,
309 MachineBasicBlock &RestoreBB,
310 const DebugLoc &DL,
311 int64_t BrOffset,
312 RegScavenger *RS) const {
313 assert(RS && "RegScavenger required for long branching");
314 assert(MBB.empty() &&
315 "new block should be inserted for expanding unconditional branch");
316 assert(MBB.pred_size() == 1);
317 assert(RestoreBB.empty() &&
318 "restore block should be inserted for restoring clobbered registers");
319
320 auto buildIndirectBranch = [&](Register Reg, MachineBasicBlock &DestBB) {
321 // Offsets outside of the signed 33-bit range are not supported for ADRP +
322 // ADD.
323 if (!isInt<33>(BrOffset))
325 "Branch offsets outside of the signed 33-bit range not supported");
326
327 BuildMI(MBB, MBB.end(), DL, get(AArch64::ADRP), Reg)
328 .addSym(DestBB.getSymbol(), AArch64II::MO_PAGE);
329 BuildMI(MBB, MBB.end(), DL, get(AArch64::ADDXri), Reg)
330 .addReg(Reg)
331 .addSym(DestBB.getSymbol(), AArch64II::MO_PAGEOFF | AArch64II::MO_NC)
332 .addImm(0);
333 BuildMI(MBB, MBB.end(), DL, get(AArch64::BR)).addReg(Reg);
334 };
335
336 RS->enterBasicBlockEnd(MBB);
337 // If X16 is unused, we can rely on the linker to insert a range extension
338 // thunk if NewDestBB is out of range of a single B instruction.
339 constexpr Register Reg = AArch64::X16;
340 if (!RS->isRegUsed(Reg)) {
341 insertUnconditionalBranch(MBB, &NewDestBB, DL);
342 RS->setRegUsed(Reg);
343 return;
344 }
345
346 // If there's a free register and it's worth inflating the code size,
347 // manually insert the indirect branch.
348 Register Scavenged = RS->FindUnusedReg(&AArch64::GPR64RegClass);
349 if (Scavenged != AArch64::NoRegister &&
350 MBB.getSectionID() == MBBSectionID::ColdSectionID) {
351 buildIndirectBranch(Scavenged, NewDestBB);
352 RS->setRegUsed(Scavenged);
353 return;
354 }
355
356 // Note: Spilling X16 briefly moves the stack pointer, making it incompatible
357 // with red zones.
358 AArch64FunctionInfo *AFI = MBB.getParent()->getInfo<AArch64FunctionInfo>();
359 if (!AFI || AFI->hasRedZone().value_or(true))
361 "Unable to insert indirect branch inside function that has red zone");
362
363 // Otherwise, spill X16 and defer range extension to the linker.
364 BuildMI(MBB, MBB.end(), DL, get(AArch64::STRXpre))
365 .addReg(AArch64::SP, RegState::Define)
366 .addReg(Reg)
367 .addReg(AArch64::SP)
368 .addImm(-16);
369
370 BuildMI(MBB, MBB.end(), DL, get(AArch64::B)).addMBB(&RestoreBB);
371
372 BuildMI(RestoreBB, RestoreBB.end(), DL, get(AArch64::LDRXpost))
373 .addReg(AArch64::SP, RegState::Define)
375 .addReg(AArch64::SP)
376 .addImm(16);
377}
378
379// Branch analysis.
382 MachineBasicBlock *&FBB,
384 bool AllowModify) const {
385 // If the block has no terminators, it just falls into the block after it.
386 MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
387 if (I == MBB.end())
388 return false;
389
390 // Skip over SpeculationBarrierEndBB terminators
391 if (I->getOpcode() == AArch64::SpeculationBarrierISBDSBEndBB ||
392 I->getOpcode() == AArch64::SpeculationBarrierSBEndBB) {
393 --I;
394 }
395
396 if (!isUnpredicatedTerminator(*I))
397 return false;
398
399 // Get the last instruction in the block.
400 MachineInstr *LastInst = &*I;
401
402 // If there is only one terminator instruction, process it.
403 unsigned LastOpc = LastInst->getOpcode();
404 if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
405 if (isUncondBranchOpcode(LastOpc)) {
406 TBB = LastInst->getOperand(0).getMBB();
407 return false;
408 }
409 if (isCondBranchOpcode(LastOpc)) {
410 // Block ends with fall-through condbranch.
411 parseCondBranch(LastInst, TBB, Cond);
412 return false;
413 }
414 return true; // Can't handle indirect branch.
415 }
416
417 // Get the instruction before it if it is a terminator.
418 MachineInstr *SecondLastInst = &*I;
419 unsigned SecondLastOpc = SecondLastInst->getOpcode();
420
421 // If AllowModify is true and the block ends with two or more unconditional
422 // branches, delete all but the first unconditional branch.
423 if (AllowModify && isUncondBranchOpcode(LastOpc)) {
424 while (isUncondBranchOpcode(SecondLastOpc)) {
425 LastInst->eraseFromParent();
426 LastInst = SecondLastInst;
427 LastOpc = LastInst->getOpcode();
428 if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
429 // Return now the only terminator is an unconditional branch.
430 TBB = LastInst->getOperand(0).getMBB();
431 return false;
432 }
433 SecondLastInst = &*I;
434 SecondLastOpc = SecondLastInst->getOpcode();
435 }
436 }
437
438 // If we're allowed to modify and the block ends in a unconditional branch
439 // which could simply fallthrough, remove the branch. (Note: This case only
440 // matters when we can't understand the whole sequence, otherwise it's also
441 // handled by BranchFolding.cpp.)
442 if (AllowModify && isUncondBranchOpcode(LastOpc) &&
443 MBB.isLayoutSuccessor(getBranchDestBlock(*LastInst))) {
444 LastInst->eraseFromParent();
445 LastInst = SecondLastInst;
446 LastOpc = LastInst->getOpcode();
447 if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
448 assert(!isUncondBranchOpcode(LastOpc) &&
449 "unreachable unconditional branches removed above");
450
451 if (isCondBranchOpcode(LastOpc)) {
452 // Block ends with fall-through condbranch.
453 parseCondBranch(LastInst, TBB, Cond);
454 return false;
455 }
456 return true; // Can't handle indirect branch.
457 }
458 SecondLastInst = &*I;
459 SecondLastOpc = SecondLastInst->getOpcode();
460 }
461
462 // If there are three terminators, we don't know what sort of block this is.
463 if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(*--I))
464 return true;
465
466 // If the block ends with a B and a Bcc, handle it.
467 if (isCondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
468 parseCondBranch(SecondLastInst, TBB, Cond);
469 FBB = LastInst->getOperand(0).getMBB();
470 return false;
471 }
472
473 // If the block ends with two unconditional branches, handle it. The second
474 // one is not executed, so remove it.
475 if (isUncondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
476 TBB = SecondLastInst->getOperand(0).getMBB();
477 I = LastInst;
478 if (AllowModify)
479 I->eraseFromParent();
480 return false;
481 }
482
483 // ...likewise if it ends with an indirect branch followed by an unconditional
484 // branch.
485 if (isIndirectBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
486 I = LastInst;
487 if (AllowModify)
488 I->eraseFromParent();
489 return true;
490 }
491
492 // Otherwise, can't handle this.
493 return true;
494}
495
497 MachineBranchPredicate &MBP,
498 bool AllowModify) const {
499 // For the moment, handle only a block which ends with a cb(n)zx followed by
500 // a fallthrough. Why this? Because it is a common form.
501 // TODO: Should we handle b.cc?
502
503 MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
504 if (I == MBB.end())
505 return true;
506
507 // Skip over SpeculationBarrierEndBB terminators
508 if (I->getOpcode() == AArch64::SpeculationBarrierISBDSBEndBB ||
509 I->getOpcode() == AArch64::SpeculationBarrierSBEndBB) {
510 --I;
511 }
512
513 if (!isUnpredicatedTerminator(*I))
514 return true;
515
516 // Get the last instruction in the block.
517 MachineInstr *LastInst = &*I;
518 unsigned LastOpc = LastInst->getOpcode();
519 if (!isCondBranchOpcode(LastOpc))
520 return true;
521
522 switch (LastOpc) {
523 default:
524 return true;
525 case AArch64::CBZW:
526 case AArch64::CBZX:
527 case AArch64::CBNZW:
528 case AArch64::CBNZX:
529 break;
530 };
531
532 MBP.TrueDest = LastInst->getOperand(1).getMBB();
533 assert(MBP.TrueDest && "expected!");
534 MBP.FalseDest = MBB.getNextNode();
535
536 MBP.ConditionDef = nullptr;
537 MBP.SingleUseCondition = false;
538
539 MBP.LHS = LastInst->getOperand(0);
540 MBP.RHS = MachineOperand::CreateImm(0);
541 MBP.Predicate = (LastOpc == AArch64::CBNZX || LastOpc == AArch64::CBNZW)
542 ? MachineBranchPredicate::PRED_NE
543 : MachineBranchPredicate::PRED_EQ;
544 return false;
545}
546
549 if (Cond[0].getImm() != -1) {
550 // Regular Bcc
551 AArch64CC::CondCode CC = (AArch64CC::CondCode)(int)Cond[0].getImm();
553 } else {
554 // Folded compare-and-branch
555 switch (Cond[1].getImm()) {
556 default:
557 llvm_unreachable("Unknown conditional branch!");
558 case AArch64::CBZW:
559 Cond[1].setImm(AArch64::CBNZW);
560 break;
561 case AArch64::CBNZW:
562 Cond[1].setImm(AArch64::CBZW);
563 break;
564 case AArch64::CBZX:
565 Cond[1].setImm(AArch64::CBNZX);
566 break;
567 case AArch64::CBNZX:
568 Cond[1].setImm(AArch64::CBZX);
569 break;
570 case AArch64::TBZW:
571 Cond[1].setImm(AArch64::TBNZW);
572 break;
573 case AArch64::TBNZW:
574 Cond[1].setImm(AArch64::TBZW);
575 break;
576 case AArch64::TBZX:
577 Cond[1].setImm(AArch64::TBNZX);
578 break;
579 case AArch64::TBNZX:
580 Cond[1].setImm(AArch64::TBZX);
581 break;
582
583 // Cond is { -1, Opcode, CC, Op0, Op1 }
584 case AArch64::CBWPri:
585 case AArch64::CBXPri:
586 case AArch64::CBWPrr:
587 case AArch64::CBXPrr: {
588 // Pseudos using standard 4bit Arm condition codes
590 static_cast<AArch64CC::CondCode>(Cond[2].getImm());
592 }
593 }
594 }
595
596 return false;
597}
598
600 int *BytesRemoved) const {
601 MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
602 if (I == MBB.end())
603 return 0;
604
605 if (!isUncondBranchOpcode(I->getOpcode()) &&
606 !isCondBranchOpcode(I->getOpcode()))
607 return 0;
608
609 // Remove the branch.
610 I->eraseFromParent();
611
612 I = MBB.end();
613
614 if (I == MBB.begin()) {
615 if (BytesRemoved)
616 *BytesRemoved = 4;
617 return 1;
618 }
619 --I;
620 if (!isCondBranchOpcode(I->getOpcode())) {
621 if (BytesRemoved)
622 *BytesRemoved = 4;
623 return 1;
624 }
625
626 // Remove the branch.
627 I->eraseFromParent();
628 if (BytesRemoved)
629 *BytesRemoved = 8;
630
631 return 2;
632}
633
634void AArch64InstrInfo::instantiateCondBranch(
637 if (Cond[0].getImm() != -1) {
638 // Regular Bcc
639 BuildMI(&MBB, DL, get(AArch64::Bcc)).addImm(Cond[0].getImm()).addMBB(TBB);
640 } else {
641 // Folded compare-and-branch
642 // Note that we use addOperand instead of addReg to keep the flags.
643
644 // cbz, cbnz
645 const MachineInstrBuilder MIB =
646 BuildMI(&MBB, DL, get(Cond[1].getImm())).add(Cond[2]);
647
648 // tbz/tbnz
649 if (Cond.size() > 3)
650 MIB.add(Cond[3]);
651
652 // cb
653 if (Cond.size() > 4)
654 MIB.add(Cond[4]);
655
656 MIB.addMBB(TBB);
657 }
658}
659
662 ArrayRef<MachineOperand> Cond, const DebugLoc &DL, int *BytesAdded) const {
663 // Shouldn't be a fall through.
664 assert(TBB && "insertBranch must not be told to insert a fallthrough");
665
666 if (!FBB) {
667 if (Cond.empty()) // Unconditional branch?
668 BuildMI(&MBB, DL, get(AArch64::B)).addMBB(TBB);
669 else
670 instantiateCondBranch(MBB, DL, TBB, Cond);
671
672 if (BytesAdded)
673 *BytesAdded = 4;
674
675 return 1;
676 }
677
678 // Two-way conditional branch.
679 instantiateCondBranch(MBB, DL, TBB, Cond);
680 BuildMI(&MBB, DL, get(AArch64::B)).addMBB(FBB);
681
682 if (BytesAdded)
683 *BytesAdded = 8;
684
685 return 2;
686}
687
688// Find the original register that VReg is copied from.
689static unsigned removeCopies(const MachineRegisterInfo &MRI, unsigned VReg) {
690 while (Register::isVirtualRegister(VReg)) {
691 const MachineInstr *DefMI = MRI.getVRegDef(VReg);
692 if (!DefMI->isFullCopy())
693 return VReg;
694 VReg = DefMI->getOperand(1).getReg();
695 }
696 return VReg;
697}
698
699// Determine if VReg is defined by an instruction that can be folded into a
700// csel instruction. If so, return the folded opcode, and the replacement
701// register.
702static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg,
703 unsigned *NewReg = nullptr) {
704 VReg = removeCopies(MRI, VReg);
706 return 0;
707
708 bool Is64Bit = AArch64::GPR64allRegClass.hasSubClassEq(MRI.getRegClass(VReg));
709 const MachineInstr *DefMI = MRI.getVRegDef(VReg);
710 unsigned Opc = 0;
711 unsigned SrcReg = 0;
712 switch (DefMI->getOpcode()) {
713 case AArch64::SUBREG_TO_REG:
714 // Check for the following way to define an 64-bit immediate:
715 // %0:gpr32 = MOVi32imm 1
716 // %1:gpr64 = SUBREG_TO_REG 0, %0:gpr32, %subreg.sub_32
717 if (!DefMI->getOperand(1).isImm() || DefMI->getOperand(1).getImm() != 0)
718 return 0;
719 if (!DefMI->getOperand(2).isReg())
720 return 0;
721 if (!DefMI->getOperand(3).isImm() ||
722 DefMI->getOperand(3).getImm() != AArch64::sub_32)
723 return 0;
724 DefMI = MRI.getVRegDef(DefMI->getOperand(2).getReg());
725 if (DefMI->getOpcode() != AArch64::MOVi32imm)
726 return 0;
727 if (!DefMI->getOperand(1).isImm() || DefMI->getOperand(1).getImm() != 1)
728 return 0;
729 assert(Is64Bit);
730 SrcReg = AArch64::XZR;
731 Opc = AArch64::CSINCXr;
732 break;
733
734 case AArch64::MOVi32imm:
735 case AArch64::MOVi64imm:
736 if (!DefMI->getOperand(1).isImm() || DefMI->getOperand(1).getImm() != 1)
737 return 0;
738 SrcReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
739 Opc = Is64Bit ? AArch64::CSINCXr : AArch64::CSINCWr;
740 break;
741
742 case AArch64::ADDSXri:
743 case AArch64::ADDSWri:
744 // if NZCV is used, do not fold.
745 if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr,
746 true) == -1)
747 return 0;
748 // fall-through to ADDXri and ADDWri.
749 [[fallthrough]];
750 case AArch64::ADDXri:
751 case AArch64::ADDWri:
752 // add x, 1 -> csinc.
753 if (!DefMI->getOperand(2).isImm() || DefMI->getOperand(2).getImm() != 1 ||
754 DefMI->getOperand(3).getImm() != 0)
755 return 0;
756 SrcReg = DefMI->getOperand(1).getReg();
757 Opc = Is64Bit ? AArch64::CSINCXr : AArch64::CSINCWr;
758 break;
759
760 case AArch64::ORNXrr:
761 case AArch64::ORNWrr: {
762 // not x -> csinv, represented as orn dst, xzr, src.
763 unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg());
764 if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)
765 return 0;
766 SrcReg = DefMI->getOperand(2).getReg();
767 Opc = Is64Bit ? AArch64::CSINVXr : AArch64::CSINVWr;
768 break;
769 }
770
771 case AArch64::SUBSXrr:
772 case AArch64::SUBSWrr:
773 // if NZCV is used, do not fold.
774 if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr,
775 true) == -1)
776 return 0;
777 // fall-through to SUBXrr and SUBWrr.
778 [[fallthrough]];
779 case AArch64::SUBXrr:
780 case AArch64::SUBWrr: {
781 // neg x -> csneg, represented as sub dst, xzr, src.
782 unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg());
783 if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)
784 return 0;
785 SrcReg = DefMI->getOperand(2).getReg();
786 Opc = Is64Bit ? AArch64::CSNEGXr : AArch64::CSNEGWr;
787 break;
788 }
789 default:
790 return 0;
791 }
792 assert(Opc && SrcReg && "Missing parameters");
793
794 if (NewReg)
795 *NewReg = SrcReg;
796 return Opc;
797}
798
801 Register DstReg, Register TrueReg,
802 Register FalseReg, int &CondCycles,
803 int &TrueCycles,
804 int &FalseCycles) const {
805 // Check register classes.
806 const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
807 const TargetRegisterClass *RC =
808 RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg));
809 if (!RC)
810 return false;
811
812 // Also need to check the dest regclass, in case we're trying to optimize
813 // something like:
814 // %1(gpr) = PHI %2(fpr), bb1, %(fpr), bb2
815 if (!RI.getCommonSubClass(RC, MRI.getRegClass(DstReg)))
816 return false;
817
818 // Expanding cbz/tbz requires an extra cycle of latency on the condition.
819 unsigned ExtraCondLat = Cond.size() != 1;
820
821 // GPRs are handled by csel.
822 // FIXME: Fold in x+1, -x, and ~x when applicable.
823 if (AArch64::GPR64allRegClass.hasSubClassEq(RC) ||
824 AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
825 // Single-cycle csel, csinc, csinv, and csneg.
826 CondCycles = 1 + ExtraCondLat;
827 TrueCycles = FalseCycles = 1;
828 if (canFoldIntoCSel(MRI, TrueReg))
829 TrueCycles = 0;
830 else if (canFoldIntoCSel(MRI, FalseReg))
831 FalseCycles = 0;
832 return true;
833 }
834
835 // Scalar floating point is handled by fcsel.
836 // FIXME: Form fabs, fmin, and fmax when applicable.
837 if (AArch64::FPR64RegClass.hasSubClassEq(RC) ||
838 AArch64::FPR32RegClass.hasSubClassEq(RC)) {
839 CondCycles = 5 + ExtraCondLat;
840 TrueCycles = FalseCycles = 2;
841 return true;
842 }
843
844 // Can't do vectors.
845 return false;
846}
847
850 const DebugLoc &DL, Register DstReg,
852 Register TrueReg, Register FalseReg) const {
853 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
854
855 // Parse the condition code, see parseCondBranch() above.
857 switch (Cond.size()) {
858 default:
859 llvm_unreachable("Unknown condition opcode in Cond");
860 case 1: // b.cc
862 break;
863 case 3: { // cbz/cbnz
864 // We must insert a compare against 0.
865 bool Is64Bit;
866 switch (Cond[1].getImm()) {
867 default:
868 llvm_unreachable("Unknown branch opcode in Cond");
869 case AArch64::CBZW:
870 Is64Bit = false;
871 CC = AArch64CC::EQ;
872 break;
873 case AArch64::CBZX:
874 Is64Bit = true;
875 CC = AArch64CC::EQ;
876 break;
877 case AArch64::CBNZW:
878 Is64Bit = false;
879 CC = AArch64CC::NE;
880 break;
881 case AArch64::CBNZX:
882 Is64Bit = true;
883 CC = AArch64CC::NE;
884 break;
885 }
886 Register SrcReg = Cond[2].getReg();
887 if (Is64Bit) {
888 // cmp reg, #0 is actually subs xzr, reg, #0.
889 MRI.constrainRegClass(SrcReg, &AArch64::GPR64spRegClass);
890 BuildMI(MBB, I, DL, get(AArch64::SUBSXri), AArch64::XZR)
891 .addReg(SrcReg)
892 .addImm(0)
893 .addImm(0);
894 } else {
895 MRI.constrainRegClass(SrcReg, &AArch64::GPR32spRegClass);
896 BuildMI(MBB, I, DL, get(AArch64::SUBSWri), AArch64::WZR)
897 .addReg(SrcReg)
898 .addImm(0)
899 .addImm(0);
900 }
901 break;
902 }
903 case 4: { // tbz/tbnz
904 // We must insert a tst instruction.
905 switch (Cond[1].getImm()) {
906 default:
907 llvm_unreachable("Unknown branch opcode in Cond");
908 case AArch64::TBZW:
909 case AArch64::TBZX:
910 CC = AArch64CC::EQ;
911 break;
912 case AArch64::TBNZW:
913 case AArch64::TBNZX:
914 CC = AArch64CC::NE;
915 break;
916 }
917 // cmp reg, #foo is actually ands xzr, reg, #1<<foo.
918 if (Cond[1].getImm() == AArch64::TBZW || Cond[1].getImm() == AArch64::TBNZW)
919 BuildMI(MBB, I, DL, get(AArch64::ANDSWri), AArch64::WZR)
920 .addReg(Cond[2].getReg())
921 .addImm(
923 else
924 BuildMI(MBB, I, DL, get(AArch64::ANDSXri), AArch64::XZR)
925 .addReg(Cond[2].getReg())
926 .addImm(
928 break;
929 }
930 case 5: { // cb
931 // We must insert a cmp, that is a subs
932 // 0 1 2 3 4
933 // Cond is { -1, Opcode, CC, Op0, Op1 }
934 unsigned SUBSOpC, SUBSDestReg;
935 bool IsImm = false;
936 CC = static_cast<AArch64CC::CondCode>(Cond[2].getImm());
937 switch (Cond[1].getImm()) {
938 default:
939 llvm_unreachable("Unknown branch opcode in Cond");
940 case AArch64::CBWPri:
941 SUBSOpC = AArch64::SUBSWri;
942 SUBSDestReg = AArch64::WZR;
943 IsImm = true;
944 break;
945 case AArch64::CBXPri:
946 SUBSOpC = AArch64::SUBSXri;
947 SUBSDestReg = AArch64::XZR;
948 IsImm = true;
949 break;
950 case AArch64::CBWPrr:
951 SUBSOpC = AArch64::SUBSWrr;
952 SUBSDestReg = AArch64::WZR;
953 IsImm = false;
954 break;
955 case AArch64::CBXPrr:
956 SUBSOpC = AArch64::SUBSXrr;
957 SUBSDestReg = AArch64::XZR;
958 IsImm = false;
959 break;
960 }
961
962 if (IsImm)
963 BuildMI(MBB, I, DL, get(SUBSOpC), SUBSDestReg)
964 .addReg(Cond[3].getReg())
965 .addImm(Cond[4].getImm())
966 .addImm(0);
967 else
968 BuildMI(MBB, I, DL, get(SUBSOpC), SUBSDestReg)
969 .addReg(Cond[3].getReg())
970 .addReg(Cond[4].getReg());
971 }
972 }
973
974 unsigned Opc = 0;
975 const TargetRegisterClass *RC = nullptr;
976 bool TryFold = false;
977 if (MRI.constrainRegClass(DstReg, &AArch64::GPR64RegClass)) {
978 RC = &AArch64::GPR64RegClass;
979 Opc = AArch64::CSELXr;
980 TryFold = true;
981 } else if (MRI.constrainRegClass(DstReg, &AArch64::GPR32RegClass)) {
982 RC = &AArch64::GPR32RegClass;
983 Opc = AArch64::CSELWr;
984 TryFold = true;
985 } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR64RegClass)) {
986 RC = &AArch64::FPR64RegClass;
987 Opc = AArch64::FCSELDrrr;
988 } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR32RegClass)) {
989 RC = &AArch64::FPR32RegClass;
990 Opc = AArch64::FCSELSrrr;
991 }
992 assert(RC && "Unsupported regclass");
993
994 // Try folding simple instructions into the csel.
995 if (TryFold) {
996 unsigned NewReg = 0;
997 unsigned FoldedOpc = canFoldIntoCSel(MRI, TrueReg, &NewReg);
998 if (FoldedOpc) {
999 // The folded opcodes csinc, csinc and csneg apply the operation to
1000 // FalseReg, so we need to invert the condition.
1002 TrueReg = FalseReg;
1003 } else
1004 FoldedOpc = canFoldIntoCSel(MRI, FalseReg, &NewReg);
1005
1006 // Fold the operation. Leave any dead instructions for DCE to clean up.
1007 if (FoldedOpc) {
1008 FalseReg = NewReg;
1009 Opc = FoldedOpc;
1010 // Extend the live range of NewReg.
1011 MRI.clearKillFlags(NewReg);
1012 }
1013 }
1014
1015 // Pull all virtual register into the appropriate class.
1016 MRI.constrainRegClass(TrueReg, RC);
1017 // FalseReg might be WZR or XZR if the folded operand is a literal 1.
1018 assert(
1019 (FalseReg.isVirtual() || FalseReg == AArch64::WZR ||
1020 FalseReg == AArch64::XZR) &&
1021 "FalseReg was folded into a non-virtual register other than WZR or XZR");
1022 if (FalseReg.isVirtual())
1023 MRI.constrainRegClass(FalseReg, RC);
1024
1025 // Insert the csel.
1026 BuildMI(MBB, I, DL, get(Opc), DstReg)
1027 .addReg(TrueReg)
1028 .addReg(FalseReg)
1029 .addImm(CC);
1030}
1031
1032// Return true if Imm can be loaded into a register by a "cheap" sequence of
1033// instructions. For now, "cheap" means at most two instructions.
1034static bool isCheapImmediate(const MachineInstr &MI, unsigned BitSize) {
1035 if (BitSize == 32)
1036 return true;
1037
1038 assert(BitSize == 64 && "Only bit sizes of 32 or 64 allowed");
1039 uint64_t Imm = static_cast<uint64_t>(MI.getOperand(1).getImm());
1041 AArch64_IMM::expandMOVImm(Imm, BitSize, Is);
1042
1043 return Is.size() <= 2;
1044}
1045
1046// FIXME: this implementation should be micro-architecture dependent, so a
1047// micro-architecture target hook should be introduced here in future.
1049 if (Subtarget.hasExynosCheapAsMoveHandling()) {
1050 if (isExynosCheapAsMove(MI))
1051 return true;
1052 return MI.isAsCheapAsAMove();
1053 }
1054
1055 switch (MI.getOpcode()) {
1056 default:
1057 return MI.isAsCheapAsAMove();
1058
1059 case AArch64::ADDWrs:
1060 case AArch64::ADDXrs:
1061 case AArch64::SUBWrs:
1062 case AArch64::SUBXrs:
1063 return Subtarget.hasALULSLFast() && MI.getOperand(3).getImm() <= 4;
1064
1065 // If MOVi32imm or MOVi64imm can be expanded into ORRWri or
1066 // ORRXri, it is as cheap as MOV.
1067 // Likewise if it can be expanded to MOVZ/MOVN/MOVK.
1068 case AArch64::MOVi32imm:
1069 return isCheapImmediate(MI, 32);
1070 case AArch64::MOVi64imm:
1071 return isCheapImmediate(MI, 64);
1072 }
1073}
1074
1075bool AArch64InstrInfo::isFalkorShiftExtFast(const MachineInstr &MI) {
1076 switch (MI.getOpcode()) {
1077 default:
1078 return false;
1079
1080 case AArch64::ADDWrs:
1081 case AArch64::ADDXrs:
1082 case AArch64::ADDSWrs:
1083 case AArch64::ADDSXrs: {
1084 unsigned Imm = MI.getOperand(3).getImm();
1085 unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
1086 if (ShiftVal == 0)
1087 return true;
1088 return AArch64_AM::getShiftType(Imm) == AArch64_AM::LSL && ShiftVal <= 5;
1089 }
1090
1091 case AArch64::ADDWrx:
1092 case AArch64::ADDXrx:
1093 case AArch64::ADDXrx64:
1094 case AArch64::ADDSWrx:
1095 case AArch64::ADDSXrx:
1096 case AArch64::ADDSXrx64: {
1097 unsigned Imm = MI.getOperand(3).getImm();
1098 switch (AArch64_AM::getArithExtendType(Imm)) {
1099 default:
1100 return false;
1101 case AArch64_AM::UXTB:
1102 case AArch64_AM::UXTH:
1103 case AArch64_AM::UXTW:
1104 case AArch64_AM::UXTX:
1105 return AArch64_AM::getArithShiftValue(Imm) <= 4;
1106 }
1107 }
1108
1109 case AArch64::SUBWrs:
1110 case AArch64::SUBSWrs: {
1111 unsigned Imm = MI.getOperand(3).getImm();
1112 unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
1113 return ShiftVal == 0 ||
1114 (AArch64_AM::getShiftType(Imm) == AArch64_AM::ASR && ShiftVal == 31);
1115 }
1116
1117 case AArch64::SUBXrs:
1118 case AArch64::SUBSXrs: {
1119 unsigned Imm = MI.getOperand(3).getImm();
1120 unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
1121 return ShiftVal == 0 ||
1122 (AArch64_AM::getShiftType(Imm) == AArch64_AM::ASR && ShiftVal == 63);
1123 }
1124
1125 case AArch64::SUBWrx:
1126 case AArch64::SUBXrx:
1127 case AArch64::SUBXrx64:
1128 case AArch64::SUBSWrx:
1129 case AArch64::SUBSXrx:
1130 case AArch64::SUBSXrx64: {
1131 unsigned Imm = MI.getOperand(3).getImm();
1132 switch (AArch64_AM::getArithExtendType(Imm)) {
1133 default:
1134 return false;
1135 case AArch64_AM::UXTB:
1136 case AArch64_AM::UXTH:
1137 case AArch64_AM::UXTW:
1138 case AArch64_AM::UXTX:
1139 return AArch64_AM::getArithShiftValue(Imm) == 0;
1140 }
1141 }
1142
1143 case AArch64::LDRBBroW:
1144 case AArch64::LDRBBroX:
1145 case AArch64::LDRBroW:
1146 case AArch64::LDRBroX:
1147 case AArch64::LDRDroW:
1148 case AArch64::LDRDroX:
1149 case AArch64::LDRHHroW:
1150 case AArch64::LDRHHroX:
1151 case AArch64::LDRHroW:
1152 case AArch64::LDRHroX:
1153 case AArch64::LDRQroW:
1154 case AArch64::LDRQroX:
1155 case AArch64::LDRSBWroW:
1156 case AArch64::LDRSBWroX:
1157 case AArch64::LDRSBXroW:
1158 case AArch64::LDRSBXroX:
1159 case AArch64::LDRSHWroW:
1160 case AArch64::LDRSHWroX:
1161 case AArch64::LDRSHXroW:
1162 case AArch64::LDRSHXroX:
1163 case AArch64::LDRSWroW:
1164 case AArch64::LDRSWroX:
1165 case AArch64::LDRSroW:
1166 case AArch64::LDRSroX:
1167 case AArch64::LDRWroW:
1168 case AArch64::LDRWroX:
1169 case AArch64::LDRXroW:
1170 case AArch64::LDRXroX:
1171 case AArch64::PRFMroW:
1172 case AArch64::PRFMroX:
1173 case AArch64::STRBBroW:
1174 case AArch64::STRBBroX:
1175 case AArch64::STRBroW:
1176 case AArch64::STRBroX:
1177 case AArch64::STRDroW:
1178 case AArch64::STRDroX:
1179 case AArch64::STRHHroW:
1180 case AArch64::STRHHroX:
1181 case AArch64::STRHroW:
1182 case AArch64::STRHroX:
1183 case AArch64::STRQroW:
1184 case AArch64::STRQroX:
1185 case AArch64::STRSroW:
1186 case AArch64::STRSroX:
1187 case AArch64::STRWroW:
1188 case AArch64::STRWroX:
1189 case AArch64::STRXroW:
1190 case AArch64::STRXroX: {
1191 unsigned IsSigned = MI.getOperand(3).getImm();
1192 return !IsSigned;
1193 }
1194 }
1195}
1196
1197bool AArch64InstrInfo::isSEHInstruction(const MachineInstr &MI) {
1198 unsigned Opc = MI.getOpcode();
1199 switch (Opc) {
1200 default:
1201 return false;
1202 case AArch64::SEH_StackAlloc:
1203 case AArch64::SEH_SaveFPLR:
1204 case AArch64::SEH_SaveFPLR_X:
1205 case AArch64::SEH_SaveReg:
1206 case AArch64::SEH_SaveReg_X:
1207 case AArch64::SEH_SaveRegP:
1208 case AArch64::SEH_SaveRegP_X:
1209 case AArch64::SEH_SaveFReg:
1210 case AArch64::SEH_SaveFReg_X:
1211 case AArch64::SEH_SaveFRegP:
1212 case AArch64::SEH_SaveFRegP_X:
1213 case AArch64::SEH_SetFP:
1214 case AArch64::SEH_AddFP:
1215 case AArch64::SEH_Nop:
1216 case AArch64::SEH_PrologEnd:
1217 case AArch64::SEH_EpilogStart:
1218 case AArch64::SEH_EpilogEnd:
1219 case AArch64::SEH_PACSignLR:
1220 case AArch64::SEH_SaveAnyRegQP:
1221 case AArch64::SEH_SaveAnyRegQPX:
1222 case AArch64::SEH_AllocZ:
1223 case AArch64::SEH_SaveZReg:
1224 case AArch64::SEH_SavePReg:
1225 return true;
1226 }
1227}
1228
1230 Register &SrcReg, Register &DstReg,
1231 unsigned &SubIdx) const {
1232 switch (MI.getOpcode()) {
1233 default:
1234 return false;
1235 case AArch64::SBFMXri: // aka sxtw
1236 case AArch64::UBFMXri: // aka uxtw
1237 // Check for the 32 -> 64 bit extension case, these instructions can do
1238 // much more.
1239 if (MI.getOperand(2).getImm() != 0 || MI.getOperand(3).getImm() != 31)
1240 return false;
1241 // This is a signed or unsigned 32 -> 64 bit extension.
1242 SrcReg = MI.getOperand(1).getReg();
1243 DstReg = MI.getOperand(0).getReg();
1244 SubIdx = AArch64::sub_32;
1245 return true;
1246 }
1247}
1248
1250 const MachineInstr &MIa, const MachineInstr &MIb) const {
1252 const MachineOperand *BaseOpA = nullptr, *BaseOpB = nullptr;
1253 int64_t OffsetA = 0, OffsetB = 0;
1254 TypeSize WidthA(0, false), WidthB(0, false);
1255 bool OffsetAIsScalable = false, OffsetBIsScalable = false;
1256
1257 assert(MIa.mayLoadOrStore() && "MIa must be a load or store.");
1258 assert(MIb.mayLoadOrStore() && "MIb must be a load or store.");
1259
1262 return false;
1263
1264 // Retrieve the base, offset from the base and width. Width
1265 // is the size of memory that is being loaded/stored (e.g. 1, 2, 4, 8). If
1266 // base are identical, and the offset of a lower memory access +
1267 // the width doesn't overlap the offset of a higher memory access,
1268 // then the memory accesses are different.
1269 // If OffsetAIsScalable and OffsetBIsScalable are both true, they
1270 // are assumed to have the same scale (vscale).
1271 if (getMemOperandWithOffsetWidth(MIa, BaseOpA, OffsetA, OffsetAIsScalable,
1272 WidthA, TRI) &&
1273 getMemOperandWithOffsetWidth(MIb, BaseOpB, OffsetB, OffsetBIsScalable,
1274 WidthB, TRI)) {
1275 if (BaseOpA->isIdenticalTo(*BaseOpB) &&
1276 OffsetAIsScalable == OffsetBIsScalable) {
1277 int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB;
1278 int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA;
1279 TypeSize LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
1280 if (LowWidth.isScalable() == OffsetAIsScalable &&
1281 LowOffset + (int)LowWidth.getKnownMinValue() <= HighOffset)
1282 return true;
1283 }
1284 }
1285 return false;
1286}
1287
1289 const MachineBasicBlock *MBB,
1290 const MachineFunction &MF) const {
1292 return true;
1293
1294 // Do not move an instruction that can be recognized as a branch target.
1295 if (hasBTISemantics(MI))
1296 return true;
1297
1298 switch (MI.getOpcode()) {
1299 case AArch64::HINT:
1300 // CSDB hints are scheduling barriers.
1301 if (MI.getOperand(0).getImm() == 0x14)
1302 return true;
1303 break;
1304 case AArch64::DSB:
1305 case AArch64::ISB:
1306 // DSB and ISB also are scheduling barriers.
1307 return true;
1308 case AArch64::MSRpstatesvcrImm1:
1309 // SMSTART and SMSTOP are also scheduling barriers.
1310 return true;
1311 default:;
1312 }
1313 if (isSEHInstruction(MI))
1314 return true;
1315 auto Next = std::next(MI.getIterator());
1316 return Next != MBB->end() && Next->isCFIInstruction();
1317}
1318
1319/// analyzeCompare - For a comparison instruction, return the source registers
1320/// in SrcReg and SrcReg2, and the value it compares against in CmpValue.
1321/// Return true if the comparison instruction can be analyzed.
1323 Register &SrcReg2, int64_t &CmpMask,
1324 int64_t &CmpValue) const {
1325 // The first operand can be a frame index where we'd normally expect a
1326 // register.
1327 // FIXME: Pass subregisters out of analyzeCompare
1328 assert(MI.getNumOperands() >= 2 && "All AArch64 cmps should have 2 operands");
1329 if (!MI.getOperand(1).isReg() || MI.getOperand(1).getSubReg())
1330 return false;
1331
1332 switch (MI.getOpcode()) {
1333 default:
1334 break;
1335 case AArch64::PTEST_PP:
1336 case AArch64::PTEST_PP_ANY:
1337 case AArch64::PTEST_PP_FIRST:
1338 SrcReg = MI.getOperand(0).getReg();
1339 SrcReg2 = MI.getOperand(1).getReg();
1340 if (MI.getOperand(2).getSubReg())
1341 return false;
1342
1343 // Not sure about the mask and value for now...
1344 CmpMask = ~0;
1345 CmpValue = 0;
1346 return true;
1347 case AArch64::SUBSWrr:
1348 case AArch64::SUBSWrs:
1349 case AArch64::SUBSWrx:
1350 case AArch64::SUBSXrr:
1351 case AArch64::SUBSXrs:
1352 case AArch64::SUBSXrx:
1353 case AArch64::ADDSWrr:
1354 case AArch64::ADDSWrs:
1355 case AArch64::ADDSWrx:
1356 case AArch64::ADDSXrr:
1357 case AArch64::ADDSXrs:
1358 case AArch64::ADDSXrx:
1359 // Replace SUBSWrr with SUBWrr if NZCV is not used.
1360 SrcReg = MI.getOperand(1).getReg();
1361 SrcReg2 = MI.getOperand(2).getReg();
1362
1363 // FIXME: Pass subregisters out of analyzeCompare
1364 if (MI.getOperand(2).getSubReg())
1365 return false;
1366
1367 CmpMask = ~0;
1368 CmpValue = 0;
1369 return true;
1370 case AArch64::SUBSWri:
1371 case AArch64::ADDSWri:
1372 case AArch64::SUBSXri:
1373 case AArch64::ADDSXri:
1374 SrcReg = MI.getOperand(1).getReg();
1375 SrcReg2 = 0;
1376 CmpMask = ~0;
1377 CmpValue = MI.getOperand(2).getImm();
1378 return true;
1379 case AArch64::ANDSWri:
1380 case AArch64::ANDSXri:
1381 // ANDS does not use the same encoding scheme as the others xxxS
1382 // instructions.
1383 SrcReg = MI.getOperand(1).getReg();
1384 SrcReg2 = 0;
1385 CmpMask = ~0;
1387 MI.getOperand(2).getImm(),
1388 MI.getOpcode() == AArch64::ANDSWri ? 32 : 64);
1389 return true;
1390 }
1391
1392 return false;
1393}
1394
1396 MachineBasicBlock *MBB = Instr.getParent();
1397 assert(MBB && "Can't get MachineBasicBlock here");
1398 MachineFunction *MF = MBB->getParent();
1399 assert(MF && "Can't get MachineFunction here");
1403
1404 for (unsigned OpIdx = 0, EndIdx = Instr.getNumOperands(); OpIdx < EndIdx;
1405 ++OpIdx) {
1406 MachineOperand &MO = Instr.getOperand(OpIdx);
1407 const TargetRegisterClass *OpRegCstraints =
1408 Instr.getRegClassConstraint(OpIdx, TII, TRI);
1409
1410 // If there's no constraint, there's nothing to do.
1411 if (!OpRegCstraints)
1412 continue;
1413 // If the operand is a frame index, there's nothing to do here.
1414 // A frame index operand will resolve correctly during PEI.
1415 if (MO.isFI())
1416 continue;
1417
1418 assert(MO.isReg() &&
1419 "Operand has register constraints without being a register!");
1420
1421 Register Reg = MO.getReg();
1422 if (Reg.isPhysical()) {
1423 if (!OpRegCstraints->contains(Reg))
1424 return false;
1425 } else if (!OpRegCstraints->hasSubClassEq(MRI->getRegClass(Reg)) &&
1426 !MRI->constrainRegClass(Reg, OpRegCstraints))
1427 return false;
1428 }
1429
1430 return true;
1431}
1432
1433/// Return the opcode that does not set flags when possible - otherwise
1434/// return the original opcode. The caller is responsible to do the actual
1435/// substitution and legality checking.
1437 // Don't convert all compare instructions, because for some the zero register
1438 // encoding becomes the sp register.
1439 bool MIDefinesZeroReg = false;
1440 if (MI.definesRegister(AArch64::WZR, /*TRI=*/nullptr) ||
1441 MI.definesRegister(AArch64::XZR, /*TRI=*/nullptr))
1442 MIDefinesZeroReg = true;
1443
1444 switch (MI.getOpcode()) {
1445 default:
1446 return MI.getOpcode();
1447 case AArch64::ADDSWrr:
1448 return AArch64::ADDWrr;
1449 case AArch64::ADDSWri:
1450 return MIDefinesZeroReg ? AArch64::ADDSWri : AArch64::ADDWri;
1451 case AArch64::ADDSWrs:
1452 return MIDefinesZeroReg ? AArch64::ADDSWrs : AArch64::ADDWrs;
1453 case AArch64::ADDSWrx:
1454 return AArch64::ADDWrx;
1455 case AArch64::ADDSXrr:
1456 return AArch64::ADDXrr;
1457 case AArch64::ADDSXri:
1458 return MIDefinesZeroReg ? AArch64::ADDSXri : AArch64::ADDXri;
1459 case AArch64::ADDSXrs:
1460 return MIDefinesZeroReg ? AArch64::ADDSXrs : AArch64::ADDXrs;
1461 case AArch64::ADDSXrx:
1462 return AArch64::ADDXrx;
1463 case AArch64::SUBSWrr:
1464 return AArch64::SUBWrr;
1465 case AArch64::SUBSWri:
1466 return MIDefinesZeroReg ? AArch64::SUBSWri : AArch64::SUBWri;
1467 case AArch64::SUBSWrs:
1468 return MIDefinesZeroReg ? AArch64::SUBSWrs : AArch64::SUBWrs;
1469 case AArch64::SUBSWrx:
1470 return AArch64::SUBWrx;
1471 case AArch64::SUBSXrr:
1472 return AArch64::SUBXrr;
1473 case AArch64::SUBSXri:
1474 return MIDefinesZeroReg ? AArch64::SUBSXri : AArch64::SUBXri;
1475 case AArch64::SUBSXrs:
1476 return MIDefinesZeroReg ? AArch64::SUBSXrs : AArch64::SUBXrs;
1477 case AArch64::SUBSXrx:
1478 return AArch64::SUBXrx;
1479 }
1480}
1481
1482enum AccessKind { AK_Write = 0x01, AK_Read = 0x10, AK_All = 0x11 };
1483
1484/// True when condition flags are accessed (either by writing or reading)
1485/// on the instruction trace starting at From and ending at To.
1486///
1487/// Note: If From and To are from different blocks it's assumed CC are accessed
1488/// on the path.
1491 const TargetRegisterInfo *TRI, const AccessKind AccessToCheck = AK_All) {
1492 // Early exit if To is at the beginning of the BB.
1493 if (To == To->getParent()->begin())
1494 return true;
1495
1496 // Check whether the instructions are in the same basic block
1497 // If not, assume the condition flags might get modified somewhere.
1498 if (To->getParent() != From->getParent())
1499 return true;
1500
1501 // From must be above To.
1502 assert(std::any_of(
1503 ++To.getReverse(), To->getParent()->rend(),
1504 [From](MachineInstr &MI) { return MI.getIterator() == From; }));
1505
1506 // We iterate backward starting at \p To until we hit \p From.
1507 for (const MachineInstr &Instr :
1509 if (((AccessToCheck & AK_Write) &&
1510 Instr.modifiesRegister(AArch64::NZCV, TRI)) ||
1511 ((AccessToCheck & AK_Read) && Instr.readsRegister(AArch64::NZCV, TRI)))
1512 return true;
1513 }
1514 return false;
1515}
1516
1517std::optional<unsigned>
1518AArch64InstrInfo::canRemovePTestInstr(MachineInstr *PTest, MachineInstr *Mask,
1519 MachineInstr *Pred,
1520 const MachineRegisterInfo *MRI) const {
1521 unsigned MaskOpcode = Mask->getOpcode();
1522 unsigned PredOpcode = Pred->getOpcode();
1523 bool PredIsPTestLike = isPTestLikeOpcode(PredOpcode);
1524 bool PredIsWhileLike = isWhileOpcode(PredOpcode);
1525
1526 if (PredIsWhileLike) {
1527 // For PTEST(PG, PG), PTEST is redundant when PG is the result of a WHILEcc
1528 // instruction and the condition is "any" since WHILcc does an implicit
1529 // PTEST(ALL, PG) check and PG is always a subset of ALL.
1530 if ((Mask == Pred) && PTest->getOpcode() == AArch64::PTEST_PP_ANY)
1531 return PredOpcode;
1532
1533 // For PTEST(PTRUE_ALL, WHILE), if the element size matches, the PTEST is
1534 // redundant since WHILE performs an implicit PTEST with an all active
1535 // mask.
1536 if (isPTrueOpcode(MaskOpcode) && Mask->getOperand(1).getImm() == 31 &&
1537 getElementSizeForOpcode(MaskOpcode) ==
1538 getElementSizeForOpcode(PredOpcode))
1539 return PredOpcode;
1540
1541 // For PTEST_FIRST(PTRUE_ALL, WHILE), the PTEST_FIRST is redundant since
1542 // WHILEcc performs an implicit PTEST with an all active mask, setting
1543 // the N flag as the PTEST_FIRST would.
1544 if (PTest->getOpcode() == AArch64::PTEST_PP_FIRST &&
1545 isPTrueOpcode(MaskOpcode) && Mask->getOperand(1).getImm() == 31)
1546 return PredOpcode;
1547
1548 return {};
1549 }
1550
1551 if (PredIsPTestLike) {
1552 // For PTEST(PG, PG), PTEST is redundant when PG is the result of an
1553 // instruction that sets the flags as PTEST would and the condition is
1554 // "any" since PG is always a subset of the governing predicate of the
1555 // ptest-like instruction.
1556 if ((Mask == Pred) && PTest->getOpcode() == AArch64::PTEST_PP_ANY)
1557 return PredOpcode;
1558
1559 auto PTestLikeMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg());
1560
1561 // If the PTEST like instruction's general predicate is not `Mask`, attempt
1562 // to look through a copy and try again. This is because some instructions
1563 // take a predicate whose register class is a subset of its result class.
1564 if (Mask != PTestLikeMask && PTestLikeMask->isFullCopy() &&
1565 PTestLikeMask->getOperand(1).getReg().isVirtual())
1566 PTestLikeMask =
1567 MRI->getUniqueVRegDef(PTestLikeMask->getOperand(1).getReg());
1568
1569 // For PTEST(PTRUE_ALL, PTEST_LIKE), the PTEST is redundant if the
1570 // the element size matches and either the PTEST_LIKE instruction uses
1571 // the same all active mask or the condition is "any".
1572 if (isPTrueOpcode(MaskOpcode) && Mask->getOperand(1).getImm() == 31 &&
1573 getElementSizeForOpcode(MaskOpcode) ==
1574 getElementSizeForOpcode(PredOpcode)) {
1575 if (Mask == PTestLikeMask || PTest->getOpcode() == AArch64::PTEST_PP_ANY)
1576 return PredOpcode;
1577 }
1578
1579 // For PTEST(PG, PTEST_LIKE(PG, ...)), the PTEST is redundant since the
1580 // flags are set based on the same mask 'PG', but PTEST_LIKE must operate
1581 // on 8-bit predicates like the PTEST. Otherwise, for instructions like
1582 // compare that also support 16/32/64-bit predicates, the implicit PTEST
1583 // performed by the compare could consider fewer lanes for these element
1584 // sizes.
1585 //
1586 // For example, consider
1587 //
1588 // ptrue p0.b ; P0=1111-1111-1111-1111
1589 // index z0.s, #0, #1 ; Z0=<0,1,2,3>
1590 // index z1.s, #1, #1 ; Z1=<1,2,3,4>
1591 // cmphi p1.s, p0/z, z1.s, z0.s ; P1=0001-0001-0001-0001
1592 // ; ^ last active
1593 // ptest p0, p1.b ; P1=0001-0001-0001-0001
1594 // ; ^ last active
1595 //
1596 // where the compare generates a canonical all active 32-bit predicate
1597 // (equivalent to 'ptrue p1.s, all'). The implicit PTEST sets the last
1598 // active flag, whereas the PTEST instruction with the same mask doesn't.
1599 // For PTEST_ANY this doesn't apply as the flags in this case would be
1600 // identical regardless of element size.
1601 uint64_t PredElementSize = getElementSizeForOpcode(PredOpcode);
1602 if (Mask == PTestLikeMask && (PredElementSize == AArch64::ElementSizeB ||
1603 PTest->getOpcode() == AArch64::PTEST_PP_ANY))
1604 return PredOpcode;
1605
1606 return {};
1607 }
1608
1609 // If OP in PTEST(PG, OP(PG, ...)) has a flag-setting variant change the
1610 // opcode so the PTEST becomes redundant.
1611 switch (PredOpcode) {
1612 case AArch64::AND_PPzPP:
1613 case AArch64::BIC_PPzPP:
1614 case AArch64::EOR_PPzPP:
1615 case AArch64::NAND_PPzPP:
1616 case AArch64::NOR_PPzPP:
1617 case AArch64::ORN_PPzPP:
1618 case AArch64::ORR_PPzPP:
1619 case AArch64::BRKA_PPzP:
1620 case AArch64::BRKPA_PPzPP:
1621 case AArch64::BRKB_PPzP:
1622 case AArch64::BRKPB_PPzPP:
1623 case AArch64::RDFFR_PPz: {
1624 // Check to see if our mask is the same. If not the resulting flag bits
1625 // may be different and we can't remove the ptest.
1626 auto *PredMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg());
1627 if (Mask != PredMask)
1628 return {};
1629 break;
1630 }
1631 case AArch64::BRKN_PPzP: {
1632 // BRKN uses an all active implicit mask to set flags unlike the other
1633 // flag-setting instructions.
1634 // PTEST(PTRUE_B(31), BRKN(PG, A, B)) -> BRKNS(PG, A, B).
1635 if ((MaskOpcode != AArch64::PTRUE_B) ||
1636 (Mask->getOperand(1).getImm() != 31))
1637 return {};
1638 break;
1639 }
1640 case AArch64::PTRUE_B:
1641 // PTEST(OP=PTRUE_B(A), OP) -> PTRUES_B(A)
1642 break;
1643 default:
1644 // Bail out if we don't recognize the input
1645 return {};
1646 }
1647
1648 return convertToFlagSettingOpc(PredOpcode);
1649}
1650
1651/// optimizePTestInstr - Attempt to remove a ptest of a predicate-generating
1652/// operation which could set the flags in an identical manner
1653bool AArch64InstrInfo::optimizePTestInstr(
1654 MachineInstr *PTest, unsigned MaskReg, unsigned PredReg,
1655 const MachineRegisterInfo *MRI) const {
1656 auto *Mask = MRI->getUniqueVRegDef(MaskReg);
1657 auto *Pred = MRI->getUniqueVRegDef(PredReg);
1658
1659 if (Pred->isCopy() && PTest->getOpcode() == AArch64::PTEST_PP_FIRST) {
1660 // Instructions which return a multi-vector (e.g. WHILECC_x2) require copies
1661 // before the branch to extract each subregister.
1662 auto Op = Pred->getOperand(1);
1663 if (Op.isReg() && Op.getReg().isVirtual() &&
1664 Op.getSubReg() == AArch64::psub0)
1665 Pred = MRI->getUniqueVRegDef(Op.getReg());
1666 }
1667
1668 unsigned PredOpcode = Pred->getOpcode();
1669 auto NewOp = canRemovePTestInstr(PTest, Mask, Pred, MRI);
1670 if (!NewOp)
1671 return false;
1672
1673 const TargetRegisterInfo *TRI = &getRegisterInfo();
1674
1675 // If another instruction between Pred and PTest accesses flags, don't remove
1676 // the ptest or update the earlier instruction to modify them.
1677 if (areCFlagsAccessedBetweenInstrs(Pred, PTest, TRI))
1678 return false;
1679
1680 // If we pass all the checks, it's safe to remove the PTEST and use the flags
1681 // as they are prior to PTEST. Sometimes this requires the tested PTEST
1682 // operand to be replaced with an equivalent instruction that also sets the
1683 // flags.
1684 PTest->eraseFromParent();
1685 if (*NewOp != PredOpcode) {
1686 Pred->setDesc(get(*NewOp));
1687 bool succeeded = UpdateOperandRegClass(*Pred);
1688 (void)succeeded;
1689 assert(succeeded && "Operands have incompatible register classes!");
1690 Pred->addRegisterDefined(AArch64::NZCV, TRI);
1691 }
1692
1693 // Ensure that the flags def is live.
1694 if (Pred->registerDefIsDead(AArch64::NZCV, TRI)) {
1695 unsigned i = 0, e = Pred->getNumOperands();
1696 for (; i != e; ++i) {
1697 MachineOperand &MO = Pred->getOperand(i);
1698 if (MO.isReg() && MO.isDef() && MO.getReg() == AArch64::NZCV) {
1699 MO.setIsDead(false);
1700 break;
1701 }
1702 }
1703 }
1704 return true;
1705}
1706
1707/// Try to optimize a compare instruction. A compare instruction is an
1708/// instruction which produces AArch64::NZCV. It can be truly compare
1709/// instruction
1710/// when there are no uses of its destination register.
1711///
1712/// The following steps are tried in order:
1713/// 1. Convert CmpInstr into an unconditional version.
1714/// 2. Remove CmpInstr if above there is an instruction producing a needed
1715/// condition code or an instruction which can be converted into such an
1716/// instruction.
1717/// Only comparison with zero is supported.
1719 MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int64_t CmpMask,
1720 int64_t CmpValue, const MachineRegisterInfo *MRI) const {
1721 assert(CmpInstr.getParent());
1722 assert(MRI);
1723
1724 // Replace SUBSWrr with SUBWrr if NZCV is not used.
1725 int DeadNZCVIdx =
1726 CmpInstr.findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr, true);
1727 if (DeadNZCVIdx != -1) {
1728 if (CmpInstr.definesRegister(AArch64::WZR, /*TRI=*/nullptr) ||
1729 CmpInstr.definesRegister(AArch64::XZR, /*TRI=*/nullptr)) {
1730 CmpInstr.eraseFromParent();
1731 return true;
1732 }
1733 unsigned Opc = CmpInstr.getOpcode();
1734 unsigned NewOpc = convertToNonFlagSettingOpc(CmpInstr);
1735 if (NewOpc == Opc)
1736 return false;
1737 const MCInstrDesc &MCID = get(NewOpc);
1738 CmpInstr.setDesc(MCID);
1739 CmpInstr.removeOperand(DeadNZCVIdx);
1740 bool succeeded = UpdateOperandRegClass(CmpInstr);
1741 (void)succeeded;
1742 assert(succeeded && "Some operands reg class are incompatible!");
1743 return true;
1744 }
1745
1746 if (CmpInstr.getOpcode() == AArch64::PTEST_PP ||
1747 CmpInstr.getOpcode() == AArch64::PTEST_PP_ANY ||
1748 CmpInstr.getOpcode() == AArch64::PTEST_PP_FIRST)
1749 return optimizePTestInstr(&CmpInstr, SrcReg, SrcReg2, MRI);
1750
1751 if (SrcReg2 != 0)
1752 return false;
1753
1754 // CmpInstr is a Compare instruction if destination register is not used.
1755 if (!MRI->use_nodbg_empty(CmpInstr.getOperand(0).getReg()))
1756 return false;
1757
1758 if (CmpValue == 0 && substituteCmpToZero(CmpInstr, SrcReg, *MRI))
1759 return true;
1760 return (CmpValue == 0 || CmpValue == 1) &&
1761 removeCmpToZeroOrOne(CmpInstr, SrcReg, CmpValue, *MRI);
1762}
1763
1764/// Get opcode of S version of Instr.
1765/// If Instr is S version its opcode is returned.
1766/// AArch64::INSTRUCTION_LIST_END is returned if Instr does not have S version
1767/// or we are not interested in it.
1768static unsigned sForm(MachineInstr &Instr) {
1769 switch (Instr.getOpcode()) {
1770 default:
1771 return AArch64::INSTRUCTION_LIST_END;
1772
1773 case AArch64::ADDSWrr:
1774 case AArch64::ADDSWri:
1775 case AArch64::ADDSXrr:
1776 case AArch64::ADDSXri:
1777 case AArch64::SUBSWrr:
1778 case AArch64::SUBSWri:
1779 case AArch64::SUBSXrr:
1780 case AArch64::SUBSXri:
1781 return Instr.getOpcode();
1782
1783 case AArch64::ADDWrr:
1784 return AArch64::ADDSWrr;
1785 case AArch64::ADDWri:
1786 return AArch64::ADDSWri;
1787 case AArch64::ADDXrr:
1788 return AArch64::ADDSXrr;
1789 case AArch64::ADDXri:
1790 return AArch64::ADDSXri;
1791 case AArch64::ADCWr:
1792 return AArch64::ADCSWr;
1793 case AArch64::ADCXr:
1794 return AArch64::ADCSXr;
1795 case AArch64::SUBWrr:
1796 return AArch64::SUBSWrr;
1797 case AArch64::SUBWri:
1798 return AArch64::SUBSWri;
1799 case AArch64::SUBXrr:
1800 return AArch64::SUBSXrr;
1801 case AArch64::SUBXri:
1802 return AArch64::SUBSXri;
1803 case AArch64::SBCWr:
1804 return AArch64::SBCSWr;
1805 case AArch64::SBCXr:
1806 return AArch64::SBCSXr;
1807 case AArch64::ANDWri:
1808 return AArch64::ANDSWri;
1809 case AArch64::ANDXri:
1810 return AArch64::ANDSXri;
1811 }
1812}
1813
1814/// Check if AArch64::NZCV should be alive in successors of MBB.
1816 for (auto *BB : MBB->successors())
1817 if (BB->isLiveIn(AArch64::NZCV))
1818 return true;
1819 return false;
1820}
1821
1822/// \returns The condition code operand index for \p Instr if it is a branch
1823/// or select and -1 otherwise.
1824static int
1826 switch (Instr.getOpcode()) {
1827 default:
1828 return -1;
1829
1830 case AArch64::Bcc: {
1831 int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV, /*TRI=*/nullptr);
1832 assert(Idx >= 2);
1833 return Idx - 2;
1834 }
1835
1836 case AArch64::CSINVWr:
1837 case AArch64::CSINVXr:
1838 case AArch64::CSINCWr:
1839 case AArch64::CSINCXr:
1840 case AArch64::CSELWr:
1841 case AArch64::CSELXr:
1842 case AArch64::CSNEGWr:
1843 case AArch64::CSNEGXr:
1844 case AArch64::FCSELSrrr:
1845 case AArch64::FCSELDrrr: {
1846 int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV, /*TRI=*/nullptr);
1847 assert(Idx >= 1);
1848 return Idx - 1;
1849 }
1850 }
1851}
1852
1853/// Find a condition code used by the instruction.
1854/// Returns AArch64CC::Invalid if either the instruction does not use condition
1855/// codes or we don't optimize CmpInstr in the presence of such instructions.
1858 return CCIdx >= 0 ? static_cast<AArch64CC::CondCode>(
1859 Instr.getOperand(CCIdx).getImm())
1861}
1862
1865 UsedNZCV UsedFlags;
1866 switch (CC) {
1867 default:
1868 break;
1869
1870 case AArch64CC::EQ: // Z set
1871 case AArch64CC::NE: // Z clear
1872 UsedFlags.Z = true;
1873 break;
1874
1875 case AArch64CC::HI: // Z clear and C set
1876 case AArch64CC::LS: // Z set or C clear
1877 UsedFlags.Z = true;
1878 [[fallthrough]];
1879 case AArch64CC::HS: // C set
1880 case AArch64CC::LO: // C clear
1881 UsedFlags.C = true;
1882 break;
1883
1884 case AArch64CC::MI: // N set
1885 case AArch64CC::PL: // N clear
1886 UsedFlags.N = true;
1887 break;
1888
1889 case AArch64CC::VS: // V set
1890 case AArch64CC::VC: // V clear
1891 UsedFlags.V = true;
1892 break;
1893
1894 case AArch64CC::GT: // Z clear, N and V the same
1895 case AArch64CC::LE: // Z set, N and V differ
1896 UsedFlags.Z = true;
1897 [[fallthrough]];
1898 case AArch64CC::GE: // N and V the same
1899 case AArch64CC::LT: // N and V differ
1900 UsedFlags.N = true;
1901 UsedFlags.V = true;
1902 break;
1903 }
1904 return UsedFlags;
1905}
1906
1907/// \returns Conditions flags used after \p CmpInstr in its MachineBB if NZCV
1908/// flags are not alive in successors of the same \p CmpInstr and \p MI parent.
1909/// \returns std::nullopt otherwise.
1910///
1911/// Collect instructions using that flags in \p CCUseInstrs if provided.
1912std::optional<UsedNZCV>
1914 const TargetRegisterInfo &TRI,
1915 SmallVectorImpl<MachineInstr *> *CCUseInstrs) {
1916 MachineBasicBlock *CmpParent = CmpInstr.getParent();
1917 if (MI.getParent() != CmpParent)
1918 return std::nullopt;
1919
1920 if (areCFlagsAliveInSuccessors(CmpParent))
1921 return std::nullopt;
1922
1923 UsedNZCV NZCVUsedAfterCmp;
1925 std::next(CmpInstr.getIterator()), CmpParent->instr_end())) {
1926 if (Instr.readsRegister(AArch64::NZCV, &TRI)) {
1928 if (CC == AArch64CC::Invalid) // Unsupported conditional instruction
1929 return std::nullopt;
1930 NZCVUsedAfterCmp |= getUsedNZCV(CC);
1931 if (CCUseInstrs)
1932 CCUseInstrs->push_back(&Instr);
1933 }
1934 if (Instr.modifiesRegister(AArch64::NZCV, &TRI))
1935 break;
1936 }
1937 return NZCVUsedAfterCmp;
1938}
1939
1940static bool isADDSRegImm(unsigned Opcode) {
1941 return Opcode == AArch64::ADDSWri || Opcode == AArch64::ADDSXri;
1942}
1943
1944static bool isSUBSRegImm(unsigned Opcode) {
1945 return Opcode == AArch64::SUBSWri || Opcode == AArch64::SUBSXri;
1946}
1947
1948/// Check if CmpInstr can be substituted by MI.
1949///
1950/// CmpInstr can be substituted:
1951/// - CmpInstr is either 'ADDS %vreg, 0' or 'SUBS %vreg, 0'
1952/// - and, MI and CmpInstr are from the same MachineBB
1953/// - and, condition flags are not alive in successors of the CmpInstr parent
1954/// - and, if MI opcode is the S form there must be no defs of flags between
1955/// MI and CmpInstr
1956/// or if MI opcode is not the S form there must be neither defs of flags
1957/// nor uses of flags between MI and CmpInstr.
1958/// - and, if C/V flags are not used after CmpInstr
1959/// or if N flag is used but MI produces poison value if signed overflow
1960/// occurs.
1962 const TargetRegisterInfo &TRI) {
1963 // NOTE this assertion guarantees that MI.getOpcode() is add or subtraction
1964 // that may or may not set flags.
1965 assert(sForm(MI) != AArch64::INSTRUCTION_LIST_END);
1966
1967 const unsigned CmpOpcode = CmpInstr.getOpcode();
1968 if (!isADDSRegImm(CmpOpcode) && !isSUBSRegImm(CmpOpcode))
1969 return false;
1970
1971 assert((CmpInstr.getOperand(2).isImm() &&
1972 CmpInstr.getOperand(2).getImm() == 0) &&
1973 "Caller guarantees that CmpInstr compares with constant 0");
1974
1975 std::optional<UsedNZCV> NZVCUsed = examineCFlagsUse(MI, CmpInstr, TRI);
1976 if (!NZVCUsed || NZVCUsed->C)
1977 return false;
1978
1979 // CmpInstr is either 'ADDS %vreg, 0' or 'SUBS %vreg, 0', and MI is either
1980 // '%vreg = add ...' or '%vreg = sub ...'.
1981 // Condition flag V is used to indicate signed overflow.
1982 // 1) MI and CmpInstr set N and V to the same value.
1983 // 2) If MI is add/sub with no-signed-wrap, it produces a poison value when
1984 // signed overflow occurs, so CmpInstr could still be simplified away.
1985 if (NZVCUsed->V && !MI.getFlag(MachineInstr::NoSWrap))
1986 return false;
1987
1988 AccessKind AccessToCheck = AK_Write;
1989 if (sForm(MI) != MI.getOpcode())
1990 AccessToCheck = AK_All;
1991 return !areCFlagsAccessedBetweenInstrs(&MI, &CmpInstr, &TRI, AccessToCheck);
1992}
1993
1994/// Substitute an instruction comparing to zero with another instruction
1995/// which produces needed condition flags.
1996///
1997/// Return true on success.
1998bool AArch64InstrInfo::substituteCmpToZero(
1999 MachineInstr &CmpInstr, unsigned SrcReg,
2000 const MachineRegisterInfo &MRI) const {
2001 // Get the unique definition of SrcReg.
2002 MachineInstr *MI = MRI.getUniqueVRegDef(SrcReg);
2003 if (!MI)
2004 return false;
2005
2006 const TargetRegisterInfo &TRI = getRegisterInfo();
2007
2008 unsigned NewOpc = sForm(*MI);
2009 if (NewOpc == AArch64::INSTRUCTION_LIST_END)
2010 return false;
2011
2012 if (!canInstrSubstituteCmpInstr(*MI, CmpInstr, TRI))
2013 return false;
2014
2015 // Update the instruction to set NZCV.
2016 MI->setDesc(get(NewOpc));
2017 CmpInstr.eraseFromParent();
2019 (void)succeeded;
2020 assert(succeeded && "Some operands reg class are incompatible!");
2021 MI->addRegisterDefined(AArch64::NZCV, &TRI);
2022 return true;
2023}
2024
2025/// \returns True if \p CmpInstr can be removed.
2026///
2027/// \p IsInvertCC is true if, after removing \p CmpInstr, condition
2028/// codes used in \p CCUseInstrs must be inverted.
2030 int CmpValue, const TargetRegisterInfo &TRI,
2032 bool &IsInvertCC) {
2033 assert((CmpValue == 0 || CmpValue == 1) &&
2034 "Only comparisons to 0 or 1 considered for removal!");
2035
2036 // MI is 'CSINCWr %vreg, wzr, wzr, <cc>' or 'CSINCXr %vreg, xzr, xzr, <cc>'
2037 unsigned MIOpc = MI.getOpcode();
2038 if (MIOpc == AArch64::CSINCWr) {
2039 if (MI.getOperand(1).getReg() != AArch64::WZR ||
2040 MI.getOperand(2).getReg() != AArch64::WZR)
2041 return false;
2042 } else if (MIOpc == AArch64::CSINCXr) {
2043 if (MI.getOperand(1).getReg() != AArch64::XZR ||
2044 MI.getOperand(2).getReg() != AArch64::XZR)
2045 return false;
2046 } else {
2047 return false;
2048 }
2050 if (MICC == AArch64CC::Invalid)
2051 return false;
2052
2053 // NZCV needs to be defined
2054 if (MI.findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr, true) != -1)
2055 return false;
2056
2057 // CmpInstr is 'ADDS %vreg, 0' or 'SUBS %vreg, 0' or 'SUBS %vreg, 1'
2058 const unsigned CmpOpcode = CmpInstr.getOpcode();
2059 bool IsSubsRegImm = isSUBSRegImm(CmpOpcode);
2060 if (CmpValue && !IsSubsRegImm)
2061 return false;
2062 if (!CmpValue && !IsSubsRegImm && !isADDSRegImm(CmpOpcode))
2063 return false;
2064
2065 // MI conditions allowed: eq, ne, mi, pl
2066 UsedNZCV MIUsedNZCV = getUsedNZCV(MICC);
2067 if (MIUsedNZCV.C || MIUsedNZCV.V)
2068 return false;
2069
2070 std::optional<UsedNZCV> NZCVUsedAfterCmp =
2071 examineCFlagsUse(MI, CmpInstr, TRI, &CCUseInstrs);
2072 // Condition flags are not used in CmpInstr basic block successors and only
2073 // Z or N flags allowed to be used after CmpInstr within its basic block
2074 if (!NZCVUsedAfterCmp || NZCVUsedAfterCmp->C || NZCVUsedAfterCmp->V)
2075 return false;
2076 // Z or N flag used after CmpInstr must correspond to the flag used in MI
2077 if ((MIUsedNZCV.Z && NZCVUsedAfterCmp->N) ||
2078 (MIUsedNZCV.N && NZCVUsedAfterCmp->Z))
2079 return false;
2080 // If CmpInstr is comparison to zero MI conditions are limited to eq, ne
2081 if (MIUsedNZCV.N && !CmpValue)
2082 return false;
2083
2084 // There must be no defs of flags between MI and CmpInstr
2085 if (areCFlagsAccessedBetweenInstrs(&MI, &CmpInstr, &TRI, AK_Write))
2086 return false;
2087
2088 // Condition code is inverted in the following cases:
2089 // 1. MI condition is ne; CmpInstr is 'ADDS %vreg, 0' or 'SUBS %vreg, 0'
2090 // 2. MI condition is eq, pl; CmpInstr is 'SUBS %vreg, 1'
2091 IsInvertCC = (CmpValue && (MICC == AArch64CC::EQ || MICC == AArch64CC::PL)) ||
2092 (!CmpValue && MICC == AArch64CC::NE);
2093 return true;
2094}
2095
2096/// Remove comparison in csinc-cmp sequence
2097///
2098/// Examples:
2099/// 1. \code
2100/// csinc w9, wzr, wzr, ne
2101/// cmp w9, #0
2102/// b.eq
2103/// \endcode
2104/// to
2105/// \code
2106/// csinc w9, wzr, wzr, ne
2107/// b.ne
2108/// \endcode
2109///
2110/// 2. \code
2111/// csinc x2, xzr, xzr, mi
2112/// cmp x2, #1
2113/// b.pl
2114/// \endcode
2115/// to
2116/// \code
2117/// csinc x2, xzr, xzr, mi
2118/// b.pl
2119/// \endcode
2120///
2121/// \param CmpInstr comparison instruction
2122/// \return True when comparison removed
2123bool AArch64InstrInfo::removeCmpToZeroOrOne(
2124 MachineInstr &CmpInstr, unsigned SrcReg, int CmpValue,
2125 const MachineRegisterInfo &MRI) const {
2126 MachineInstr *MI = MRI.getUniqueVRegDef(SrcReg);
2127 if (!MI)
2128 return false;
2129 const TargetRegisterInfo &TRI = getRegisterInfo();
2130 SmallVector<MachineInstr *, 4> CCUseInstrs;
2131 bool IsInvertCC = false;
2132 if (!canCmpInstrBeRemoved(*MI, CmpInstr, CmpValue, TRI, CCUseInstrs,
2133 IsInvertCC))
2134 return false;
2135 // Make transformation
2136 CmpInstr.eraseFromParent();
2137 if (IsInvertCC) {
2138 // Invert condition codes in CmpInstr CC users
2139 for (MachineInstr *CCUseInstr : CCUseInstrs) {
2140 int Idx = findCondCodeUseOperandIdxForBranchOrSelect(*CCUseInstr);
2141 assert(Idx >= 0 && "Unexpected instruction using CC.");
2142 MachineOperand &CCOperand = CCUseInstr->getOperand(Idx);
2144 static_cast<AArch64CC::CondCode>(CCOperand.getImm()));
2145 CCOperand.setImm(CCUse);
2146 }
2147 }
2148 return true;
2149}
2150
2151bool AArch64InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
2152 if (MI.getOpcode() != TargetOpcode::LOAD_STACK_GUARD &&
2153 MI.getOpcode() != AArch64::CATCHRET)
2154 return false;
2155
2156 MachineBasicBlock &MBB = *MI.getParent();
2157 auto &Subtarget = MBB.getParent()->getSubtarget<AArch64Subtarget>();
2158 auto TRI = Subtarget.getRegisterInfo();
2159 DebugLoc DL = MI.getDebugLoc();
2160
2161 if (MI.getOpcode() == AArch64::CATCHRET) {
2162 // Skip to the first instruction before the epilog.
2163 const TargetInstrInfo *TII =
2165 MachineBasicBlock *TargetMBB = MI.getOperand(0).getMBB();
2167 MachineBasicBlock::iterator FirstEpilogSEH = std::prev(MBBI);
2168 while (FirstEpilogSEH->getFlag(MachineInstr::FrameDestroy) &&
2169 FirstEpilogSEH != MBB.begin())
2170 FirstEpilogSEH = std::prev(FirstEpilogSEH);
2171 if (FirstEpilogSEH != MBB.begin())
2172 FirstEpilogSEH = std::next(FirstEpilogSEH);
2173 BuildMI(MBB, FirstEpilogSEH, DL, TII->get(AArch64::ADRP))
2174 .addReg(AArch64::X0, RegState::Define)
2175 .addMBB(TargetMBB);
2176 BuildMI(MBB, FirstEpilogSEH, DL, TII->get(AArch64::ADDXri))
2177 .addReg(AArch64::X0, RegState::Define)
2178 .addReg(AArch64::X0)
2179 .addMBB(TargetMBB)
2180 .addImm(0);
2181 TargetMBB->setMachineBlockAddressTaken();
2182 return true;
2183 }
2184
2185 Register Reg = MI.getOperand(0).getReg();
2187 if (M.getStackProtectorGuard() == "sysreg") {
2188 const AArch64SysReg::SysReg *SrcReg =
2189 AArch64SysReg::lookupSysRegByName(M.getStackProtectorGuardReg());
2190 if (!SrcReg)
2191 report_fatal_error("Unknown SysReg for Stack Protector Guard Register");
2192
2193 // mrs xN, sysreg
2194 BuildMI(MBB, MI, DL, get(AArch64::MRS))
2196 .addImm(SrcReg->Encoding);
2197 int Offset = M.getStackProtectorGuardOffset();
2198 if (Offset >= 0 && Offset <= 32760 && Offset % 8 == 0) {
2199 // ldr xN, [xN, #offset]
2200 BuildMI(MBB, MI, DL, get(AArch64::LDRXui))
2201 .addDef(Reg)
2203 .addImm(Offset / 8);
2204 } else if (Offset >= -256 && Offset <= 255) {
2205 // ldur xN, [xN, #offset]
2206 BuildMI(MBB, MI, DL, get(AArch64::LDURXi))
2207 .addDef(Reg)
2209 .addImm(Offset);
2210 } else if (Offset >= -4095 && Offset <= 4095) {
2211 if (Offset > 0) {
2212 // add xN, xN, #offset
2213 BuildMI(MBB, MI, DL, get(AArch64::ADDXri))
2214 .addDef(Reg)
2216 .addImm(Offset)
2217 .addImm(0);
2218 } else {
2219 // sub xN, xN, #offset
2220 BuildMI(MBB, MI, DL, get(AArch64::SUBXri))
2221 .addDef(Reg)
2223 .addImm(-Offset)
2224 .addImm(0);
2225 }
2226 // ldr xN, [xN]
2227 BuildMI(MBB, MI, DL, get(AArch64::LDRXui))
2228 .addDef(Reg)
2230 .addImm(0);
2231 } else {
2232 // Cases that are larger than +/- 4095 and not a multiple of 8, or larger
2233 // than 23760.
2234 // It might be nice to use AArch64::MOVi32imm here, which would get
2235 // expanded in PreSched2 after PostRA, but our lone scratch Reg already
2236 // contains the MRS result. findScratchNonCalleeSaveRegister() in
2237 // AArch64FrameLowering might help us find such a scratch register
2238 // though. If we failed to find a scratch register, we could emit a
2239 // stream of add instructions to build up the immediate. Or, we could try
2240 // to insert a AArch64::MOVi32imm before register allocation so that we
2241 // didn't need to scavenge for a scratch register.
2242 report_fatal_error("Unable to encode Stack Protector Guard Offset");
2243 }
2244 MBB.erase(MI);
2245 return true;
2246 }
2247
2248 const GlobalValue *GV =
2249 cast<GlobalValue>((*MI.memoperands_begin())->getValue());
2250 const TargetMachine &TM = MBB.getParent()->getTarget();
2251 unsigned OpFlags = Subtarget.ClassifyGlobalReference(GV, TM);
2252 const unsigned char MO_NC = AArch64II::MO_NC;
2253
2254 if ((OpFlags & AArch64II::MO_GOT) != 0) {
2255 BuildMI(MBB, MI, DL, get(AArch64::LOADgot), Reg)
2256 .addGlobalAddress(GV, 0, OpFlags);
2257 if (Subtarget.isTargetILP32()) {
2258 unsigned Reg32 = TRI->getSubReg(Reg, AArch64::sub_32);
2259 BuildMI(MBB, MI, DL, get(AArch64::LDRWui))
2260 .addDef(Reg32, RegState::Dead)
2262 .addImm(0)
2263 .addMemOperand(*MI.memoperands_begin())
2265 } else {
2266 BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
2268 .addImm(0)
2269 .addMemOperand(*MI.memoperands_begin());
2270 }
2271 } else if (TM.getCodeModel() == CodeModel::Large) {
2272 assert(!Subtarget.isTargetILP32() && "how can large exist in ILP32?");
2273 BuildMI(MBB, MI, DL, get(AArch64::MOVZXi), Reg)
2274 .addGlobalAddress(GV, 0, AArch64II::MO_G0 | MO_NC)
2275 .addImm(0);
2276 BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
2278 .addGlobalAddress(GV, 0, AArch64II::MO_G1 | MO_NC)
2279 .addImm(16);
2280 BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
2282 .addGlobalAddress(GV, 0, AArch64II::MO_G2 | MO_NC)
2283 .addImm(32);
2284 BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
2287 .addImm(48);
2288 BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
2290 .addImm(0)
2291 .addMemOperand(*MI.memoperands_begin());
2292 } else if (TM.getCodeModel() == CodeModel::Tiny) {
2293 BuildMI(MBB, MI, DL, get(AArch64::ADR), Reg)
2294 .addGlobalAddress(GV, 0, OpFlags);
2295 } else {
2296 BuildMI(MBB, MI, DL, get(AArch64::ADRP), Reg)
2297 .addGlobalAddress(GV, 0, OpFlags | AArch64II::MO_PAGE);
2298 unsigned char LoFlags = OpFlags | AArch64II::MO_PAGEOFF | MO_NC;
2299 if (Subtarget.isTargetILP32()) {
2300 unsigned Reg32 = TRI->getSubReg(Reg, AArch64::sub_32);
2301 BuildMI(MBB, MI, DL, get(AArch64::LDRWui))
2302 .addDef(Reg32, RegState::Dead)
2304 .addGlobalAddress(GV, 0, LoFlags)
2305 .addMemOperand(*MI.memoperands_begin())
2307 } else {
2308 BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
2310 .addGlobalAddress(GV, 0, LoFlags)
2311 .addMemOperand(*MI.memoperands_begin());
2312 }
2313 }
2314
2315 MBB.erase(MI);
2316
2317 return true;
2318}
2319
2320// Return true if this instruction simply sets its single destination register
2321// to zero. This is equivalent to a register rename of the zero-register.
2323 switch (MI.getOpcode()) {
2324 default:
2325 break;
2326 case AArch64::MOVZWi:
2327 case AArch64::MOVZXi: // movz Rd, #0 (LSL #0)
2328 if (MI.getOperand(1).isImm() && MI.getOperand(1).getImm() == 0) {
2329 assert(MI.getDesc().getNumOperands() == 3 &&
2330 MI.getOperand(2).getImm() == 0 && "invalid MOVZi operands");
2331 return true;
2332 }
2333 break;
2334 case AArch64::ANDWri: // and Rd, Rzr, #imm
2335 return MI.getOperand(1).getReg() == AArch64::WZR;
2336 case AArch64::ANDXri:
2337 return MI.getOperand(1).getReg() == AArch64::XZR;
2338 case TargetOpcode::COPY:
2339 return MI.getOperand(1).getReg() == AArch64::WZR;
2340 }
2341 return false;
2342}
2343
2344// Return true if this instruction simply renames a general register without
2345// modifying bits.
2347 switch (MI.getOpcode()) {
2348 default:
2349 break;
2350 case TargetOpcode::COPY: {
2351 // GPR32 copies will by lowered to ORRXrs
2352 Register DstReg = MI.getOperand(0).getReg();
2353 return (AArch64::GPR32RegClass.contains(DstReg) ||
2354 AArch64::GPR64RegClass.contains(DstReg));
2355 }
2356 case AArch64::ORRXrs: // orr Xd, Xzr, Xm (LSL #0)
2357 if (MI.getOperand(1).getReg() == AArch64::XZR) {
2358 assert(MI.getDesc().getNumOperands() == 4 &&
2359 MI.getOperand(3).getImm() == 0 && "invalid ORRrs operands");
2360 return true;
2361 }
2362 break;
2363 case AArch64::ADDXri: // add Xd, Xn, #0 (LSL #0)
2364 if (MI.getOperand(2).getImm() == 0) {
2365 assert(MI.getDesc().getNumOperands() == 4 &&
2366 MI.getOperand(3).getImm() == 0 && "invalid ADDXri operands");
2367 return true;
2368 }
2369 break;
2370 }
2371 return false;
2372}
2373
2374// Return true if this instruction simply renames a general register without
2375// modifying bits.
2377 switch (MI.getOpcode()) {
2378 default:
2379 break;
2380 case TargetOpcode::COPY: {
2381 Register DstReg = MI.getOperand(0).getReg();
2382 return AArch64::FPR128RegClass.contains(DstReg);
2383 }
2384 case AArch64::ORRv16i8:
2385 if (MI.getOperand(1).getReg() == MI.getOperand(2).getReg()) {
2386 assert(MI.getDesc().getNumOperands() == 3 && MI.getOperand(0).isReg() &&
2387 "invalid ORRv16i8 operands");
2388 return true;
2389 }
2390 break;
2391 }
2392 return false;
2393}
2394
2396 int &FrameIndex) const {
2397 switch (MI.getOpcode()) {
2398 default:
2399 break;
2400 case AArch64::LDRWui:
2401 case AArch64::LDRXui:
2402 case AArch64::LDRBui:
2403 case AArch64::LDRHui:
2404 case AArch64::LDRSui:
2405 case AArch64::LDRDui:
2406 case AArch64::LDRQui:
2407 case AArch64::LDR_PXI:
2408 if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() &&
2409 MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) {
2410 FrameIndex = MI.getOperand(1).getIndex();
2411 return MI.getOperand(0).getReg();
2412 }
2413 break;
2414 }
2415
2416 return 0;
2417}
2418
2420 int &FrameIndex) const {
2421 switch (MI.getOpcode()) {
2422 default:
2423 break;
2424 case AArch64::STRWui:
2425 case AArch64::STRXui:
2426 case AArch64::STRBui:
2427 case AArch64::STRHui:
2428 case AArch64::STRSui:
2429 case AArch64::STRDui:
2430 case AArch64::STRQui:
2431 case AArch64::STR_PXI:
2432 if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() &&
2433 MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) {
2434 FrameIndex = MI.getOperand(1).getIndex();
2435 return MI.getOperand(0).getReg();
2436 }
2437 break;
2438 }
2439 return 0;
2440}
2441
2442/// Check all MachineMemOperands for a hint to suppress pairing.
2444 return llvm::any_of(MI.memoperands(), [](MachineMemOperand *MMO) {
2445 return MMO->getFlags() & MOSuppressPair;
2446 });
2447}
2448
2449/// Set a flag on the first MachineMemOperand to suppress pairing.
2451 if (MI.memoperands_empty())
2452 return;
2453 (*MI.memoperands_begin())->setFlags(MOSuppressPair);
2454}
2455
2456/// Check all MachineMemOperands for a hint that the load/store is strided.
2458 return llvm::any_of(MI.memoperands(), [](MachineMemOperand *MMO) {
2459 return MMO->getFlags() & MOStridedAccess;
2460 });
2461}
2462
2464 switch (Opc) {
2465 default:
2466 return false;
2467 case AArch64::STURSi:
2468 case AArch64::STRSpre:
2469 case AArch64::STURDi:
2470 case AArch64::STRDpre:
2471 case AArch64::STURQi:
2472 case AArch64::STRQpre:
2473 case AArch64::STURBBi:
2474 case AArch64::STURHHi:
2475 case AArch64::STURWi:
2476 case AArch64::STRWpre:
2477 case AArch64::STURXi:
2478 case AArch64::STRXpre:
2479 case AArch64::LDURSi:
2480 case AArch64::LDRSpre:
2481 case AArch64::LDURDi:
2482 case AArch64::LDRDpre:
2483 case AArch64::LDURQi:
2484 case AArch64::LDRQpre:
2485 case AArch64::LDURWi:
2486 case AArch64::LDRWpre:
2487 case AArch64::LDURXi:
2488 case AArch64::LDRXpre:
2489 case AArch64::LDRSWpre:
2490 case AArch64::LDURSWi:
2491 case AArch64::LDURHHi:
2492 case AArch64::LDURBBi:
2493 case AArch64::LDURSBWi:
2494 case AArch64::LDURSHWi:
2495 return true;
2496 }
2497}
2498
2499std::optional<unsigned> AArch64InstrInfo::getUnscaledLdSt(unsigned Opc) {
2500 switch (Opc) {
2501 default: return {};
2502 case AArch64::PRFMui: return AArch64::PRFUMi;
2503 case AArch64::LDRXui: return AArch64::LDURXi;
2504 case AArch64::LDRWui: return AArch64::LDURWi;
2505 case AArch64::LDRBui: return AArch64::LDURBi;
2506 case AArch64::LDRHui: return AArch64::LDURHi;
2507 case AArch64::LDRSui: return AArch64::LDURSi;
2508 case AArch64::LDRDui: return AArch64::LDURDi;
2509 case AArch64::LDRQui: return AArch64::LDURQi;
2510 case AArch64::LDRBBui: return AArch64::LDURBBi;
2511 case AArch64::LDRHHui: return AArch64::LDURHHi;
2512 case AArch64::LDRSBXui: return AArch64::LDURSBXi;
2513 case AArch64::LDRSBWui: return AArch64::LDURSBWi;
2514 case AArch64::LDRSHXui: return AArch64::LDURSHXi;
2515 case AArch64::LDRSHWui: return AArch64::LDURSHWi;
2516 case AArch64::LDRSWui: return AArch64::LDURSWi;
2517 case AArch64::STRXui: return AArch64::STURXi;
2518 case AArch64::STRWui: return AArch64::STURWi;
2519 case AArch64::STRBui: return AArch64::STURBi;
2520 case AArch64::STRHui: return AArch64::STURHi;
2521 case AArch64::STRSui: return AArch64::STURSi;
2522 case AArch64::STRDui: return AArch64::STURDi;
2523 case AArch64::STRQui: return AArch64::STURQi;
2524 case AArch64::STRBBui: return AArch64::STURBBi;
2525 case AArch64::STRHHui: return AArch64::STURHHi;
2526 }
2527}
2528
2530 switch (Opc) {
2531 default:
2532 llvm_unreachable("Unhandled Opcode in getLoadStoreImmIdx");
2533 case AArch64::ADDG:
2534 case AArch64::LDAPURBi:
2535 case AArch64::LDAPURHi:
2536 case AArch64::LDAPURi:
2537 case AArch64::LDAPURSBWi:
2538 case AArch64::LDAPURSBXi:
2539 case AArch64::LDAPURSHWi:
2540 case AArch64::LDAPURSHXi:
2541 case AArch64::LDAPURSWi:
2542 case AArch64::LDAPURXi:
2543 case AArch64::LDR_PPXI:
2544 case AArch64::LDR_PXI:
2545 case AArch64::LDR_ZXI:
2546 case AArch64::LDR_ZZXI:
2547 case AArch64::LDR_ZZXI_STRIDED_CONTIGUOUS:
2548 case AArch64::LDR_ZZZXI:
2549 case AArch64::LDR_ZZZZXI:
2550 case AArch64::LDR_ZZZZXI_STRIDED_CONTIGUOUS:
2551 case AArch64::LDRBBui:
2552 case AArch64::LDRBui:
2553 case AArch64::LDRDui:
2554 case AArch64::LDRHHui:
2555 case AArch64::LDRHui:
2556 case AArch64::LDRQui:
2557 case AArch64::LDRSBWui:
2558 case AArch64::LDRSBXui:
2559 case AArch64::LDRSHWui:
2560 case AArch64::LDRSHXui:
2561 case AArch64::LDRSui:
2562 case AArch64::LDRSWui:
2563 case AArch64::LDRWui:
2564 case AArch64::LDRXui:
2565 case AArch64::LDURBBi:
2566 case AArch64::LDURBi:
2567 case AArch64::LDURDi:
2568 case AArch64::LDURHHi:
2569 case AArch64::LDURHi:
2570 case AArch64::LDURQi:
2571 case AArch64::LDURSBWi:
2572 case AArch64::LDURSBXi:
2573 case AArch64::LDURSHWi:
2574 case AArch64::LDURSHXi:
2575 case AArch64::LDURSi:
2576 case AArch64::LDURSWi:
2577 case AArch64::LDURWi:
2578 case AArch64::LDURXi:
2579 case AArch64::PRFMui:
2580 case AArch64::PRFUMi:
2581 case AArch64::ST2Gi:
2582 case AArch64::STGi:
2583 case AArch64::STLURBi:
2584 case AArch64::STLURHi:
2585 case AArch64::STLURWi:
2586 case AArch64::STLURXi:
2587 case AArch64::StoreSwiftAsyncContext:
2588 case AArch64::STR_PPXI:
2589 case AArch64::STR_PXI:
2590 case AArch64::STR_ZXI:
2591 case AArch64::STR_ZZXI:
2592 case AArch64::STR_ZZXI_STRIDED_CONTIGUOUS:
2593 case AArch64::STR_ZZZXI:
2594 case AArch64::STR_ZZZZXI:
2595 case AArch64::STR_ZZZZXI_STRIDED_CONTIGUOUS:
2596 case AArch64::STRBBui:
2597 case AArch64::STRBui:
2598 case AArch64::STRDui:
2599 case AArch64::STRHHui:
2600 case AArch64::STRHui:
2601 case AArch64::STRQui:
2602 case AArch64::STRSui:
2603 case AArch64::STRWui:
2604 case AArch64::STRXui:
2605 case AArch64::STURBBi:
2606 case AArch64::STURBi:
2607 case AArch64::STURDi:
2608 case AArch64::STURHHi:
2609 case AArch64::STURHi:
2610 case AArch64::STURQi:
2611 case AArch64::STURSi:
2612 case AArch64::STURWi:
2613 case AArch64::STURXi:
2614 case AArch64::STZ2Gi:
2615 case AArch64::STZGi:
2616 case AArch64::TAGPstack:
2617 return 2;
2618 case AArch64::LD1B_D_IMM:
2619 case AArch64::LD1B_H_IMM:
2620 case AArch64::LD1B_IMM:
2621 case AArch64::LD1B_S_IMM:
2622 case AArch64::LD1D_IMM:
2623 case AArch64::LD1H_D_IMM:
2624 case AArch64::LD1H_IMM:
2625 case AArch64::LD1H_S_IMM:
2626 case AArch64::LD1RB_D_IMM:
2627 case AArch64::LD1RB_H_IMM:
2628 case AArch64::LD1RB_IMM:
2629 case AArch64::LD1RB_S_IMM:
2630 case AArch64::LD1RD_IMM:
2631 case AArch64::LD1RH_D_IMM:
2632 case AArch64::LD1RH_IMM:
2633 case AArch64::LD1RH_S_IMM:
2634 case AArch64::LD1RSB_D_IMM:
2635 case AArch64::LD1RSB_H_IMM:
2636 case AArch64::LD1RSB_S_IMM:
2637 case AArch64::LD1RSH_D_IMM:
2638 case AArch64::LD1RSH_S_IMM:
2639 case AArch64::LD1RSW_IMM:
2640 case AArch64::LD1RW_D_IMM:
2641 case AArch64::LD1RW_IMM:
2642 case AArch64::LD1SB_D_IMM:
2643 case AArch64::LD1SB_H_IMM:
2644 case AArch64::LD1SB_S_IMM:
2645 case AArch64::LD1SH_D_IMM:
2646 case AArch64::LD1SH_S_IMM:
2647 case AArch64::LD1SW_D_IMM:
2648 case AArch64::LD1W_D_IMM:
2649 case AArch64::LD1W_IMM:
2650 case AArch64::LD2B_IMM:
2651 case AArch64::LD2D_IMM:
2652 case AArch64::LD2H_IMM:
2653 case AArch64::LD2W_IMM:
2654 case AArch64::LD3B_IMM:
2655 case AArch64::LD3D_IMM:
2656 case AArch64::LD3H_IMM:
2657 case AArch64::LD3W_IMM:
2658 case AArch64::LD4B_IMM:
2659 case AArch64::LD4D_IMM:
2660 case AArch64::LD4H_IMM:
2661 case AArch64::LD4W_IMM:
2662 case AArch64::LDG:
2663 case AArch64::LDNF1B_D_IMM:
2664 case AArch64::LDNF1B_H_IMM:
2665 case AArch64::LDNF1B_IMM:
2666 case AArch64::LDNF1B_S_IMM:
2667 case AArch64::LDNF1D_IMM:
2668 case AArch64::LDNF1H_D_IMM:
2669 case AArch64::LDNF1H_IMM:
2670 case AArch64::LDNF1H_S_IMM:
2671 case AArch64::LDNF1SB_D_IMM:
2672 case AArch64::LDNF1SB_H_IMM:
2673 case AArch64::LDNF1SB_S_IMM:
2674 case AArch64::LDNF1SH_D_IMM:
2675 case AArch64::LDNF1SH_S_IMM:
2676 case AArch64::LDNF1SW_D_IMM:
2677 case AArch64::LDNF1W_D_IMM:
2678 case AArch64::LDNF1W_IMM:
2679 case AArch64::LDNPDi:
2680 case AArch64::LDNPQi:
2681 case AArch64::LDNPSi:
2682 case AArch64::LDNPWi:
2683 case AArch64::LDNPXi:
2684 case AArch64::LDNT1B_ZRI:
2685 case AArch64::LDNT1D_ZRI:
2686 case AArch64::LDNT1H_ZRI:
2687 case AArch64::LDNT1W_ZRI:
2688 case AArch64::LDPDi:
2689 case AArch64::LDPQi:
2690 case AArch64::LDPSi:
2691 case AArch64::LDPWi:
2692 case AArch64::LDPXi:
2693 case AArch64::LDRBBpost:
2694 case AArch64::LDRBBpre:
2695 case AArch64::LDRBpost:
2696 case AArch64::LDRBpre:
2697 case AArch64::LDRDpost:
2698 case AArch64::LDRDpre:
2699 case AArch64::LDRHHpost:
2700 case AArch64::LDRHHpre:
2701 case AArch64::LDRHpost:
2702 case AArch64::LDRHpre:
2703 case AArch64::LDRQpost:
2704 case AArch64::LDRQpre:
2705 case AArch64::LDRSpost:
2706 case AArch64::LDRSpre:
2707 case AArch64::LDRWpost:
2708 case AArch64::LDRWpre:
2709 case AArch64::LDRXpost:
2710 case AArch64::LDRXpre:
2711 case AArch64::ST1B_D_IMM:
2712 case AArch64::ST1B_H_IMM:
2713 case AArch64::ST1B_IMM:
2714 case AArch64::ST1B_S_IMM:
2715 case AArch64::ST1D_IMM:
2716 case AArch64::ST1H_D_IMM:
2717 case AArch64::ST1H_IMM:
2718 case AArch64::ST1H_S_IMM:
2719 case AArch64::ST1W_D_IMM:
2720 case AArch64::ST1W_IMM:
2721 case AArch64::ST2B_IMM:
2722 case AArch64::ST2D_IMM:
2723 case AArch64::ST2H_IMM:
2724 case AArch64::ST2W_IMM:
2725 case AArch64::ST3B_IMM:
2726 case AArch64::ST3D_IMM:
2727 case AArch64::ST3H_IMM:
2728 case AArch64::ST3W_IMM:
2729 case AArch64::ST4B_IMM:
2730 case AArch64::ST4D_IMM:
2731 case AArch64::ST4H_IMM:
2732 case AArch64::ST4W_IMM:
2733 case AArch64::STGPi:
2734 case AArch64::STGPreIndex:
2735 case AArch64::STZGPreIndex:
2736 case AArch64::ST2GPreIndex:
2737 case AArch64::STZ2GPreIndex:
2738 case AArch64::STGPostIndex:
2739 case AArch64::STZGPostIndex:
2740 case AArch64::ST2GPostIndex:
2741 case AArch64::STZ2GPostIndex:
2742 case AArch64::STNPDi:
2743 case AArch64::STNPQi:
2744 case AArch64::STNPSi:
2745 case AArch64::STNPWi:
2746 case AArch64::STNPXi:
2747 case AArch64::STNT1B_ZRI:
2748 case AArch64::STNT1D_ZRI:
2749 case AArch64::STNT1H_ZRI:
2750 case AArch64::STNT1W_ZRI:
2751 case AArch64::STPDi:
2752 case AArch64::STPQi:
2753 case AArch64::STPSi:
2754 case AArch64::STPWi:
2755 case AArch64::STPXi:
2756 case AArch64::STRBBpost:
2757 case AArch64::STRBBpre:
2758 case AArch64::STRBpost:
2759 case AArch64::STRBpre:
2760 case AArch64::STRDpost:
2761 case AArch64::STRDpre:
2762 case AArch64::STRHHpost:
2763 case AArch64::STRHHpre:
2764 case AArch64::STRHpost:
2765 case AArch64::STRHpre:
2766 case AArch64::STRQpost:
2767 case AArch64::STRQpre:
2768 case AArch64::STRSpost:
2769 case AArch64::STRSpre:
2770 case AArch64::STRWpost:
2771 case AArch64::STRWpre:
2772 case AArch64::STRXpost:
2773 case AArch64::STRXpre:
2774 return 3;
2775 case AArch64::LDPDpost:
2776 case AArch64::LDPDpre:
2777 case AArch64::LDPQpost:
2778 case AArch64::LDPQpre:
2779 case AArch64::LDPSpost:
2780 case AArch64::LDPSpre:
2781 case AArch64::LDPWpost:
2782 case AArch64::LDPWpre:
2783 case AArch64::LDPXpost:
2784 case AArch64::LDPXpre:
2785 case AArch64::STGPpre:
2786 case AArch64::STGPpost:
2787 case AArch64::STPDpost:
2788 case AArch64::STPDpre:
2789 case AArch64::STPQpost:
2790 case AArch64::STPQpre:
2791 case AArch64::STPSpost:
2792 case AArch64::STPSpre:
2793 case AArch64::STPWpost:
2794 case AArch64::STPWpre:
2795 case AArch64::STPXpost:
2796 case AArch64::STPXpre:
2797 return 4;
2798 }
2799}
2800
2802 switch (MI.getOpcode()) {
2803 default:
2804 return false;
2805 // Scaled instructions.
2806 case AArch64::STRSui:
2807 case AArch64::STRDui:
2808 case AArch64::STRQui:
2809 case AArch64::STRXui:
2810 case AArch64::STRWui:
2811 case AArch64::LDRSui:
2812 case AArch64::LDRDui:
2813 case AArch64::LDRQui:
2814 case AArch64::LDRXui:
2815 case AArch64::LDRWui:
2816 case AArch64::LDRSWui:
2817 // Unscaled instructions.
2818 case AArch64::STURSi:
2819 case AArch64::STRSpre:
2820 case AArch64::STURDi:
2821 case AArch64::STRDpre:
2822 case AArch64::STURQi:
2823 case AArch64::STRQpre:
2824 case AArch64::STURWi:
2825 case AArch64::STRWpre:
2826 case AArch64::STURXi:
2827 case AArch64::STRXpre:
2828 case AArch64::LDURSi:
2829 case AArch64::LDRSpre:
2830 case AArch64::LDURDi:
2831 case AArch64::LDRDpre:
2832 case AArch64::LDURQi:
2833 case AArch64::LDRQpre:
2834 case AArch64::LDURWi:
2835 case AArch64::LDRWpre:
2836 case AArch64::LDURXi:
2837 case AArch64::LDRXpre:
2838 case AArch64::LDURSWi:
2839 case AArch64::LDRSWpre:
2840 // SVE instructions.
2841 case AArch64::LDR_ZXI:
2842 case AArch64::STR_ZXI:
2843 return true;
2844 }
2845}
2846
2848 switch (MI.getOpcode()) {
2849 default:
2850 assert((!MI.isCall() || !MI.isReturn()) &&
2851 "Unexpected instruction - was a new tail call opcode introduced?");
2852 return false;
2853 case AArch64::TCRETURNdi:
2854 case AArch64::TCRETURNri:
2855 case AArch64::TCRETURNrix16x17:
2856 case AArch64::TCRETURNrix17:
2857 case AArch64::TCRETURNrinotx16:
2858 case AArch64::TCRETURNriALL:
2859 case AArch64::AUTH_TCRETURN:
2860 case AArch64::AUTH_TCRETURN_BTI:
2861 return true;
2862 }
2863}
2864
2866 switch (Opc) {
2867 default:
2868 llvm_unreachable("Opcode has no flag setting equivalent!");
2869 // 32-bit cases:
2870 case AArch64::ADDWri:
2871 return AArch64::ADDSWri;
2872 case AArch64::ADDWrr:
2873 return AArch64::ADDSWrr;
2874 case AArch64::ADDWrs:
2875 return AArch64::ADDSWrs;
2876 case AArch64::ADDWrx:
2877 return AArch64::ADDSWrx;
2878 case AArch64::ANDWri:
2879 return AArch64::ANDSWri;
2880 case AArch64::ANDWrr:
2881 return AArch64::ANDSWrr;
2882 case AArch64::ANDWrs:
2883 return AArch64::ANDSWrs;
2884 case AArch64::BICWrr:
2885 return AArch64::BICSWrr;
2886 case AArch64::BICWrs:
2887 return AArch64::BICSWrs;
2888 case AArch64::SUBWri:
2889 return AArch64::SUBSWri;
2890 case AArch64::SUBWrr:
2891 return AArch64::SUBSWrr;
2892 case AArch64::SUBWrs:
2893 return AArch64::SUBSWrs;
2894 case AArch64::SUBWrx:
2895 return AArch64::SUBSWrx;
2896 // 64-bit cases:
2897 case AArch64::ADDXri:
2898 return AArch64::ADDSXri;
2899 case AArch64::ADDXrr:
2900 return AArch64::ADDSXrr;
2901 case AArch64::ADDXrs:
2902 return AArch64::ADDSXrs;
2903 case AArch64::ADDXrx:
2904 return AArch64::ADDSXrx;
2905 case AArch64::ANDXri:
2906 return AArch64::ANDSXri;
2907 case AArch64::ANDXrr:
2908 return AArch64::ANDSXrr;
2909 case AArch64::ANDXrs:
2910 return AArch64::ANDSXrs;
2911 case AArch64::BICXrr:
2912 return AArch64::BICSXrr;
2913 case AArch64::BICXrs:
2914 return AArch64::BICSXrs;
2915 case AArch64::SUBXri:
2916 return AArch64::SUBSXri;
2917 case AArch64::SUBXrr:
2918 return AArch64::SUBSXrr;
2919 case AArch64::SUBXrs:
2920 return AArch64::SUBSXrs;
2921 case AArch64::SUBXrx:
2922 return AArch64::SUBSXrx;
2923 // SVE instructions:
2924 case AArch64::AND_PPzPP:
2925 return AArch64::ANDS_PPzPP;
2926 case AArch64::BIC_PPzPP:
2927 return AArch64::BICS_PPzPP;
2928 case AArch64::EOR_PPzPP:
2929 return AArch64::EORS_PPzPP;
2930 case AArch64::NAND_PPzPP:
2931 return AArch64::NANDS_PPzPP;
2932 case AArch64::NOR_PPzPP:
2933 return AArch64::NORS_PPzPP;
2934 case AArch64::ORN_PPzPP:
2935 return AArch64::ORNS_PPzPP;
2936 case AArch64::ORR_PPzPP:
2937 return AArch64::ORRS_PPzPP;
2938 case AArch64::BRKA_PPzP:
2939 return AArch64::BRKAS_PPzP;
2940 case AArch64::BRKPA_PPzPP:
2941 return AArch64::BRKPAS_PPzPP;
2942 case AArch64::BRKB_PPzP:
2943 return AArch64::BRKBS_PPzP;
2944 case AArch64::BRKPB_PPzPP:
2945 return AArch64::BRKPBS_PPzPP;
2946 case AArch64::BRKN_PPzP:
2947 return AArch64::BRKNS_PPzP;
2948 case AArch64::RDFFR_PPz:
2949 return AArch64::RDFFRS_PPz;
2950 case AArch64::PTRUE_B:
2951 return AArch64::PTRUES_B;
2952 }
2953}
2954
2955// Is this a candidate for ld/st merging or pairing? For example, we don't
2956// touch volatiles or load/stores that have a hint to avoid pair formation.
2958
2959 bool IsPreLdSt = isPreLdSt(MI);
2960
2961 // If this is a volatile load/store, don't mess with it.
2962 if (MI.hasOrderedMemoryRef())
2963 return false;
2964
2965 // Make sure this is a reg/fi+imm (as opposed to an address reloc).
2966 // For Pre-inc LD/ST, the operand is shifted by one.
2967 assert((MI.getOperand(IsPreLdSt ? 2 : 1).isReg() ||
2968 MI.getOperand(IsPreLdSt ? 2 : 1).isFI()) &&
2969 "Expected a reg or frame index operand.");
2970
2971 // For Pre-indexed addressing quadword instructions, the third operand is the
2972 // immediate value.
2973 bool IsImmPreLdSt = IsPreLdSt && MI.getOperand(3).isImm();
2974
2975 if (!MI.getOperand(2).isImm() && !IsImmPreLdSt)
2976 return false;
2977
2978 // Can't merge/pair if the instruction modifies the base register.
2979 // e.g., ldr x0, [x0]
2980 // This case will never occur with an FI base.
2981 // However, if the instruction is an LDR<S,D,Q,W,X,SW>pre or
2982 // STR<S,D,Q,W,X>pre, it can be merged.
2983 // For example:
2984 // ldr q0, [x11, #32]!
2985 // ldr q1, [x11, #16]
2986 // to
2987 // ldp q0, q1, [x11, #32]!
2988 if (MI.getOperand(1).isReg() && !IsPreLdSt) {
2989 Register BaseReg = MI.getOperand(1).getReg();
2991 if (MI.modifiesRegister(BaseReg, TRI))
2992 return false;
2993 }
2994
2995 // Pairing SVE fills/spills is only valid for little-endian targets that
2996 // implement VLS 128.
2997 switch (MI.getOpcode()) {
2998 default:
2999 break;
3000 case AArch64::LDR_ZXI:
3001 case AArch64::STR_ZXI:
3002 if (!Subtarget.isLittleEndian() ||
3003 Subtarget.getSVEVectorSizeInBits() != 128)
3004 return false;
3005 }
3006
3007 // Check if this load/store has a hint to avoid pair formation.
3008 // MachineMemOperands hints are set by the AArch64StorePairSuppress pass.
3010 return false;
3011
3012 // Do not pair any callee-save store/reload instructions in the
3013 // prologue/epilogue if the CFI information encoded the operations as separate
3014 // instructions, as that will cause the size of the actual prologue to mismatch
3015 // with the prologue size recorded in the Windows CFI.
3016 const MCAsmInfo *MAI = MI.getMF()->getTarget().getMCAsmInfo();
3017 bool NeedsWinCFI = MAI->usesWindowsCFI() &&
3018 MI.getMF()->getFunction().needsUnwindTableEntry();
3019 if (NeedsWinCFI && (MI.getFlag(MachineInstr::FrameSetup) ||
3021 return false;
3022
3023 // On some CPUs quad load/store pairs are slower than two single load/stores.
3024 if (Subtarget.isPaired128Slow()) {
3025 switch (MI.getOpcode()) {
3026 default:
3027 break;
3028 case AArch64::LDURQi:
3029 case AArch64::STURQi:
3030 case AArch64::LDRQui:
3031 case AArch64::STRQui:
3032 return false;
3033 }
3034 }
3035
3036 return true;
3037}
3038
3041 int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width,
3042 const TargetRegisterInfo *TRI) const {
3043 if (!LdSt.mayLoadOrStore())
3044 return false;
3045
3046 const MachineOperand *BaseOp;
3047 TypeSize WidthN(0, false);
3048 if (!getMemOperandWithOffsetWidth(LdSt, BaseOp, Offset, OffsetIsScalable,
3049 WidthN, TRI))
3050 return false;
3051 // The maximum vscale is 16 under AArch64, return the maximal extent for the
3052 // vector.
3053 Width = LocationSize::precise(WidthN);
3054 BaseOps.push_back(BaseOp);
3055 return true;
3056}
3057
3058std::optional<ExtAddrMode>
3060 const TargetRegisterInfo *TRI) const {
3061 const MachineOperand *Base; // Filled with the base operand of MI.
3062 int64_t Offset; // Filled with the offset of MI.
3063 bool OffsetIsScalable;
3064 if (!getMemOperandWithOffset(MemI, Base, Offset, OffsetIsScalable, TRI))
3065 return std::nullopt;
3066
3067 if (!Base->isReg())
3068 return std::nullopt;
3069 ExtAddrMode AM;
3070 AM.BaseReg = Base->getReg();
3071 AM.Displacement = Offset;
3072 AM.ScaledReg = 0;
3073 AM.Scale = 0;
3074 return AM;
3075}
3076
3078 Register Reg,
3079 const MachineInstr &AddrI,
3080 ExtAddrMode &AM) const {
3081 // Filter out instructions into which we cannot fold.
3082 unsigned NumBytes;
3083 int64_t OffsetScale = 1;
3084 switch (MemI.getOpcode()) {
3085 default:
3086 return false;
3087
3088 case AArch64::LDURQi:
3089 case AArch64::STURQi:
3090 NumBytes = 16;
3091 break;
3092
3093 case AArch64::LDURDi:
3094 case AArch64::STURDi:
3095 case AArch64::LDURXi:
3096 case AArch64::STURXi:
3097 NumBytes = 8;
3098 break;
3099
3100 case AArch64::LDURWi:
3101 case AArch64::LDURSWi:
3102 case AArch64::STURWi:
3103 NumBytes = 4;
3104 break;
3105
3106 case AArch64::LDURHi:
3107 case AArch64::STURHi:
3108 case AArch64::LDURHHi:
3109 case AArch64::STURHHi:
3110 case AArch64::LDURSHXi:
3111 case AArch64::LDURSHWi:
3112 NumBytes = 2;
3113 break;
3114
3115 case AArch64::LDRBroX:
3116 case AArch64::LDRBBroX:
3117 case AArch64::LDRSBXroX:
3118 case AArch64::LDRSBWroX:
3119 case AArch64::STRBroX:
3120 case AArch64::STRBBroX:
3121 case AArch64::LDURBi:
3122 case AArch64::LDURBBi:
3123 case AArch64::LDURSBXi:
3124 case AArch64::LDURSBWi:
3125 case AArch64::STURBi:
3126 case AArch64::STURBBi:
3127 case AArch64::LDRBui:
3128 case AArch64::LDRBBui:
3129 case AArch64::LDRSBXui:
3130 case AArch64::LDRSBWui:
3131 case AArch64::STRBui:
3132 case AArch64::STRBBui:
3133 NumBytes = 1;
3134 break;
3135
3136 case AArch64::LDRQroX:
3137 case AArch64::STRQroX:
3138 case AArch64::LDRQui:
3139 case AArch64::STRQui:
3140 NumBytes = 16;
3141 OffsetScale = 16;
3142 break;
3143
3144 case AArch64::LDRDroX:
3145 case AArch64::STRDroX:
3146 case AArch64::LDRXroX:
3147 case AArch64::STRXroX:
3148 case AArch64::LDRDui:
3149 case AArch64::STRDui:
3150 case AArch64::LDRXui:
3151 case AArch64::STRXui:
3152 NumBytes = 8;
3153 OffsetScale = 8;
3154 break;
3155
3156 case AArch64::LDRWroX:
3157 case AArch64::LDRSWroX:
3158 case AArch64::STRWroX:
3159 case AArch64::LDRWui:
3160 case AArch64::LDRSWui:
3161 case AArch64::STRWui:
3162 NumBytes = 4;
3163 OffsetScale = 4;
3164 break;
3165
3166 case AArch64::LDRHroX:
3167 case AArch64::STRHroX:
3168 case AArch64::LDRHHroX:
3169 case AArch64::STRHHroX:
3170 case AArch64::LDRSHXroX:
3171 case AArch64::LDRSHWroX:
3172 case AArch64::LDRHui:
3173 case AArch64::STRHui:
3174 case AArch64::LDRHHui:
3175 case AArch64::STRHHui:
3176 case AArch64::LDRSHXui:
3177 case AArch64::LDRSHWui:
3178 NumBytes = 2;
3179 OffsetScale = 2;
3180 break;
3181 }
3182
3183 // Check the fold operand is not the loaded/stored value.
3184 const MachineOperand &BaseRegOp = MemI.getOperand(0);
3185 if (BaseRegOp.isReg() && BaseRegOp.getReg() == Reg)
3186 return false;
3187
3188 // Handle memory instructions with a [Reg, Reg] addressing mode.
3189 if (MemI.getOperand(2).isReg()) {
3190 // Bail if the addressing mode already includes extension of the offset
3191 // register.
3192 if (MemI.getOperand(3).getImm())
3193 return false;
3194
3195 // Check if we actually have a scaled offset.
3196 if (MemI.getOperand(4).getImm() == 0)
3197 OffsetScale = 1;
3198
3199 // If the address instructions is folded into the base register, then the
3200 // addressing mode must not have a scale. Then we can swap the base and the
3201 // scaled registers.
3202 if (MemI.getOperand(1).getReg() == Reg && OffsetScale != 1)
3203 return false;
3204
3205 switch (AddrI.getOpcode()) {
3206 default:
3207 return false;
3208
3209 case AArch64::SBFMXri:
3210 // sxtw Xa, Wm
3211 // ldr Xd, [Xn, Xa, lsl #N]
3212 // ->
3213 // ldr Xd, [Xn, Wm, sxtw #N]
3214 if (AddrI.getOperand(2).getImm() != 0 ||
3215 AddrI.getOperand(3).getImm() != 31)
3216 return false;
3217
3218 AM.BaseReg = MemI.getOperand(1).getReg();
3219 if (AM.BaseReg == Reg)
3220 AM.BaseReg = MemI.getOperand(2).getReg();
3221 AM.ScaledReg = AddrI.getOperand(1).getReg();
3222 AM.Scale = OffsetScale;
3223 AM.Displacement = 0;
3225 return true;
3226
3227 case TargetOpcode::SUBREG_TO_REG: {
3228 // mov Wa, Wm
3229 // ldr Xd, [Xn, Xa, lsl #N]
3230 // ->
3231 // ldr Xd, [Xn, Wm, uxtw #N]
3232
3233 // Zero-extension looks like an ORRWrs followed by a SUBREG_TO_REG.
3234 if (AddrI.getOperand(1).getImm() != 0 ||
3235 AddrI.getOperand(3).getImm() != AArch64::sub_32)
3236 return false;
3237
3238 const MachineRegisterInfo &MRI = AddrI.getMF()->getRegInfo();
3239 Register OffsetReg = AddrI.getOperand(2).getReg();
3240 if (!OffsetReg.isVirtual() || !MRI.hasOneNonDBGUse(OffsetReg))
3241 return false;
3242
3243 const MachineInstr &DefMI = *MRI.getVRegDef(OffsetReg);
3244 if (DefMI.getOpcode() != AArch64::ORRWrs ||
3245 DefMI.getOperand(1).getReg() != AArch64::WZR ||
3246 DefMI.getOperand(3).getImm() != 0)
3247 return false;
3248
3249 AM.BaseReg = MemI.getOperand(1).getReg();
3250 if (AM.BaseReg == Reg)
3251 AM.BaseReg = MemI.getOperand(2).getReg();
3252 AM.ScaledReg = DefMI.getOperand(2).getReg();
3253 AM.Scale = OffsetScale;
3254 AM.Displacement = 0;
3256 return true;
3257 }
3258 }
3259 }
3260
3261 // Handle memory instructions with a [Reg, #Imm] addressing mode.
3262
3263 // Check we are not breaking a potential conversion to an LDP.
3264 auto validateOffsetForLDP = [](unsigned NumBytes, int64_t OldOffset,
3265 int64_t NewOffset) -> bool {
3266 int64_t MinOffset, MaxOffset;
3267 switch (NumBytes) {
3268 default:
3269 return true;
3270 case 4:
3271 MinOffset = -256;
3272 MaxOffset = 252;
3273 break;
3274 case 8:
3275 MinOffset = -512;
3276 MaxOffset = 504;
3277 break;
3278 case 16:
3279 MinOffset = -1024;
3280 MaxOffset = 1008;
3281 break;
3282 }
3283 return OldOffset < MinOffset || OldOffset > MaxOffset ||
3284 (NewOffset >= MinOffset && NewOffset <= MaxOffset);
3285 };
3286 auto canFoldAddSubImmIntoAddrMode = [&](int64_t Disp) -> bool {
3287 int64_t OldOffset = MemI.getOperand(2).getImm() * OffsetScale;
3288 int64_t NewOffset = OldOffset + Disp;
3289 if (!isLegalAddressingMode(NumBytes, NewOffset, /* Scale */ 0))
3290 return false;
3291 // If the old offset would fit into an LDP, but the new offset wouldn't,
3292 // bail out.
3293 if (!validateOffsetForLDP(NumBytes, OldOffset, NewOffset))
3294 return false;
3295 AM.BaseReg = AddrI.getOperand(1).getReg();
3296 AM.ScaledReg = 0;
3297 AM.Scale = 0;
3298 AM.Displacement = NewOffset;
3300 return true;
3301 };
3302
3303 auto canFoldAddRegIntoAddrMode =
3304 [&](int64_t Scale,
3306 if (MemI.getOperand(2).getImm() != 0)
3307 return false;
3308 if ((unsigned)Scale != Scale)
3309 return false;
3310 if (!isLegalAddressingMode(NumBytes, /* Offset */ 0, Scale))
3311 return false;
3312 AM.BaseReg = AddrI.getOperand(1).getReg();
3313 AM.ScaledReg = AddrI.getOperand(2).getReg();
3314 AM.Scale = Scale;
3315 AM.Displacement = 0;
3316 AM.Form = Form;
3317 return true;
3318 };
3319
3320 auto avoidSlowSTRQ = [&](const MachineInstr &MemI) {
3321 unsigned Opcode = MemI.getOpcode();
3322 return (Opcode == AArch64::STURQi || Opcode == AArch64::STRQui) &&
3323 Subtarget.isSTRQroSlow();
3324 };
3325
3326 int64_t Disp = 0;
3327 const bool OptSize = MemI.getMF()->getFunction().hasOptSize();
3328 switch (AddrI.getOpcode()) {
3329 default:
3330 return false;
3331
3332 case AArch64::ADDXri:
3333 // add Xa, Xn, #N
3334 // ldr Xd, [Xa, #M]
3335 // ->
3336 // ldr Xd, [Xn, #N'+M]
3337 Disp = AddrI.getOperand(2).getImm() << AddrI.getOperand(3).getImm();
3338 return canFoldAddSubImmIntoAddrMode(Disp);
3339
3340 case AArch64::SUBXri:
3341 // sub Xa, Xn, #N
3342 // ldr Xd, [Xa, #M]
3343 // ->
3344 // ldr Xd, [Xn, #N'+M]
3345 Disp = AddrI.getOperand(2).getImm() << AddrI.getOperand(3).getImm();
3346 return canFoldAddSubImmIntoAddrMode(-Disp);
3347
3348 case AArch64::ADDXrs: {
3349 // add Xa, Xn, Xm, lsl #N
3350 // ldr Xd, [Xa]
3351 // ->
3352 // ldr Xd, [Xn, Xm, lsl #N]
3353
3354 // Don't fold the add if the result would be slower, unless optimising for
3355 // size.
3356 unsigned Shift = static_cast<unsigned>(AddrI.getOperand(3).getImm());
3358 return false;
3359 Shift = AArch64_AM::getShiftValue(Shift);
3360 if (!OptSize) {
3361 if (Shift != 2 && Shift != 3 && Subtarget.hasAddrLSLSlow14())
3362 return false;
3363 if (avoidSlowSTRQ(MemI))
3364 return false;
3365 }
3366 return canFoldAddRegIntoAddrMode(1ULL << Shift);
3367 }
3368
3369 case AArch64::ADDXrr:
3370 // add Xa, Xn, Xm
3371 // ldr Xd, [Xa]
3372 // ->
3373 // ldr Xd, [Xn, Xm, lsl #0]
3374
3375 // Don't fold the add if the result would be slower, unless optimising for
3376 // size.
3377 if (!OptSize && avoidSlowSTRQ(MemI))
3378 return false;
3379 return canFoldAddRegIntoAddrMode(1);
3380
3381 case AArch64::ADDXrx:
3382 // add Xa, Xn, Wm, {s,u}xtw #N
3383 // ldr Xd, [Xa]
3384 // ->
3385 // ldr Xd, [Xn, Wm, {s,u}xtw #N]
3386
3387 // Don't fold the add if the result would be slower, unless optimising for
3388 // size.
3389 if (!OptSize && avoidSlowSTRQ(MemI))
3390 return false;
3391
3392 // Can fold only sign-/zero-extend of a word.
3393 unsigned Imm = static_cast<unsigned>(AddrI.getOperand(3).getImm());
3395 if (Extend != AArch64_AM::UXTW && Extend != AArch64_AM::SXTW)
3396 return false;
3397
3398 return canFoldAddRegIntoAddrMode(
3399 1ULL << AArch64_AM::getArithShiftValue(Imm),
3402 }
3403}
3404
3405// Given an opcode for an instruction with a [Reg, #Imm] addressing mode,
3406// return the opcode of an instruction performing the same operation, but using
3407// the [Reg, Reg] addressing mode.
3408static unsigned regOffsetOpcode(unsigned Opcode) {
3409 switch (Opcode) {
3410 default:
3411 llvm_unreachable("Address folding not implemented for instruction");
3412
3413 case AArch64::LDURQi:
3414 case AArch64::LDRQui:
3415 return AArch64::LDRQroX;
3416 case AArch64::STURQi:
3417 case AArch64::STRQui:
3418 return AArch64::STRQroX;
3419 case AArch64::LDURDi:
3420 case AArch64::LDRDui:
3421 return AArch64::LDRDroX;
3422 case AArch64::STURDi:
3423 case AArch64::STRDui:
3424 return AArch64::STRDroX;
3425 case AArch64::LDURXi:
3426 case AArch64::LDRXui:
3427 return AArch64::LDRXroX;
3428 case AArch64::STURXi:
3429 case AArch64::STRXui:
3430 return AArch64::STRXroX;
3431 case AArch64::LDURWi:
3432 case AArch64::LDRWui:
3433 return AArch64::LDRWroX;
3434 case AArch64::LDURSWi:
3435 case AArch64::LDRSWui:
3436 return AArch64::LDRSWroX;
3437 case AArch64::STURWi:
3438 case AArch64::STRWui:
3439 return AArch64::STRWroX;
3440 case AArch64::LDURHi:
3441 case AArch64::LDRHui:
3442 return AArch64::LDRHroX;
3443 case AArch64::STURHi:
3444 case AArch64::STRHui:
3445 return AArch64::STRHroX;
3446 case AArch64::LDURHHi:
3447 case AArch64::LDRHHui:
3448 return AArch64::LDRHHroX;
3449 case AArch64::STURHHi:
3450 case AArch64::STRHHui:
3451 return AArch64::STRHHroX;
3452 case AArch64::LDURSHXi:
3453 case AArch64::LDRSHXui:
3454 return AArch64::LDRSHXroX;
3455 case AArch64::LDURSHWi:
3456 case AArch64::LDRSHWui:
3457 return AArch64::LDRSHWroX;
3458 case AArch64::LDURBi:
3459 case AArch64::LDRBui:
3460 return AArch64::LDRBroX;
3461 case AArch64::LDURBBi:
3462 case AArch64::LDRBBui:
3463 return AArch64::LDRBBroX;
3464 case AArch64::LDURSBXi:
3465 case AArch64::LDRSBXui:
3466 return AArch64::LDRSBXroX;
3467 case AArch64::LDURSBWi:
3468 case AArch64::LDRSBWui:
3469 return AArch64::LDRSBWroX;
3470 case AArch64::STURBi:
3471 case AArch64::STRBui:
3472 return AArch64::STRBroX;
3473 case AArch64::STURBBi:
3474 case AArch64::STRBBui:
3475 return AArch64::STRBBroX;
3476 }
3477}
3478
3479// Given an opcode for an instruction with a [Reg, #Imm] addressing mode, return
3480// the opcode of an instruction performing the same operation, but using the
3481// [Reg, #Imm] addressing mode with scaled offset.
3482unsigned scaledOffsetOpcode(unsigned Opcode, unsigned &Scale) {
3483 switch (Opcode) {
3484 default:
3485 llvm_unreachable("Address folding not implemented for instruction");
3486
3487 case AArch64::LDURQi:
3488 Scale = 16;
3489 return AArch64::LDRQui;
3490 case AArch64::STURQi:
3491 Scale = 16;
3492 return AArch64::STRQui;
3493 case AArch64::LDURDi:
3494 Scale = 8;
3495 return AArch64::LDRDui;
3496 case AArch64::STURDi:
3497 Scale = 8;
3498 return AArch64::STRDui;
3499 case AArch64::LDURXi:
3500 Scale = 8;
3501 return AArch64::LDRXui;
3502 case AArch64::STURXi:
3503 Scale = 8;
3504 return AArch64::STRXui;
3505 case AArch64::LDURWi:
3506 Scale = 4;
3507 return AArch64::LDRWui;
3508 case AArch64::LDURSWi:
3509 Scale = 4;
3510 return AArch64::LDRSWui;
3511 case AArch64::STURWi:
3512 Scale = 4;
3513 return AArch64::STRWui;
3514 case AArch64::LDURHi:
3515 Scale = 2;
3516 return AArch64::LDRHui;
3517 case AArch64::STURHi:
3518 Scale = 2;
3519 return AArch64::STRHui;
3520 case AArch64::LDURHHi:
3521 Scale = 2;
3522 return AArch64::LDRHHui;
3523 case AArch64::STURHHi:
3524 Scale = 2;
3525 return AArch64::STRHHui;
3526 case AArch64::LDURSHXi:
3527 Scale = 2;
3528 return AArch64::LDRSHXui;
3529 case AArch64::LDURSHWi:
3530 Scale = 2;
3531 return AArch64::LDRSHWui;
3532 case AArch64::LDURBi:
3533 Scale = 1;
3534 return AArch64::LDRBui;
3535 case AArch64::LDURBBi:
3536 Scale = 1;
3537 return AArch64::LDRBBui;
3538 case AArch64::LDURSBXi:
3539 Scale = 1;
3540 return AArch64::LDRSBXui;
3541 case AArch64::LDURSBWi:
3542 Scale = 1;
3543 return AArch64::LDRSBWui;
3544 case AArch64::STURBi:
3545 Scale = 1;
3546 return AArch64::STRBui;
3547 case AArch64::STURBBi:
3548 Scale = 1;
3549 return AArch64::STRBBui;
3550 case AArch64::LDRQui:
3551 case AArch64::STRQui:
3552 Scale = 16;
3553 return Opcode;
3554 case AArch64::LDRDui:
3555 case AArch64::STRDui:
3556 case AArch64::LDRXui:
3557 case AArch64::STRXui:
3558 Scale = 8;
3559 return Opcode;
3560 case AArch64::LDRWui:
3561 case AArch64::LDRSWui:
3562 case AArch64::STRWui:
3563 Scale = 4;
3564 return Opcode;
3565 case AArch64::LDRHui:
3566 case AArch64::STRHui:
3567 case AArch64::LDRHHui:
3568 case AArch64::STRHHui:
3569 case AArch64::LDRSHXui:
3570 case AArch64::LDRSHWui:
3571 Scale = 2;
3572 return Opcode;
3573 case AArch64::LDRBui:
3574 case AArch64::LDRBBui:
3575 case AArch64::LDRSBXui:
3576 case AArch64::LDRSBWui:
3577 case AArch64::STRBui:
3578 case AArch64::STRBBui:
3579 Scale = 1;
3580 return Opcode;
3581 }
3582}
3583
3584// Given an opcode for an instruction with a [Reg, #Imm] addressing mode, return
3585// the opcode of an instruction performing the same operation, but using the
3586// [Reg, #Imm] addressing mode with unscaled offset.
3587unsigned unscaledOffsetOpcode(unsigned Opcode) {
3588 switch (Opcode) {
3589 default:
3590 llvm_unreachable("Address folding not implemented for instruction");
3591
3592 case AArch64::LDURQi:
3593 case AArch64::STURQi:
3594 case AArch64::LDURDi:
3595 case AArch64::STURDi:
3596 case AArch64::LDURXi:
3597 case AArch64::STURXi:
3598 case AArch64::LDURWi:
3599 case AArch64::LDURSWi:
3600 case AArch64::STURWi:
3601 case AArch64::LDURHi:
3602 case AArch64::STURHi:
3603 case AArch64::LDURHHi:
3604 case AArch64::STURHHi:
3605 case AArch64::LDURSHXi:
3606 case AArch64::LDURSHWi:
3607 case AArch64::LDURBi:
3608 case AArch64::STURBi:
3609 case AArch64::LDURBBi:
3610 case AArch64::STURBBi:
3611 case AArch64::LDURSBWi:
3612 case AArch64::LDURSBXi:
3613 return Opcode;
3614 case AArch64::LDRQui:
3615 return AArch64::LDURQi;
3616 case AArch64::STRQui:
3617 return AArch64::STURQi;
3618 case AArch64::LDRDui:
3619 return AArch64::LDURDi;
3620 case AArch64::STRDui:
3621 return AArch64::STURDi;
3622 case AArch64::LDRXui:
3623 return AArch64::LDURXi;
3624 case AArch64::STRXui:
3625 return AArch64::STURXi;
3626 case AArch64::LDRWui:
3627 return AArch64::LDURWi;
3628 case AArch64::LDRSWui:
3629 return AArch64::LDURSWi;
3630 case AArch64::STRWui:
3631 return AArch64::STURWi;
3632 case AArch64::LDRHui:
3633 return AArch64::LDURHi;
3634 case AArch64::STRHui:
3635 return AArch64::STURHi;
3636 case AArch64::LDRHHui:
3637 return AArch64::LDURHHi;
3638 case AArch64::STRHHui:
3639 return AArch64::STURHHi;
3640 case AArch64::LDRSHXui:
3641 return AArch64::LDURSHXi;
3642 case AArch64::LDRSHWui:
3643 return AArch64::LDURSHWi;
3644 case AArch64::LDRBBui:
3645 return AArch64::LDURBBi;
3646 case AArch64::LDRBui:
3647 return AArch64::LDURBi;
3648 case AArch64::STRBBui:
3649 return AArch64::STURBBi;
3650 case AArch64::STRBui:
3651 return AArch64::STURBi;
3652 case AArch64::LDRSBWui:
3653 return AArch64::LDURSBWi;
3654 case AArch64::LDRSBXui:
3655 return AArch64::LDURSBXi;
3656 }
3657}
3658
3659// Given the opcode of a memory load/store instruction, return the opcode of an
3660// instruction performing the same operation, but using
3661// the [Reg, Reg, {s,u}xtw #N] addressing mode with sign-/zero-extend of the
3662// offset register.
3663static unsigned offsetExtendOpcode(unsigned Opcode) {
3664 switch (Opcode) {
3665 default:
3666 llvm_unreachable("Address folding not implemented for instruction");
3667
3668 case AArch64::LDRQroX:
3669 case AArch64::LDURQi:
3670 case AArch64::LDRQui:
3671 return AArch64::LDRQroW;
3672 case AArch64::STRQroX:
3673 case AArch64::STURQi:
3674 case AArch64::STRQui:
3675 return AArch64::STRQroW;
3676 case AArch64::LDRDroX:
3677 case AArch64::LDURDi:
3678 case AArch64::LDRDui:
3679 return AArch64::LDRDroW;
3680 case AArch64::STRDroX:
3681 case AArch64::STURDi:
3682 case AArch64::STRDui:
3683 return AArch64::STRDroW;
3684 case AArch64::LDRXroX:
3685 case AArch64::LDURXi:
3686 case AArch64::LDRXui:
3687 return AArch64::LDRXroW;
3688 case AArch64::STRXroX:
3689 case AArch64::STURXi:
3690 case AArch64::STRXui:
3691 return AArch64::STRXroW;
3692 case AArch64::LDRWroX:
3693 case AArch64::LDURWi:
3694 case AArch64::LDRWui:
3695 return AArch64::LDRWroW;
3696 case AArch64::LDRSWroX:
3697 case AArch64::LDURSWi:
3698 case AArch64::LDRSWui:
3699 return AArch64::LDRSWroW;
3700 case AArch64::STRWroX:
3701 case AArch64::STURWi:
3702 case AArch64::STRWui:
3703 return AArch64::STRWroW;
3704 case AArch64::LDRHroX:
3705 case AArch64::LDURHi:
3706 case AArch64::LDRHui:
3707 return AArch64::LDRHroW;
3708 case AArch64::STRHroX:
3709 case AArch64::STURHi:
3710 case AArch64::STRHui:
3711 return AArch64::STRHroW;
3712 case AArch64::LDRHHroX:
3713 case AArch64::LDURHHi:
3714 case AArch64::LDRHHui:
3715 return AArch64::LDRHHroW;
3716 case AArch64::STRHHroX:
3717 case AArch64::STURHHi:
3718 case AArch64::STRHHui:
3719 return AArch64::STRHHroW;
3720 case AArch64::LDRSHXroX:
3721 case AArch64::LDURSHXi:
3722 case AArch64::LDRSHXui:
3723 return AArch64::LDRSHXroW;
3724 case AArch64::LDRSHWroX:
3725 case AArch64::LDURSHWi:
3726 case AArch64::LDRSHWui:
3727 return AArch64::LDRSHWroW;
3728 case AArch64::LDRBroX:
3729 case AArch64::LDURBi:
3730 case AArch64::LDRBui:
3731 return AArch64::LDRBroW;
3732 case AArch64::LDRBBroX:
3733 case AArch64::LDURBBi:
3734 case AArch64::LDRBBui:
3735 return AArch64::LDRBBroW;
3736 case AArch64::LDRSBXroX:
3737 case AArch64::LDURSBXi:
3738 case AArch64::LDRSBXui:
3739 return AArch64::LDRSBXroW;
3740 case AArch64::LDRSBWroX:
3741 case AArch64::LDURSBWi:
3742 case AArch64::LDRSBWui:
3743 return AArch64::LDRSBWroW;
3744 case AArch64::STRBroX:
3745 case AArch64::STURBi:
3746 case AArch64::STRBui:
3747 return AArch64::STRBroW;
3748 case AArch64::STRBBroX:
3749 case AArch64::STURBBi:
3750 case AArch64::STRBBui:
3751 return AArch64::STRBBroW;
3752 }
3753}
3754
3756 const ExtAddrMode &AM) const {
3757
3758 const DebugLoc &DL = MemI.getDebugLoc();
3759 MachineBasicBlock &MBB = *MemI.getParent();
3761
3763 if (AM.ScaledReg) {
3764 // The new instruction will be in the form `ldr Rt, [Xn, Xm, lsl #imm]`.
3765 unsigned Opcode = regOffsetOpcode(MemI.getOpcode());
3766 MRI.constrainRegClass(AM.BaseReg, &AArch64::GPR64spRegClass);
3767 auto B = BuildMI(MBB, MemI, DL, get(Opcode))
3768 .addReg(MemI.getOperand(0).getReg(),
3769 MemI.mayLoad() ? RegState::Define : 0)
3770 .addReg(AM.BaseReg)
3771 .addReg(AM.ScaledReg)
3772 .addImm(0)
3773 .addImm(AM.Scale > 1)
3774 .setMemRefs(MemI.memoperands())
3775 .setMIFlags(MemI.getFlags());
3776 return B.getInstr();
3777 }
3778
3779 assert(AM.ScaledReg == 0 && AM.Scale == 0 &&
3780 "Addressing mode not supported for folding");
3781
3782 // The new instruction will be in the form `ld[u]r Rt, [Xn, #imm]`.
3783 unsigned Scale = 1;
3784 unsigned Opcode = MemI.getOpcode();
3785 if (isInt<9>(AM.Displacement))
3786 Opcode = unscaledOffsetOpcode(Opcode);
3787 else
3788 Opcode = scaledOffsetOpcode(Opcode, Scale);
3789
3790 auto B = BuildMI(MBB, MemI, DL, get(Opcode))
3791 .addReg(MemI.getOperand(0).getReg(),
3792 MemI.mayLoad() ? RegState::Define : 0)
3793 .addReg(AM.BaseReg)
3794 .addImm(AM.Displacement / Scale)
3795 .setMemRefs(MemI.memoperands())
3796 .setMIFlags(MemI.getFlags());
3797 return B.getInstr();
3798 }
3799
3802 // The new instruction will be in the form `ldr Rt, [Xn, Wm, {s,u}xtw #N]`.
3803 assert(AM.ScaledReg && !AM.Displacement &&
3804 "Address offset can be a register or an immediate, but not both");
3805 unsigned Opcode = offsetExtendOpcode(MemI.getOpcode());
3806 MRI.constrainRegClass(AM.BaseReg, &AArch64::GPR64spRegClass);
3807 // Make sure the offset register is in the correct register class.
3808 Register OffsetReg = AM.ScaledReg;
3809 const TargetRegisterClass *RC = MRI.getRegClass(OffsetReg);
3810 if (RC->hasSuperClassEq(&AArch64::GPR64RegClass)) {
3811 OffsetReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3812 BuildMI(MBB, MemI, DL, get(TargetOpcode::COPY), OffsetReg)
3813 .addReg(AM.ScaledReg, 0, AArch64::sub_32);
3814 }
3815 auto B = BuildMI(MBB, MemI, DL, get(Opcode))
3816 .addReg(MemI.getOperand(0).getReg(),
3817 MemI.mayLoad() ? RegState::Define : 0)
3818 .addReg(AM.BaseReg)
3819 .addReg(OffsetReg)
3821 .addImm(AM.Scale != 1)
3822 .setMemRefs(MemI.memoperands())
3823 .setMIFlags(MemI.getFlags());
3824
3825 return B.getInstr();
3826 }
3827
3829 "Function must not be called with an addressing mode it can't handle");
3830}
3831
3832/// Return true if the opcode is a post-index ld/st instruction, which really
3833/// loads from base+0.
3834static bool isPostIndexLdStOpcode(unsigned Opcode) {
3835 switch (Opcode) {
3836 default:
3837 return false;
3838 case AArch64::LD1Fourv16b_POST:
3839 case AArch64::LD1Fourv1d_POST:
3840 case AArch64::LD1Fourv2d_POST:
3841 case AArch64::LD1Fourv2s_POST:
3842 case AArch64::LD1Fourv4h_POST:
3843 case AArch64::LD1Fourv4s_POST:
3844 case AArch64::LD1Fourv8b_POST:
3845 case AArch64::LD1Fourv8h_POST:
3846 case AArch64::LD1Onev16b_POST:
3847 case AArch64::LD1Onev1d_POST:
3848 case AArch64::LD1Onev2d_POST:
3849 case AArch64::LD1Onev2s_POST:
3850 case AArch64::LD1Onev4h_POST:
3851 case AArch64::LD1Onev4s_POST:
3852 case AArch64::LD1Onev8b_POST:
3853 case AArch64::LD1Onev8h_POST:
3854 case AArch64::LD1Rv16b_POST:
3855 case AArch64::LD1Rv1d_POST:
3856 case AArch64::LD1Rv2d_POST:
3857 case AArch64::LD1Rv2s_POST:
3858 case AArch64::LD1Rv4h_POST:
3859 case AArch64::LD1Rv4s_POST:
3860 case AArch64::LD1Rv8b_POST:
3861 case AArch64::LD1Rv8h_POST:
3862 case AArch64::LD1Threev16b_POST:
3863 case AArch64::LD1Threev1d_POST:
3864 case AArch64::LD1Threev2d_POST:
3865 case AArch64::LD1Threev2s_POST:
3866 case AArch64::LD1Threev4h_POST:
3867 case AArch64::LD1Threev4s_POST:
3868 case AArch64::LD1Threev8b_POST:
3869 case AArch64::LD1Threev8h_POST:
3870 case AArch64::LD1Twov16b_POST:
3871 case AArch64::LD1Twov1d_POST:
3872 case AArch64::LD1Twov2d_POST:
3873 case AArch64::LD1Twov2s_POST:
3874 case AArch64::LD1Twov4h_POST:
3875 case AArch64::LD1Twov4s_POST:
3876 case AArch64::LD1Twov8b_POST:
3877 case AArch64::LD1Twov8h_POST:
3878 case AArch64::LD1i16_POST:
3879 case AArch64::LD1i32_POST:
3880 case AArch64::LD1i64_POST:
3881 case AArch64::LD1i8_POST:
3882 case AArch64::LD2Rv16b_POST:
3883 case AArch64::LD2Rv1d_POST:
3884 case AArch64::LD2Rv2d_POST:
3885 case AArch64::LD2Rv2s_POST:
3886 case AArch64::LD2Rv4h_POST:
3887 case AArch64::LD2Rv4s_POST:
3888 case AArch64::LD2Rv8b_POST:
3889 case AArch64::LD2Rv8h_POST:
3890 case AArch64::LD2Twov16b_POST:
3891 case AArch64::LD2Twov2d_POST:
3892 case AArch64::LD2Twov2s_POST:
3893 case AArch64::LD2Twov4h_POST:
3894 case AArch64::LD2Twov4s_POST:
3895 case AArch64::LD2Twov8b_POST:
3896 case AArch64::LD2Twov8h_POST:
3897 case AArch64::LD2i16_POST:
3898 case AArch64::LD2i32_POST:
3899 case AArch64::LD2i64_POST:
3900 case AArch64::LD2i8_POST:
3901 case AArch64::LD3Rv16b_POST:
3902 case AArch64::LD3Rv1d_POST:
3903 case AArch64::LD3Rv2d_POST:
3904 case AArch64::LD3Rv2s_POST:
3905 case AArch64::LD3Rv4h_POST:
3906 case AArch64::LD3Rv4s_POST:
3907 case AArch64::LD3Rv8b_POST:
3908 case AArch64::LD3Rv8h_POST:
3909 case AArch64::LD3Threev16b_POST:
3910 case AArch64::LD3Threev2d_POST:
3911 case AArch64::LD3Threev2s_POST:
3912 case AArch64::LD3Threev4h_POST:
3913 case AArch64::LD3Threev4s_POST:
3914 case AArch64::LD3Threev8b_POST:
3915 case AArch64::LD3Threev8h_POST:
3916 case AArch64::LD3i16_POST:
3917 case AArch64::LD3i32_POST:
3918 case AArch64::LD3i64_POST:
3919 case AArch64::LD3i8_POST:
3920 case AArch64::LD4Fourv16b_POST:
3921 case AArch64::LD4Fourv2d_POST:
3922 case AArch64::LD4Fourv2s_POST:
3923 case AArch64::LD4Fourv4h_POST:
3924 case AArch64::LD4Fourv4s_POST:
3925 case AArch64::LD4Fourv8b_POST:
3926 case AArch64::LD4Fourv8h_POST:
3927 case AArch64::LD4Rv16b_POST:
3928 case AArch64::LD4Rv1d_POST:
3929 case AArch64::LD4Rv2d_POST:
3930 case AArch64::LD4Rv2s_POST:
3931 case AArch64::LD4Rv4h_POST:
3932 case AArch64::LD4Rv4s_POST:
3933 case AArch64::LD4Rv8b_POST:
3934 case AArch64::LD4Rv8h_POST:
3935 case AArch64::LD4i16_POST:
3936 case AArch64::LD4i32_POST:
3937 case AArch64::LD4i64_POST:
3938 case AArch64::LD4i8_POST:
3939 case AArch64::LDAPRWpost:
3940 case AArch64::LDAPRXpost:
3941 case AArch64::LDIAPPWpost:
3942 case AArch64::LDIAPPXpost:
3943 case AArch64::LDPDpost:
3944 case AArch64::LDPQpost:
3945 case AArch64::LDPSWpost:
3946 case AArch64::LDPSpost:
3947 case AArch64::LDPWpost:
3948 case AArch64::LDPXpost:
3949 case AArch64::LDRBBpost:
3950 case AArch64::LDRBpost:
3951 case AArch64::LDRDpost:
3952 case AArch64::LDRHHpost:
3953 case AArch64::LDRHpost:
3954 case AArch64::LDRQpost:
3955 case AArch64::LDRSBWpost:
3956 case AArch64::LDRSBXpost:
3957 case AArch64::LDRSHWpost:
3958 case AArch64::LDRSHXpost:
3959 case AArch64::LDRSWpost:
3960 case AArch64::LDRSpost:
3961 case AArch64::LDRWpost:
3962 case AArch64::LDRXpost:
3963 case AArch64::ST1Fourv16b_POST:
3964 case AArch64::ST1Fourv1d_POST:
3965 case AArch64::ST1Fourv2d_POST:
3966 case AArch64::ST1Fourv2s_POST:
3967 case AArch64::ST1Fourv4h_POST:
3968 case AArch64::ST1Fourv4s_POST:
3969 case AArch64::ST1Fourv8b_POST:
3970 case AArch64::ST1Fourv8h_POST:
3971 case AArch64::ST1Onev16b_POST:
3972 case AArch64::ST1Onev1d_POST:
3973 case AArch64::ST1Onev2d_POST:
3974 case AArch64::ST1Onev2s_POST:
3975 case AArch64::ST1Onev4h_POST:
3976 case AArch64::ST1Onev4s_POST:
3977 case AArch64::ST1Onev8b_POST:
3978 case AArch64::ST1Onev8h_POST:
3979 case AArch64::ST1Threev16b_POST:
3980 case AArch64::ST1Threev1d_POST:
3981 case AArch64::ST1Threev2d_POST:
3982 case AArch64::ST1Threev2s_POST:
3983 case AArch64::ST1Threev4h_POST:
3984 case AArch64::ST1Threev4s_POST:
3985 case AArch64::ST1Threev8b_POST:
3986 case AArch64::ST1Threev8h_POST:
3987 case AArch64::ST1Twov16b_POST:
3988 case AArch64::ST1Twov1d_POST:
3989 case AArch64::ST1Twov2d_POST:
3990 case AArch64::ST1Twov2s_POST:
3991 case AArch64::ST1Twov4h_POST:
3992 case AArch64::ST1Twov4s_POST:
3993 case AArch64::ST1Twov8b_POST:
3994 case AArch64::ST1Twov8h_POST:
3995 case AArch64::ST1i16_POST:
3996 case AArch64::ST1i32_POST:
3997 case AArch64::ST1i64_POST:
3998 case AArch64::ST1i8_POST:
3999 case AArch64::ST2GPostIndex:
4000 case AArch64::ST2Twov16b_POST:
4001 case AArch64::ST2Twov2d_POST:
4002 case AArch64::ST2Twov2s_POST:
4003 case AArch64::ST2Twov4h_POST:
4004 case AArch64::ST2Twov4s_POST:
4005 case AArch64::ST2Twov8b_POST:
4006 case AArch64::ST2Twov8h_POST:
4007 case AArch64::ST2i16_POST:
4008 case AArch64::ST2i32_POST:
4009 case AArch64::ST2i64_POST:
4010 case AArch64::ST2i8_POST:
4011 case AArch64::ST3Threev16b_POST:
4012 case AArch64::ST3Threev2d_POST:
4013 case AArch64::ST3Threev2s_POST:
4014 case AArch64::ST3Threev4h_POST:
4015 case AArch64::ST3Threev4s_POST:
4016 case AArch64::ST3Threev8b_POST:
4017 case AArch64::ST3Threev8h_POST:
4018 case AArch64::ST3i16_POST:
4019 case AArch64::ST3i32_POST:
4020 case AArch64::ST3i64_POST:
4021 case AArch64::ST3i8_POST:
4022 case AArch64::ST4Fourv16b_POST:
4023 case AArch64::ST4Fourv2d_POST:
4024 case AArch64::ST4Fourv2s_POST:
4025 case AArch64::ST4Fourv4h_POST:
4026 case AArch64::ST4Fourv4s_POST:
4027 case AArch64::ST4Fourv8b_POST:
4028 case AArch64::ST4Fourv8h_POST:
4029 case AArch64::ST4i16_POST:
4030 case AArch64::ST4i32_POST:
4031 case AArch64::ST4i64_POST:
4032 case AArch64::ST4i8_POST:
4033 case AArch64::STGPostIndex:
4034 case AArch64::STGPpost:
4035 case AArch64::STPDpost:
4036 case AArch64::STPQpost:
4037 case AArch64::STPSpost:
4038 case AArch64::STPWpost:
4039 case AArch64::STPXpost:
4040 case AArch64::STRBBpost:
4041 case AArch64::STRBpost:
4042 case AArch64::STRDpost:
4043 case AArch64::STRHHpost:
4044 case AArch64::STRHpost:
4045 case AArch64::STRQpost:
4046 case AArch64::STRSpost:
4047 case AArch64::STRWpost:
4048 case AArch64::STRXpost:
4049 case AArch64::STZ2GPostIndex:
4050 case AArch64::STZGPostIndex:
4051 return true;
4052 }
4053}
4054
4056 const MachineInstr &LdSt, const MachineOperand *&BaseOp, int64_t &Offset,
4057 bool &OffsetIsScalable, TypeSize &Width,
4058 const TargetRegisterInfo *TRI) const {
4059 assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
4060 // Handle only loads/stores with base register followed by immediate offset.
4061 if (LdSt.getNumExplicitOperands() == 3) {
4062 // Non-paired instruction (e.g., ldr x1, [x0, #8]).
4063 if ((!LdSt.getOperand(1).isReg() && !LdSt.getOperand(1).isFI()) ||
4064 !LdSt.getOperand(2).isImm())
4065 return false;
4066 } else if (LdSt.getNumExplicitOperands() == 4) {
4067 // Paired instruction (e.g., ldp x1, x2, [x0, #8]).
4068 if (!LdSt.getOperand(1).isReg() ||
4069 (!LdSt.getOperand(2).isReg() && !LdSt.getOperand(2).isFI()) ||
4070 !LdSt.getOperand(3).isImm())
4071 return false;
4072 } else
4073 return false;
4074
4075 // Get the scaling factor for the instruction and set the width for the
4076 // instruction.
4077 TypeSize Scale(0U, false);
4078 int64_t Dummy1, Dummy2;
4079
4080 // If this returns false, then it's an instruction we don't want to handle.
4081 if (!getMemOpInfo(LdSt.getOpcode(), Scale, Width, Dummy1, Dummy2))
4082 return false;
4083
4084 // Compute the offset. Offset is calculated as the immediate operand
4085 // multiplied by the scaling factor. Unscaled instructions have scaling factor
4086 // set to 1. Postindex are a special case which have an offset of 0.
4087 if (isPostIndexLdStOpcode(LdSt.getOpcode())) {
4088 BaseOp = &LdSt.getOperand(2);
4089 Offset = 0;
4090 } else if (LdSt.getNumExplicitOperands() == 3) {
4091 BaseOp = &LdSt.getOperand(1);
4092 Offset = LdSt.getOperand(2).getImm() * Scale.getKnownMinValue();
4093 } else {
4094 assert(LdSt.getNumExplicitOperands() == 4 && "invalid number of operands");
4095 BaseOp = &LdSt.getOperand(2);
4096 Offset = LdSt.getOperand(3).getImm() * Scale.getKnownMinValue();
4097 }
4098 OffsetIsScalable = Scale.isScalable();
4099
4100 return BaseOp->isReg() || BaseOp->isFI();
4101}
4102
4105 assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
4106 MachineOperand &OfsOp = LdSt.getOperand(LdSt.getNumExplicitOperands() - 1);
4107 assert(OfsOp.isImm() && "Offset operand wasn't immediate.");
4108 return OfsOp;
4109}
4110
4111bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
4112 TypeSize &Width, int64_t &MinOffset,
4113 int64_t &MaxOffset) {
4114 switch (Opcode) {
4115 // Not a memory operation or something we want to handle.
4116 default:
4117 Scale = TypeSize::getFixed(0);
4118 Width = TypeSize::getFixed(0);
4119 MinOffset = MaxOffset = 0;
4120 return false;
4121 // LDR / STR
4122 case AArch64::LDRQui:
4123 case AArch64::STRQui:
4124 Scale = TypeSize::getFixed(16);
4125 Width = TypeSize::getFixed(16);
4126 MinOffset = 0;
4127 MaxOffset = 4095;
4128 break;
4129 case AArch64::LDRXui:
4130 case AArch64::LDRDui:
4131 case AArch64::STRXui:
4132 case AArch64::STRDui:
4133 case AArch64::PRFMui:
4134 Scale = TypeSize::getFixed(8);
4135 Width = TypeSize::getFixed(8);
4136 MinOffset = 0;
4137 MaxOffset = 4095;
4138 break;
4139 case AArch64::LDRWui:
4140 case AArch64::LDRSui:
4141 case AArch64::LDRSWui:
4142 case AArch64::STRWui:
4143 case AArch64::STRSui:
4144 Scale = TypeSize::getFixed(4);
4145 Width = TypeSize::getFixed(4);
4146 MinOffset = 0;
4147 MaxOffset = 4095;
4148 break;
4149 case AArch64::LDRHui:
4150 case AArch64::LDRHHui:
4151 case AArch64::LDRSHWui:
4152 case AArch64::LDRSHXui:
4153 case AArch64::STRHui:
4154 case AArch64::STRHHui:
4155 Scale = TypeSize::getFixed(2);
4156 Width = TypeSize::getFixed(2);
4157 MinOffset = 0;
4158 MaxOffset = 4095;
4159 break;
4160 case AArch64::LDRBui:
4161 case AArch64::LDRBBui:
4162 case AArch64::LDRSBWui:
4163 case AArch64::LDRSBXui:
4164 case AArch64::STRBui:
4165 case AArch64::STRBBui:
4166 Scale = TypeSize::getFixed(1);
4167 Width = TypeSize::getFixed(1);
4168 MinOffset = 0;
4169 MaxOffset = 4095;
4170 break;
4171 // post/pre inc
4172 case AArch64::STRQpre:
4173 case AArch64::LDRQpost:
4174 Scale = TypeSize::getFixed(1);
4175 Width = TypeSize::getFixed(16);
4176 MinOffset = -256;
4177 MaxOffset = 255;
4178 break;
4179 case AArch64::LDRDpost:
4180 case AArch64::LDRDpre:
4181 case AArch64::LDRXpost:
4182 case AArch64::LDRXpre:
4183 case AArch64::STRDpost:
4184 case AArch64::STRDpre:
4185 case AArch64::STRXpost:
4186 case AArch64::STRXpre:
4187 Scale = TypeSize::getFixed(1);
4188 Width = TypeSize::getFixed(8);
4189 MinOffset = -256;
4190 MaxOffset = 255;
4191 break;
4192 case AArch64::STRWpost:
4193 case AArch64::STRWpre:
4194 case AArch64::LDRWpost:
4195 case AArch64::LDRWpre:
4196 case AArch64::STRSpost:
4197 case AArch64::STRSpre:
4198 case AArch64::LDRSpost:
4199 case AArch64::LDRSpre:
4200 Scale = TypeSize::getFixed(1);
4201 Width = TypeSize::getFixed(4);
4202 MinOffset = -256;
4203 MaxOffset = 255;
4204 break;
4205 case AArch64::LDRHpost:
4206 case AArch64::LDRHpre:
4207 case AArch64::STRHpost:
4208 case AArch64::STRHpre:
4209 case AArch64::LDRHHpost:
4210 case AArch64::LDRHHpre:
4211 case AArch64::STRHHpost:
4212 case AArch64::STRHHpre:
4213 Scale = TypeSize::getFixed(1);
4214 Width = TypeSize::getFixed(2);
4215 MinOffset = -256;
4216 MaxOffset = 255;
4217 break;
4218 case AArch64::LDRBpost:
4219 case AArch64::LDRBpre:
4220 case AArch64::STRBpost:
4221 case AArch64::STRBpre:
4222 case AArch64::LDRBBpost:
4223 case AArch64::LDRBBpre:
4224 case AArch64::STRBBpost:
4225 case AArch64::STRBBpre:
4226 Scale = TypeSize::getFixed(1);
4227 Width = TypeSize::getFixed(1);
4228 MinOffset = -256;
4229 MaxOffset = 255;
4230 break;
4231 // Unscaled
4232 case AArch64::LDURQi:
4233 case AArch64::STURQi:
4234 Scale = TypeSize::getFixed(1);
4235 Width = TypeSize::getFixed(16);
4236 MinOffset = -256;
4237 MaxOffset = 255;
4238 break;
4239 case AArch64::LDURXi:
4240 case AArch64::LDURDi:
4241 case AArch64::LDAPURXi:
4242 case AArch64::STURXi:
4243 case AArch64::STURDi:
4244 case AArch64::STLURXi:
4245 case AArch64::PRFUMi:
4246 Scale = TypeSize::getFixed(1);
4247 Width = TypeSize::getFixed(8);
4248 MinOffset = -256;
4249 MaxOffset = 255;
4250 break;
4251 case AArch64::LDURWi:
4252 case AArch64::LDURSi:
4253 case AArch64::LDURSWi:
4254 case AArch64::LDAPURi:
4255 case AArch64::LDAPURSWi:
4256 case AArch64::STURWi:
4257 case AArch64::STURSi:
4258 case AArch64::STLURWi:
4259 Scale = TypeSize::getFixed(1);
4260 Width = TypeSize::getFixed(4);
4261 MinOffset = -256;
4262 MaxOffset = 255;
4263 break;
4264 case AArch64::LDURHi:
4265 case AArch64::LDURHHi:
4266 case AArch64::LDURSHXi:
4267 case AArch64::LDURSHWi:
4268 case AArch64::LDAPURHi:
4269 case AArch64::LDAPURSHWi:
4270 case AArch64::LDAPURSHXi:
4271 case AArch64::STURHi:
4272 case AArch64::STURHHi:
4273 case AArch64::STLURHi:
4274 Scale = TypeSize::getFixed(1);
4275 Width = TypeSize::getFixed(2);
4276 MinOffset = -256;
4277 MaxOffset = 255;
4278 break;
4279 case AArch64::LDURBi:
4280 case AArch64::LDURBBi:
4281 case AArch64::LDURSBXi:
4282 case AArch64::LDURSBWi:
4283 case AArch64::LDAPURBi:
4284 case AArch64::LDAPURSBWi:
4285 case AArch64::LDAPURSBXi:
4286 case AArch64::STURBi:
4287 case AArch64::STURBBi:
4288 case AArch64::STLURBi:
4289 Scale = TypeSize::getFixed(1);
4290 Width = TypeSize::getFixed(1);
4291 MinOffset = -256;
4292 MaxOffset = 255;
4293 break;
4294 // LDP / STP (including pre/post inc)
4295 case AArch64::LDPQi:
4296 case AArch64::LDNPQi:
4297 case AArch64::STPQi:
4298 case AArch64::STNPQi:
4299 case AArch64::LDPQpost:
4300 case AArch64::LDPQpre:
4301 case AArch64::STPQpost:
4302 case AArch64::STPQpre:
4303 Scale = TypeSize::getFixed(16);
4304 Width = TypeSize::getFixed(16 * 2);
4305 MinOffset = -64;
4306 MaxOffset = 63;
4307 break;
4308 case AArch64::LDPXi:
4309 case AArch64::LDPDi:
4310 case AArch64::LDNPXi:
4311 case AArch64::LDNPDi:
4312 case AArch64::STPXi:
4313 case AArch64::STPDi:
4314 case AArch64::STNPXi:
4315 case AArch64::STNPDi:
4316 case AArch64::LDPDpost:
4317 case AArch64::LDPDpre:
4318 case AArch64::LDPXpost:
4319 case AArch64::LDPXpre:
4320 case AArch64::STPDpost:
4321 case AArch64::STPDpre:
4322 case AArch64::STPXpost:
4323 case AArch64::STPXpre:
4324 Scale = TypeSize::getFixed(8);
4325 Width = TypeSize::getFixed(8 * 2);
4326 MinOffset = -64;
4327 MaxOffset = 63;
4328 break;
4329 case AArch64::LDPWi:
4330 case AArch64::LDPSi:
4331 case AArch64::LDNPWi:
4332 case AArch64::LDNPSi:
4333 case AArch64::STPWi:
4334 case AArch64::STPSi:
4335 case AArch64::STNPWi:
4336 case AArch64::STNPSi:
4337 case AArch64::LDPSpost:
4338 case AArch64::LDPSpre:
4339 case AArch64::LDPWpost:
4340 case AArch64::LDPWpre:
4341 case AArch64::STPSpost:
4342 case AArch64::STPSpre:
4343 case AArch64::STPWpost:
4344 case AArch64::STPWpre:
4345 Scale = TypeSize::getFixed(4);
4346 Width = TypeSize::getFixed(4 * 2);
4347 MinOffset = -64;
4348 MaxOffset = 63;
4349 break;
4350 case AArch64::StoreSwiftAsyncContext:
4351 // Store is an STRXui, but there might be an ADDXri in the expansion too.
4352 Scale = TypeSize::getFixed(1);
4353 Width = TypeSize::getFixed(8);
4354 MinOffset = 0;
4355 MaxOffset = 4095;
4356 break;
4357 case AArch64::ADDG:
4358 Scale = TypeSize::getFixed(16);
4359 Width = TypeSize::getFixed(0);
4360 MinOffset = 0;
4361 MaxOffset = 63;
4362 break;
4363 case AArch64::TAGPstack:
4364 Scale = TypeSize::getFixed(16);
4365 Width = TypeSize::getFixed(0);
4366 // TAGP with a negative offset turns into SUBP, which has a maximum offset
4367 // of 63 (not 64!).
4368 MinOffset = -63;
4369 MaxOffset = 63;
4370 break;
4371 case AArch64::LDG:
4372 case AArch64::STGi:
4373 case AArch64::STGPreIndex:
4374 case AArch64::STGPostIndex:
4375 case AArch64::STZGi:
4376 case AArch64::STZGPreIndex:
4377 case AArch64::STZGPostIndex:
4378 Scale = TypeSize::getFixed(16);
4379 Width = TypeSize::getFixed(16);
4380 MinOffset = -256;
4381 MaxOffset = 255;
4382 break;
4383 // SVE
4384 case AArch64::STR_ZZZZXI:
4385 case AArch64::STR_ZZZZXI_STRIDED_CONTIGUOUS:
4386 case AArch64::LDR_ZZZZXI:
4387 case AArch64::LDR_ZZZZXI_STRIDED_CONTIGUOUS:
4388 Scale = TypeSize::getScalable(16);
4389 Width = TypeSize::getScalable(16 * 4);
4390 MinOffset = -256;
4391 MaxOffset = 252;
4392 break;
4393 case AArch64::STR_ZZZXI:
4394 case AArch64::LDR_ZZZXI:
4395 Scale = TypeSize::getScalable(16);
4396 Width = TypeSize::getScalable(16 * 3);
4397 MinOffset = -256;
4398 MaxOffset = 253;
4399 break;
4400 case AArch64::STR_ZZXI:
4401 case AArch64::STR_ZZXI_STRIDED_CONTIGUOUS:
4402 case AArch64::LDR_ZZXI:
4403 case AArch64::LDR_ZZXI_STRIDED_CONTIGUOUS:
4404 Scale = TypeSize::getScalable(16);
4405 Width = TypeSize::getScalable(16 * 2);
4406 MinOffset = -256;
4407 MaxOffset = 254;
4408 break;
4409 case AArch64::LDR_PXI:
4410 case AArch64::STR_PXI:
4411 Scale = TypeSize::getScalable(2);
4412 Width = TypeSize::getScalable(2);
4413 MinOffset = -256;
4414 MaxOffset = 255;
4415 break;
4416 case AArch64::LDR_PPXI:
4417 case AArch64::STR_PPXI:
4418 Scale = TypeSize::getScalable(2);
4419 Width = TypeSize::getScalable(2 * 2);
4420 MinOffset = -256;
4421 MaxOffset = 254;
4422 break;
4423 case AArch64::LDR_ZXI:
4424 case AArch64::STR_ZXI:
4425 Scale = TypeSize::getScalable(16);
4426 Width = TypeSize::getScalable(16);
4427 MinOffset = -256;
4428 MaxOffset = 255;
4429 break;
4430 case AArch64::LD1B_IMM:
4431 case AArch64::LD1H_IMM:
4432 case AArch64::LD1W_IMM:
4433 case AArch64::LD1D_IMM:
4434 case AArch64::LDNT1B_ZRI:
4435 case AArch64::LDNT1H_ZRI:
4436 case AArch64::LDNT1W_ZRI:
4437 case AArch64::LDNT1D_ZRI:
4438 case AArch64::ST1B_IMM:
4439 case AArch64::ST1H_IMM:
4440 case AArch64::ST1W_IMM:
4441 case AArch64::ST1D_IMM:
4442 case AArch64::STNT1B_ZRI:
4443 case AArch64::STNT1H_ZRI:
4444 case AArch64::STNT1W_ZRI:
4445 case AArch64::STNT1D_ZRI:
4446 case AArch64::LDNF1B_IMM:
4447 case AArch64::LDNF1H_IMM:
4448 case AArch64::LDNF1W_IMM:
4449 case AArch64::LDNF1D_IMM:
4450 // A full vectors worth of data
4451 // Width = mbytes * elements
4452 Scale = TypeSize::getScalable(16);
4453 Width = TypeSize::getScalable(16);
4454 MinOffset = -8;
4455 MaxOffset = 7;
4456 break;
4457 case AArch64::LD2B_IMM:
4458 case AArch64::LD2H_IMM:
4459 case AArch64::LD2W_IMM:
4460 case AArch64::LD2D_IMM:
4461 case AArch64::ST2B_IMM:
4462 case AArch64::ST2H_IMM:
4463 case AArch64::ST2W_IMM:
4464 case AArch64::ST2D_IMM:
4465 Scale = TypeSize::getScalable(32);
4466 Width = TypeSize::getScalable(16 * 2);
4467 MinOffset = -8;
4468 MaxOffset = 7;
4469 break;
4470 case AArch64::LD3B_IMM:
4471 case AArch64::LD3H_IMM:
4472 case AArch64::LD3W_IMM:
4473 case AArch64::LD3D_IMM:
4474 case AArch64::ST3B_IMM:
4475 case AArch64::ST3H_IMM:
4476 case AArch64::ST3W_IMM:
4477 case AArch64::ST3D_IMM:
4478 Scale = TypeSize::getScalable(48);
4479 Width = TypeSize::getScalable(16 * 3);
4480 MinOffset = -8;
4481 MaxOffset = 7;
4482 break;
4483 case AArch64::LD4B_IMM:
4484 case AArch64::LD4H_IMM:
4485 case AArch64::LD4W_IMM:
4486 case AArch64::LD4D_IMM:
4487 case AArch64::ST4B_IMM:
4488 case AArch64::ST4H_IMM:
4489 case AArch64::ST4W_IMM:
4490 case AArch64::ST4D_IMM:
4491 Scale = TypeSize::getScalable(64);
4492 Width = TypeSize::getScalable(16 * 4);
4493 MinOffset = -8;
4494 MaxOffset = 7;
4495 break;
4496 case AArch64::LD1B_H_IMM:
4497 case AArch64::LD1SB_H_IMM:
4498 case AArch64::LD1H_S_IMM:
4499 case AArch64::LD1SH_S_IMM:
4500 case AArch64::LD1W_D_IMM:
4501 case AArch64::LD1SW_D_IMM:
4502 case AArch64::ST1B_H_IMM:
4503 case AArch64::ST1H_S_IMM:
4504 case AArch64::ST1W_D_IMM:
4505 case AArch64::LDNF1B_H_IMM:
4506 case AArch64::LDNF1SB_H_IMM:
4507 case AArch64::LDNF1H_S_IMM:
4508 case AArch64::LDNF1SH_S_IMM:
4509 case AArch64::LDNF1W_D_IMM:
4510 case AArch64::LDNF1SW_D_IMM:
4511 // A half vector worth of data
4512 // Width = mbytes * elements
4513 Scale = TypeSize::getScalable(8);
4514 Width = TypeSize::getScalable(8);
4515 MinOffset = -8;
4516 MaxOffset = 7;
4517 break;
4518 case AArch64::LD1B_S_IMM:
4519 case AArch64::LD1SB_S_IMM:
4520 case AArch64::LD1H_D_IMM:
4521 case AArch64::LD1SH_D_IMM:
4522 case AArch64::ST1B_S_IMM:
4523 case AArch64::ST1H_D_IMM:
4524 case AArch64::LDNF1B_S_IMM:
4525 case AArch64::LDNF1SB_S_IMM:
4526 case AArch64::LDNF1H_D_IMM:
4527 case AArch64::LDNF1SH_D_IMM:
4528 // A quarter vector worth of data
4529 // Width = mbytes * elements
4530 Scale = TypeSize::getScalable(4);
4531 Width = TypeSize::getScalable(4);
4532 MinOffset = -8;
4533 MaxOffset = 7;
4534 break;
4535 case AArch64::LD1B_D_IMM:
4536 case AArch64::LD1SB_D_IMM:
4537 case AArch64::ST1B_D_IMM:
4538 case AArch64::LDNF1B_D_IMM:
4539 case AArch64::LDNF1SB_D_IMM:
4540 // A eighth vector worth of data
4541 // Width = mbytes * elements
4542 Scale = TypeSize::getScalable(2);
4543 Width = TypeSize::getScalable(2);
4544 MinOffset = -8;
4545 MaxOffset = 7;
4546 break;
4547 case AArch64::ST2Gi:
4548 case AArch64::ST2GPreIndex:
4549 case AArch64::ST2GPostIndex:
4550 case AArch64::STZ2Gi:
4551 case AArch64::STZ2GPreIndex:
4552 case AArch64::STZ2GPostIndex:
4553 Scale = TypeSize::getFixed(16);
4554 Width = TypeSize::getFixed(32);
4555 MinOffset = -256;
4556 MaxOffset = 255;
4557 break;
4558 case AArch64::STGPi:
4559 case AArch64::STGPpost:
4560 case AArch64::STGPpre:
4561 Scale = TypeSize::getFixed(16);
4562 Width = TypeSize::getFixed(16);
4563 MinOffset = -64;
4564 MaxOffset = 63;
4565 break;
4566 case AArch64::LD1RB_IMM:
4567 case AArch64::LD1RB_H_IMM:
4568 case AArch64::LD1RB_S_IMM:
4569 case AArch64::LD1RB_D_IMM:
4570 case AArch64::LD1RSB_H_IMM:
4571 case AArch64::LD1RSB_S_IMM:
4572 case AArch64::LD1RSB_D_IMM:
4573 Scale = TypeSize::getFixed(1);
4574 Width = TypeSize::getFixed(1);
4575 MinOffset = 0;
4576 MaxOffset = 63;
4577 break;
4578 case AArch64::LD1RH_IMM:
4579 case AArch64::LD1RH_S_IMM:
4580 case AArch64::LD1RH_D_IMM:
4581 case AArch64::LD1RSH_S_IMM:
4582 case AArch64::LD1RSH_D_IMM:
4583 Scale = TypeSize::getFixed(2);
4584 Width = TypeSize::getFixed(2);
4585 MinOffset = 0;
4586 MaxOffset = 63;
4587 break;
4588 case AArch64::LD1RW_IMM:
4589 case AArch64::LD1RW_D_IMM:
4590 case AArch64::LD1RSW_IMM:
4591 Scale = TypeSize::getFixed(4);
4592 Width = TypeSize::getFixed(4);
4593 MinOffset = 0;
4594 MaxOffset = 63;
4595 break;
4596 case AArch64::LD1RD_IMM:
4597 Scale = TypeSize::getFixed(8);
4598 Width = TypeSize::getFixed(8);
4599 MinOffset = 0;
4600 MaxOffset = 63;
4601 break;
4602 }
4603
4604 return true;
4605}
4606
4607// Scaling factor for unscaled load or store.
4609 switch (Opc) {
4610 default:
4611 llvm_unreachable("Opcode has unknown scale!");
4612 case AArch64::LDRBBui:
4613 case AArch64::LDURBBi:
4614 case AArch64::LDRSBWui:
4615 case AArch64::LDURSBWi:
4616 case AArch64::STRBBui:
4617 case AArch64::STURBBi:
4618 return 1;
4619 case AArch64::LDRHHui:
4620 case AArch64::LDURHHi:
4621 case AArch64::LDRSHWui:
4622 case AArch64::LDURSHWi:
4623 case AArch64::STRHHui:
4624 case AArch64::STURHHi:
4625 return 2;
4626 case AArch64::LDRSui:
4627 case AArch64::LDURSi:
4628 case AArch64::LDRSpre:
4629 case AArch64::LDRSWui:
4630 case AArch64::LDURSWi:
4631 case AArch64::LDRSWpre:
4632 case AArch64::LDRWpre:
4633 case AArch64::LDRWui:
4634 case AArch64::LDURWi:
4635 case AArch64::STRSui:
4636 case AArch64::STURSi:
4637 case AArch64::STRSpre:
4638 case AArch64::STRWui:
4639 case AArch64::STURWi:
4640 case AArch64::STRWpre:
4641 case AArch64::LDPSi:
4642 case AArch64::LDPSWi:
4643 case AArch64::LDPWi:
4644 case AArch64::STPSi:
4645 case AArch64::STPWi:
4646 return 4;
4647 case AArch64::LDRDui:
4648 case AArch64::LDURDi:
4649 case AArch64::LDRDpre:
4650 case AArch64::LDRXui:
4651 case AArch64::LDURXi:
4652 case AArch64::LDRXpre:
4653 case AArch64::STRDui:
4654 case AArch64::STURDi:
4655 case AArch64::STRDpre:
4656 case AArch64::STRXui:
4657 case AArch64::STURXi:
4658 case AArch64::STRXpre:
4659 case AArch64::LDPDi:
4660 case AArch64::LDPXi:
4661 case AArch64::STPDi:
4662 case AArch64::STPXi:
4663 return 8;
4664 case AArch64::LDRQui:
4665 case AArch64::LDURQi:
4666 case AArch64::STRQui:
4667 case AArch64::STURQi:
4668 case AArch64::STRQpre:
4669 case AArch64::LDPQi:
4670 case AArch64::LDRQpre:
4671 case AArch64::STPQi:
4672 case AArch64::STGi:
4673 case AArch64::STZGi:
4674 case AArch64::ST2Gi:
4675 case AArch64::STZ2Gi:
4676 case AArch64::STGPi:
4677 return 16;
4678 }
4679}
4680
4682 switch (MI.getOpcode()) {
4683 default:
4684 return false;
4685 case AArch64::LDRWpre:
4686 case AArch64::LDRXpre:
4687 case AArch64::LDRSWpre:
4688 case AArch64::LDRSpre:
4689 case AArch64::LDRDpre:
4690 case AArch64::LDRQpre:
4691 return true;
4692 }
4693}
4694
4696 switch (MI.getOpcode()) {
4697 default:
4698 return false;
4699 case AArch64::STRWpre:
4700 case AArch64::STRXpre:
4701 case AArch64::STRSpre:
4702 case AArch64::STRDpre:
4703 case AArch64::STRQpre:
4704 return true;
4705 }
4706}
4707
4709 return isPreLd(MI) || isPreSt(MI);
4710}
4711
4713 switch (MI.getOpcode()) {
4714 default:
4715 return false;
4716 case AArch64::LDPSi:
4717 case AArch64::LDPSWi:
4718 case AArch64::LDPDi:
4719 case AArch64::LDPQi:
4720 case AArch64::LDPWi:
4721 case AArch64::LDPXi:
4722 case AArch64::STPSi:
4723 case AArch64::STPDi:
4724 case AArch64::STPQi:
4725 case AArch64::STPWi:
4726 case AArch64::STPXi:
4727 case AArch64::STGPi:
4728 return true;
4729 }
4730}
4731
4733 assert(MI.mayLoadOrStore() && "Load or store instruction expected");
4734 unsigned Idx =
4736 : 1;
4737 return MI.getOperand(Idx);
4738}
4739
4740const MachineOperand &
4742 assert(MI.mayLoadOrStore() && "Load or store instruction expected");
4743 unsigned Idx =
4745 : 2;
4746 return MI.getOperand(Idx);
4747}
4748
4749const MachineOperand &
4751 switch (MI.getOpcode()) {
4752 default:
4753 llvm_unreachable("Unexpected opcode");
4754 case AArch64::LDRBroX:
4755 case AArch64::LDRBBroX:
4756 case AArch64::LDRSBXroX:
4757 case AArch64::LDRSBWroX:
4758 case AArch64::LDRHroX:
4759 case AArch64::LDRHHroX:
4760 case AArch64::LDRSHXroX:
4761 case AArch64::LDRSHWroX:
4762 case AArch64::LDRWroX:
4763 case AArch64::LDRSroX:
4764 case AArch64::LDRSWroX:
4765 case AArch64::LDRDroX:
4766 case AArch64::LDRXroX:
4767 case AArch64::LDRQroX:
4768 return MI.getOperand(4);
4769 }
4770}
4771
4773 Register Reg) {
4774 if (MI.getParent() == nullptr)
4775 return nullptr;
4776 const MachineFunction *MF = MI.getParent()->getParent();
4777 return MF ? MF->getRegInfo().getRegClassOrNull(Reg) : nullptr;
4778}
4779
4781 auto IsHFPR = [&](const MachineOperand &Op) {
4782 if (!Op.isReg())
4783 return false;
4784 auto Reg = Op.getReg();
4785 if (Reg.isPhysical())
4786 return AArch64::FPR16RegClass.contains(Reg);
4787 const TargetRegisterClass *TRC = ::getRegClass(MI, Reg);
4788 return TRC == &AArch64::FPR16RegClass ||
4789 TRC == &AArch64::FPR16_loRegClass;
4790 };
4791 return llvm::any_of(MI.operands(), IsHFPR);
4792}
4793
4795 auto IsQFPR = [&](const MachineOperand &Op) {
4796 if (!Op.isReg())
4797 return false;
4798 auto Reg = Op.getReg();
4799 if (Reg.isPhysical())
4800 return AArch64::FPR128RegClass.contains(Reg);
4801 const TargetRegisterClass *TRC = ::getRegClass(MI, Reg);
4802 return TRC == &AArch64::FPR128RegClass ||
4803 TRC == &AArch64::FPR128_loRegClass;
4804 };
4805 return llvm::any_of(MI.operands(), IsQFPR);
4806}
4807
4809 switch (MI.getOpcode()) {
4810 case AArch64::BRK:
4811 case AArch64::HLT:
4812 case AArch64::PACIASP:
4813 case AArch64::PACIBSP:
4814 // Implicit BTI behavior.
4815 return true;
4816 case AArch64::PAUTH_PROLOGUE:
4817 // PAUTH_PROLOGUE expands to PACI(A|B)SP.
4818 return true;
4819 case AArch64::HINT: {
4820 unsigned Imm = MI.getOperand(0).getImm();
4821 // Explicit BTI instruction.
4822 if (Imm == 32 || Imm == 34 || Imm == 36 || Imm == 38)
4823 return true;
4824 // PACI(A|B)SP instructions.
4825 if (Imm == 25 || Imm == 27)
4826 return true;
4827 return false;
4828 }
4829 default:
4830 return false;
4831 }
4832}
4833
4835 if (Reg == 0)
4836 return false;
4837 assert(Reg.isPhysical() && "Expected physical register in isFpOrNEON");
4838 return AArch64::FPR128RegClass.contains(Reg) ||
4839 AArch64::FPR64RegClass.contains(Reg) ||
4840 AArch64::FPR32RegClass.contains(Reg) ||
4841 AArch64::FPR16RegClass.contains(Reg) ||
4842 AArch64::FPR8RegClass.contains(Reg);
4843}
4844
4846 auto IsFPR = [&](const MachineOperand &Op) {
4847 if (!Op.isReg())
4848 return false;
4849 auto Reg = Op.getReg();
4850 if (Reg.isPhysical())
4851 return isFpOrNEON(Reg);
4852
4853 const TargetRegisterClass *TRC = ::getRegClass(MI, Reg);
4854 return TRC == &AArch64::FPR128RegClass ||
4855 TRC == &AArch64::FPR128_loRegClass ||
4856 TRC == &AArch64::FPR64RegClass ||
4857 TRC == &AArch64::FPR64_loRegClass ||
4858 TRC == &AArch64::FPR32RegClass || TRC == &AArch64::FPR16RegClass ||
4859 TRC == &AArch64::FPR8RegClass;
4860 };
4861 return llvm::any_of(MI.operands(), IsFPR);
4862}
4863
4864// Scale the unscaled offsets. Returns false if the unscaled offset can't be
4865// scaled.
4866static bool scaleOffset(unsigned Opc, int64_t &Offset) {
4868
4869 // If the byte-offset isn't a multiple of the stride, we can't scale this
4870 // offset.
4871 if (Offset % Scale != 0)
4872 return false;
4873
4874 // Convert the byte-offset used by unscaled into an "element" offset used
4875 // by the scaled pair load/store instructions.
4876 Offset /= Scale;
4877 return true;
4878}
4879
4880static bool canPairLdStOpc(unsigned FirstOpc, unsigned SecondOpc) {
4881 if (FirstOpc == SecondOpc)
4882 return true;
4883 // We can also pair sign-ext and zero-ext instructions.
4884 switch (FirstOpc) {
4885 default:
4886 return false;
4887 case AArch64::STRSui:
4888 case AArch64::STURSi:
4889 return SecondOpc == AArch64::STRSui || SecondOpc == AArch64::STURSi;
4890 case AArch64::STRDui:
4891 case AArch64::STURDi:
4892 return SecondOpc == AArch64::STRDui || SecondOpc == AArch64::STURDi;
4893 case AArch64::STRQui:
4894 case AArch64::STURQi:
4895 return SecondOpc == AArch64::STRQui || SecondOpc == AArch64::STURQi;
4896 case AArch64::STRWui:
4897 case AArch64::STURWi:
4898 return SecondOpc == AArch64::STRWui || SecondOpc == AArch64::STURWi;
4899 case AArch64::STRXui:
4900 case AArch64::STURXi:
4901 return SecondOpc == AArch64::STRXui || SecondOpc == AArch64::STURXi;
4902 case AArch64::LDRSui:
4903 case AArch64::LDURSi:
4904 return SecondOpc == AArch64::LDRSui || SecondOpc == AArch64::LDURSi;
4905 case AArch64::LDRDui:
4906 case AArch64::LDURDi:
4907 return SecondOpc == AArch64::LDRDui || SecondOpc == AArch64::LDURDi;
4908 case AArch64::LDRQui:
4909 case AArch64::LDURQi:
4910 return SecondOpc == AArch64::LDRQui || SecondOpc == AArch64::LDURQi;
4911 case AArch64::LDRWui:
4912 case AArch64::LDURWi:
4913 return SecondOpc == AArch64::LDRSWui || SecondOpc == AArch64::LDURSWi;
4914 case AArch64::LDRSWui:
4915 case AArch64::LDURSWi:
4916 return SecondOpc == AArch64::LDRWui || SecondOpc == AArch64::LDURWi;
4917 case AArch64::LDRXui:
4918 case AArch64::LDURXi:
4919 return SecondOpc == AArch64::LDRXui || SecondOpc == AArch64::LDURXi;
4920 }
4921 // These instructions can't be paired based on their opcodes.
4922 return false;
4923}
4924
4925static bool shouldClusterFI(const MachineFrameInfo &MFI, int FI1,
4926 int64_t Offset1, unsigned Opcode1, int FI2,
4927 int64_t Offset2, unsigned Opcode2) {
4928 // Accesses through fixed stack object frame indices may access a different
4929 // fixed stack slot. Check that the object offsets + offsets match.
4930 if (MFI.isFixedObjectIndex(FI1) && MFI.isFixedObjectIndex(FI2)) {
4931 int64_t ObjectOffset1 = MFI.getObjectOffset(FI1);
4932 int64_t ObjectOffset2 = MFI.getObjectOffset(FI2);
4933 assert(ObjectOffset1 <= ObjectOffset2 && "Object offsets are not ordered.");
4934 // Convert to scaled object offsets.
4935 int Scale1 = AArch64InstrInfo::getMemScale(Opcode1);
4936 if (ObjectOffset1 % Scale1 != 0)
4937 return false;
4938 ObjectOffset1 /= Scale1;
4939 int Scale2 = AArch64InstrInfo::getMemScale(Opcode2);
4940 if (ObjectOffset2 % Scale2 != 0)
4941 return false;
4942 ObjectOffset2 /= Scale2;
4943 ObjectOffset1 += Offset1;
4944 ObjectOffset2 += Offset2;
4945 return ObjectOffset1 + 1 == ObjectOffset2;
4946 }
4947
4948 return FI1 == FI2;
4949}
4950
4951/// Detect opportunities for ldp/stp formation.
4952///
4953/// Only called for LdSt for which getMemOperandWithOffset returns true.
4955 ArrayRef<const MachineOperand *> BaseOps1, int64_t OpOffset1,
4956 bool OffsetIsScalable1, ArrayRef<const MachineOperand *> BaseOps2,
4957 int64_t OpOffset2, bool OffsetIsScalable2, unsigned ClusterSize,
4958 unsigned NumBytes) const {
4959 assert(BaseOps1.size() == 1 && BaseOps2.size() == 1);
4960 const MachineOperand &BaseOp1 = *BaseOps1.front();
4961 const MachineOperand &BaseOp2 = *BaseOps2.front();
4962 const MachineInstr &FirstLdSt = *BaseOp1.getParent();
4963 const MachineInstr &SecondLdSt = *BaseOp2.getParent();
4964 if (BaseOp1.getType() != BaseOp2.getType())
4965 return false;
4966
4967 assert((BaseOp1.isReg() || BaseOp1.isFI()) &&
4968 "Only base registers and frame indices are supported.");
4969
4970 // Check for both base regs and base FI.
4971 if (BaseOp1.isReg() && BaseOp1.getReg() != BaseOp2.getReg())
4972 return false;
4973
4974 // Only cluster up to a single pair.
4975 if (ClusterSize > 2)
4976 return false;
4977
4978 if (!isPairableLdStInst(FirstLdSt) || !isPairableLdStInst(SecondLdSt))
4979 return false;
4980
4981 // Can we pair these instructions based on their opcodes?
4982 unsigned FirstOpc = FirstLdSt.getOpcode();
4983 unsigned SecondOpc = SecondLdSt.getOpcode();
4984 if (!canPairLdStOpc(FirstOpc, SecondOpc))
4985 return false;
4986
4987 // Can't merge volatiles or load/stores that have a hint to avoid pair
4988 // formation, for example.
4989 if (!isCandidateToMergeOrPair(FirstLdSt) ||
4990 !isCandidateToMergeOrPair(SecondLdSt))
4991 return false;
4992
4993 // isCandidateToMergeOrPair guarantees that operand 2 is an immediate.
4994 int64_t Offset1 = FirstLdSt.getOperand(2).getImm();
4995 if (hasUnscaledLdStOffset(FirstOpc) && !scaleOffset(FirstOpc, Offset1))
4996 return false;
4997
4998 int64_t Offset2 = SecondLdSt.getOperand(2).getImm();
4999 if (hasUnscaledLdStOffset(SecondOpc) && !scaleOffset(SecondOpc, Offset2))
5000 return false;
5001
5002 // Pairwise instructions have a 7-bit signed offset field.
5003 if (Offset1 > 63 || Offset1 < -64)
5004 return false;
5005
5006 // The caller should already have ordered First/SecondLdSt by offset.
5007 // Note: except for non-equal frame index bases
5008 if (BaseOp1.isFI()) {
5009 assert((!BaseOp1.isIdenticalTo(BaseOp2) || Offset1 <= Offset2) &&
5010 "Caller should have ordered offsets.");
5011
5012 const MachineFrameInfo &MFI =
5013 FirstLdSt.getParent()->getParent()->getFrameInfo();
5014 return shouldClusterFI(MFI, BaseOp1.getIndex(), Offset1, FirstOpc,
5015 BaseOp2.getIndex(), Offset2, SecondOpc);
5016 }
5017
5018 assert(Offset1 <= Offset2 && "Caller should have ordered offsets.");
5019
5020 return Offset1 + 1 == Offset2;
5021}
5022
5024 MCRegister Reg, unsigned SubIdx,
5025 unsigned State,
5026 const TargetRegisterInfo *TRI) {
5027 if (!SubIdx)
5028 return MIB.addReg(Reg, State);
5029
5030 if (Reg.isPhysical())
5031 return MIB.addReg(TRI->getSubReg(Reg, SubIdx), State);
5032 return MIB.addReg(Reg, State, SubIdx);
5033}
5034
5035static bool forwardCopyWillClobberTuple(unsigned DestReg, unsigned SrcReg,
5036 unsigned NumRegs) {
5037 // We really want the positive remainder mod 32 here, that happens to be
5038 // easily obtainable with a mask.
5039 return ((DestReg - SrcReg) & 0x1f) < NumRegs;
5040}
5041
5044 const DebugLoc &DL, MCRegister DestReg,
5045 MCRegister SrcReg, bool KillSrc,
5046 unsigned Opcode,
5047 ArrayRef<unsigned> Indices) const {
5048 assert(Subtarget.hasNEON() && "Unexpected register copy without NEON");
5050 uint16_t DestEncoding = TRI->getEncodingValue(DestReg);
5051 uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg);
5052 unsigned NumRegs = Indices.size();
5053
5054 int SubReg = 0, End = NumRegs, Incr = 1;
5055 if (forwardCopyWillClobberTuple(DestEncoding, SrcEncoding, NumRegs)) {
5056 SubReg = NumRegs - 1;
5057 End = -1;
5058 Incr = -1;
5059 }
5060
5061 for (; SubReg != End; SubReg += Incr) {
5062 const MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opcode));
5063 AddSubReg(MIB, DestReg, Indices[SubReg], RegState::Define, TRI);
5064 AddSubReg(MIB, SrcReg, Indices[SubReg], 0, TRI);
5065 AddSubReg(MIB, SrcReg, Indices[SubReg], getKillRegState(KillSrc), TRI);
5066 }
5067}
5068
5071 const DebugLoc &DL, MCRegister DestReg,
5072 MCRegister SrcReg, bool KillSrc,
5073 unsigned Opcode, unsigned ZeroReg,
5074 llvm::ArrayRef<unsigned> Indices) const {
5076 unsigned NumRegs = Indices.size();
5077
5078#ifndef NDEBUG
5079 uint16_t DestEncoding = TRI->getEncodingValue(DestReg);
5080 uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg);
5081 assert(DestEncoding % NumRegs == 0 && SrcEncoding % NumRegs == 0 &&
5082 "GPR reg sequences should not be able to overlap");
5083#endif
5084
5085 for (unsigned SubReg = 0; SubReg != NumRegs; ++SubReg) {
5086 const MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opcode));
5087 AddSubReg(MIB, DestReg, Indices[SubReg], RegState::Define, TRI);
5088 MIB.addReg(ZeroReg);
5089 AddSubReg(MIB, SrcReg, Indices[SubReg], getKillRegState(KillSrc), TRI);
5090 MIB.addImm(0);
5091 }
5092}
5093
5096 const DebugLoc &DL, Register DestReg,
5097 Register SrcReg, bool KillSrc,
5098 bool RenamableDest,
5099 bool RenamableSrc) const {
5100 if (AArch64::GPR32spRegClass.contains(DestReg) &&
5101 AArch64::GPR32spRegClass.contains(SrcReg)) {
5102 if (DestReg == AArch64::WSP || SrcReg == AArch64::WSP) {
5103 // If either operand is WSP, expand to ADD #0.
5104 if (Subtarget.hasZeroCycleRegMoveGPR64() &&
5105 !Subtarget.hasZeroCycleRegMoveGPR32()) {
5106 // Cyclone recognizes "ADD Xd, Xn, #0" as a zero-cycle register move.
5107 MCRegister DestRegX = RI.getMatchingSuperReg(DestReg, AArch64::sub_32,
5108 &AArch64::GPR64spRegClass);
5109 MCRegister SrcRegX = RI.getMatchingSuperReg(SrcReg, AArch64::sub_32,
5110 &AArch64::GPR64spRegClass);
5111 // This instruction is reading and writing X registers. This may upset
5112 // the register scavenger and machine verifier, so we need to indicate
5113 // that we are reading an undefined value from SrcRegX, but a proper
5114 // value from SrcReg.
5115 BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestRegX)
5116 .addReg(SrcRegX, RegState::Undef)
5117 .addImm(0)
5119 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
5120 } else {
5121 BuildMI(MBB, I, DL, get(AArch64::ADDWri), DestReg)
5122 .addReg(SrcReg, getKillRegState(KillSrc))
5123 .addImm(0)
5125 }
5126 } else if (Subtarget.hasZeroCycleRegMoveGPR64() &&
5127 !Subtarget.hasZeroCycleRegMoveGPR32()) {
5128 // Cyclone recognizes "ORR Xd, XZR, Xm" as a zero-cycle register move.
5129 MCRegister DestRegX = RI.getMatchingSuperReg(DestReg, AArch64::sub_32,
5130 &AArch64::GPR64spRegClass);
5131 assert(DestRegX.isValid() && "Destination super-reg not valid");
5132 MCRegister SrcRegX = RI.getMatchingSuperReg(SrcReg, AArch64::sub_32,
5133 &AArch64::GPR64spRegClass);
5134 assert(SrcRegX.isValid() && "Source super-reg not valid");
5135 // This instruction is reading and writing X registers. This may upset
5136 // the register scavenger and machine verifier, so we need to indicate
5137 // that we are reading an undefined value from SrcRegX, but a proper
5138 // value from SrcReg.
5139 BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestRegX)
5140 .addReg(AArch64::XZR)
5141 .addReg(SrcRegX, RegState::Undef)
5142 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
5143 } else {
5144 // Otherwise, expand to ORR WZR.
5145 BuildMI(MBB, I, DL, get(AArch64::ORRWrr), DestReg)
5146 .addReg(AArch64::WZR)
5147 .addReg(SrcReg, getKillRegState(KillSrc));
5148 }
5149 return;
5150 }
5151
5152 // GPR32 zeroing
5153 if (AArch64::GPR32spRegClass.contains(DestReg) && SrcReg == AArch64::WZR) {
5154 if (Subtarget.hasZeroCycleZeroingGPR32()) {
5155 BuildMI(MBB, I, DL, get(AArch64::MOVZWi), DestReg)
5156 .addImm(0)
5158 } else {
5159 BuildMI(MBB, I, DL, get(AArch64::ORRWrr), DestReg)
5160 .addReg(AArch64::WZR)
5161 .addReg(AArch64::WZR);
5162 }
5163 return;
5164 }
5165
5166 if (AArch64::GPR64spRegClass.contains(DestReg) &&
5167 AArch64::GPR64spRegClass.contains(SrcReg)) {
5168 if (DestReg == AArch64::SP || SrcReg == AArch64::SP) {
5169 // If either operand is SP, expand to ADD #0.
5170 BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestReg)
5171 .addReg(SrcReg, getKillRegState(KillSrc))
5172 .addImm(0)
5174 } else {
5175 // Otherwise, expand to ORR XZR.
5176 BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestReg)
5177 .addReg(AArch64::XZR)
5178 .addReg(SrcReg, getKillRegState(KillSrc));
5179 }
5180 return;
5181 }
5182
5183 // GPR64 zeroing
5184 if (AArch64::GPR64spRegClass.contains(DestReg) && SrcReg == AArch64::XZR) {
5185 if (Subtarget.hasZeroCycleZeroingGPR64()) {
5186 BuildMI(MBB, I, DL, get(AArch64::MOVZXi), DestReg)
5187 .addImm(0)
5189 } else {
5190 BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestReg)
5191 .addReg(AArch64::XZR)
5192 .addReg(AArch64::XZR);
5193 }
5194 return;
5195 }
5196
5197 // Copy a Predicate register by ORRing with itself.
5198 if (AArch64::PPRRegClass.contains(DestReg) &&
5199 AArch64::PPRRegClass.contains(SrcReg)) {
5200 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5201 "Unexpected SVE register.");
5202 BuildMI(MBB, I, DL, get(AArch64::ORR_PPzPP), DestReg)
5203 .addReg(SrcReg) // Pg
5204 .addReg(SrcReg)
5205 .addReg(SrcReg, getKillRegState(KillSrc));
5206 return;
5207 }
5208
5209 // Copy a predicate-as-counter register by ORRing with itself as if it
5210 // were a regular predicate (mask) register.
5211 bool DestIsPNR = AArch64::PNRRegClass.contains(DestReg);
5212 bool SrcIsPNR = AArch64::PNRRegClass.contains(SrcReg);
5213 if (DestIsPNR || SrcIsPNR) {
5214 auto ToPPR = [](MCRegister R) -> MCRegister {
5215 return (R - AArch64::PN0) + AArch64::P0;
5216 };
5217 MCRegister PPRSrcReg = SrcIsPNR ? ToPPR(SrcReg) : SrcReg.asMCReg();
5218 MCRegister PPRDestReg = DestIsPNR ? ToPPR(DestReg) : DestReg.asMCReg();
5219
5220 if (PPRSrcReg != PPRDestReg) {
5221 auto NewMI = BuildMI(MBB, I, DL, get(AArch64::ORR_PPzPP), PPRDestReg)
5222 .addReg(PPRSrcReg) // Pg
5223 .addReg(PPRSrcReg)
5224 .addReg(PPRSrcReg, getKillRegState(KillSrc));
5225 if (DestIsPNR)
5226 NewMI.addDef(DestReg, RegState::Implicit);
5227 }
5228 return;
5229 }
5230
5231 // Copy a Z register by ORRing with itself.
5232 if (AArch64::ZPRRegClass.contains(DestReg) &&
5233 AArch64::ZPRRegClass.contains(SrcReg)) {
5234 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5235 "Unexpected SVE register.");
5236 BuildMI(MBB, I, DL, get(AArch64::ORR_ZZZ), DestReg)
5237 .addReg(SrcReg)
5238 .addReg(SrcReg, getKillRegState(KillSrc));
5239 return;
5240 }
5241
5242 // Copy a Z register pair by copying the individual sub-registers.
5243 if ((AArch64::ZPR2RegClass.contains(DestReg) ||
5244 AArch64::ZPR2StridedOrContiguousRegClass.contains(DestReg)) &&
5245 (AArch64::ZPR2RegClass.contains(SrcReg) ||
5246 AArch64::ZPR2StridedOrContiguousRegClass.contains(SrcReg))) {
5247 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5248 "Unexpected SVE register.");
5249 static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1};
5250 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORR_ZZZ,
5251 Indices);
5252 return;
5253 }
5254
5255 // Copy a Z register triple by copying the individual sub-registers.
5256 if (AArch64::ZPR3RegClass.contains(DestReg) &&
5257 AArch64::ZPR3RegClass.contains(SrcReg)) {
5258 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5259 "Unexpected SVE register.");
5260 static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1,
5261 AArch64::zsub2};
5262 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORR_ZZZ,
5263 Indices);
5264 return;
5265 }
5266
5267 // Copy a Z register quad by copying the individual sub-registers.
5268 if ((AArch64::ZPR4RegClass.contains(DestReg) ||
5269 AArch64::ZPR4StridedOrContiguousRegClass.contains(DestReg)) &&
5270 (AArch64::ZPR4RegClass.contains(SrcReg) ||
5271 AArch64::ZPR4StridedOrContiguousRegClass.contains(SrcReg))) {
5272 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5273 "Unexpected SVE register.");
5274 static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1,
5275 AArch64::zsub2, AArch64::zsub3};
5276 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORR_ZZZ,
5277 Indices);
5278 return;
5279 }
5280
5281 // Copy a DDDD register quad by copying the individual sub-registers.
5282 if (AArch64::DDDDRegClass.contains(DestReg) &&
5283 AArch64::DDDDRegClass.contains(SrcReg)) {
5284 static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1,
5285 AArch64::dsub2, AArch64::dsub3};
5286 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
5287 Indices);
5288 return;
5289 }
5290
5291 // Copy a DDD register triple by copying the individual sub-registers.
5292 if (AArch64::DDDRegClass.contains(DestReg) &&
5293 AArch64::DDDRegClass.contains(SrcReg)) {
5294 static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1,
5295 AArch64::dsub2};
5296 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
5297 Indices);
5298 return;
5299 }
5300
5301 // Copy a DD register pair by copying the individual sub-registers.
5302 if (AArch64::DDRegClass.contains(DestReg) &&
5303 AArch64::DDRegClass.contains(SrcReg)) {
5304 static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1};
5305 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
5306 Indices);
5307 return;
5308 }
5309
5310 // Copy a QQQQ register quad by copying the individual sub-registers.
5311 if (AArch64::QQQQRegClass.contains(DestReg) &&
5312 AArch64::QQQQRegClass.contains(SrcReg)) {
5313 static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1,
5314 AArch64::qsub2, AArch64::qsub3};
5315 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
5316 Indices);
5317 return;
5318 }
5319
5320 // Copy a QQQ register triple by copying the individual sub-registers.
5321 if (AArch64::QQQRegClass.contains(DestReg) &&
5322 AArch64::QQQRegClass.contains(SrcReg)) {
5323 static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1,
5324 AArch64::qsub2};
5325 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
5326 Indices);
5327 return;
5328 }
5329
5330 // Copy a QQ register pair by copying the individual sub-registers.
5331 if (AArch64::QQRegClass.contains(DestReg) &&
5332 AArch64::QQRegClass.contains(SrcReg)) {
5333 static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1};
5334 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
5335 Indices);
5336 return;
5337 }
5338
5339 if (AArch64::XSeqPairsClassRegClass.contains(DestReg) &&
5340 AArch64::XSeqPairsClassRegClass.contains(SrcReg)) {
5341 static const unsigned Indices[] = {AArch64::sube64, AArch64::subo64};
5342 copyGPRRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRXrs,
5343 AArch64::XZR, Indices);
5344 return;
5345 }
5346
5347 if (AArch64::WSeqPairsClassRegClass.contains(DestReg) &&
5348 AArch64::WSeqPairsClassRegClass.contains(SrcReg)) {
5349 static const unsigned Indices[] = {AArch64::sube32, AArch64::subo32};
5350 copyGPRRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRWrs,
5351 AArch64::WZR, Indices);
5352 return;
5353 }
5354
5355 if (AArch64::FPR128RegClass.contains(DestReg) &&
5356 AArch64::FPR128RegClass.contains(SrcReg)) {
5357 if (Subtarget.isSVEorStreamingSVEAvailable() &&
5358 !Subtarget.isNeonAvailable())
5359 BuildMI(MBB, I, DL, get(AArch64::ORR_ZZZ))
5360 .addReg(AArch64::Z0 + (DestReg - AArch64::Q0), RegState::Define)
5361 .addReg(AArch64::Z0 + (SrcReg - AArch64::Q0))
5362 .addReg(AArch64::Z0 + (SrcReg - AArch64::Q0));
5363 else if (Subtarget.isNeonAvailable())
5364 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
5365 .addReg(SrcReg)
5366 .addReg(SrcReg, getKillRegState(KillSrc));
5367 else {
5368 BuildMI(MBB, I, DL, get(AArch64::STRQpre))
5369 .addReg(AArch64::SP, RegState::Define)
5370 .addReg(SrcReg, getKillRegState(KillSrc))
5371 .addReg(AArch64::SP)
5372 .addImm(-16);
5373 BuildMI(MBB, I, DL, get(AArch64::LDRQpost))
5374 .addReg(AArch64::SP, RegState::Define)
5375 .addReg(DestReg, RegState::Define)
5376 .addReg(AArch64::SP)
5377 .addImm(16);
5378 }
5379 return;
5380 }
5381
5382 if (AArch64::FPR64RegClass.contains(DestReg) &&
5383 AArch64::FPR64RegClass.contains(SrcReg)) {
5384 if (Subtarget.hasZeroCycleRegMoveFPR128() &&
5385 !Subtarget.hasZeroCycleRegMoveFPR64() &&
5386 !Subtarget.hasZeroCycleRegMoveFPR32() && Subtarget.isNeonAvailable()) {
5387 MCRegister DestRegQ = RI.getMatchingSuperReg(DestReg, AArch64::dsub,
5388 &AArch64::FPR128RegClass);
5389 MCRegister SrcRegQ = RI.getMatchingSuperReg(SrcReg, AArch64::dsub,
5390 &AArch64::FPR128RegClass);
5391 // This instruction is reading and writing Q registers. This may upset
5392 // the register scavenger and machine verifier, so we need to indicate
5393 // that we are reading an undefined value from SrcRegQ, but a proper
5394 // value from SrcReg.
5395 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestRegQ)
5396 .addReg(SrcRegQ, RegState::Undef)
5397 .addReg(SrcRegQ, RegState::Undef)
5398 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
5399 } else {
5400 BuildMI(MBB, I, DL, get(AArch64::FMOVDr), DestReg)
5401 .addReg(SrcReg, getKillRegState(KillSrc));
5402 }
5403 return;
5404 }
5405
5406 if (AArch64::FPR32RegClass.contains(DestReg) &&
5407 AArch64::FPR32RegClass.contains(SrcReg)) {
5408 if (Subtarget.hasZeroCycleRegMoveFPR128() &&
5409 !Subtarget.hasZeroCycleRegMoveFPR64() &&
5410 !Subtarget.hasZeroCycleRegMoveFPR32() && Subtarget.isNeonAvailable()) {
5411 MCRegister DestRegQ = RI.getMatchingSuperReg(DestReg, AArch64::ssub,
5412 &AArch64::FPR128RegClass);
5413 MCRegister SrcRegQ = RI.getMatchingSuperReg(SrcReg, AArch64::ssub,
5414 &AArch64::FPR128RegClass);
5415 // This instruction is reading and writing Q registers. This may upset
5416 // the register scavenger and machine verifier, so we need to indicate
5417 // that we are reading an undefined value from SrcRegQ, but a proper
5418 // value from SrcReg.
5419 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestRegQ)
5420 .addReg(SrcRegQ, RegState::Undef)
5421 .addReg(SrcRegQ, RegState::Undef)
5422 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
5423 } else if (Subtarget.hasZeroCycleRegMoveFPR64() &&
5424 !Subtarget.hasZeroCycleRegMoveFPR32()) {
5425 MCRegister DestRegD = RI.getMatchingSuperReg(DestReg, AArch64::ssub,
5426 &AArch64::FPR64RegClass);
5427 MCRegister SrcRegD = RI.getMatchingSuperReg(SrcReg, AArch64::ssub,
5428 &AArch64::FPR64RegClass);
5429 // This instruction is reading and writing D registers. This may upset
5430 // the register scavenger and machine verifier, so we need to indicate
5431 // that we are reading an undefined value from SrcRegD, but a proper
5432 // value from SrcReg.
5433 BuildMI(MBB, I, DL, get(AArch64::FMOVDr), DestRegD)
5434 .addReg(SrcRegD, RegState::Undef)
5435 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
5436 } else {
5437 BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
5438 .addReg(SrcReg, getKillRegState(KillSrc));
5439 }
5440 return;
5441 }
5442
5443 if (AArch64::FPR16RegClass.contains(DestReg) &&
5444 AArch64::FPR16RegClass.contains(SrcReg)) {
5445 if (Subtarget.hasZeroCycleRegMoveFPR128() &&
5446 !Subtarget.hasZeroCycleRegMoveFPR64() &&
5447 !Subtarget.hasZeroCycleRegMoveFPR32() && Subtarget.isNeonAvailable()) {
5448 MCRegister DestRegQ = RI.getMatchingSuperReg(DestReg, AArch64::hsub,
5449 &AArch64::FPR128RegClass);
5450 MCRegister SrcRegQ = RI.getMatchingSuperReg(SrcReg, AArch64::hsub,
5451 &AArch64::FPR128RegClass);
5452 // This instruction is reading and writing Q registers. This may upset
5453 // the register scavenger and machine verifier, so we need to indicate
5454 // that we are reading an undefined value from SrcRegQ, but a proper
5455 // value from SrcReg.
5456 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestRegQ)
5457 .addReg(SrcRegQ, RegState::Undef)
5458 .addReg(SrcRegQ, RegState::Undef)
5459 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
5460 } else if (Subtarget.hasZeroCycleRegMoveFPR64() &&
5461 !Subtarget.hasZeroCycleRegMoveFPR32()) {
5462 MCRegister DestRegD = RI.getMatchingSuperReg(DestReg, AArch64::hsub,
5463 &AArch64::FPR64RegClass);
5464 MCRegister SrcRegD = RI.getMatchingSuperReg(SrcReg, AArch64::hsub,
5465 &AArch64::FPR64RegClass);
5466 // This instruction is reading and writing D registers. This may upset
5467 // the register scavenger and machine verifier, so we need to indicate
5468 // that we are reading an undefined value from SrcRegD, but a proper
5469 // value from SrcReg.
5470 BuildMI(MBB, I, DL, get(AArch64::FMOVDr), DestRegD)
5471 .addReg(SrcRegD, RegState::Undef)
5472 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
5473 } else {
5474 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::hsub,
5475 &AArch64::FPR32RegClass);
5476 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::hsub,
5477 &AArch64::FPR32RegClass);
5478 BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
5479 .addReg(SrcReg, getKillRegState(KillSrc));
5480 }
5481 return;
5482 }
5483
5484 if (AArch64::FPR8RegClass.contains(DestReg) &&
5485 AArch64::FPR8RegClass.contains(SrcReg)) {
5486 if (Subtarget.hasZeroCycleRegMoveFPR128() &&
5487 !Subtarget.hasZeroCycleRegMoveFPR64() &&
5488 !Subtarget.hasZeroCycleRegMoveFPR64() && Subtarget.isNeonAvailable()) {
5489 MCRegister DestRegQ = RI.getMatchingSuperReg(DestReg, AArch64::bsub,
5490 &AArch64::FPR128RegClass);
5491 MCRegister SrcRegQ = RI.getMatchingSuperReg(SrcReg, AArch64::bsub,
5492 &AArch64::FPR128RegClass);
5493 // This instruction is reading and writing Q registers. This may upset
5494 // the register scavenger and machine verifier, so we need to indicate
5495 // that we are reading an undefined value from SrcRegQ, but a proper
5496 // value from SrcReg.
5497 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestRegQ)
5498 .addReg(SrcRegQ, RegState::Undef)
5499 .addReg(SrcRegQ, RegState::Undef)
5500 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
5501 } else if (Subtarget.hasZeroCycleRegMoveFPR64() &&
5502 !Subtarget.hasZeroCycleRegMoveFPR32()) {
5503 MCRegister DestRegD = RI.getMatchingSuperReg(DestReg, AArch64::bsub,
5504 &AArch64::FPR64RegClass);
5505 MCRegister SrcRegD = RI.getMatchingSuperReg(SrcReg, AArch64::bsub,
5506 &AArch64::FPR64RegClass);
5507 // This instruction is reading and writing D registers. This may upset
5508 // the register scavenger and machine verifier, so we need to indicate
5509 // that we are reading an undefined value from SrcRegD, but a proper
5510 // value from SrcReg.
5511 BuildMI(MBB, I, DL, get(AArch64::FMOVDr), DestRegD)
5512 .addReg(SrcRegD, RegState::Undef)
5513 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
5514 } else {
5515 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::bsub,
5516 &AArch64::FPR32RegClass);
5517 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::bsub,
5518 &AArch64::FPR32RegClass);
5519 BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
5520 .addReg(SrcReg, getKillRegState(KillSrc));
5521 }
5522 return;
5523 }
5524
5525 // Copies between GPR64 and FPR64.
5526 if (AArch64::FPR64RegClass.contains(DestReg) &&
5527 AArch64::GPR64RegClass.contains(SrcReg)) {
5528 if (AArch64::XZR == SrcReg) {
5529 BuildMI(MBB, I, DL, get(AArch64::FMOVD0), DestReg);
5530 } else {
5531 BuildMI(MBB, I, DL, get(AArch64::FMOVXDr), DestReg)
5532 .addReg(SrcReg, getKillRegState(KillSrc));
5533 }
5534 return;
5535 }
5536 if (AArch64::GPR64RegClass.contains(DestReg) &&
5537 AArch64::FPR64RegClass.contains(SrcReg)) {
5538 BuildMI(MBB, I, DL, get(AArch64::FMOVDXr), DestReg)
5539 .addReg(SrcReg, getKillRegState(KillSrc));
5540 return;
5541 }
5542 // Copies between GPR32 and FPR32.
5543 if (AArch64::FPR32RegClass.contains(DestReg) &&
5544 AArch64::GPR32RegClass.contains(SrcReg)) {
5545 if (AArch64::WZR == SrcReg) {
5546 BuildMI(MBB, I, DL, get(AArch64::FMOVS0), DestReg);
5547 } else {
5548 BuildMI(MBB, I, DL, get(AArch64::FMOVWSr), DestReg)
5549 .addReg(SrcReg, getKillRegState(KillSrc));
5550 }
5551 return;
5552 }
5553 if (AArch64::GPR32RegClass.contains(DestReg) &&
5554 AArch64::FPR32RegClass.contains(SrcReg)) {
5555 BuildMI(MBB, I, DL, get(AArch64::FMOVSWr), DestReg)
5556 .addReg(SrcReg, getKillRegState(KillSrc));
5557 return;
5558 }
5559
5560 if (DestReg == AArch64::NZCV) {
5561 assert(AArch64::GPR64RegClass.contains(SrcReg) && "Invalid NZCV copy");
5562 BuildMI(MBB, I, DL, get(AArch64::MSR))
5563 .addImm(AArch64SysReg::NZCV)
5564 .addReg(SrcReg, getKillRegState(KillSrc))
5565 .addReg(AArch64::NZCV, RegState::Implicit | RegState::Define);
5566 return;
5567 }
5568
5569 if (SrcReg == AArch64::NZCV) {
5570 assert(AArch64::GPR64RegClass.contains(DestReg) && "Invalid NZCV copy");
5571 BuildMI(MBB, I, DL, get(AArch64::MRS), DestReg)
5572 .addImm(AArch64SysReg::NZCV)
5573 .addReg(AArch64::NZCV, RegState::Implicit | getKillRegState(KillSrc));
5574 return;
5575 }
5576
5577#ifndef NDEBUG
5578 errs() << RI.getRegAsmName(DestReg) << " = COPY " << RI.getRegAsmName(SrcReg)
5579 << "\n";
5580#endif
5581 llvm_unreachable("unimplemented reg-to-reg copy");
5582}
5583
5586 MachineBasicBlock::iterator InsertBefore,
5587 const MCInstrDesc &MCID,
5588 Register SrcReg, bool IsKill,
5589 unsigned SubIdx0, unsigned SubIdx1, int FI,
5590 MachineMemOperand *MMO) {
5591 Register SrcReg0 = SrcReg;
5592 Register SrcReg1 = SrcReg;
5593 if (SrcReg.isPhysical()) {
5594 SrcReg0 = TRI.getSubReg(SrcReg, SubIdx0);
5595 SubIdx0 = 0;
5596 SrcReg1 = TRI.getSubReg(SrcReg, SubIdx1);
5597 SubIdx1 = 0;
5598 }
5599 BuildMI(MBB, InsertBefore, DebugLoc(), MCID)
5600 .addReg(SrcReg0, getKillRegState(IsKill), SubIdx0)
5601 .addReg(SrcReg1, getKillRegState(IsKill), SubIdx1)
5602 .addFrameIndex(FI)
5603 .addImm(0)
5604 .addMemOperand(MMO);
5605}
5606
5609 Register SrcReg, bool isKill, int FI,
5610 const TargetRegisterClass *RC,
5611 const TargetRegisterInfo *TRI,
5612 Register VReg,
5613 MachineInstr::MIFlag Flags) const {
5614 MachineFunction &MF = *MBB.getParent();
5615 MachineFrameInfo &MFI = MF.getFrameInfo();
5616
5618 MachineMemOperand *MMO =
5620 MFI.getObjectSize(FI), MFI.getObjectAlign(FI));
5621 unsigned Opc = 0;
5622 bool Offset = true;
5624 unsigned StackID = TargetStackID::Default;
5625 switch (TRI->getSpillSize(*RC)) {
5626 case 1:
5627 if (AArch64::FPR8RegClass.hasSubClassEq(RC))
5628 Opc = AArch64::STRBui;
5629 break;
5630 case 2: {
5631 if (AArch64::FPR16RegClass.hasSubClassEq(RC))
5632 Opc = AArch64::STRHui;
5633 else if (AArch64::PNRRegClass.hasSubClassEq(RC) ||
5634 AArch64::PPRRegClass.hasSubClassEq(RC)) {
5635 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5636 "Unexpected register store without SVE store instructions");
5637 Opc = AArch64::STR_PXI;
5639 }
5640 break;
5641 }
5642 case 4:
5643 if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
5644 Opc = AArch64::STRWui;
5645 if (SrcReg.isVirtual())
5646 MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR32RegClass);
5647 else
5648 assert(SrcReg != AArch64::WSP);
5649 } else if (AArch64::FPR32RegClass.hasSubClassEq(RC))
5650 Opc = AArch64::STRSui;
5651 else if (AArch64::PPR2RegClass.hasSubClassEq(RC)) {
5652 Opc = AArch64::STR_PPXI;
5654 }
5655 break;
5656 case 8:
5657 if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) {
5658 Opc = AArch64::STRXui;
5659 if (SrcReg.isVirtual())
5660 MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass);
5661 else
5662 assert(SrcReg != AArch64::SP);
5663 } else if (AArch64::FPR64RegClass.hasSubClassEq(RC)) {
5664 Opc = AArch64::STRDui;
5665 } else if (AArch64::WSeqPairsClassRegClass.hasSubClassEq(RC)) {
5667 get(AArch64::STPWi), SrcReg, isKill,
5668 AArch64::sube32, AArch64::subo32, FI, MMO);
5669 return;
5670 }
5671 break;
5672 case 16:
5673 if (AArch64::FPR128RegClass.hasSubClassEq(RC))
5674 Opc = AArch64::STRQui;
5675 else if (AArch64::DDRegClass.hasSubClassEq(RC)) {
5676 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
5677 Opc = AArch64::ST1Twov1d;
5678 Offset = false;
5679 } else if (AArch64::XSeqPairsClassRegClass.hasSubClassEq(RC)) {
5681 get(AArch64::STPXi), SrcReg, isKill,
5682 AArch64::sube64, AArch64::subo64, FI, MMO);
5683 return;
5684 } else if (AArch64::ZPRRegClass.hasSubClassEq(RC)) {
5685 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5686 "Unexpected register store without SVE store instructions");
5687 Opc = AArch64::STR_ZXI;
5689 }
5690 break;
5691 case 24:
5692 if (AArch64::DDDRegClass.hasSubClassEq(RC)) {
5693 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
5694 Opc = AArch64::ST1Threev1d;
5695 Offset = false;
5696 }
5697 break;
5698 case 32:
5699 if (AArch64::DDDDRegClass.hasSubClassEq(RC)) {
5700 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
5701 Opc = AArch64::ST1Fourv1d;
5702 Offset = false;
5703 } else if (AArch64::QQRegClass.hasSubClassEq(RC)) {
5704 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
5705 Opc = AArch64::ST1Twov2d;
5706 Offset = false;
5707 } else if (AArch64::ZPR2StridedOrContiguousRegClass.hasSubClassEq(RC)) {
5708 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5709 "Unexpected register store without SVE store instructions");
5710 Opc = AArch64::STR_ZZXI_STRIDED_CONTIGUOUS;
5712 } else if (AArch64::ZPR2RegClass.hasSubClassEq(RC)) {
5713 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5714 "Unexpected register store without SVE store instructions");
5715 Opc = AArch64::STR_ZZXI;
5717 }
5718 break;
5719 case 48:
5720 if (AArch64::QQQRegClass.hasSubClassEq(RC)) {
5721 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
5722 Opc = AArch64::ST1Threev2d;
5723 Offset = false;
5724 } else if (AArch64::ZPR3RegClass.hasSubClassEq(RC)) {
5725 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5726 "Unexpected register store without SVE store instructions");
5727 Opc = AArch64::STR_ZZZXI;
5729 }
5730 break;
5731 case 64:
5732 if (AArch64::QQQQRegClass.hasSubClassEq(RC)) {
5733 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
5734 Opc = AArch64::ST1Fourv2d;
5735 Offset = false;
5736 } else if (AArch64::ZPR4StridedOrContiguousRegClass.hasSubClassEq(RC)) {
5737 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5738 "Unexpected register store without SVE store instructions");
5739 Opc = AArch64::STR_ZZZZXI_STRIDED_CONTIGUOUS;
5741 } else if (AArch64::ZPR4RegClass.hasSubClassEq(RC)) {
5742 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5743 "Unexpected register store without SVE store instructions");
5744 Opc = AArch64::STR_ZZZZXI;
5746 }
5747 break;
5748 }
5749 assert(Opc && "Unknown register class");
5750 MFI.setStackID(FI, StackID);
5751
5753 .addReg(SrcReg, getKillRegState(isKill))
5754 .addFrameIndex(FI);
5755
5756 if (Offset)
5757 MI.addImm(0);
5758 if (PNRReg.isValid())
5759 MI.addDef(PNRReg, RegState::Implicit);
5760 MI.addMemOperand(MMO);
5761}
5762
5765 MachineBasicBlock::iterator InsertBefore,
5766 const MCInstrDesc &MCID,
5767 Register DestReg, unsigned SubIdx0,
5768 unsigned SubIdx1, int FI,
5769 MachineMemOperand *MMO) {
5770 Register DestReg0 = DestReg;
5771 Register DestReg1 = DestReg;
5772 bool IsUndef = true;
5773 if (DestReg.isPhysical()) {
5774 DestReg0 = TRI.getSubReg(DestReg, SubIdx0);
5775 SubIdx0 = 0;
5776 DestReg1 = TRI.getSubReg(DestReg, SubIdx1);
5777 SubIdx1 = 0;
5778 IsUndef = false;
5779 }
5780 BuildMI(MBB, InsertBefore, DebugLoc(), MCID)
5781 .addReg(DestReg0, RegState::Define | getUndefRegState(IsUndef), SubIdx0)
5782 .addReg(DestReg1, RegState::Define | getUndefRegState(IsUndef), SubIdx1)
5783 .addFrameIndex(FI)
5784 .addImm(0)
5785 .addMemOperand(MMO);
5786}
5787
5790 int FI, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI,
5791 Register VReg, MachineInstr::MIFlag Flags) const {
5792 MachineFunction &MF = *MBB.getParent();
5793 MachineFrameInfo &MFI = MF.getFrameInfo();
5795 MachineMemOperand *MMO =
5797 MFI.getObjectSize(FI), MFI.getObjectAlign(FI));
5798
5799 unsigned Opc = 0;
5800 bool Offset = true;
5801 unsigned StackID = TargetStackID::Default;
5803 switch (TRI->getSpillSize(*RC)) {
5804 case 1:
5805 if (AArch64::FPR8RegClass.hasSubClassEq(RC))
5806 Opc = AArch64::LDRBui;
5807 break;
5808 case 2: {
5809 bool IsPNR = AArch64::PNRRegClass.hasSubClassEq(RC);
5810 if (AArch64::FPR16RegClass.hasSubClassEq(RC))
5811 Opc = AArch64::LDRHui;
5812 else if (IsPNR || AArch64::PPRRegClass.hasSubClassEq(RC)) {
5813 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5814 "Unexpected register load without SVE load instructions");
5815 if (IsPNR)
5816 PNRReg = DestReg;
5817 Opc = AArch64::LDR_PXI;
5819 }
5820 break;
5821 }
5822 case 4:
5823 if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
5824 Opc = AArch64::LDRWui;
5825 if (DestReg.isVirtual())
5826 MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR32RegClass);
5827 else
5828 assert(DestReg != AArch64::WSP);
5829 } else if (AArch64::FPR32RegClass.hasSubClassEq(RC))
5830 Opc = AArch64::LDRSui;
5831 else if (AArch64::PPR2RegClass.hasSubClassEq(RC)) {
5832 Opc = AArch64::LDR_PPXI;
5834 }
5835 break;
5836 case 8:
5837 if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) {
5838 Opc = AArch64::LDRXui;
5839 if (DestReg.isVirtual())
5840 MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR64RegClass);
5841 else
5842 assert(DestReg != AArch64::SP);
5843 } else if (AArch64::FPR64RegClass.hasSubClassEq(RC)) {
5844 Opc = AArch64::LDRDui;
5845 } else if (AArch64::WSeqPairsClassRegClass.hasSubClassEq(RC)) {
5847 get(AArch64::LDPWi), DestReg, AArch64::sube32,
5848 AArch64::subo32, FI, MMO);
5849 return;
5850 }
5851 break;
5852 case 16:
5853 if (AArch64::FPR128RegClass.hasSubClassEq(RC))
5854 Opc = AArch64::LDRQui;
5855 else if (AArch64::DDRegClass.hasSubClassEq(RC)) {
5856 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
5857 Opc = AArch64::LD1Twov1d;
5858 Offset = false;
5859 } else if (AArch64::XSeqPairsClassRegClass.hasSubClassEq(RC)) {
5861 get(AArch64::LDPXi), DestReg, AArch64::sube64,
5862 AArch64::subo64, FI, MMO);
5863 return;
5864 } else if (AArch64::ZPRRegClass.hasSubClassEq(RC)) {
5865 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5866 "Unexpected register load without SVE load instructions");
5867 Opc = AArch64::LDR_ZXI;
5869 }
5870 break;
5871 case 24:
5872 if (AArch64::DDDRegClass.hasSubClassEq(RC)) {
5873 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
5874 Opc = AArch64::LD1Threev1d;
5875 Offset = false;
5876 }
5877 break;
5878 case 32:
5879 if (AArch64::DDDDRegClass.hasSubClassEq(RC)) {
5880 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
5881 Opc = AArch64::LD1Fourv1d;
5882 Offset = false;
5883 } else if (AArch64::QQRegClass.hasSubClassEq(RC)) {
5884 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
5885 Opc = AArch64::LD1Twov2d;
5886 Offset = false;
5887 } else if (AArch64::ZPR2StridedOrContiguousRegClass.hasSubClassEq(RC)) {
5888 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5889 "Unexpected register load without SVE load instructions");
5890 Opc = AArch64::LDR_ZZXI_STRIDED_CONTIGUOUS;
5892 } else if (AArch64::ZPR2RegClass.hasSubClassEq(RC)) {
5893 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5894 "Unexpected register load without SVE load instructions");
5895 Opc = AArch64::LDR_ZZXI;
5897 }
5898 break;
5899 case 48:
5900 if (AArch64::QQQRegClass.hasSubClassEq(RC)) {
5901 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
5902 Opc = AArch64::LD1Threev2d;
5903 Offset = false;
5904 } else if (AArch64::ZPR3RegClass.hasSubClassEq(RC)) {
5905 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5906 "Unexpected register load without SVE load instructions");
5907 Opc = AArch64::LDR_ZZZXI;
5909 }
5910 break;
5911 case 64:
5912 if (AArch64::QQQQRegClass.hasSubClassEq(RC)) {
5913 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
5914 Opc = AArch64::LD1Fourv2d;
5915 Offset = false;
5916 } else if (AArch64::ZPR4StridedOrContiguousRegClass.hasSubClassEq(RC)) {
5917 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5918 "Unexpected register load without SVE load instructions");
5919 Opc = AArch64::LDR_ZZZZXI_STRIDED_CONTIGUOUS;
5921 } else if (AArch64::ZPR4RegClass.hasSubClassEq(RC)) {
5922 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5923 "Unexpected register load without SVE load instructions");
5924 Opc = AArch64::LDR_ZZZZXI;
5926 }
5927 break;
5928 }
5929
5930 assert(Opc && "Unknown register class");
5931 MFI.setStackID(FI, StackID);
5932
5934 .addReg(DestReg, getDefRegState(true))
5935 .addFrameIndex(FI);
5936 if (Offset)
5937 MI.addImm(0);
5938 if (PNRReg.isValid() && !PNRReg.isVirtual())
5939 MI.addDef(PNRReg, RegState::Implicit);
5940 MI.addMemOperand(MMO);
5941}
5942
5944 const MachineInstr &UseMI,
5945 const TargetRegisterInfo *TRI) {
5946 return any_of(instructionsWithoutDebug(std::next(DefMI.getIterator()),
5947 UseMI.getIterator()),
5948 [TRI](const MachineInstr &I) {
5949 return I.modifiesRegister(AArch64::NZCV, TRI) ||
5950 I.readsRegister(AArch64::NZCV, TRI);
5951 });
5952}
5953
5954void AArch64InstrInfo::decomposeStackOffsetForDwarfOffsets(
5955 const StackOffset &Offset, int64_t &ByteSized, int64_t &VGSized) {
5956 // The smallest scalable element supported by scaled SVE addressing
5957 // modes are predicates, which are 2 scalable bytes in size. So the scalable
5958 // byte offset must always be a multiple of 2.
5959 assert(Offset.getScalable() % 2 == 0 && "Invalid frame offset");
5960
5961 // VGSized offsets are divided by '2', because the VG register is the
5962 // the number of 64bit granules as opposed to 128bit vector chunks,
5963 // which is how the 'n' in e.g. MVT::nxv1i8 is modelled.
5964 // So, for a stack offset of 16 MVT::nxv1i8's, the size is n x 16 bytes.
5965 // VG = n * 2 and the dwarf offset must be VG * 8 bytes.
5966 ByteSized = Offset.getFixed();
5967 VGSized = Offset.getScalable() / 2;
5968}
5969
5970/// Returns the offset in parts to which this frame offset can be
5971/// decomposed for the purpose of describing a frame offset.
5972/// For non-scalable offsets this is simply its byte size.
5973void AArch64InstrInfo::decomposeStackOffsetForFrameOffsets(
5974 const StackOffset &Offset, int64_t &NumBytes, int64_t &NumPredicateVectors,
5975 int64_t &NumDataVectors) {
5976 // The smallest scalable element supported by scaled SVE addressing
5977 // modes are predicates, which are 2 scalable bytes in size. So the scalable
5978 // byte offset must always be a multiple of 2.
5979 assert(Offset.getScalable() % 2 == 0 && "Invalid frame offset");
5980
5981 NumBytes = Offset.getFixed();
5982 NumDataVectors = 0;
5983 NumPredicateVectors = Offset.getScalable() / 2;
5984 // This method is used to get the offsets to adjust the frame offset.
5985 // If the function requires ADDPL to be used and needs more than two ADDPL
5986 // instructions, part of the offset is folded into NumDataVectors so that it
5987 // uses ADDVL for part of it, reducing the number of ADDPL instructions.
5988 if (NumPredicateVectors % 8 == 0 || NumPredicateVectors < -64 ||
5989 NumPredicateVectors > 62) {
5990 NumDataVectors = NumPredicateVectors / 8;
5991 NumPredicateVectors -= NumDataVectors * 8;
5992 }
5993}
5994
5995// Convenience function to create a DWARF expression for: Constant `Operation`.
5996// This helper emits compact sequences for common cases. For example, for`-15
5997// DW_OP_plus`, this helper would create DW_OP_lit15 DW_OP_minus.
6000 if (Operation == dwarf::DW_OP_plus && Constant < 0 && -Constant <= 31) {
6001 // -Constant (1 to 31)
6002 Expr.push_back(dwarf::DW_OP_lit0 - Constant);
6003 Operation = dwarf::DW_OP_minus;
6004 } else if (Constant >= 0 && Constant <= 31) {
6005 // Literal value 0 to 31
6006 Expr.push_back(dwarf::DW_OP_lit0 + Constant);
6007 } else {
6008 // Signed constant
6009 Expr.push_back(dwarf::DW_OP_consts);
6011 }
6012 return Expr.push_back(Operation);
6013}
6014
6015// Convenience function to create a DWARF expression for a register.
6016static void appendReadRegExpr(SmallVectorImpl<char> &Expr, unsigned RegNum) {
6017 Expr.push_back((char)dwarf::DW_OP_bregx);
6019 Expr.push_back(0);
6020}
6021
6022// Convenience function to create a DWARF expression for loading a register from
6023// a CFA offset.
6025 int64_t OffsetFromDefCFA) {
6026 // This assumes the top of the DWARF stack contains the CFA.
6027 Expr.push_back(dwarf::DW_OP_dup);
6028 // Add the offset to the register.
6029 appendConstantExpr(Expr, OffsetFromDefCFA, dwarf::DW_OP_plus);
6030 // Dereference the address (loads a 64 bit value)..
6031 Expr.push_back(dwarf::DW_OP_deref);
6032}
6033
6034// Convenience function to create a comment for
6035// (+/-) NumBytes (* RegScale)?
6036static void appendOffsetComment(int NumBytes, llvm::raw_string_ostream &Comment,
6037 StringRef RegScale = {}) {
6038 if (NumBytes) {
6039 Comment << (NumBytes < 0 ? " - " : " + ") << std::abs(NumBytes);
6040 if (!RegScale.empty())
6041 Comment << ' ' << RegScale;
6042 }
6043}
6044
6045// Creates an MCCFIInstruction:
6046// { DW_CFA_def_cfa_expression, ULEB128 (sizeof expr), expr }
6048 unsigned Reg,
6049 const StackOffset &Offset) {
6050 int64_t NumBytes, NumVGScaledBytes;
6051 AArch64InstrInfo::decomposeStackOffsetForDwarfOffsets(Offset, NumBytes,
6052 NumVGScaledBytes);
6053 std::string CommentBuffer;
6054 llvm::raw_string_ostream Comment(CommentBuffer);
6055
6056 if (Reg == AArch64::SP)
6057 Comment << "sp";
6058 else if (Reg == AArch64::FP)
6059 Comment << "fp";
6060 else
6061 Comment << printReg(Reg, &TRI);
6062
6063 // Build up the expression (Reg + NumBytes + VG * NumVGScaledBytes)
6064 SmallString<64> Expr;
6065 unsigned DwarfReg = TRI.getDwarfRegNum(Reg, true);
6066 assert(DwarfReg <= 31 && "DwarfReg out of bounds (0..31)");
6067 // Reg + NumBytes
6068 Expr.push_back(dwarf::DW_OP_breg0 + DwarfReg);
6069 appendLEB128<LEB128Sign::Signed>(Expr, NumBytes);
6070 appendOffsetComment(NumBytes, Comment);
6071 if (NumVGScaledBytes) {
6072 // + VG * NumVGScaledBytes
6073 appendOffsetComment(NumVGScaledBytes, Comment, "* VG");
6074 appendReadRegExpr(Expr, TRI.getDwarfRegNum(AArch64::VG, true));
6075 appendConstantExpr(Expr, NumVGScaledBytes, dwarf::DW_OP_mul);
6076 Expr.push_back(dwarf::DW_OP_plus);
6077 }
6078
6079 // Wrap this into DW_CFA_def_cfa.
6080 SmallString<64> DefCfaExpr;
6081 DefCfaExpr.push_back(dwarf::DW_CFA_def_cfa_expression);
6082 appendLEB128<LEB128Sign::Unsigned>(DefCfaExpr, Expr.size());
6083 DefCfaExpr.append(Expr.str());
6084 return MCCFIInstruction::createEscape(nullptr, DefCfaExpr.str(), SMLoc(),
6085 Comment.str());
6086}
6087
6089 unsigned FrameReg, unsigned Reg,
6090 const StackOffset &Offset,
6091 bool LastAdjustmentWasScalable) {
6092 if (Offset.getScalable())
6093 return createDefCFAExpression(TRI, Reg, Offset);
6094
6095 if (FrameReg == Reg && !LastAdjustmentWasScalable)
6096 return MCCFIInstruction::cfiDefCfaOffset(nullptr, int(Offset.getFixed()));
6097
6098 unsigned DwarfReg = TRI.getDwarfRegNum(Reg, true);
6099 return MCCFIInstruction::cfiDefCfa(nullptr, DwarfReg, (int)Offset.getFixed());
6100}
6101
6104 const StackOffset &OffsetFromDefCFA,
6105 std::optional<int64_t> IncomingVGOffsetFromDefCFA) {
6106 int64_t NumBytes, NumVGScaledBytes;
6107 AArch64InstrInfo::decomposeStackOffsetForDwarfOffsets(
6108 OffsetFromDefCFA, NumBytes, NumVGScaledBytes);
6109
6110 unsigned DwarfReg = TRI.getDwarfRegNum(Reg, true);
6111
6112 // Non-scalable offsets can use DW_CFA_offset directly.
6113 if (!NumVGScaledBytes)
6114 return MCCFIInstruction::createOffset(nullptr, DwarfReg, NumBytes);
6115
6116 std::string CommentBuffer;
6117 llvm::raw_string_ostream Comment(CommentBuffer);
6118 Comment << printReg(Reg, &TRI) << " @ cfa";
6119
6120 // Build up expression (CFA + VG * NumVGScaledBytes + NumBytes)
6121 assert(NumVGScaledBytes && "Expected scalable offset");
6122 SmallString<64> OffsetExpr;
6123 // + VG * NumVGScaledBytes
6124 StringRef VGRegScale;
6125 if (IncomingVGOffsetFromDefCFA) {
6126 appendLoadRegExpr(OffsetExpr, *IncomingVGOffsetFromDefCFA);
6127 VGRegScale = "* IncomingVG";
6128 } else {
6129 appendReadRegExpr(OffsetExpr, TRI.getDwarfRegNum(AArch64::VG, true));
6130 VGRegScale = "* VG";
6131 }
6132 appendConstantExpr(OffsetExpr, NumVGScaledBytes, dwarf::DW_OP_mul);
6133 appendOffsetComment(NumVGScaledBytes, Comment, VGRegScale);
6134 OffsetExpr.push_back(dwarf::DW_OP_plus);
6135 if (NumBytes) {
6136 // + NumBytes
6137 appendOffsetComment(NumBytes, Comment);
6138 appendConstantExpr(OffsetExpr, NumBytes, dwarf::DW_OP_plus);
6139 }
6140
6141 // Wrap this into DW_CFA_expression
6142 SmallString<64> CfaExpr;
6143 CfaExpr.push_back(dwarf::DW_CFA_expression);
6144 appendLEB128<LEB128Sign::Unsigned>(CfaExpr, DwarfReg);
6145 appendLEB128<LEB128Sign::Unsigned>(CfaExpr, OffsetExpr.size());
6146 CfaExpr.append(OffsetExpr.str());
6147
6148 return MCCFIInstruction::createEscape(nullptr, CfaExpr.str(), SMLoc(),
6149 Comment.str());
6150}
6151
6152// Helper function to emit a frame offset adjustment from a given
6153// pointer (SrcReg), stored into DestReg. This function is explicit
6154// in that it requires the opcode.
6157 const DebugLoc &DL, unsigned DestReg,
6158 unsigned SrcReg, int64_t Offset, unsigned Opc,
6159 const TargetInstrInfo *TII,
6160 MachineInstr::MIFlag Flag, bool NeedsWinCFI,
6161 bool *HasWinCFI, bool EmitCFAOffset,
6162 StackOffset CFAOffset, unsigned FrameReg) {
6163 int Sign = 1;
6164 unsigned MaxEncoding, ShiftSize;
6165 switch (Opc) {
6166 case AArch64::ADDXri:
6167 case AArch64::ADDSXri:
6168 case AArch64::SUBXri:
6169 case AArch64::SUBSXri:
6170 MaxEncoding = 0xfff;
6171 ShiftSize = 12;
6172 break;
6173 case AArch64::ADDVL_XXI:
6174 case AArch64::ADDPL_XXI:
6175 case AArch64::ADDSVL_XXI:
6176 case AArch64::ADDSPL_XXI:
6177 MaxEncoding = 31;
6178 ShiftSize = 0;
6179 if (Offset < 0) {
6180 MaxEncoding = 32;
6181 Sign = -1;
6182 Offset = -Offset;
6183 }
6184 break;
6185 default:
6186 llvm_unreachable("Unsupported opcode");
6187 }
6188
6189 // `Offset` can be in bytes or in "scalable bytes".
6190 int VScale = 1;
6191 if (Opc == AArch64::ADDVL_XXI || Opc == AArch64::ADDSVL_XXI)
6192 VScale = 16;
6193 else if (Opc == AArch64::ADDPL_XXI || Opc == AArch64::ADDSPL_XXI)
6194 VScale = 2;
6195
6196 // FIXME: If the offset won't fit in 24-bits, compute the offset into a
6197 // scratch register. If DestReg is a virtual register, use it as the
6198 // scratch register; otherwise, create a new virtual register (to be
6199 // replaced by the scavenger at the end of PEI). That case can be optimized
6200 // slightly if DestReg is SP which is always 16-byte aligned, so the scratch
6201 // register can be loaded with offset%8 and the add/sub can use an extending
6202 // instruction with LSL#3.
6203 // Currently the function handles any offsets but generates a poor sequence
6204 // of code.
6205 // assert(Offset < (1 << 24) && "unimplemented reg plus immediate");
6206
6207 const unsigned MaxEncodableValue = MaxEncoding << ShiftSize;
6208 Register TmpReg = DestReg;
6209 if (TmpReg == AArch64::XZR)
6210 TmpReg = MBB.getParent()->getRegInfo().createVirtualRegister(
6211 &AArch64::GPR64RegClass);
6212 do {
6213 uint64_t ThisVal = std::min<uint64_t>(Offset, MaxEncodableValue);
6214 unsigned LocalShiftSize = 0;
6215 if (ThisVal > MaxEncoding) {
6216 ThisVal = ThisVal >> ShiftSize;
6217 LocalShiftSize = ShiftSize;
6218 }
6219 assert((ThisVal >> ShiftSize) <= MaxEncoding &&
6220 "Encoding cannot handle value that big");
6221
6222 Offset -= ThisVal << LocalShiftSize;
6223 if (Offset == 0)
6224 TmpReg = DestReg;
6225 auto MBI = BuildMI(MBB, MBBI, DL, TII->get(Opc), TmpReg)
6226 .addReg(SrcReg)
6227 .addImm(Sign * (int)ThisVal);
6228 if (ShiftSize)
6229 MBI = MBI.addImm(
6231 MBI = MBI.setMIFlag(Flag);
6232
6233 auto Change =
6234 VScale == 1
6235 ? StackOffset::getFixed(ThisVal << LocalShiftSize)
6236 : StackOffset::getScalable(VScale * (ThisVal << LocalShiftSize));
6237 if (Sign == -1 || Opc == AArch64::SUBXri || Opc == AArch64::SUBSXri)
6238 CFAOffset += Change;
6239 else
6240 CFAOffset -= Change;
6241 if (EmitCFAOffset && DestReg == TmpReg) {
6242 MachineFunction &MF = *MBB.getParent();
6243 const TargetSubtargetInfo &STI = MF.getSubtarget();
6244 const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
6245
6246 unsigned CFIIndex = MF.addFrameInst(
6247 createDefCFA(TRI, FrameReg, DestReg, CFAOffset, VScale != 1));
6248 BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
6249 .addCFIIndex(CFIIndex)
6250 .setMIFlags(Flag);
6251 }
6252
6253 if (NeedsWinCFI) {
6254 int Imm = (int)(ThisVal << LocalShiftSize);
6255 if (VScale != 1 && DestReg == AArch64::SP) {
6256 if (HasWinCFI)
6257 *HasWinCFI = true;
6258 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_AllocZ))
6259 .addImm(ThisVal)
6260 .setMIFlag(Flag);
6261 } else if ((DestReg == AArch64::FP && SrcReg == AArch64::SP) ||
6262 (SrcReg == AArch64::FP && DestReg == AArch64::SP)) {
6263 assert(VScale == 1 && "Expected non-scalable operation");
6264 if (HasWinCFI)
6265 *HasWinCFI = true;
6266 if (Imm == 0)
6267 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_SetFP)).setMIFlag(Flag);
6268 else
6269 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_AddFP))
6270 .addImm(Imm)
6271 .setMIFlag(Flag);
6272 assert(Offset == 0 && "Expected remaining offset to be zero to "
6273 "emit a single SEH directive");
6274 } else if (DestReg == AArch64::SP) {
6275 assert(VScale == 1 && "Expected non-scalable operation");
6276 if (HasWinCFI)
6277 *HasWinCFI = true;
6278 assert(SrcReg == AArch64::SP && "Unexpected SrcReg for SEH_StackAlloc");
6279 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_StackAlloc))
6280 .addImm(Imm)
6281 .setMIFlag(Flag);
6282 }
6283 }
6284
6285 SrcReg = TmpReg;
6286 } while (Offset);
6287}
6288
6291 unsigned DestReg, unsigned SrcReg,
6293 MachineInstr::MIFlag Flag, bool SetNZCV,
6294 bool NeedsWinCFI, bool *HasWinCFI,
6295 bool EmitCFAOffset, StackOffset CFAOffset,
6296 unsigned FrameReg) {
6297 // If a function is marked as arm_locally_streaming, then the runtime value of
6298 // vscale in the prologue/epilogue is different the runtime value of vscale
6299 // in the function's body. To avoid having to consider multiple vscales,
6300 // we can use `addsvl` to allocate any scalable stack-slots, which under
6301 // most circumstances will be only locals, not callee-save slots.
6302 const Function &F = MBB.getParent()->getFunction();
6303 bool UseSVL = F.hasFnAttribute("aarch64_pstate_sm_body");
6304
6305 int64_t Bytes, NumPredicateVectors, NumDataVectors;
6306 AArch64InstrInfo::decomposeStackOffsetForFrameOffsets(
6307 Offset, Bytes, NumPredicateVectors, NumDataVectors);
6308
6309 // Insert ADDSXri for scalable offset at the end.
6310 bool NeedsFinalDefNZCV = SetNZCV && (NumPredicateVectors || NumDataVectors);
6311 if (NeedsFinalDefNZCV)
6312 SetNZCV = false;
6313
6314 // First emit non-scalable frame offsets, or a simple 'mov'.
6315 if (Bytes || (!Offset && SrcReg != DestReg)) {
6316 assert((DestReg != AArch64::SP || Bytes % 8 == 0) &&
6317 "SP increment/decrement not 8-byte aligned");
6318 unsigned Opc = SetNZCV ? AArch64::ADDSXri : AArch64::ADDXri;
6319 if (Bytes < 0) {
6320 Bytes = -Bytes;
6321 Opc = SetNZCV ? AArch64::SUBSXri : AArch64::SUBXri;
6322 }
6323 emitFrameOffsetAdj(MBB, MBBI, DL, DestReg, SrcReg, Bytes, Opc, TII, Flag,
6324 NeedsWinCFI, HasWinCFI, EmitCFAOffset, CFAOffset,
6325 FrameReg);
6326 CFAOffset += (Opc == AArch64::ADDXri || Opc == AArch64::ADDSXri)
6327 ? StackOffset::getFixed(-Bytes)
6328 : StackOffset::getFixed(Bytes);
6329 SrcReg = DestReg;
6330 FrameReg = DestReg;
6331 }
6332
6333 assert(!(NeedsWinCFI && NumPredicateVectors) &&
6334 "WinCFI can't allocate fractions of an SVE data vector");
6335
6336 if (NumDataVectors) {
6337 emitFrameOffsetAdj(MBB, MBBI, DL, DestReg, SrcReg, NumDataVectors,
6338 UseSVL ? AArch64::ADDSVL_XXI : AArch64::ADDVL_XXI, TII,
6339 Flag, NeedsWinCFI, HasWinCFI, EmitCFAOffset, CFAOffset,
6340 FrameReg);
6341 CFAOffset += StackOffset::getScalable(-NumDataVectors * 16);
6342 SrcReg = DestReg;
6343 }
6344
6345 if (NumPredicateVectors) {
6346 assert(DestReg != AArch64::SP && "Unaligned access to SP");
6347 emitFrameOffsetAdj(MBB, MBBI, DL, DestReg, SrcReg, NumPredicateVectors,
6348 UseSVL ? AArch64::ADDSPL_XXI : AArch64::ADDPL_XXI, TII,
6349 Flag, NeedsWinCFI, HasWinCFI, EmitCFAOffset, CFAOffset,
6350 FrameReg);
6351 }
6352
6353 if (NeedsFinalDefNZCV)
6354 BuildMI(MBB, MBBI, DL, TII->get(AArch64::ADDSXri), DestReg)
6355 .addReg(DestReg)
6356 .addImm(0)
6357 .addImm(0);
6358}
6359
6362 MachineBasicBlock::iterator InsertPt, int FrameIndex,
6363 LiveIntervals *LIS, VirtRegMap *VRM) const {
6364 // This is a bit of a hack. Consider this instruction:
6365 //
6366 // %0 = COPY %sp; GPR64all:%0
6367 //
6368 // We explicitly chose GPR64all for the virtual register so such a copy might
6369 // be eliminated by RegisterCoalescer. However, that may not be possible, and
6370 // %0 may even spill. We can't spill %sp, and since it is in the GPR64all
6371 // register class, TargetInstrInfo::foldMemoryOperand() is going to try.
6372 //
6373 // To prevent that, we are going to constrain the %0 register class here.
6374 if (MI.isFullCopy()) {
6375 Register DstReg = MI.getOperand(0).getReg();
6376 Register SrcReg = MI.getOperand(1).getReg();
6377 if (SrcReg == AArch64::SP && DstReg.isVirtual()) {
6378 MF.getRegInfo().constrainRegClass(DstReg, &AArch64::GPR64RegClass);
6379 return nullptr;
6380 }
6381 if (DstReg == AArch64::SP && SrcReg.isVirtual()) {
6382 MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass);
6383 return nullptr;
6384 }
6385 // Nothing can folded with copy from/to NZCV.
6386 if (SrcReg == AArch64::NZCV || DstReg == AArch64::NZCV)
6387 return nullptr;
6388 }
6389
6390 // Handle the case where a copy is being spilled or filled but the source
6391 // and destination register class don't match. For example:
6392 //
6393 // %0 = COPY %xzr; GPR64common:%0
6394 //
6395 // In this case we can still safely fold away the COPY and generate the
6396 // following spill code:
6397 //
6398 // STRXui %xzr, %stack.0
6399 //
6400 // This also eliminates spilled cross register class COPYs (e.g. between x and
6401 // d regs) of the same size. For example:
6402 //
6403 // %0 = COPY %1; GPR64:%0, FPR64:%1
6404 //
6405 // will be filled as
6406 //
6407 // LDRDui %0, fi<#0>
6408 //
6409 // instead of
6410 //
6411 // LDRXui %Temp, fi<#0>
6412 // %0 = FMOV %Temp
6413 //
6414 if (MI.isCopy() && Ops.size() == 1 &&
6415 // Make sure we're only folding the explicit COPY defs/uses.
6416 (Ops[0] == 0 || Ops[0] == 1)) {
6417 bool IsSpill = Ops[0] == 0;
6418 bool IsFill = !IsSpill;
6420 const MachineRegisterInfo &MRI = MF.getRegInfo();
6421 MachineBasicBlock &MBB = *MI.getParent();
6422 const MachineOperand &DstMO = MI.getOperand(0);
6423 const MachineOperand &SrcMO = MI.getOperand(1);
6424 Register DstReg = DstMO.getReg();
6425 Register SrcReg = SrcMO.getReg();
6426 // This is slightly expensive to compute for physical regs since
6427 // getMinimalPhysRegClass is slow.
6428 auto getRegClass = [&](unsigned Reg) {
6429 return Register::isVirtualRegister(Reg) ? MRI.getRegClass(Reg)
6430 : TRI.getMinimalPhysRegClass(Reg);
6431 };
6432
6433 if (DstMO.getSubReg() == 0 && SrcMO.getSubReg() == 0) {
6434 assert(TRI.getRegSizeInBits(*getRegClass(DstReg)) ==
6435 TRI.getRegSizeInBits(*getRegClass(SrcReg)) &&
6436 "Mismatched register size in non subreg COPY");
6437 if (IsSpill)
6438 storeRegToStackSlot(MBB, InsertPt, SrcReg, SrcMO.isKill(), FrameIndex,
6439 getRegClass(SrcReg), &TRI, Register());
6440 else
6441 loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex,
6442 getRegClass(DstReg), &TRI, Register());
6443 return &*--InsertPt;
6444 }
6445
6446 // Handle cases like spilling def of:
6447 //
6448 // %0:sub_32<def,read-undef> = COPY %wzr; GPR64common:%0
6449 //
6450 // where the physical register source can be widened and stored to the full
6451 // virtual reg destination stack slot, in this case producing:
6452 //
6453 // STRXui %xzr, %stack.0
6454 //
6455 if (IsSpill && DstMO.isUndef() && SrcReg == AArch64::WZR &&
6456 TRI.getRegSizeInBits(*getRegClass(DstReg)) == 64) {
6457 assert(SrcMO.getSubReg() == 0 &&
6458 "Unexpected subreg on physical register");
6459 storeRegToStackSlot(MBB, InsertPt, AArch64::XZR, SrcMO.isKill(),
6460 FrameIndex, &AArch64::GPR64RegClass, &TRI,
6461 Register());
6462 return &*--InsertPt;
6463 }
6464
6465 // Handle cases like filling use of:
6466 //
6467 // %0:sub_32<def,read-undef> = COPY %1; GPR64:%0, GPR32:%1
6468 //
6469 // where we can load the full virtual reg source stack slot, into the subreg
6470 // destination, in this case producing:
6471 //
6472 // LDRWui %0:sub_32<def,read-undef>, %stack.0
6473 //
6474 if (IsFill && SrcMO.getSubReg() == 0 && DstMO.isUndef()) {
6475 const TargetRegisterClass *FillRC = nullptr;
6476 switch (DstMO.getSubReg()) {
6477 default:
6478 break;
6479 case AArch64::sub_32:
6480 if (AArch64::GPR64RegClass.hasSubClassEq(getRegClass(DstReg)))
6481 FillRC = &AArch64::GPR32RegClass;
6482 break;
6483 case AArch64::ssub:
6484 FillRC = &AArch64::FPR32RegClass;
6485 break;
6486 case AArch64::dsub:
6487 FillRC = &AArch64::FPR64RegClass;
6488 break;
6489 }
6490
6491 if (FillRC) {
6492 assert(TRI.getRegSizeInBits(*getRegClass(SrcReg)) ==
6493 TRI.getRegSizeInBits(*FillRC) &&
6494 "Mismatched regclass size on folded subreg COPY");
6495 loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex, FillRC, &TRI,
6496 Register());
6497 MachineInstr &LoadMI = *--InsertPt;
6498 MachineOperand &LoadDst = LoadMI.getOperand(0);
6499 assert(LoadDst.getSubReg() == 0 && "unexpected subreg on fill load");
6500 LoadDst.setSubReg(DstMO.getSubReg());
6501 LoadDst.setIsUndef();
6502 return &LoadMI;
6503 }
6504 }
6505 }
6506
6507 // Cannot fold.
6508 return nullptr;
6509}
6510
6512 StackOffset &SOffset,
6513 bool *OutUseUnscaledOp,
6514 unsigned *OutUnscaledOp,
6515 int64_t *EmittableOffset) {
6516 // Set output values in case of early exit.
6517 if (EmittableOffset)
6518 *EmittableOffset = 0;
6519 if (OutUseUnscaledOp)
6520 *OutUseUnscaledOp = false;
6521 if (OutUnscaledOp)
6522 *OutUnscaledOp = 0;
6523
6524 // Exit early for structured vector spills/fills as they can't take an
6525 // immediate offset.
6526 switch (MI.getOpcode()) {
6527 default:
6528 break;
6529 case AArch64::LD1Rv1d:
6530 case AArch64::LD1Rv2s:
6531 case AArch64::LD1Rv2d:
6532 case AArch64::LD1Rv4h:
6533 case AArch64::LD1Rv4s:
6534 case AArch64::LD1Rv8b:
6535 case AArch64::LD1Rv8h:
6536 case AArch64::LD1Rv16b:
6537 case AArch64::LD1Twov2d:
6538 case AArch64::LD1Threev2d:
6539 case AArch64::LD1Fourv2d:
6540 case AArch64::LD1Twov1d:
6541 case AArch64::LD1Threev1d:
6542 case AArch64::LD1Fourv1d:
6543 case AArch64::ST1Twov2d:
6544 case AArch64::ST1Threev2d:
6545 case AArch64::ST1Fourv2d:
6546 case AArch64::ST1Twov1d:
6547 case AArch64::ST1Threev1d:
6548 case AArch64::ST1Fourv1d:
6549 case AArch64::ST1i8:
6550 case AArch64::ST1i16:
6551 case AArch64::ST1i32:
6552 case AArch64::ST1i64:
6553 case AArch64::IRG:
6554 case AArch64::IRGstack:
6555 case AArch64::STGloop:
6556 case AArch64::STZGloop:
6558 }
6559
6560 // Get the min/max offset and the scale.
6561 TypeSize ScaleValue(0U, false), Width(0U, false);
6562 int64_t MinOff, MaxOff;
6563 if (!AArch64InstrInfo::getMemOpInfo(MI.getOpcode(), ScaleValue, Width, MinOff,
6564 MaxOff))
6565 llvm_unreachable("unhandled opcode in isAArch64FrameOffsetLegal");
6566
6567 // Construct the complete offset.
6568 bool IsMulVL = ScaleValue.isScalable();
6569 unsigned Scale = ScaleValue.getKnownMinValue();
6570 int64_t Offset = IsMulVL ? SOffset.getScalable() : SOffset.getFixed();
6571
6572 const MachineOperand &ImmOpnd =
6573 MI.getOperand(AArch64InstrInfo::getLoadStoreImmIdx(MI.getOpcode()));
6574 Offset += ImmOpnd.getImm() * Scale;
6575
6576 // If the offset doesn't match the scale, we rewrite the instruction to
6577 // use the unscaled instruction instead. Likewise, if we have a negative
6578 // offset and there is an unscaled op to use.
6579 std::optional<unsigned> UnscaledOp =
6581 bool useUnscaledOp = UnscaledOp && (Offset % Scale || Offset < 0);
6582 if (useUnscaledOp &&
6583 !AArch64InstrInfo::getMemOpInfo(*UnscaledOp, ScaleValue, Width, MinOff,
6584 MaxOff))
6585 llvm_unreachable("unhandled opcode in isAArch64FrameOffsetLegal");
6586
6587 Scale = ScaleValue.getKnownMinValue();
6588 assert(IsMulVL == ScaleValue.isScalable() &&
6589 "Unscaled opcode has different value for scalable");
6590
6591 int64_t Remainder = Offset % Scale;
6592 assert(!(Remainder && useUnscaledOp) &&
6593 "Cannot have remainder when using unscaled op");
6594
6595 assert(MinOff < MaxOff && "Unexpected Min/Max offsets");
6596 int64_t NewOffset = Offset / Scale;
6597 if (MinOff <= NewOffset && NewOffset <= MaxOff)
6598 Offset = Remainder;
6599 else {
6600 NewOffset = NewOffset < 0 ? MinOff : MaxOff;
6601 Offset = Offset - (NewOffset * Scale);
6602 }
6603
6604 if (EmittableOffset)
6605 *EmittableOffset = NewOffset;
6606 if (OutUseUnscaledOp)
6607 *OutUseUnscaledOp = useUnscaledOp;
6608 if (OutUnscaledOp && UnscaledOp)
6609 *OutUnscaledOp = *UnscaledOp;
6610
6611 if (IsMulVL)
6612 SOffset = StackOffset::get(SOffset.getFixed(), Offset);
6613 else
6614 SOffset = StackOffset::get(Offset, SOffset.getScalable());
6616 (SOffset ? 0 : AArch64FrameOffsetIsLegal);
6617}
6618
6620 unsigned FrameReg, StackOffset &Offset,
6621 const AArch64InstrInfo *TII) {
6622 unsigned Opcode = MI.getOpcode();
6623 unsigned ImmIdx = FrameRegIdx + 1;
6624
6625 if (Opcode == AArch64::ADDSXri || Opcode == AArch64::ADDXri) {
6626 Offset += StackOffset::getFixed(MI.getOperand(ImmIdx).getImm());
6627 emitFrameOffset(*MI.getParent(), MI, MI.getDebugLoc(),
6628 MI.getOperand(0).getReg(), FrameReg, Offset, TII,
6629 MachineInstr::NoFlags, (Opcode == AArch64::ADDSXri));
6630 MI.eraseFromParent();
6631 Offset = StackOffset();
6632 return true;
6633 }
6634
6635 int64_t NewOffset;
6636 unsigned UnscaledOp;
6637 bool UseUnscaledOp;
6638 int Status = isAArch64FrameOffsetLegal(MI, Offset, &UseUnscaledOp,
6639 &UnscaledOp, &NewOffset);
6642 // Replace the FrameIndex with FrameReg.
6643 MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
6644 if (UseUnscaledOp)
6645 MI.setDesc(TII->get(UnscaledOp));
6646
6647 MI.getOperand(ImmIdx).ChangeToImmediate(NewOffset);
6648 return !Offset;
6649 }
6650
6651 return false;
6652}
6653
6659
6661 return MCInstBuilder(AArch64::HINT).addImm(0);
6662}
6663
6664// AArch64 supports MachineCombiner.
6665bool AArch64InstrInfo::useMachineCombiner() const { return true; }
6666
6667// True when Opc sets flag
6668static bool isCombineInstrSettingFlag(unsigned Opc) {
6669 switch (Opc) {
6670 case AArch64::ADDSWrr:
6671 case AArch64::ADDSWri:
6672 case AArch64::ADDSXrr:
6673 case AArch64::ADDSXri:
6674 case AArch64::SUBSWrr:
6675 case AArch64::SUBSXrr:
6676 // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
6677 case AArch64::SUBSWri:
6678 case AArch64::SUBSXri:
6679 return true;
6680 default:
6681 break;
6682 }
6683 return false;
6684}
6685
6686// 32b Opcodes that can be combined with a MUL
6687static bool isCombineInstrCandidate32(unsigned Opc) {
6688 switch (Opc) {
6689 case AArch64::ADDWrr:
6690 case AArch64::ADDWri:
6691 case AArch64::SUBWrr:
6692 case AArch64::ADDSWrr:
6693 case AArch64::ADDSWri:
6694 case AArch64::SUBSWrr:
6695 // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
6696 case AArch64::SUBWri:
6697 case AArch64::SUBSWri:
6698 return true;
6699 default:
6700 break;
6701 }
6702 return false;
6703}
6704
6705// 64b Opcodes that can be combined with a MUL
6706static bool isCombineInstrCandidate64(unsigned Opc) {
6707 switch (Opc) {
6708 case AArch64::ADDXrr:
6709 case AArch64::ADDXri:
6710 case AArch64::SUBXrr:
6711 case AArch64::ADDSXrr:
6712 case AArch64::ADDSXri:
6713 case AArch64::SUBSXrr:
6714 // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
6715 case AArch64::SUBXri:
6716 case AArch64::SUBSXri:
6717 case AArch64::ADDv8i8:
6718 case AArch64::ADDv16i8:
6719 case AArch64::ADDv4i16:
6720 case AArch64::ADDv8i16:
6721 case AArch64::ADDv2i32:
6722 case AArch64::ADDv4i32:
6723 case AArch64::SUBv8i8:
6724 case AArch64::SUBv16i8:
6725 case AArch64::SUBv4i16:
6726 case AArch64::SUBv8i16:
6727 case AArch64::SUBv2i32:
6728 case AArch64::SUBv4i32:
6729 return true;
6730 default:
6731 break;
6732 }
6733 return false;
6734}
6735
6736// FP Opcodes that can be combined with a FMUL.
6737static bool isCombineInstrCandidateFP(const MachineInstr &Inst) {
6738 switch (Inst.getOpcode()) {
6739 default:
6740 break;
6741 case AArch64::FADDHrr:
6742 case AArch64::FADDSrr:
6743 case AArch64::FADDDrr:
6744 case AArch64::FADDv4f16:
6745 case AArch64::FADDv8f16:
6746 case AArch64::FADDv2f32:
6747 case AArch64::FADDv2f64:
6748 case AArch64::FADDv4f32:
6749 case AArch64::FSUBHrr:
6750 case AArch64::FSUBSrr:
6751 case AArch64::FSUBDrr:
6752 case AArch64::FSUBv4f16:
6753 case AArch64::FSUBv8f16:
6754 case AArch64::FSUBv2f32:
6755 case AArch64::FSUBv2f64:
6756 case AArch64::FSUBv4f32:
6758 // We can fuse FADD/FSUB with FMUL, if fusion is either allowed globally by
6759 // the target options or if FADD/FSUB has the contract fast-math flag.
6760 return Options.AllowFPOpFusion == FPOpFusion::Fast ||
6762 }
6763 return false;
6764}
6765
6766// Opcodes that can be combined with a MUL
6770
6771//
6772// Utility routine that checks if \param MO is defined by an
6773// \param CombineOpc instruction in the basic block \param MBB
6775 unsigned CombineOpc, unsigned ZeroReg = 0,
6776 bool CheckZeroReg = false) {
6777 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
6778 MachineInstr *MI = nullptr;
6779
6780 if (MO.isReg() && MO.getReg().isVirtual())
6781 MI = MRI.getUniqueVRegDef(MO.getReg());
6782 // And it needs to be in the trace (otherwise, it won't have a depth).
6783 if (!MI || MI->getParent() != &MBB || MI->getOpcode() != CombineOpc)
6784 return false;
6785 // Must only used by the user we combine with.
6786 if (!MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
6787 return false;
6788
6789 if (CheckZeroReg) {
6790 assert(MI->getNumOperands() >= 4 && MI->getOperand(0).isReg() &&
6791 MI->getOperand(1).isReg() && MI->getOperand(2).isReg() &&
6792 MI->getOperand(3).isReg() && "MAdd/MSub must have a least 4 regs");
6793 // The third input reg must be zero.
6794 if (MI->getOperand(3).getReg() != ZeroReg)
6795 return false;
6796 }
6797
6798 if (isCombineInstrSettingFlag(CombineOpc) &&
6799 MI->findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr, true) == -1)
6800 return false;
6801
6802 return true;
6803}
6804
6805//
6806// Is \param MO defined by an integer multiply and can be combined?
6808 unsigned MulOpc, unsigned ZeroReg) {
6809 return canCombine(MBB, MO, MulOpc, ZeroReg, true);
6810}
6811
6812//
6813// Is \param MO defined by a floating-point multiply and can be combined?
6815 unsigned MulOpc) {
6816 return canCombine(MBB, MO, MulOpc);
6817}
6818
6819// TODO: There are many more machine instruction opcodes to match:
6820// 1. Other data types (integer, vectors)
6821// 2. Other math / logic operations (xor, or)
6822// 3. Other forms of the same operation (intrinsics and other variants)
6823bool AArch64InstrInfo::isAssociativeAndCommutative(const MachineInstr &Inst,
6824 bool Invert) const {
6825 if (Invert)
6826 return false;
6827 switch (Inst.getOpcode()) {
6828 // == Floating-point types ==
6829 // -- Floating-point instructions --
6830 case AArch64::FADDHrr:
6831 case AArch64::FADDSrr:
6832 case AArch64::FADDDrr:
6833 case AArch64::FMULHrr:
6834 case AArch64::FMULSrr:
6835 case AArch64::FMULDrr:
6836 case AArch64::FMULX16:
6837 case AArch64::FMULX32:
6838 case AArch64::FMULX64:
6839 // -- Advanced SIMD instructions --
6840 case AArch64::FADDv4f16:
6841 case AArch64::FADDv8f16:
6842 case AArch64::FADDv2f32:
6843 case AArch64::FADDv4f32:
6844 case AArch64::FADDv2f64:
6845 case AArch64::FMULv4f16:
6846 case AArch64::FMULv8f16:
6847 case AArch64::FMULv2f32:
6848 case AArch64::FMULv4f32:
6849 case AArch64::FMULv2f64:
6850 case AArch64::FMULXv4f16:
6851 case AArch64::FMULXv8f16:
6852 case AArch64::FMULXv2f32:
6853 case AArch64::FMULXv4f32:
6854 case AArch64::FMULXv2f64:
6855 // -- SVE instructions --
6856 // Opcodes FMULX_ZZZ_? don't exist because there is no unpredicated FMULX
6857 // in the SVE instruction set (though there are predicated ones).
6858 case AArch64::FADD_ZZZ_H:
6859 case AArch64::FADD_ZZZ_S:
6860 case AArch64::FADD_ZZZ_D:
6861 case AArch64::FMUL_ZZZ_H:
6862 case AArch64::FMUL_ZZZ_S:
6863 case AArch64::FMUL_ZZZ_D:
6866
6867 // == Integer types ==
6868 // -- Base instructions --
6869 // Opcodes MULWrr and MULXrr don't exist because
6870 // `MUL <Wd>, <Wn>, <Wm>` and `MUL <Xd>, <Xn>, <Xm>` are aliases of
6871 // `MADD <Wd>, <Wn>, <Wm>, WZR` and `MADD <Xd>, <Xn>, <Xm>, XZR` respectively.
6872 // The machine-combiner does not support three-source-operands machine
6873 // instruction. So we cannot reassociate MULs.
6874 case AArch64::ADDWrr:
6875 case AArch64::ADDXrr:
6876 case AArch64::ANDWrr:
6877 case AArch64::ANDXrr:
6878 case AArch64::ORRWrr:
6879 case AArch64::ORRXrr:
6880 case AArch64::EORWrr:
6881 case AArch64::EORXrr:
6882 case AArch64::EONWrr:
6883 case AArch64::EONXrr:
6884 // -- Advanced SIMD instructions --
6885 // Opcodes MULv1i64 and MULv2i64 don't exist because there is no 64-bit MUL
6886 // in the Advanced SIMD instruction set.
6887 case AArch64::ADDv8i8:
6888 case AArch64::ADDv16i8:
6889 case AArch64::ADDv4i16:
6890 case AArch64::ADDv8i16:
6891 case AArch64::ADDv2i32:
6892 case AArch64::ADDv4i32:
6893 case AArch64::ADDv1i64:
6894 case AArch64::ADDv2i64:
6895 case AArch64::MULv8i8:
6896 case AArch64::MULv16i8:
6897 case AArch64::MULv4i16:
6898 case AArch64::MULv8i16:
6899 case AArch64::MULv2i32:
6900 case AArch64::MULv4i32:
6901 case AArch64::ANDv8i8:
6902 case AArch64::ANDv16i8:
6903 case AArch64::ORRv8i8:
6904 case AArch64::ORRv16i8:
6905 case AArch64::EORv8i8:
6906 case AArch64::EORv16i8:
6907 // -- SVE instructions --
6908 case AArch64::ADD_ZZZ_B:
6909 case AArch64::ADD_ZZZ_H:
6910 case AArch64::ADD_ZZZ_S:
6911 case AArch64::ADD_ZZZ_D:
6912 case AArch64::MUL_ZZZ_B:
6913 case AArch64::MUL_ZZZ_H:
6914 case AArch64::MUL_ZZZ_S:
6915 case AArch64::MUL_ZZZ_D:
6916 case AArch64::AND_ZZZ:
6917 case AArch64::ORR_ZZZ:
6918 case AArch64::EOR_ZZZ:
6919 return true;
6920
6921 default:
6922 return false;
6923 }
6924}
6925
6926/// Find instructions that can be turned into madd.
6928 SmallVectorImpl<unsigned> &Patterns) {
6929 unsigned Opc = Root.getOpcode();
6930 MachineBasicBlock &MBB = *Root.getParent();
6931 bool Found = false;
6932
6934 return false;
6936 int Cmp_NZCV =
6937 Root.findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr, true);
6938 // When NZCV is live bail out.
6939 if (Cmp_NZCV == -1)
6940 return false;
6941 unsigned NewOpc = convertToNonFlagSettingOpc(Root);
6942 // When opcode can't change bail out.
6943 // CHECKME: do we miss any cases for opcode conversion?
6944 if (NewOpc == Opc)
6945 return false;
6946 Opc = NewOpc;
6947 }
6948
6949 auto setFound = [&](int Opcode, int Operand, unsigned ZeroReg,
6950 unsigned Pattern) {
6951 if (canCombineWithMUL(MBB, Root.getOperand(Operand), Opcode, ZeroReg)) {
6952 Patterns.push_back(Pattern);
6953 Found = true;
6954 }
6955 };
6956
6957 auto setVFound = [&](int Opcode, int Operand, unsigned Pattern) {
6958 if (canCombine(MBB, Root.getOperand(Operand), Opcode)) {
6959 Patterns.push_back(Pattern);
6960 Found = true;
6961 }
6962 };
6963
6965
6966 switch (Opc) {
6967 default:
6968 break;
6969 case AArch64::ADDWrr:
6970 assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
6971 "ADDWrr does not have register operands");
6972 setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULADDW_OP1);
6973 setFound(AArch64::MADDWrrr, 2, AArch64::WZR, MCP::MULADDW_OP2);
6974 break;
6975 case AArch64::ADDXrr:
6976 setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULADDX_OP1);
6977 setFound(AArch64::MADDXrrr, 2, AArch64::XZR, MCP::MULADDX_OP2);
6978 break;
6979 case AArch64::SUBWrr:
6980 setFound(AArch64::MADDWrrr, 2, AArch64::WZR, MCP::MULSUBW_OP2);
6981 setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULSUBW_OP1);
6982 break;
6983 case AArch64::SUBXrr:
6984 setFound(AArch64::MADDXrrr, 2, AArch64::XZR, MCP::MULSUBX_OP2);
6985 setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULSUBX_OP1);
6986 break;
6987 case AArch64::ADDWri:
6988 setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULADDWI_OP1);
6989 break;
6990 case AArch64::ADDXri:
6991 setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULADDXI_OP1);
6992 break;
6993 case AArch64::SUBWri:
6994 setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULSUBWI_OP1);
6995 break;
6996 case AArch64::SUBXri:
6997 setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULSUBXI_OP1);
6998 break;
6999 case AArch64::ADDv8i8:
7000 setVFound(AArch64::MULv8i8, 1, MCP::MULADDv8i8_OP1);
7001 setVFound(AArch64::MULv8i8, 2, MCP::MULADDv8i8_OP2);
7002 break;
7003 case AArch64::ADDv16i8:
7004 setVFound(AArch64::MULv16i8, 1, MCP::MULADDv16i8_OP1);
7005 setVFound(AArch64::MULv16i8, 2, MCP::MULADDv16i8_OP2);
7006 break;
7007 case AArch64::ADDv4i16:
7008 setVFound(AArch64::MULv4i16, 1, MCP::MULADDv4i16_OP1);
7009 setVFound(AArch64::MULv4i16, 2, MCP::MULADDv4i16_OP2);
7010 setVFound(AArch64::MULv4i16_indexed, 1, MCP::MULADDv4i16_indexed_OP1);
7011 setVFound(AArch64::MULv4i16_indexed, 2, MCP::MULADDv4i16_indexed_OP2);
7012 break;
7013 case AArch64::ADDv8i16:
7014 setVFound(AArch64::MULv8i16, 1, MCP::MULADDv8i16_OP1);
7015 setVFound(AArch64::MULv8i16, 2, MCP::MULADDv8i16_OP2);
7016 setVFound(AArch64::MULv8i16_indexed, 1, MCP::MULADDv8i16_indexed_OP1);
7017 setVFound(AArch64::MULv8i16_indexed, 2, MCP::MULADDv8i16_indexed_OP2);
7018 break;
7019 case AArch64::ADDv2i32:
7020 setVFound(AArch64::MULv2i32, 1, MCP::MULADDv2i32_OP1);
7021 setVFound(AArch64::MULv2i32, 2, MCP::MULADDv2i32_OP2);
7022 setVFound(AArch64::MULv2i32_indexed, 1, MCP::MULADDv2i32_indexed_OP1);
7023 setVFound(AArch64::MULv2i32_indexed, 2, MCP::MULADDv2i32_indexed_OP2);
7024 break;
7025 case AArch64::ADDv4i32:
7026 setVFound(AArch64::MULv4i32, 1, MCP::MULADDv4i32_OP1);
7027 setVFound(AArch64::MULv4i32, 2, MCP::MULADDv4i32_OP2);
7028 setVFound(AArch64::MULv4i32_indexed, 1, MCP::MULADDv4i32_indexed_OP1);
7029 setVFound(AArch64::MULv4i32_indexed, 2, MCP::MULADDv4i32_indexed_OP2);
7030 break;
7031 case AArch64::SUBv8i8:
7032 setVFound(AArch64::MULv8i8, 1, MCP::MULSUBv8i8_OP1);
7033 setVFound(AArch64::MULv8i8, 2, MCP::MULSUBv8i8_OP2);
7034 break;
7035 case AArch64::SUBv16i8:
7036 setVFound(AArch64::MULv16i8, 1, MCP::MULSUBv16i8_OP1);
7037 setVFound(AArch64::MULv16i8, 2, MCP::MULSUBv16i8_OP2);
7038 break;
7039 case AArch64::SUBv4i16:
7040 setVFound(AArch64::MULv4i16, 1, MCP::MULSUBv4i16_OP1);
7041 setVFound(AArch64::MULv4i16, 2, MCP::MULSUBv4i16_OP2);
7042 setVFound(AArch64::MULv4i16_indexed, 1, MCP::MULSUBv4i16_indexed_OP1);
7043 setVFound(AArch64::MULv4i16_indexed, 2, MCP::MULSUBv4i16_indexed_OP2);
7044 break;
7045 case AArch64::SUBv8i16:
7046 setVFound(AArch64::MULv8i16, 1, MCP::MULSUBv8i16_OP1);
7047 setVFound(AArch64::MULv8i16, 2, MCP::MULSUBv8i16_OP2);
7048 setVFound(AArch64::MULv8i16_indexed, 1, MCP::MULSUBv8i16_indexed_OP1);
7049 setVFound(AArch64::MULv8i16_indexed, 2, MCP::MULSUBv8i16_indexed_OP2);
7050 break;
7051 case AArch64::SUBv2i32:
7052 setVFound(AArch64::MULv2i32, 1, MCP::MULSUBv2i32_OP1);
7053 setVFound(AArch64::MULv2i32, 2, MCP::MULSUBv2i32_OP2);
7054 setVFound(AArch64::MULv2i32_indexed, 1, MCP::MULSUBv2i32_indexed_OP1);
7055 setVFound(AArch64::MULv2i32_indexed, 2, MCP::MULSUBv2i32_indexed_OP2);
7056 break;
7057 case AArch64::SUBv4i32:
7058 setVFound(AArch64::MULv4i32, 1, MCP::MULSUBv4i32_OP1);
7059 setVFound(AArch64::MULv4i32, 2, MCP::MULSUBv4i32_OP2);
7060 setVFound(AArch64::MULv4i32_indexed, 1, MCP::MULSUBv4i32_indexed_OP1);
7061 setVFound(AArch64::MULv4i32_indexed, 2, MCP::MULSUBv4i32_indexed_OP2);
7062 break;
7063 }
7064 return Found;
7065}
7066
7067bool AArch64InstrInfo::isAccumulationOpcode(unsigned Opcode) const {
7068 switch (Opcode) {
7069 default:
7070 break;
7071 case AArch64::UABALB_ZZZ_D:
7072 case AArch64::UABALB_ZZZ_H:
7073 case AArch64::UABALB_ZZZ_S:
7074 case AArch64::UABALT_ZZZ_D:
7075 case AArch64::UABALT_ZZZ_H:
7076 case AArch64::UABALT_ZZZ_S:
7077 case AArch64::SABALB_ZZZ_D:
7078 case AArch64::SABALB_ZZZ_S:
7079 case AArch64::SABALB_ZZZ_H:
7080 case AArch64::SABALT_ZZZ_D:
7081 case AArch64::SABALT_ZZZ_S:
7082 case AArch64::SABALT_ZZZ_H:
7083 case AArch64::UABALv16i8_v8i16:
7084 case AArch64::UABALv2i32_v2i64:
7085 case AArch64::UABALv4i16_v4i32:
7086 case AArch64::UABALv4i32_v2i64:
7087 case AArch64::UABALv8i16_v4i32:
7088 case AArch64::UABALv8i8_v8i16:
7089 case AArch64::UABAv16i8:
7090 case AArch64::UABAv2i32:
7091 case AArch64::UABAv4i16:
7092 case AArch64::UABAv4i32:
7093 case AArch64::UABAv8i16:
7094 case AArch64::UABAv8i8:
7095 case AArch64::SABALv16i8_v8i16:
7096 case AArch64::SABALv2i32_v2i64:
7097 case AArch64::SABALv4i16_v4i32:
7098 case AArch64::SABALv4i32_v2i64:
7099 case AArch64::SABALv8i16_v4i32:
7100 case AArch64::SABALv8i8_v8i16:
7101 case AArch64::SABAv16i8:
7102 case AArch64::SABAv2i32:
7103 case AArch64::SABAv4i16:
7104 case AArch64::SABAv4i32:
7105 case AArch64::SABAv8i16:
7106 case AArch64::SABAv8i8:
7107 return true;
7108 }
7109
7110 return false;
7111}
7112
7113unsigned AArch64InstrInfo::getAccumulationStartOpcode(
7114 unsigned AccumulationOpcode) const {
7115 switch (AccumulationOpcode) {
7116 default:
7117 llvm_unreachable("Unsupported accumulation Opcode!");
7118 case AArch64::UABALB_ZZZ_D:
7119 return AArch64::UABDLB_ZZZ_D;
7120 case AArch64::UABALB_ZZZ_H:
7121 return AArch64::UABDLB_ZZZ_H;
7122 case AArch64::UABALB_ZZZ_S:
7123 return AArch64::UABDLB_ZZZ_S;
7124 case AArch64::UABALT_ZZZ_D:
7125 return AArch64::UABDLT_ZZZ_D;
7126 case AArch64::UABALT_ZZZ_H:
7127 return AArch64::UABDLT_ZZZ_H;
7128 case AArch64::UABALT_ZZZ_S:
7129 return AArch64::UABDLT_ZZZ_S;
7130 case AArch64::UABALv16i8_v8i16:
7131 return AArch64::UABDLv16i8_v8i16;
7132 case AArch64::UABALv2i32_v2i64:
7133 return AArch64::UABDLv2i32_v2i64;
7134 case AArch64::UABALv4i16_v4i32:
7135 return AArch64::UABDLv4i16_v4i32;
7136 case AArch64::UABALv4i32_v2i64:
7137 return AArch64::UABDLv4i32_v2i64;
7138 case AArch64::UABALv8i16_v4i32:
7139 return AArch64::UABDLv8i16_v4i32;
7140 case AArch64::UABALv8i8_v8i16:
7141 return AArch64::UABDLv8i8_v8i16;
7142 case AArch64::UABAv16i8:
7143 return AArch64::UABDv16i8;
7144 case AArch64::UABAv2i32:
7145 return AArch64::UABDv2i32;
7146 case AArch64::UABAv4i16:
7147 return AArch64::UABDv4i16;
7148 case AArch64::UABAv4i32:
7149 return AArch64::UABDv4i32;
7150 case AArch64::UABAv8i16:
7151 return AArch64::UABDv8i16;
7152 case AArch64::UABAv8i8:
7153 return AArch64::UABDv8i8;
7154 case AArch64::SABALB_ZZZ_D:
7155 return AArch64::SABDLB_ZZZ_D;
7156 case AArch64::SABALB_ZZZ_S:
7157 return AArch64::SABDLB_ZZZ_S;
7158 case AArch64::SABALB_ZZZ_H:
7159 return AArch64::SABDLB_ZZZ_H;
7160 case AArch64::SABALT_ZZZ_D:
7161 return AArch64::SABDLT_ZZZ_D;
7162 case AArch64::SABALT_ZZZ_S:
7163 return AArch64::SABDLT_ZZZ_S;
7164 case AArch64::SABALT_ZZZ_H:
7165 return AArch64::SABDLT_ZZZ_H;
7166 case AArch64::SABALv16i8_v8i16:
7167 return AArch64::SABDLv16i8_v8i16;
7168 case AArch64::SABALv2i32_v2i64:
7169 return AArch64::SABDLv2i32_v2i64;
7170 case AArch64::SABALv4i16_v4i32:
7171 return AArch64::SABDLv4i16_v4i32;
7172 case AArch64::SABALv4i32_v2i64:
7173 return AArch64::SABDLv4i32_v2i64;
7174 case AArch64::SABALv8i16_v4i32:
7175 return AArch64::SABDLv8i16_v4i32;
7176 case AArch64::SABALv8i8_v8i16:
7177 return AArch64::SABDLv8i8_v8i16;
7178 case AArch64::SABAv16i8:
7179 return AArch64::SABDv16i8;
7180 case AArch64::SABAv2i32:
7181 return AArch64::SABAv2i32;
7182 case AArch64::SABAv4i16:
7183 return AArch64::SABDv4i16;
7184 case AArch64::SABAv4i32:
7185 return AArch64::SABDv4i32;
7186 case AArch64::SABAv8i16:
7187 return AArch64::SABDv8i16;
7188 case AArch64::SABAv8i8:
7189 return AArch64::SABDv8i8;
7190 }
7191}
7192
7193/// Floating-Point Support
7194
7195/// Find instructions that can be turned into madd.
7197 SmallVectorImpl<unsigned> &Patterns) {
7198
7199 if (!isCombineInstrCandidateFP(Root))
7200 return false;
7201
7202 MachineBasicBlock &MBB = *Root.getParent();
7203 bool Found = false;
7204
7205 auto Match = [&](int Opcode, int Operand, unsigned Pattern) -> bool {
7206 if (canCombineWithFMUL(MBB, Root.getOperand(Operand), Opcode)) {
7207 Patterns.push_back(Pattern);
7208 return true;
7209 }
7210 return false;
7211 };
7212
7214
7215 switch (Root.getOpcode()) {
7216 default:
7217 assert(false && "Unsupported FP instruction in combiner\n");
7218 break;
7219 case AArch64::FADDHrr:
7220 assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
7221 "FADDHrr does not have register operands");
7222
7223 Found = Match(AArch64::FMULHrr, 1, MCP::FMULADDH_OP1);
7224 Found |= Match(AArch64::FMULHrr, 2, MCP::FMULADDH_OP2);
7225 break;
7226 case AArch64::FADDSrr:
7227 assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
7228 "FADDSrr does not have register operands");
7229
7230 Found |= Match(AArch64::FMULSrr, 1, MCP::FMULADDS_OP1) ||
7231 Match(AArch64::FMULv1i32_indexed, 1, MCP::FMLAv1i32_indexed_OP1);
7232
7233 Found |= Match(AArch64::FMULSrr, 2, MCP::FMULADDS_OP2) ||
7234 Match(AArch64::FMULv1i32_indexed, 2, MCP::FMLAv1i32_indexed_OP2);
7235 break;
7236 case AArch64::FADDDrr:
7237 Found |= Match(AArch64::FMULDrr, 1, MCP::FMULADDD_OP1) ||
7238 Match(AArch64::FMULv1i64_indexed, 1, MCP::FMLAv1i64_indexed_OP1);
7239
7240 Found |= Match(AArch64::FMULDrr, 2, MCP::FMULADDD_OP2) ||
7241 Match(AArch64::FMULv1i64_indexed, 2, MCP::FMLAv1i64_indexed_OP2);
7242 break;
7243 case AArch64::FADDv4f16:
7244 Found |= Match(AArch64::FMULv4i16_indexed, 1, MCP::FMLAv4i16_indexed_OP1) ||
7245 Match(AArch64::FMULv4f16, 1, MCP::FMLAv4f16_OP1);
7246
7247 Found |= Match(AArch64::FMULv4i16_indexed, 2, MCP::FMLAv4i16_indexed_OP2) ||
7248 Match(AArch64::FMULv4f16, 2, MCP::FMLAv4f16_OP2);
7249 break;
7250 case AArch64::FADDv8f16:
7251 Found |= Match(AArch64::FMULv8i16_indexed, 1, MCP::FMLAv8i16_indexed_OP1) ||
7252 Match(AArch64::FMULv8f16, 1, MCP::FMLAv8f16_OP1);
7253
7254 Found |= Match(AArch64::FMULv8i16_indexed, 2, MCP::FMLAv8i16_indexed_OP2) ||
7255 Match(AArch64::FMULv8f16, 2, MCP::FMLAv8f16_OP2);
7256 break;
7257 case AArch64::FADDv2f32:
7258 Found |= Match(AArch64::FMULv2i32_indexed, 1, MCP::FMLAv2i32_indexed_OP1) ||
7259 Match(AArch64::FMULv2f32, 1, MCP::FMLAv2f32_OP1);
7260
7261 Found |= Match(AArch64::FMULv2i32_indexed, 2, MCP::FMLAv2i32_indexed_OP2) ||
7262 Match(AArch64::FMULv2f32, 2, MCP::FMLAv2f32_OP2);
7263 break;
7264 case AArch64::FADDv2f64:
7265 Found |= Match(AArch64::FMULv2i64_indexed, 1, MCP::FMLAv2i64_indexed_OP1) ||
7266 Match(AArch64::FMULv2f64, 1, MCP::FMLAv2f64_OP1);
7267
7268 Found |= Match(AArch64::FMULv2i64_indexed, 2, MCP::FMLAv2i64_indexed_OP2) ||
7269 Match(AArch64::FMULv2f64, 2, MCP::FMLAv2f64_OP2);
7270 break;
7271 case AArch64::FADDv4f32:
7272 Found |= Match(AArch64::FMULv4i32_indexed, 1, MCP::FMLAv4i32_indexed_OP1) ||
7273 Match(AArch64::FMULv4f32, 1, MCP::FMLAv4f32_OP1);
7274
7275 Found |= Match(AArch64::FMULv4i32_indexed, 2, MCP::FMLAv4i32_indexed_OP2) ||
7276 Match(AArch64::FMULv4f32, 2, MCP::FMLAv4f32_OP2);
7277 break;
7278 case AArch64::FSUBHrr:
7279 Found = Match(AArch64::FMULHrr, 1, MCP::FMULSUBH_OP1);
7280 Found |= Match(AArch64::FMULHrr, 2, MCP::FMULSUBH_OP2);
7281 Found |= Match(AArch64::FNMULHrr, 1, MCP::FNMULSUBH_OP1);
7282 break;
7283 case AArch64::FSUBSrr:
7284 Found = Match(AArch64::FMULSrr, 1, MCP::FMULSUBS_OP1);
7285
7286 Found |= Match(AArch64::FMULSrr, 2, MCP::FMULSUBS_OP2) ||
7287 Match(AArch64::FMULv1i32_indexed, 2, MCP::FMLSv1i32_indexed_OP2);
7288
7289 Found |= Match(AArch64::FNMULSrr, 1, MCP::FNMULSUBS_OP1);
7290 break;
7291 case AArch64::FSUBDrr:
7292 Found = Match(AArch64::FMULDrr, 1, MCP::FMULSUBD_OP1);
7293
7294 Found |= Match(AArch64::FMULDrr, 2, MCP::FMULSUBD_OP2) ||
7295 Match(AArch64::FMULv1i64_indexed, 2, MCP::FMLSv1i64_indexed_OP2);
7296
7297 Found |= Match(AArch64::FNMULDrr, 1, MCP::FNMULSUBD_OP1);
7298 break;
7299 case AArch64::FSUBv4f16:
7300 Found |= Match(AArch64::FMULv4i16_indexed, 2, MCP::FMLSv4i16_indexed_OP2) ||
7301 Match(AArch64::FMULv4f16, 2, MCP::FMLSv4f16_OP2);
7302
7303 Found |= Match(AArch64::FMULv4i16_indexed, 1, MCP::FMLSv4i16_indexed_OP1) ||
7304 Match(AArch64::FMULv4f16, 1, MCP::FMLSv4f16_OP1);
7305 break;
7306 case AArch64::FSUBv8f16:
7307 Found |= Match(AArch64::FMULv8i16_indexed, 2, MCP::FMLSv8i16_indexed_OP2) ||
7308 Match(AArch64::FMULv8f16, 2, MCP::FMLSv8f16_OP2);
7309
7310 Found |= Match(AArch64::FMULv8i16_indexed, 1, MCP::FMLSv8i16_indexed_OP1) ||
7311 Match(AArch64::FMULv8f16, 1, MCP::FMLSv8f16_OP1);
7312 break;
7313 case AArch64::FSUBv2f32:
7314 Found |= Match(AArch64::FMULv2i32_indexed, 2, MCP::FMLSv2i32_indexed_OP2) ||
7315 Match(AArch64::FMULv2f32, 2, MCP::FMLSv2f32_OP2);
7316
7317 Found |= Match(AArch64::FMULv2i32_indexed, 1, MCP::FMLSv2i32_indexed_OP1) ||
7318 Match(AArch64::FMULv2f32, 1, MCP::FMLSv2f32_OP1);
7319 break;
7320 case AArch64::FSUBv2f64:
7321 Found |= Match(AArch64::FMULv2i64_indexed, 2, MCP::FMLSv2i64_indexed_OP2) ||
7322 Match(AArch64::FMULv2f64, 2, MCP::FMLSv2f64_OP2);
7323
7324 Found |= Match(AArch64::FMULv2i64_indexed, 1, MCP::FMLSv2i64_indexed_OP1) ||
7325 Match(AArch64::FMULv2f64, 1, MCP::FMLSv2f64_OP1);
7326 break;
7327 case AArch64::FSUBv4f32:
7328 Found |= Match(AArch64::FMULv4i32_indexed, 2, MCP::FMLSv4i32_indexed_OP2) ||
7329 Match(AArch64::FMULv4f32, 2, MCP::FMLSv4f32_OP2);
7330
7331 Found |= Match(AArch64::FMULv4i32_indexed, 1, MCP::FMLSv4i32_indexed_OP1) ||
7332 Match(AArch64::FMULv4f32, 1, MCP::FMLSv4f32_OP1);
7333 break;
7334 }
7335 return Found;
7336}
7337
7339 SmallVectorImpl<unsigned> &Patterns) {
7340 MachineBasicBlock &MBB = *Root.getParent();
7341 bool Found = false;
7342
7343 auto Match = [&](unsigned Opcode, int Operand, unsigned Pattern) -> bool {
7344 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
7345 MachineOperand &MO = Root.getOperand(Operand);
7346 MachineInstr *MI = nullptr;
7347 if (MO.isReg() && MO.getReg().isVirtual())
7348 MI = MRI.getUniqueVRegDef(MO.getReg());
7349 // Ignore No-op COPYs in FMUL(COPY(DUP(..)))
7350 if (MI && MI->getOpcode() == TargetOpcode::COPY &&
7351 MI->getOperand(1).getReg().isVirtual())
7352 MI = MRI.getUniqueVRegDef(MI->getOperand(1).getReg());
7353 if (MI && MI->getOpcode() == Opcode) {
7354 Patterns.push_back(Pattern);
7355 return true;
7356 }
7357 return false;
7358 };
7359
7361
7362 switch (Root.getOpcode()) {
7363 default:
7364 return false;
7365 case AArch64::FMULv2f32:
7366 Found = Match(AArch64::DUPv2i32lane, 1, MCP::FMULv2i32_indexed_OP1);
7367 Found |= Match(AArch64::DUPv2i32lane, 2, MCP::FMULv2i32_indexed_OP2);
7368 break;
7369 case AArch64::FMULv2f64:
7370 Found = Match(AArch64::DUPv2i64lane, 1, MCP::FMULv2i64_indexed_OP1);
7371 Found |= Match(AArch64::DUPv2i64lane, 2, MCP::FMULv2i64_indexed_OP2);
7372 break;
7373 case AArch64::FMULv4f16:
7374 Found = Match(AArch64::DUPv4i16lane, 1, MCP::FMULv4i16_indexed_OP1);
7375 Found |= Match(AArch64::DUPv4i16lane, 2, MCP::FMULv4i16_indexed_OP2);
7376 break;
7377 case AArch64::FMULv4f32:
7378 Found = Match(AArch64::DUPv4i32lane, 1, MCP::FMULv4i32_indexed_OP1);
7379 Found |= Match(AArch64::DUPv4i32lane, 2, MCP::FMULv4i32_indexed_OP2);
7380 break;
7381 case AArch64::FMULv8f16:
7382 Found = Match(AArch64::DUPv8i16lane, 1, MCP::FMULv8i16_indexed_OP1);
7383 Found |= Match(AArch64::DUPv8i16lane, 2, MCP::FMULv8i16_indexed_OP2);
7384 break;
7385 }
7386
7387 return Found;
7388}
7389
7391 SmallVectorImpl<unsigned> &Patterns) {
7392 unsigned Opc = Root.getOpcode();
7393 MachineBasicBlock &MBB = *Root.getParent();
7394 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
7395
7396 auto Match = [&](unsigned Opcode, unsigned Pattern) -> bool {
7397 MachineOperand &MO = Root.getOperand(1);
7398 MachineInstr *MI = MRI.getUniqueVRegDef(MO.getReg());
7399 if (MI != nullptr && (MI->getOpcode() == Opcode) &&
7400 MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()) &&
7404 MI->getFlag(MachineInstr::MIFlag::FmNsz)) {
7405 Patterns.push_back(Pattern);
7406 return true;
7407 }
7408 return false;
7409 };
7410
7411 switch (Opc) {
7412 default:
7413 break;
7414 case AArch64::FNEGDr:
7415 return Match(AArch64::FMADDDrrr, AArch64MachineCombinerPattern::FNMADD);
7416 case AArch64::FNEGSr:
7417 return Match(AArch64::FMADDSrrr, AArch64MachineCombinerPattern::FNMADD);
7418 }
7419
7420 return false;
7421}
7422
7423/// Return true when a code sequence can improve throughput. It
7424/// should be called only for instructions in loops.
7425/// \param Pattern - combiner pattern
7427 switch (Pattern) {
7428 default:
7429 break;
7535 return true;
7536 } // end switch (Pattern)
7537 return false;
7538}
7539
7540/// Find other MI combine patterns.
7542 SmallVectorImpl<unsigned> &Patterns) {
7543 // A - (B + C) ==> (A - B) - C or (A - C) - B
7544 unsigned Opc = Root.getOpcode();
7545 MachineBasicBlock &MBB = *Root.getParent();
7546
7547 switch (Opc) {
7548 case AArch64::SUBWrr:
7549 case AArch64::SUBSWrr:
7550 case AArch64::SUBXrr:
7551 case AArch64::SUBSXrr:
7552 // Found candidate root.
7553 break;
7554 default:
7555 return false;
7556 }
7557
7559 Root.findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr, true) ==
7560 -1)
7561 return false;
7562
7563 if (canCombine(MBB, Root.getOperand(2), AArch64::ADDWrr) ||
7564 canCombine(MBB, Root.getOperand(2), AArch64::ADDSWrr) ||
7565 canCombine(MBB, Root.getOperand(2), AArch64::ADDXrr) ||
7566 canCombine(MBB, Root.getOperand(2), AArch64::ADDSXrr)) {
7569 return true;
7570 }
7571
7572 return false;
7573}
7574
7575/// Check if the given instruction forms a gather load pattern that can be
7576/// optimized for better Memory-Level Parallelism (MLP). This function
7577/// identifies chains of NEON lane load instructions that load data from
7578/// different memory addresses into individual lanes of a 128-bit vector
7579/// register, then attempts to split the pattern into parallel loads to break
7580/// the serial dependency between instructions.
7581///
7582/// Pattern Matched:
7583/// Initial scalar load -> SUBREG_TO_REG (lane 0) -> LD1i* (lane 1) ->
7584/// LD1i* (lane 2) -> ... -> LD1i* (lane N-1, Root)
7585///
7586/// Transformed Into:
7587/// Two parallel vector loads using fewer lanes each, followed by ZIP1v2i64
7588/// to combine the results, enabling better memory-level parallelism.
7589///
7590/// Supported Element Types:
7591/// - 32-bit elements (LD1i32, 4 lanes total)
7592/// - 16-bit elements (LD1i16, 8 lanes total)
7593/// - 8-bit elements (LD1i8, 16 lanes total)
7595 SmallVectorImpl<unsigned> &Patterns,
7596 unsigned LoadLaneOpCode, unsigned NumLanes) {
7597 const MachineFunction *MF = Root.getMF();
7598
7599 // Early exit if optimizing for size.
7600 if (MF->getFunction().hasMinSize())
7601 return false;
7602
7603 const MachineRegisterInfo &MRI = MF->getRegInfo();
7605
7606 // The root of the pattern must load into the last lane of the vector.
7607 if (Root.getOperand(2).getImm() != NumLanes - 1)
7608 return false;
7609
7610 // Check that we have load into all lanes except lane 0.
7611 // For each load we also want to check that:
7612 // 1. It has a single non-debug use (since we will be replacing the virtual
7613 // register)
7614 // 2. That the addressing mode only uses a single pointer operand
7615 auto *CurrInstr = MRI.getUniqueVRegDef(Root.getOperand(1).getReg());
7616 auto Range = llvm::seq<unsigned>(1, NumLanes - 1);
7617 SmallSet<unsigned, 16> RemainingLanes(Range.begin(), Range.end());
7619 while (!RemainingLanes.empty() && CurrInstr &&
7620 CurrInstr->getOpcode() == LoadLaneOpCode &&
7621 MRI.hasOneNonDBGUse(CurrInstr->getOperand(0).getReg()) &&
7622 CurrInstr->getNumOperands() == 4) {
7623 RemainingLanes.erase(CurrInstr->getOperand(2).getImm());
7624 LoadInstrs.push_back(CurrInstr);
7625 CurrInstr = MRI.getUniqueVRegDef(CurrInstr->getOperand(1).getReg());
7626 }
7627
7628 // Check that we have found a match for lanes N-1.. 1.
7629 if (!RemainingLanes.empty())
7630 return false;
7631
7632 // Match the SUBREG_TO_REG sequence.
7633 if (CurrInstr->getOpcode() != TargetOpcode::SUBREG_TO_REG)
7634 return false;
7635
7636 // Verify that the subreg to reg loads an integer into the first lane.
7637 auto Lane0LoadReg = CurrInstr->getOperand(2).getReg();
7638 unsigned SingleLaneSizeInBits = 128 / NumLanes;
7639 if (TRI->getRegSizeInBits(Lane0LoadReg, MRI) != SingleLaneSizeInBits)
7640 return false;
7641
7642 // Verify that it also has a single non debug use.
7643 if (!MRI.hasOneNonDBGUse(Lane0LoadReg))
7644 return false;
7645
7646 LoadInstrs.push_back(MRI.getUniqueVRegDef(Lane0LoadReg));
7647
7648 // If there is any chance of aliasing, do not apply the pattern.
7649 // Walk backward through the MBB starting from Root.
7650 // Exit early if we've encountered all load instructions or hit the search
7651 // limit.
7652 auto MBBItr = Root.getIterator();
7653 unsigned RemainingSteps = GatherOptSearchLimit;
7654 SmallPtrSet<const MachineInstr *, 16> RemainingLoadInstrs;
7655 RemainingLoadInstrs.insert(LoadInstrs.begin(), LoadInstrs.end());
7656 const MachineBasicBlock *MBB = Root.getParent();
7657
7658 for (; MBBItr != MBB->begin() && RemainingSteps > 0 &&
7659 !RemainingLoadInstrs.empty();
7660 --MBBItr, --RemainingSteps) {
7661 const MachineInstr &CurrInstr = *MBBItr;
7662
7663 // Remove this instruction from remaining loads if it's one we're tracking.
7664 RemainingLoadInstrs.erase(&CurrInstr);
7665
7666 // Check for potential aliasing with any of the load instructions to
7667 // optimize.
7668 if (CurrInstr.isLoadFoldBarrier())
7669 return false;
7670 }
7671
7672 // If we hit the search limit without finding all load instructions,
7673 // don't match the pattern.
7674 if (RemainingSteps == 0 && !RemainingLoadInstrs.empty())
7675 return false;
7676
7677 switch (NumLanes) {
7678 case 4:
7680 break;
7681 case 8:
7683 break;
7684 case 16:
7686 break;
7687 default:
7688 llvm_unreachable("Got bad number of lanes for gather pattern.");
7689 }
7690
7691 return true;
7692}
7693
7694/// Search for patterns of LD instructions we can optimize.
7696 SmallVectorImpl<unsigned> &Patterns) {
7697
7698 // The pattern searches for loads into single lanes.
7699 switch (Root.getOpcode()) {
7700 case AArch64::LD1i32:
7701 return getGatherLanePattern(Root, Patterns, Root.getOpcode(), 4);
7702 case AArch64::LD1i16:
7703 return getGatherLanePattern(Root, Patterns, Root.getOpcode(), 8);
7704 case AArch64::LD1i8:
7705 return getGatherLanePattern(Root, Patterns, Root.getOpcode(), 16);
7706 default:
7707 return false;
7708 }
7709}
7710
7711/// Generate optimized instruction sequence for gather load patterns to improve
7712/// Memory-Level Parallelism (MLP). This function transforms a chain of
7713/// sequential NEON lane loads into parallel vector loads that can execute
7714/// concurrently.
7715static void
7719 DenseMap<Register, unsigned> &InstrIdxForVirtReg,
7720 unsigned Pattern, unsigned NumLanes) {
7721 MachineFunction &MF = *Root.getParent()->getParent();
7724
7725 // Gather the initial load instructions to build the pattern.
7726 SmallVector<MachineInstr *, 16> LoadToLaneInstrs;
7727 MachineInstr *CurrInstr = &Root;
7728 for (unsigned i = 0; i < NumLanes - 1; ++i) {
7729 LoadToLaneInstrs.push_back(CurrInstr);
7730 CurrInstr = MRI.getUniqueVRegDef(CurrInstr->getOperand(1).getReg());
7731 }
7732
7733 // Sort the load instructions according to the lane.
7734 llvm::sort(LoadToLaneInstrs,
7735 [](const MachineInstr *A, const MachineInstr *B) {
7736 return A->getOperand(2).getImm() > B->getOperand(2).getImm();
7737 });
7738
7739 MachineInstr *SubregToReg = CurrInstr;
7740 LoadToLaneInstrs.push_back(
7741 MRI.getUniqueVRegDef(SubregToReg->getOperand(2).getReg()));
7742 auto LoadToLaneInstrsAscending = llvm::reverse(LoadToLaneInstrs);
7743
7744 const TargetRegisterClass *FPR128RegClass =
7745 MRI.getRegClass(Root.getOperand(0).getReg());
7746
7747 // Helper lambda to create a LD1 instruction.
7748 auto CreateLD1Instruction = [&](MachineInstr *OriginalInstr,
7749 Register SrcRegister, unsigned Lane,
7750 Register OffsetRegister,
7751 bool OffsetRegisterKillState) {
7752 auto NewRegister = MRI.createVirtualRegister(FPR128RegClass);
7753 MachineInstrBuilder LoadIndexIntoRegister =
7754 BuildMI(MF, MIMetadata(*OriginalInstr), TII->get(Root.getOpcode()),
7755 NewRegister)
7756 .addReg(SrcRegister)
7757 .addImm(Lane)
7758 .addReg(OffsetRegister, getKillRegState(OffsetRegisterKillState));
7759 InstrIdxForVirtReg.insert(std::make_pair(NewRegister, InsInstrs.size()));
7760 InsInstrs.push_back(LoadIndexIntoRegister);
7761 return NewRegister;
7762 };
7763
7764 // Helper to create load instruction based on the NumLanes in the NEON
7765 // register we are rewriting.
7766 auto CreateLDRInstruction = [&](unsigned NumLanes, Register DestReg,
7767 Register OffsetReg,
7768 bool KillState) -> MachineInstrBuilder {
7769 unsigned Opcode;
7770 switch (NumLanes) {
7771 case 4:
7772 Opcode = AArch64::LDRSui;
7773 break;
7774 case 8:
7775 Opcode = AArch64::LDRHui;
7776 break;
7777 case 16:
7778 Opcode = AArch64::LDRBui;
7779 break;
7780 default:
7782 "Got unsupported number of lanes in machine-combiner gather pattern");
7783 }
7784 // Immediate offset load
7785 return BuildMI(MF, MIMetadata(Root), TII->get(Opcode), DestReg)
7786 .addReg(OffsetReg)
7787 .addImm(0);
7788 };
7789
7790 // Load the remaining lanes into register 0.
7791 auto LanesToLoadToReg0 =
7792 llvm::make_range(LoadToLaneInstrsAscending.begin() + 1,
7793 LoadToLaneInstrsAscending.begin() + NumLanes / 2);
7794 Register PrevReg = SubregToReg->getOperand(0).getReg();
7795 for (auto [Index, LoadInstr] : llvm::enumerate(LanesToLoadToReg0)) {
7796 const MachineOperand &OffsetRegOperand = LoadInstr->getOperand(3);
7797 PrevReg = CreateLD1Instruction(LoadInstr, PrevReg, Index + 1,
7798 OffsetRegOperand.getReg(),
7799 OffsetRegOperand.isKill());
7800 DelInstrs.push_back(LoadInstr);
7801 }
7802 Register LastLoadReg0 = PrevReg;
7803
7804 // First load into register 1. Perform an integer load to zero out the upper
7805 // lanes in a single instruction.
7806 MachineInstr *Lane0Load = *LoadToLaneInstrsAscending.begin();
7807 MachineInstr *OriginalSplitLoad =
7808 *std::next(LoadToLaneInstrsAscending.begin(), NumLanes / 2);
7809 Register DestRegForMiddleIndex = MRI.createVirtualRegister(
7810 MRI.getRegClass(Lane0Load->getOperand(0).getReg()));
7811
7812 const MachineOperand &OriginalSplitToLoadOffsetOperand =
7813 OriginalSplitLoad->getOperand(3);
7814 MachineInstrBuilder MiddleIndexLoadInstr =
7815 CreateLDRInstruction(NumLanes, DestRegForMiddleIndex,
7816 OriginalSplitToLoadOffsetOperand.getReg(),
7817 OriginalSplitToLoadOffsetOperand.isKill());
7818
7819 InstrIdxForVirtReg.insert(
7820 std::make_pair(DestRegForMiddleIndex, InsInstrs.size()));
7821 InsInstrs.push_back(MiddleIndexLoadInstr);
7822 DelInstrs.push_back(OriginalSplitLoad);
7823
7824 // Subreg To Reg instruction for register 1.
7825 Register DestRegForSubregToReg = MRI.createVirtualRegister(FPR128RegClass);
7826 unsigned SubregType;
7827 switch (NumLanes) {
7828 case 4:
7829 SubregType = AArch64::ssub;
7830 break;
7831 case 8:
7832 SubregType = AArch64::hsub;
7833 break;
7834 case 16:
7835 SubregType = AArch64::bsub;
7836 break;
7837 default:
7839 "Got invalid NumLanes for machine-combiner gather pattern");
7840 }
7841
7842 auto SubRegToRegInstr =
7843 BuildMI(MF, MIMetadata(Root), TII->get(SubregToReg->getOpcode()),
7844 DestRegForSubregToReg)
7845 .addImm(0)
7846 .addReg(DestRegForMiddleIndex, getKillRegState(true))
7847 .addImm(SubregType);
7848 InstrIdxForVirtReg.insert(
7849 std::make_pair(DestRegForSubregToReg, InsInstrs.size()));
7850 InsInstrs.push_back(SubRegToRegInstr);
7851
7852 // Load remaining lanes into register 1.
7853 auto LanesToLoadToReg1 =
7854 llvm::make_range(LoadToLaneInstrsAscending.begin() + NumLanes / 2 + 1,
7855 LoadToLaneInstrsAscending.end());
7856 PrevReg = SubRegToRegInstr->getOperand(0).getReg();
7857 for (auto [Index, LoadInstr] : llvm::enumerate(LanesToLoadToReg1)) {
7858 const MachineOperand &OffsetRegOperand = LoadInstr->getOperand(3);
7859 PrevReg = CreateLD1Instruction(LoadInstr, PrevReg, Index + 1,
7860 OffsetRegOperand.getReg(),
7861 OffsetRegOperand.isKill());
7862
7863 // Do not add the last reg to DelInstrs - it will be removed later.
7864 if (Index == NumLanes / 2 - 2) {
7865 break;
7866 }
7867 DelInstrs.push_back(LoadInstr);
7868 }
7869 Register LastLoadReg1 = PrevReg;
7870
7871 // Create the final zip instruction to combine the results.
7872 MachineInstrBuilder ZipInstr =
7873 BuildMI(MF, MIMetadata(Root), TII->get(AArch64::ZIP1v2i64),
7874 Root.getOperand(0).getReg())
7875 .addReg(LastLoadReg0)
7876 .addReg(LastLoadReg1);
7877 InsInstrs.push_back(ZipInstr);
7878}
7879
7893
7894/// Return true when there is potentially a faster code sequence for an
7895/// instruction chain ending in \p Root. All potential patterns are listed in
7896/// the \p Pattern vector. Pattern should be sorted in priority order since the
7897/// pattern evaluator stops checking as soon as it finds a faster sequence.
7898
7899bool AArch64InstrInfo::getMachineCombinerPatterns(
7900 MachineInstr &Root, SmallVectorImpl<unsigned> &Patterns,
7901 bool DoRegPressureReduce) const {
7902 // Integer patterns
7903 if (getMaddPatterns(Root, Patterns))
7904 return true;
7905 // Floating point patterns
7906 if (getFMULPatterns(Root, Patterns))
7907 return true;
7908 if (getFMAPatterns(Root, Patterns))
7909 return true;
7910 if (getFNEGPatterns(Root, Patterns))
7911 return true;
7912
7913 // Other patterns
7914 if (getMiscPatterns(Root, Patterns))
7915 return true;
7916
7917 // Load patterns
7918 if (getLoadPatterns(Root, Patterns))
7919 return true;
7920
7921 return TargetInstrInfo::getMachineCombinerPatterns(Root, Patterns,
7922 DoRegPressureReduce);
7923}
7924
7926/// genFusedMultiply - Generate fused multiply instructions.
7927/// This function supports both integer and floating point instructions.
7928/// A typical example:
7929/// F|MUL I=A,B,0
7930/// F|ADD R,I,C
7931/// ==> F|MADD R,A,B,C
7932/// \param MF Containing MachineFunction
7933/// \param MRI Register information
7934/// \param TII Target information
7935/// \param Root is the F|ADD instruction
7936/// \param [out] InsInstrs is a vector of machine instructions and will
7937/// contain the generated madd instruction
7938/// \param IdxMulOpd is index of operand in Root that is the result of
7939/// the F|MUL. In the example above IdxMulOpd is 1.
7940/// \param MaddOpc the opcode fo the f|madd instruction
7941/// \param RC Register class of operands
7942/// \param kind of fma instruction (addressing mode) to be generated
7943/// \param ReplacedAddend is the result register from the instruction
7944/// replacing the non-combined operand, if any.
7945static MachineInstr *
7947 const TargetInstrInfo *TII, MachineInstr &Root,
7948 SmallVectorImpl<MachineInstr *> &InsInstrs, unsigned IdxMulOpd,
7949 unsigned MaddOpc, const TargetRegisterClass *RC,
7951 const Register *ReplacedAddend = nullptr) {
7952 assert(IdxMulOpd == 1 || IdxMulOpd == 2);
7953
7954 unsigned IdxOtherOpd = IdxMulOpd == 1 ? 2 : 1;
7955 MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg());
7956 Register ResultReg = Root.getOperand(0).getReg();
7957 Register SrcReg0 = MUL->getOperand(1).getReg();
7958 bool Src0IsKill = MUL->getOperand(1).isKill();
7959 Register SrcReg1 = MUL->getOperand(2).getReg();
7960 bool Src1IsKill = MUL->getOperand(2).isKill();
7961
7962 Register SrcReg2;
7963 bool Src2IsKill;
7964 if (ReplacedAddend) {
7965 // If we just generated a new addend, we must be it's only use.
7966 SrcReg2 = *ReplacedAddend;
7967 Src2IsKill = true;
7968 } else {
7969 SrcReg2 = Root.getOperand(IdxOtherOpd).getReg();
7970 Src2IsKill = Root.getOperand(IdxOtherOpd).isKill();
7971 }
7972
7973 if (ResultReg.isVirtual())
7974 MRI.constrainRegClass(ResultReg, RC);
7975 if (SrcReg0.isVirtual())
7976 MRI.constrainRegClass(SrcReg0, RC);
7977 if (SrcReg1.isVirtual())
7978 MRI.constrainRegClass(SrcReg1, RC);
7979 if (SrcReg2.isVirtual())
7980 MRI.constrainRegClass(SrcReg2, RC);
7981
7983 if (kind == FMAInstKind::Default)
7984 MIB = BuildMI(MF, MIMetadata(Root), TII->get(MaddOpc), ResultReg)
7985 .addReg(SrcReg0, getKillRegState(Src0IsKill))
7986 .addReg(SrcReg1, getKillRegState(Src1IsKill))
7987 .addReg(SrcReg2, getKillRegState(Src2IsKill));
7988 else if (kind == FMAInstKind::Indexed)
7989 MIB = BuildMI(MF, MIMetadata(Root), TII->get(MaddOpc), ResultReg)
7990 .addReg(SrcReg2, getKillRegState(Src2IsKill))
7991 .addReg(SrcReg0, getKillRegState(Src0IsKill))
7992 .addReg(SrcReg1, getKillRegState(Src1IsKill))
7993 .addImm(MUL->getOperand(3).getImm());
7994 else if (kind == FMAInstKind::Accumulator)
7995 MIB = BuildMI(MF, MIMetadata(Root), TII->get(MaddOpc), ResultReg)
7996 .addReg(SrcReg2, getKillRegState(Src2IsKill))
7997 .addReg(SrcReg0, getKillRegState(Src0IsKill))
7998 .addReg(SrcReg1, getKillRegState(Src1IsKill));
7999 else
8000 assert(false && "Invalid FMA instruction kind \n");
8001 // Insert the MADD (MADD, FMA, FMS, FMLA, FMSL)
8002 InsInstrs.push_back(MIB);
8003 return MUL;
8004}
8005
8006static MachineInstr *
8008 const TargetInstrInfo *TII, MachineInstr &Root,
8010 MachineInstr *MAD = MRI.getUniqueVRegDef(Root.getOperand(1).getReg());
8011
8012 unsigned Opc = 0;
8013 const TargetRegisterClass *RC = MRI.getRegClass(MAD->getOperand(0).getReg());
8014 if (AArch64::FPR32RegClass.hasSubClassEq(RC))
8015 Opc = AArch64::FNMADDSrrr;
8016 else if (AArch64::FPR64RegClass.hasSubClassEq(RC))
8017 Opc = AArch64::FNMADDDrrr;
8018 else
8019 return nullptr;
8020
8021 Register ResultReg = Root.getOperand(0).getReg();
8022 Register SrcReg0 = MAD->getOperand(1).getReg();
8023 Register SrcReg1 = MAD->getOperand(2).getReg();
8024 Register SrcReg2 = MAD->getOperand(3).getReg();
8025 bool Src0IsKill = MAD->getOperand(1).isKill();
8026 bool Src1IsKill = MAD->getOperand(2).isKill();
8027 bool Src2IsKill = MAD->getOperand(3).isKill();
8028 if (ResultReg.isVirtual())
8029 MRI.constrainRegClass(ResultReg, RC);
8030 if (SrcReg0.isVirtual())
8031 MRI.constrainRegClass(SrcReg0, RC);
8032 if (SrcReg1.isVirtual())
8033 MRI.constrainRegClass(SrcReg1, RC);
8034 if (SrcReg2.isVirtual())
8035 MRI.constrainRegClass(SrcReg2, RC);
8036
8038 BuildMI(MF, MIMetadata(Root), TII->get(Opc), ResultReg)
8039 .addReg(SrcReg0, getKillRegState(Src0IsKill))
8040 .addReg(SrcReg1, getKillRegState(Src1IsKill))
8041 .addReg(SrcReg2, getKillRegState(Src2IsKill));
8042 InsInstrs.push_back(MIB);
8043
8044 return MAD;
8045}
8046
8047/// Fold (FMUL x (DUP y lane)) into (FMUL_indexed x y lane)
8048static MachineInstr *
8051 unsigned IdxDupOp, unsigned MulOpc,
8053 assert(((IdxDupOp == 1) || (IdxDupOp == 2)) &&
8054 "Invalid index of FMUL operand");
8055
8056 MachineFunction &MF = *Root.getMF();
8058
8059 MachineInstr *Dup =
8060 MF.getRegInfo().getUniqueVRegDef(Root.getOperand(IdxDupOp).getReg());
8061
8062 if (Dup->getOpcode() == TargetOpcode::COPY)
8063 Dup = MRI.getUniqueVRegDef(Dup->getOperand(1).getReg());
8064
8065 Register DupSrcReg = Dup->getOperand(1).getReg();
8066 MRI.clearKillFlags(DupSrcReg);
8067 MRI.constrainRegClass(DupSrcReg, RC);
8068
8069 unsigned DupSrcLane = Dup->getOperand(2).getImm();
8070
8071 unsigned IdxMulOp = IdxDupOp == 1 ? 2 : 1;
8072 MachineOperand &MulOp = Root.getOperand(IdxMulOp);
8073
8074 Register ResultReg = Root.getOperand(0).getReg();
8075
8077 MIB = BuildMI(MF, MIMetadata(Root), TII->get(MulOpc), ResultReg)
8078 .add(MulOp)
8079 .addReg(DupSrcReg)
8080 .addImm(DupSrcLane);
8081
8082 InsInstrs.push_back(MIB);
8083 return &Root;
8084}
8085
8086/// genFusedMultiplyAcc - Helper to generate fused multiply accumulate
8087/// instructions.
8088///
8089/// \see genFusedMultiply
8093 unsigned IdxMulOpd, unsigned MaddOpc, const TargetRegisterClass *RC) {
8094 return genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd, MaddOpc, RC,
8096}
8097
8098/// genNeg - Helper to generate an intermediate negation of the second operand
8099/// of Root
8101 const TargetInstrInfo *TII, MachineInstr &Root,
8103 DenseMap<Register, unsigned> &InstrIdxForVirtReg,
8104 unsigned MnegOpc, const TargetRegisterClass *RC) {
8105 Register NewVR = MRI.createVirtualRegister(RC);
8107 BuildMI(MF, MIMetadata(Root), TII->get(MnegOpc), NewVR)
8108 .add(Root.getOperand(2));
8109 InsInstrs.push_back(MIB);
8110
8111 assert(InstrIdxForVirtReg.empty());
8112 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
8113
8114 return NewVR;
8115}
8116
8117/// genFusedMultiplyAccNeg - Helper to generate fused multiply accumulate
8118/// instructions with an additional negation of the accumulator
8122 DenseMap<Register, unsigned> &InstrIdxForVirtReg, unsigned IdxMulOpd,
8123 unsigned MaddOpc, unsigned MnegOpc, const TargetRegisterClass *RC) {
8124 assert(IdxMulOpd == 1);
8125
8126 Register NewVR =
8127 genNeg(MF, MRI, TII, Root, InsInstrs, InstrIdxForVirtReg, MnegOpc, RC);
8128 return genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd, MaddOpc, RC,
8129 FMAInstKind::Accumulator, &NewVR);
8130}
8131
8132/// genFusedMultiplyIdx - Helper to generate fused multiply accumulate
8133/// instructions.
8134///
8135/// \see genFusedMultiply
8139 unsigned IdxMulOpd, unsigned MaddOpc, const TargetRegisterClass *RC) {
8140 return genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd, MaddOpc, RC,
8142}
8143
8144/// genFusedMultiplyAccNeg - Helper to generate fused multiply accumulate
8145/// instructions with an additional negation of the accumulator
8149 DenseMap<Register, unsigned> &InstrIdxForVirtReg, unsigned IdxMulOpd,
8150 unsigned MaddOpc, unsigned MnegOpc, const TargetRegisterClass *RC) {
8151 assert(IdxMulOpd == 1);
8152
8153 Register NewVR =
8154 genNeg(MF, MRI, TII, Root, InsInstrs, InstrIdxForVirtReg, MnegOpc, RC);
8155
8156 return genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd, MaddOpc, RC,
8157 FMAInstKind::Indexed, &NewVR);
8158}
8159
8160/// genMaddR - Generate madd instruction and combine mul and add using
8161/// an extra virtual register
8162/// Example - an ADD intermediate needs to be stored in a register:
8163/// MUL I=A,B,0
8164/// ADD R,I,Imm
8165/// ==> ORR V, ZR, Imm
8166/// ==> MADD R,A,B,V
8167/// \param MF Containing MachineFunction
8168/// \param MRI Register information
8169/// \param TII Target information
8170/// \param Root is the ADD instruction
8171/// \param [out] InsInstrs is a vector of machine instructions and will
8172/// contain the generated madd instruction
8173/// \param IdxMulOpd is index of operand in Root that is the result of
8174/// the MUL. In the example above IdxMulOpd is 1.
8175/// \param MaddOpc the opcode fo the madd instruction
8176/// \param VR is a virtual register that holds the value of an ADD operand
8177/// (V in the example above).
8178/// \param RC Register class of operands
8180 const TargetInstrInfo *TII, MachineInstr &Root,
8182 unsigned IdxMulOpd, unsigned MaddOpc, unsigned VR,
8183 const TargetRegisterClass *RC) {
8184 assert(IdxMulOpd == 1 || IdxMulOpd == 2);
8185
8186 MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg());
8187 Register ResultReg = Root.getOperand(0).getReg();
8188 Register SrcReg0 = MUL->getOperand(1).getReg();
8189 bool Src0IsKill = MUL->getOperand(1).isKill();
8190 Register SrcReg1 = MUL->getOperand(2).getReg();
8191 bool Src1IsKill = MUL->getOperand(2).isKill();
8192
8193 if (ResultReg.isVirtual())
8194 MRI.constrainRegClass(ResultReg, RC);
8195 if (SrcReg0.isVirtual())
8196 MRI.constrainRegClass(SrcReg0, RC);
8197 if (SrcReg1.isVirtual())
8198 MRI.constrainRegClass(SrcReg1, RC);
8200 MRI.constrainRegClass(VR, RC);
8201
8203 BuildMI(MF, MIMetadata(Root), TII->get(MaddOpc), ResultReg)
8204 .addReg(SrcReg0, getKillRegState(Src0IsKill))
8205 .addReg(SrcReg1, getKillRegState(Src1IsKill))
8206 .addReg(VR);
8207 // Insert the MADD
8208 InsInstrs.push_back(MIB);
8209 return MUL;
8210}
8211
8212/// Do the following transformation
8213/// A - (B + C) ==> (A - B) - C
8214/// A - (B + C) ==> (A - C) - B
8216 const TargetInstrInfo *TII, MachineInstr &Root,
8219 unsigned IdxOpd1,
8220 DenseMap<Register, unsigned> &InstrIdxForVirtReg) {
8221 assert(IdxOpd1 == 1 || IdxOpd1 == 2);
8222 unsigned IdxOtherOpd = IdxOpd1 == 1 ? 2 : 1;
8223 MachineInstr *AddMI = MRI.getUniqueVRegDef(Root.getOperand(2).getReg());
8224
8225 Register ResultReg = Root.getOperand(0).getReg();
8226 Register RegA = Root.getOperand(1).getReg();
8227 bool RegAIsKill = Root.getOperand(1).isKill();
8228 Register RegB = AddMI->getOperand(IdxOpd1).getReg();
8229 bool RegBIsKill = AddMI->getOperand(IdxOpd1).isKill();
8230 Register RegC = AddMI->getOperand(IdxOtherOpd).getReg();
8231 bool RegCIsKill = AddMI->getOperand(IdxOtherOpd).isKill();
8232 Register NewVR =
8233 MRI.createVirtualRegister(MRI.getRegClass(Root.getOperand(2).getReg()));
8234
8235 unsigned Opcode = Root.getOpcode();
8236 if (Opcode == AArch64::SUBSWrr)
8237 Opcode = AArch64::SUBWrr;
8238 else if (Opcode == AArch64::SUBSXrr)
8239 Opcode = AArch64::SUBXrr;
8240 else
8241 assert((Opcode == AArch64::SUBWrr || Opcode == AArch64::SUBXrr) &&
8242 "Unexpected instruction opcode.");
8243
8244 uint32_t Flags = Root.mergeFlagsWith(*AddMI);
8245 Flags &= ~MachineInstr::NoSWrap;
8246 Flags &= ~MachineInstr::NoUWrap;
8247
8248 MachineInstrBuilder MIB1 =
8249 BuildMI(MF, MIMetadata(Root), TII->get(Opcode), NewVR)
8250 .addReg(RegA, getKillRegState(RegAIsKill))
8251 .addReg(RegB, getKillRegState(RegBIsKill))
8252 .setMIFlags(Flags);
8253 MachineInstrBuilder MIB2 =
8254 BuildMI(MF, MIMetadata(Root), TII->get(Opcode), ResultReg)
8255 .addReg(NewVR, getKillRegState(true))
8256 .addReg(RegC, getKillRegState(RegCIsKill))
8257 .setMIFlags(Flags);
8258
8259 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
8260 InsInstrs.push_back(MIB1);
8261 InsInstrs.push_back(MIB2);
8262 DelInstrs.push_back(AddMI);
8263 DelInstrs.push_back(&Root);
8264}
8265
8266unsigned AArch64InstrInfo::getReduceOpcodeForAccumulator(
8267 unsigned int AccumulatorOpCode) const {
8268 switch (AccumulatorOpCode) {
8269 case AArch64::UABALB_ZZZ_D:
8270 case AArch64::SABALB_ZZZ_D:
8271 case AArch64::UABALT_ZZZ_D:
8272 case AArch64::SABALT_ZZZ_D:
8273 return AArch64::ADD_ZZZ_D;
8274 case AArch64::UABALB_ZZZ_H:
8275 case AArch64::SABALB_ZZZ_H:
8276 case AArch64::UABALT_ZZZ_H:
8277 case AArch64::SABALT_ZZZ_H:
8278 return AArch64::ADD_ZZZ_H;
8279 case AArch64::UABALB_ZZZ_S:
8280 case AArch64::SABALB_ZZZ_S:
8281 case AArch64::UABALT_ZZZ_S:
8282 case AArch64::SABALT_ZZZ_S:
8283 return AArch64::ADD_ZZZ_S;
8284 case AArch64::UABALv16i8_v8i16:
8285 case AArch64::SABALv8i8_v8i16:
8286 case AArch64::SABAv8i16:
8287 case AArch64::UABAv8i16:
8288 return AArch64::ADDv8i16;
8289 case AArch64::SABALv2i32_v2i64:
8290 case AArch64::UABALv2i32_v2i64:
8291 case AArch64::SABALv4i32_v2i64:
8292 return AArch64::ADDv2i64;
8293 case AArch64::UABALv4i16_v4i32:
8294 case AArch64::SABALv4i16_v4i32:
8295 case AArch64::SABALv8i16_v4i32:
8296 case AArch64::SABAv4i32:
8297 case AArch64::UABAv4i32:
8298 return AArch64::ADDv4i32;
8299 case AArch64::UABALv4i32_v2i64:
8300 return AArch64::ADDv2i64;
8301 case AArch64::UABALv8i16_v4i32:
8302 return AArch64::ADDv4i32;
8303 case AArch64::UABALv8i8_v8i16:
8304 case AArch64::SABALv16i8_v8i16:
8305 return AArch64::ADDv8i16;
8306 case AArch64::UABAv16i8:
8307 case AArch64::SABAv16i8:
8308 return AArch64::ADDv16i8;
8309 case AArch64::UABAv4i16:
8310 case AArch64::SABAv4i16:
8311 return AArch64::ADDv4i16;
8312 case AArch64::UABAv2i32:
8313 case AArch64::SABAv2i32:
8314 return AArch64::ADDv2i32;
8315 case AArch64::UABAv8i8:
8316 case AArch64::SABAv8i8:
8317 return AArch64::ADDv8i8;
8318 default:
8319 llvm_unreachable("Unknown accumulator opcode");
8320 }
8321}
8322
8323/// When getMachineCombinerPatterns() finds potential patterns,
8324/// this function generates the instructions that could replace the
8325/// original code sequence
8326void AArch64InstrInfo::genAlternativeCodeSequence(
8327 MachineInstr &Root, unsigned Pattern,
8330 DenseMap<Register, unsigned> &InstrIdxForVirtReg) const {
8331 MachineBasicBlock &MBB = *Root.getParent();
8332 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
8333 MachineFunction &MF = *MBB.getParent();
8334 const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
8335
8336 MachineInstr *MUL = nullptr;
8337 const TargetRegisterClass *RC;
8338 unsigned Opc;
8339 switch (Pattern) {
8340 default:
8341 // Reassociate instructions.
8342 TargetInstrInfo::genAlternativeCodeSequence(Root, Pattern, InsInstrs,
8343 DelInstrs, InstrIdxForVirtReg);
8344 return;
8346 // A - (B + C)
8347 // ==> (A - B) - C
8348 genSubAdd2SubSub(MF, MRI, TII, Root, InsInstrs, DelInstrs, 1,
8349 InstrIdxForVirtReg);
8350 return;
8352 // A - (B + C)
8353 // ==> (A - C) - B
8354 genSubAdd2SubSub(MF, MRI, TII, Root, InsInstrs, DelInstrs, 2,
8355 InstrIdxForVirtReg);
8356 return;
8359 // MUL I=A,B,0
8360 // ADD R,I,C
8361 // ==> MADD R,A,B,C
8362 // --- Create(MADD);
8364 Opc = AArch64::MADDWrrr;
8365 RC = &AArch64::GPR32RegClass;
8366 } else {
8367 Opc = AArch64::MADDXrrr;
8368 RC = &AArch64::GPR64RegClass;
8369 }
8370 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8371 break;
8374 // MUL I=A,B,0
8375 // ADD R,C,I
8376 // ==> MADD R,A,B,C
8377 // --- Create(MADD);
8379 Opc = AArch64::MADDWrrr;
8380 RC = &AArch64::GPR32RegClass;
8381 } else {
8382 Opc = AArch64::MADDXrrr;
8383 RC = &AArch64::GPR64RegClass;
8384 }
8385 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8386 break;
8391 // MUL I=A,B,0
8392 // ADD/SUB R,I,Imm
8393 // ==> MOV V, Imm/-Imm
8394 // ==> MADD R,A,B,V
8395 // --- Create(MADD);
8396 const TargetRegisterClass *RC;
8397 unsigned BitSize, MovImm;
8400 MovImm = AArch64::MOVi32imm;
8401 RC = &AArch64::GPR32spRegClass;
8402 BitSize = 32;
8403 Opc = AArch64::MADDWrrr;
8404 RC = &AArch64::GPR32RegClass;
8405 } else {
8406 MovImm = AArch64::MOVi64imm;
8407 RC = &AArch64::GPR64spRegClass;
8408 BitSize = 64;
8409 Opc = AArch64::MADDXrrr;
8410 RC = &AArch64::GPR64RegClass;
8411 }
8412 Register NewVR = MRI.createVirtualRegister(RC);
8413 uint64_t Imm = Root.getOperand(2).getImm();
8414
8415 if (Root.getOperand(3).isImm()) {
8416 unsigned Val = Root.getOperand(3).getImm();
8417 Imm = Imm << Val;
8418 }
8419 bool IsSub = Pattern == AArch64MachineCombinerPattern::MULSUBWI_OP1 ||
8421 uint64_t UImm = SignExtend64(IsSub ? -Imm : Imm, BitSize);
8422 // Check that the immediate can be composed via a single instruction.
8424 AArch64_IMM::expandMOVImm(UImm, BitSize, Insn);
8425 if (Insn.size() != 1)
8426 return;
8427 MachineInstrBuilder MIB1 =
8428 BuildMI(MF, MIMetadata(Root), TII->get(MovImm), NewVR)
8429 .addImm(IsSub ? -Imm : Imm);
8430 InsInstrs.push_back(MIB1);
8431 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
8432 MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
8433 break;
8434 }
8437 // MUL I=A,B,0
8438 // SUB R,I, C
8439 // ==> SUB V, 0, C
8440 // ==> MADD R,A,B,V // = -C + A*B
8441 // --- Create(MADD);
8442 const TargetRegisterClass *SubRC;
8443 unsigned SubOpc, ZeroReg;
8445 SubOpc = AArch64::SUBWrr;
8446 SubRC = &AArch64::GPR32spRegClass;
8447 ZeroReg = AArch64::WZR;
8448 Opc = AArch64::MADDWrrr;
8449 RC = &AArch64::GPR32RegClass;
8450 } else {
8451 SubOpc = AArch64::SUBXrr;
8452 SubRC = &AArch64::GPR64spRegClass;
8453 ZeroReg = AArch64::XZR;
8454 Opc = AArch64::MADDXrrr;
8455 RC = &AArch64::GPR64RegClass;
8456 }
8457 Register NewVR = MRI.createVirtualRegister(SubRC);
8458 // SUB NewVR, 0, C
8459 MachineInstrBuilder MIB1 =
8460 BuildMI(MF, MIMetadata(Root), TII->get(SubOpc), NewVR)
8461 .addReg(ZeroReg)
8462 .add(Root.getOperand(2));
8463 InsInstrs.push_back(MIB1);
8464 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
8465 MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
8466 break;
8467 }
8470 // MUL I=A,B,0
8471 // SUB R,C,I
8472 // ==> MSUB R,A,B,C (computes C - A*B)
8473 // --- Create(MSUB);
8475 Opc = AArch64::MSUBWrrr;
8476 RC = &AArch64::GPR32RegClass;
8477 } else {
8478 Opc = AArch64::MSUBXrrr;
8479 RC = &AArch64::GPR64RegClass;
8480 }
8481 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8482 break;
8484 Opc = AArch64::MLAv8i8;
8485 RC = &AArch64::FPR64RegClass;
8486 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8487 break;
8489 Opc = AArch64::MLAv8i8;
8490 RC = &AArch64::FPR64RegClass;
8491 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8492 break;
8494 Opc = AArch64::MLAv16i8;
8495 RC = &AArch64::FPR128RegClass;
8496 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8497 break;
8499 Opc = AArch64::MLAv16i8;
8500 RC = &AArch64::FPR128RegClass;
8501 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8502 break;
8504 Opc = AArch64::MLAv4i16;
8505 RC = &AArch64::FPR64RegClass;
8506 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8507 break;
8509 Opc = AArch64::MLAv4i16;
8510 RC = &AArch64::FPR64RegClass;
8511 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8512 break;
8514 Opc = AArch64::MLAv8i16;
8515 RC = &AArch64::FPR128RegClass;
8516 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8517 break;
8519 Opc = AArch64::MLAv8i16;
8520 RC = &AArch64::FPR128RegClass;
8521 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8522 break;
8524 Opc = AArch64::MLAv2i32;
8525 RC = &AArch64::FPR64RegClass;
8526 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8527 break;
8529 Opc = AArch64::MLAv2i32;
8530 RC = &AArch64::FPR64RegClass;
8531 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8532 break;
8534 Opc = AArch64::MLAv4i32;
8535 RC = &AArch64::FPR128RegClass;
8536 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8537 break;
8539 Opc = AArch64::MLAv4i32;
8540 RC = &AArch64::FPR128RegClass;
8541 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8542 break;
8543
8545 Opc = AArch64::MLAv8i8;
8546 RC = &AArch64::FPR64RegClass;
8547 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
8548 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv8i8,
8549 RC);
8550 break;
8552 Opc = AArch64::MLSv8i8;
8553 RC = &AArch64::FPR64RegClass;
8554 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8555 break;
8557 Opc = AArch64::MLAv16i8;
8558 RC = &AArch64::FPR128RegClass;
8559 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
8560 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv16i8,
8561 RC);
8562 break;
8564 Opc = AArch64::MLSv16i8;
8565 RC = &AArch64::FPR128RegClass;
8566 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8567 break;
8569 Opc = AArch64::MLAv4i16;
8570 RC = &AArch64::FPR64RegClass;
8571 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
8572 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i16,
8573 RC);
8574 break;
8576 Opc = AArch64::MLSv4i16;
8577 RC = &AArch64::FPR64RegClass;
8578 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8579 break;
8581 Opc = AArch64::MLAv8i16;
8582 RC = &AArch64::FPR128RegClass;
8583 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
8584 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv8i16,
8585 RC);
8586 break;
8588 Opc = AArch64::MLSv8i16;
8589 RC = &AArch64::FPR128RegClass;
8590 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8591 break;
8593 Opc = AArch64::MLAv2i32;
8594 RC = &AArch64::FPR64RegClass;
8595 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
8596 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv2i32,
8597 RC);
8598 break;
8600 Opc = AArch64::MLSv2i32;
8601 RC = &AArch64::FPR64RegClass;
8602 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8603 break;
8605 Opc = AArch64::MLAv4i32;
8606 RC = &AArch64::FPR128RegClass;
8607 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
8608 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i32,
8609 RC);
8610 break;
8612 Opc = AArch64::MLSv4i32;
8613 RC = &AArch64::FPR128RegClass;
8614 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8615 break;
8616
8618 Opc = AArch64::MLAv4i16_indexed;
8619 RC = &AArch64::FPR64RegClass;
8620 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8621 break;
8623 Opc = AArch64::MLAv4i16_indexed;
8624 RC = &AArch64::FPR64RegClass;
8625 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8626 break;
8628 Opc = AArch64::MLAv8i16_indexed;
8629 RC = &AArch64::FPR128RegClass;
8630 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8631 break;
8633 Opc = AArch64::MLAv8i16_indexed;
8634 RC = &AArch64::FPR128RegClass;
8635 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8636 break;
8638 Opc = AArch64::MLAv2i32_indexed;
8639 RC = &AArch64::FPR64RegClass;
8640 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8641 break;
8643 Opc = AArch64::MLAv2i32_indexed;
8644 RC = &AArch64::FPR64RegClass;
8645 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8646 break;
8648 Opc = AArch64::MLAv4i32_indexed;
8649 RC = &AArch64::FPR128RegClass;
8650 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8651 break;
8653 Opc = AArch64::MLAv4i32_indexed;
8654 RC = &AArch64::FPR128RegClass;
8655 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8656 break;
8657
8659 Opc = AArch64::MLAv4i16_indexed;
8660 RC = &AArch64::FPR64RegClass;
8661 MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs,
8662 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i16,
8663 RC);
8664 break;
8666 Opc = AArch64::MLSv4i16_indexed;
8667 RC = &AArch64::FPR64RegClass;
8668 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8669 break;
8671 Opc = AArch64::MLAv8i16_indexed;
8672 RC = &AArch64::FPR128RegClass;
8673 MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs,
8674 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv8i16,
8675 RC);
8676 break;
8678 Opc = AArch64::MLSv8i16_indexed;
8679 RC = &AArch64::FPR128RegClass;
8680 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8681 break;
8683 Opc = AArch64::MLAv2i32_indexed;
8684 RC = &AArch64::FPR64RegClass;
8685 MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs,
8686 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv2i32,
8687 RC);
8688 break;
8690 Opc = AArch64::MLSv2i32_indexed;
8691 RC = &AArch64::FPR64RegClass;
8692 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8693 break;
8695 Opc = AArch64::MLAv4i32_indexed;
8696 RC = &AArch64::FPR128RegClass;
8697 MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs,
8698 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i32,
8699 RC);
8700 break;
8702 Opc = AArch64::MLSv4i32_indexed;
8703 RC = &AArch64::FPR128RegClass;
8704 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8705 break;
8706
8707 // Floating Point Support
8709 Opc = AArch64::FMADDHrrr;
8710 RC = &AArch64::FPR16RegClass;
8711 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8712 break;
8714 Opc = AArch64::FMADDSrrr;
8715 RC = &AArch64::FPR32RegClass;
8716 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8717 break;
8719 Opc = AArch64::FMADDDrrr;
8720 RC = &AArch64::FPR64RegClass;
8721 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8722 break;
8723
8725 Opc = AArch64::FMADDHrrr;
8726 RC = &AArch64::FPR16RegClass;
8727 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8728 break;
8730 Opc = AArch64::FMADDSrrr;
8731 RC = &AArch64::FPR32RegClass;
8732 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8733 break;
8735 Opc = AArch64::FMADDDrrr;
8736 RC = &AArch64::FPR64RegClass;
8737 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8738 break;
8739
8741 Opc = AArch64::FMLAv1i32_indexed;
8742 RC = &AArch64::FPR32RegClass;
8743 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
8745 break;
8747 Opc = AArch64::FMLAv1i32_indexed;
8748 RC = &AArch64::FPR32RegClass;
8749 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8751 break;
8752
8754 Opc = AArch64::FMLAv1i64_indexed;
8755 RC = &AArch64::FPR64RegClass;
8756 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
8758 break;
8760 Opc = AArch64::FMLAv1i64_indexed;
8761 RC = &AArch64::FPR64RegClass;
8762 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8764 break;
8765
8767 RC = &AArch64::FPR64RegClass;
8768 Opc = AArch64::FMLAv4i16_indexed;
8769 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
8771 break;
8773 RC = &AArch64::FPR64RegClass;
8774 Opc = AArch64::FMLAv4f16;
8775 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
8777 break;
8779 RC = &AArch64::FPR64RegClass;
8780 Opc = AArch64::FMLAv4i16_indexed;
8781 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8783 break;
8785 RC = &AArch64::FPR64RegClass;
8786 Opc = AArch64::FMLAv4f16;
8787 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8789 break;
8790
8793 RC = &AArch64::FPR64RegClass;
8795 Opc = AArch64::FMLAv2i32_indexed;
8796 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
8798 } else {
8799 Opc = AArch64::FMLAv2f32;
8800 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
8802 }
8803 break;
8806 RC = &AArch64::FPR64RegClass;
8808 Opc = AArch64::FMLAv2i32_indexed;
8809 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8811 } else {
8812 Opc = AArch64::FMLAv2f32;
8813 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8815 }
8816 break;
8817
8819 RC = &AArch64::FPR128RegClass;
8820 Opc = AArch64::FMLAv8i16_indexed;
8821 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
8823 break;
8825 RC = &AArch64::FPR128RegClass;
8826 Opc = AArch64::FMLAv8f16;
8827 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
8829 break;
8831 RC = &AArch64::FPR128RegClass;
8832 Opc = AArch64::FMLAv8i16_indexed;
8833 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8835 break;
8837 RC = &AArch64::FPR128RegClass;
8838 Opc = AArch64::FMLAv8f16;
8839 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8841 break;
8842
8845 RC = &AArch64::FPR128RegClass;
8847 Opc = AArch64::FMLAv2i64_indexed;
8848 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
8850 } else {
8851 Opc = AArch64::FMLAv2f64;
8852 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
8854 }
8855 break;
8858 RC = &AArch64::FPR128RegClass;
8860 Opc = AArch64::FMLAv2i64_indexed;
8861 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8863 } else {
8864 Opc = AArch64::FMLAv2f64;
8865 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8867 }
8868 break;
8869
8872 RC = &AArch64::FPR128RegClass;
8874 Opc = AArch64::FMLAv4i32_indexed;
8875 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
8877 } else {
8878 Opc = AArch64::FMLAv4f32;
8879 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
8881 }
8882 break;
8883
8886 RC = &AArch64::FPR128RegClass;
8888 Opc = AArch64::FMLAv4i32_indexed;
8889 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8891 } else {
8892 Opc = AArch64::FMLAv4f32;
8893 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8895 }
8896 break;
8897
8899 Opc = AArch64::FNMSUBHrrr;
8900 RC = &AArch64::FPR16RegClass;
8901 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8902 break;
8904 Opc = AArch64::FNMSUBSrrr;
8905 RC = &AArch64::FPR32RegClass;
8906 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8907 break;
8909 Opc = AArch64::FNMSUBDrrr;
8910 RC = &AArch64::FPR64RegClass;
8911 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8912 break;
8913
8915 Opc = AArch64::FNMADDHrrr;
8916 RC = &AArch64::FPR16RegClass;
8917 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8918 break;
8920 Opc = AArch64::FNMADDSrrr;
8921 RC = &AArch64::FPR32RegClass;
8922 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8923 break;
8925 Opc = AArch64::FNMADDDrrr;
8926 RC = &AArch64::FPR64RegClass;
8927 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8928 break;
8929
8931 Opc = AArch64::FMSUBHrrr;
8932 RC = &AArch64::FPR16RegClass;
8933 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8934 break;
8936 Opc = AArch64::FMSUBSrrr;
8937 RC = &AArch64::FPR32RegClass;
8938 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8939 break;
8941 Opc = AArch64::FMSUBDrrr;
8942 RC = &AArch64::FPR64RegClass;
8943 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8944 break;
8945
8947 Opc = AArch64::FMLSv1i32_indexed;
8948 RC = &AArch64::FPR32RegClass;
8949 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8951 break;
8952
8954 Opc = AArch64::FMLSv1i64_indexed;
8955 RC = &AArch64::FPR64RegClass;
8956 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8958 break;
8959
8962 RC = &AArch64::FPR64RegClass;
8963 Register NewVR = MRI.createVirtualRegister(RC);
8964 MachineInstrBuilder MIB1 =
8965 BuildMI(MF, MIMetadata(Root), TII->get(AArch64::FNEGv4f16), NewVR)
8966 .add(Root.getOperand(2));
8967 InsInstrs.push_back(MIB1);
8968 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
8970 Opc = AArch64::FMLAv4f16;
8971 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
8972 FMAInstKind::Accumulator, &NewVR);
8973 } else {
8974 Opc = AArch64::FMLAv4i16_indexed;
8975 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
8976 FMAInstKind::Indexed, &NewVR);
8977 }
8978 break;
8979 }
8981 RC = &AArch64::FPR64RegClass;
8982 Opc = AArch64::FMLSv4f16;
8983 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8985 break;
8987 RC = &AArch64::FPR64RegClass;
8988 Opc = AArch64::FMLSv4i16_indexed;
8989 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8991 break;
8992
8995 RC = &AArch64::FPR64RegClass;
8997 Opc = AArch64::FMLSv2i32_indexed;
8998 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9000 } else {
9001 Opc = AArch64::FMLSv2f32;
9002 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9004 }
9005 break;
9006
9009 RC = &AArch64::FPR128RegClass;
9010 Register NewVR = MRI.createVirtualRegister(RC);
9011 MachineInstrBuilder MIB1 =
9012 BuildMI(MF, MIMetadata(Root), TII->get(AArch64::FNEGv8f16), NewVR)
9013 .add(Root.getOperand(2));
9014 InsInstrs.push_back(MIB1);
9015 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
9017 Opc = AArch64::FMLAv8f16;
9018 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9019 FMAInstKind::Accumulator, &NewVR);
9020 } else {
9021 Opc = AArch64::FMLAv8i16_indexed;
9022 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9023 FMAInstKind::Indexed, &NewVR);
9024 }
9025 break;
9026 }
9028 RC = &AArch64::FPR128RegClass;
9029 Opc = AArch64::FMLSv8f16;
9030 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9032 break;
9034 RC = &AArch64::FPR128RegClass;
9035 Opc = AArch64::FMLSv8i16_indexed;
9036 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9038 break;
9039
9042 RC = &AArch64::FPR128RegClass;
9044 Opc = AArch64::FMLSv2i64_indexed;
9045 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9047 } else {
9048 Opc = AArch64::FMLSv2f64;
9049 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9051 }
9052 break;
9053
9056 RC = &AArch64::FPR128RegClass;
9058 Opc = AArch64::FMLSv4i32_indexed;
9059 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9061 } else {
9062 Opc = AArch64::FMLSv4f32;
9063 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9065 }
9066 break;
9069 RC = &AArch64::FPR64RegClass;
9070 Register NewVR = MRI.createVirtualRegister(RC);
9071 MachineInstrBuilder MIB1 =
9072 BuildMI(MF, MIMetadata(Root), TII->get(AArch64::FNEGv2f32), NewVR)
9073 .add(Root.getOperand(2));
9074 InsInstrs.push_back(MIB1);
9075 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
9077 Opc = AArch64::FMLAv2i32_indexed;
9078 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9079 FMAInstKind::Indexed, &NewVR);
9080 } else {
9081 Opc = AArch64::FMLAv2f32;
9082 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9083 FMAInstKind::Accumulator, &NewVR);
9084 }
9085 break;
9086 }
9089 RC = &AArch64::FPR128RegClass;
9090 Register NewVR = MRI.createVirtualRegister(RC);
9091 MachineInstrBuilder MIB1 =
9092 BuildMI(MF, MIMetadata(Root), TII->get(AArch64::FNEGv4f32), NewVR)
9093 .add(Root.getOperand(2));
9094 InsInstrs.push_back(MIB1);
9095 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
9097 Opc = AArch64::FMLAv4i32_indexed;
9098 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9099 FMAInstKind::Indexed, &NewVR);
9100 } else {
9101 Opc = AArch64::FMLAv4f32;
9102 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9103 FMAInstKind::Accumulator, &NewVR);
9104 }
9105 break;
9106 }
9109 RC = &AArch64::FPR128RegClass;
9110 Register NewVR = MRI.createVirtualRegister(RC);
9111 MachineInstrBuilder MIB1 =
9112 BuildMI(MF, MIMetadata(Root), TII->get(AArch64::FNEGv2f64), NewVR)
9113 .add(Root.getOperand(2));
9114 InsInstrs.push_back(MIB1);
9115 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
9117 Opc = AArch64::FMLAv2i64_indexed;
9118 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9119 FMAInstKind::Indexed, &NewVR);
9120 } else {
9121 Opc = AArch64::FMLAv2f64;
9122 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9123 FMAInstKind::Accumulator, &NewVR);
9124 }
9125 break;
9126 }
9129 unsigned IdxDupOp =
9131 : 2;
9132 genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv2i32_indexed,
9133 &AArch64::FPR128RegClass, MRI);
9134 break;
9135 }
9138 unsigned IdxDupOp =
9140 : 2;
9141 genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv2i64_indexed,
9142 &AArch64::FPR128RegClass, MRI);
9143 break;
9144 }
9147 unsigned IdxDupOp =
9149 : 2;
9150 genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv4i16_indexed,
9151 &AArch64::FPR128_loRegClass, MRI);
9152 break;
9153 }
9156 unsigned IdxDupOp =
9158 : 2;
9159 genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv4i32_indexed,
9160 &AArch64::FPR128RegClass, MRI);
9161 break;
9162 }
9165 unsigned IdxDupOp =
9167 : 2;
9168 genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv8i16_indexed,
9169 &AArch64::FPR128_loRegClass, MRI);
9170 break;
9171 }
9173 MUL = genFNegatedMAD(MF, MRI, TII, Root, InsInstrs);
9174 break;
9175 }
9177 generateGatherLanePattern(Root, InsInstrs, DelInstrs, InstrIdxForVirtReg,
9178 Pattern, 4);
9179 break;
9180 }
9182 generateGatherLanePattern(Root, InsInstrs, DelInstrs, InstrIdxForVirtReg,
9183 Pattern, 8);
9184 break;
9185 }
9187 generateGatherLanePattern(Root, InsInstrs, DelInstrs, InstrIdxForVirtReg,
9188 Pattern, 16);
9189 break;
9190 }
9191
9192 } // end switch (Pattern)
9193 // Record MUL and ADD/SUB for deletion
9194 if (MUL)
9195 DelInstrs.push_back(MUL);
9196 DelInstrs.push_back(&Root);
9197
9198 // Set the flags on the inserted instructions to be the merged flags of the
9199 // instructions that we have combined.
9200 uint32_t Flags = Root.getFlags();
9201 if (MUL)
9202 Flags = Root.mergeFlagsWith(*MUL);
9203 for (auto *MI : InsInstrs)
9204 MI->setFlags(Flags);
9205}
9206
9207/// Replace csincr-branch sequence by simple conditional branch
9208///
9209/// Examples:
9210/// 1. \code
9211/// csinc w9, wzr, wzr, <condition code>
9212/// tbnz w9, #0, 0x44
9213/// \endcode
9214/// to
9215/// \code
9216/// b.<inverted condition code>
9217/// \endcode
9218///
9219/// 2. \code
9220/// csinc w9, wzr, wzr, <condition code>
9221/// tbz w9, #0, 0x44
9222/// \endcode
9223/// to
9224/// \code
9225/// b.<condition code>
9226/// \endcode
9227///
9228/// Replace compare and branch sequence by TBZ/TBNZ instruction when the
9229/// compare's constant operand is power of 2.
9230///
9231/// Examples:
9232/// \code
9233/// and w8, w8, #0x400
9234/// cbnz w8, L1
9235/// \endcode
9236/// to
9237/// \code
9238/// tbnz w8, #10, L1
9239/// \endcode
9240///
9241/// \param MI Conditional Branch
9242/// \return True when the simple conditional branch is generated
9243///
9245 bool IsNegativeBranch = false;
9246 bool IsTestAndBranch = false;
9247 unsigned TargetBBInMI = 0;
9248 switch (MI.getOpcode()) {
9249 default:
9250 llvm_unreachable("Unknown branch instruction?");
9251 case AArch64::Bcc:
9252 case AArch64::CBWPri:
9253 case AArch64::CBXPri:
9254 case AArch64::CBWPrr:
9255 case AArch64::CBXPrr:
9256 return false;
9257 case AArch64::CBZW:
9258 case AArch64::CBZX:
9259 TargetBBInMI = 1;
9260 break;
9261 case AArch64::CBNZW:
9262 case AArch64::CBNZX:
9263 TargetBBInMI = 1;
9264 IsNegativeBranch = true;
9265 break;
9266 case AArch64::TBZW:
9267 case AArch64::TBZX:
9268 TargetBBInMI = 2;
9269 IsTestAndBranch = true;
9270 break;
9271 case AArch64::TBNZW:
9272 case AArch64::TBNZX:
9273 TargetBBInMI = 2;
9274 IsNegativeBranch = true;
9275 IsTestAndBranch = true;
9276 break;
9277 }
9278 // So we increment a zero register and test for bits other
9279 // than bit 0? Conservatively bail out in case the verifier
9280 // missed this case.
9281 if (IsTestAndBranch && MI.getOperand(1).getImm())
9282 return false;
9283
9284 // Find Definition.
9285 assert(MI.getParent() && "Incomplete machine instruction\n");
9286 MachineBasicBlock *MBB = MI.getParent();
9287 MachineFunction *MF = MBB->getParent();
9289 Register VReg = MI.getOperand(0).getReg();
9290 if (!VReg.isVirtual())
9291 return false;
9292
9293 MachineInstr *DefMI = MRI->getVRegDef(VReg);
9294
9295 // Look through COPY instructions to find definition.
9296 while (DefMI->isCopy()) {
9297 Register CopyVReg = DefMI->getOperand(1).getReg();
9298 if (!MRI->hasOneNonDBGUse(CopyVReg))
9299 return false;
9300 if (!MRI->hasOneDef(CopyVReg))
9301 return false;
9302 DefMI = MRI->getVRegDef(CopyVReg);
9303 }
9304
9305 switch (DefMI->getOpcode()) {
9306 default:
9307 return false;
9308 // Fold AND into a TBZ/TBNZ if constant operand is power of 2.
9309 case AArch64::ANDWri:
9310 case AArch64::ANDXri: {
9311 if (IsTestAndBranch)
9312 return false;
9313 if (DefMI->getParent() != MBB)
9314 return false;
9315 if (!MRI->hasOneNonDBGUse(VReg))
9316 return false;
9317
9318 bool Is32Bit = (DefMI->getOpcode() == AArch64::ANDWri);
9320 DefMI->getOperand(2).getImm(), Is32Bit ? 32 : 64);
9321 if (!isPowerOf2_64(Mask))
9322 return false;
9323
9324 MachineOperand &MO = DefMI->getOperand(1);
9325 Register NewReg = MO.getReg();
9326 if (!NewReg.isVirtual())
9327 return false;
9328
9329 assert(!MRI->def_empty(NewReg) && "Register must be defined.");
9330
9331 MachineBasicBlock &RefToMBB = *MBB;
9332 MachineBasicBlock *TBB = MI.getOperand(1).getMBB();
9333 DebugLoc DL = MI.getDebugLoc();
9334 unsigned Imm = Log2_64(Mask);
9335 unsigned Opc = (Imm < 32)
9336 ? (IsNegativeBranch ? AArch64::TBNZW : AArch64::TBZW)
9337 : (IsNegativeBranch ? AArch64::TBNZX : AArch64::TBZX);
9338 MachineInstr *NewMI = BuildMI(RefToMBB, MI, DL, get(Opc))
9339 .addReg(NewReg)
9340 .addImm(Imm)
9341 .addMBB(TBB);
9342 // Register lives on to the CBZ now.
9343 MO.setIsKill(false);
9344
9345 // For immediate smaller than 32, we need to use the 32-bit
9346 // variant (W) in all cases. Indeed the 64-bit variant does not
9347 // allow to encode them.
9348 // Therefore, if the input register is 64-bit, we need to take the
9349 // 32-bit sub-part.
9350 if (!Is32Bit && Imm < 32)
9351 NewMI->getOperand(0).setSubReg(AArch64::sub_32);
9352 MI.eraseFromParent();
9353 return true;
9354 }
9355 // Look for CSINC
9356 case AArch64::CSINCWr:
9357 case AArch64::CSINCXr: {
9358 if (!(DefMI->getOperand(1).getReg() == AArch64::WZR &&
9359 DefMI->getOperand(2).getReg() == AArch64::WZR) &&
9360 !(DefMI->getOperand(1).getReg() == AArch64::XZR &&
9361 DefMI->getOperand(2).getReg() == AArch64::XZR))
9362 return false;
9363
9364 if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr,
9365 true) != -1)
9366 return false;
9367
9368 AArch64CC::CondCode CC = (AArch64CC::CondCode)DefMI->getOperand(3).getImm();
9369 // Convert only when the condition code is not modified between
9370 // the CSINC and the branch. The CC may be used by other
9371 // instructions in between.
9373 return false;
9374 MachineBasicBlock &RefToMBB = *MBB;
9375 MachineBasicBlock *TBB = MI.getOperand(TargetBBInMI).getMBB();
9376 DebugLoc DL = MI.getDebugLoc();
9377 if (IsNegativeBranch)
9379 BuildMI(RefToMBB, MI, DL, get(AArch64::Bcc)).addImm(CC).addMBB(TBB);
9380 MI.eraseFromParent();
9381 return true;
9382 }
9383 }
9384}
9385
9386std::pair<unsigned, unsigned>
9387AArch64InstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF) const {
9388 const unsigned Mask = AArch64II::MO_FRAGMENT;
9389 return std::make_pair(TF & Mask, TF & ~Mask);
9390}
9391
9393AArch64InstrInfo::getSerializableDirectMachineOperandTargetFlags() const {
9394 using namespace AArch64II;
9395
9396 static const std::pair<unsigned, const char *> TargetFlags[] = {
9397 {MO_PAGE, "aarch64-page"}, {MO_PAGEOFF, "aarch64-pageoff"},
9398 {MO_G3, "aarch64-g3"}, {MO_G2, "aarch64-g2"},
9399 {MO_G1, "aarch64-g1"}, {MO_G0, "aarch64-g0"},
9400 {MO_HI12, "aarch64-hi12"}};
9401 return ArrayRef(TargetFlags);
9402}
9403
9405AArch64InstrInfo::getSerializableBitmaskMachineOperandTargetFlags() const {
9406 using namespace AArch64II;
9407
9408 static const std::pair<unsigned, const char *> TargetFlags[] = {
9409 {MO_COFFSTUB, "aarch64-coffstub"},
9410 {MO_GOT, "aarch64-got"},
9411 {MO_NC, "aarch64-nc"},
9412 {MO_S, "aarch64-s"},
9413 {MO_TLS, "aarch64-tls"},
9414 {MO_DLLIMPORT, "aarch64-dllimport"},
9415 {MO_PREL, "aarch64-prel"},
9416 {MO_TAGGED, "aarch64-tagged"},
9417 {MO_ARM64EC_CALLMANGLE, "aarch64-arm64ec-callmangle"},
9418 };
9419 return ArrayRef(TargetFlags);
9420}
9421
9423AArch64InstrInfo::getSerializableMachineMemOperandTargetFlags() const {
9424 static const std::pair<MachineMemOperand::Flags, const char *> TargetFlags[] =
9425 {{MOSuppressPair, "aarch64-suppress-pair"},
9426 {MOStridedAccess, "aarch64-strided-access"}};
9427 return ArrayRef(TargetFlags);
9428}
9429
9430/// Constants defining how certain sequences should be outlined.
9431/// This encompasses how an outlined function should be called, and what kind of
9432/// frame should be emitted for that outlined function.
9433///
9434/// \p MachineOutlinerDefault implies that the function should be called with
9435/// a save and restore of LR to the stack.
9436///
9437/// That is,
9438///
9439/// I1 Save LR OUTLINED_FUNCTION:
9440/// I2 --> BL OUTLINED_FUNCTION I1
9441/// I3 Restore LR I2
9442/// I3
9443/// RET
9444///
9445/// * Call construction overhead: 3 (save + BL + restore)
9446/// * Frame construction overhead: 1 (ret)
9447/// * Requires stack fixups? Yes
9448///
9449/// \p MachineOutlinerTailCall implies that the function is being created from
9450/// a sequence of instructions ending in a return.
9451///
9452/// That is,
9453///
9454/// I1 OUTLINED_FUNCTION:
9455/// I2 --> B OUTLINED_FUNCTION I1
9456/// RET I2
9457/// RET
9458///
9459/// * Call construction overhead: 1 (B)
9460/// * Frame construction overhead: 0 (Return included in sequence)
9461/// * Requires stack fixups? No
9462///
9463/// \p MachineOutlinerNoLRSave implies that the function should be called using
9464/// a BL instruction, but doesn't require LR to be saved and restored. This
9465/// happens when LR is known to be dead.
9466///
9467/// That is,
9468///
9469/// I1 OUTLINED_FUNCTION:
9470/// I2 --> BL OUTLINED_FUNCTION I1
9471/// I3 I2
9472/// I3
9473/// RET
9474///
9475/// * Call construction overhead: 1 (BL)
9476/// * Frame construction overhead: 1 (RET)
9477/// * Requires stack fixups? No
9478///
9479/// \p MachineOutlinerThunk implies that the function is being created from
9480/// a sequence of instructions ending in a call. The outlined function is
9481/// called with a BL instruction, and the outlined function tail-calls the
9482/// original call destination.
9483///
9484/// That is,
9485///
9486/// I1 OUTLINED_FUNCTION:
9487/// I2 --> BL OUTLINED_FUNCTION I1
9488/// BL f I2
9489/// B f
9490/// * Call construction overhead: 1 (BL)
9491/// * Frame construction overhead: 0
9492/// * Requires stack fixups? No
9493///
9494/// \p MachineOutlinerRegSave implies that the function should be called with a
9495/// save and restore of LR to an available register. This allows us to avoid
9496/// stack fixups. Note that this outlining variant is compatible with the
9497/// NoLRSave case.
9498///
9499/// That is,
9500///
9501/// I1 Save LR OUTLINED_FUNCTION:
9502/// I2 --> BL OUTLINED_FUNCTION I1
9503/// I3 Restore LR I2
9504/// I3
9505/// RET
9506///
9507/// * Call construction overhead: 3 (save + BL + restore)
9508/// * Frame construction overhead: 1 (ret)
9509/// * Requires stack fixups? No
9511 MachineOutlinerDefault, /// Emit a save, restore, call, and return.
9512 MachineOutlinerTailCall, /// Only emit a branch.
9513 MachineOutlinerNoLRSave, /// Emit a call and return.
9514 MachineOutlinerThunk, /// Emit a call and tail-call.
9515 MachineOutlinerRegSave /// Same as default, but save to a register.
9516};
9517
9523
9525AArch64InstrInfo::findRegisterToSaveLRTo(outliner::Candidate &C) const {
9526 MachineFunction *MF = C.getMF();
9527 const TargetRegisterInfo &TRI = *MF->getSubtarget().getRegisterInfo();
9528 const AArch64RegisterInfo *ARI =
9529 static_cast<const AArch64RegisterInfo *>(&TRI);
9530 // Check if there is an available register across the sequence that we can
9531 // use.
9532 for (unsigned Reg : AArch64::GPR64RegClass) {
9533 if (!ARI->isReservedReg(*MF, Reg) &&
9534 Reg != AArch64::LR && // LR is not reserved, but don't use it.
9535 Reg != AArch64::X16 && // X16 is not guaranteed to be preserved.
9536 Reg != AArch64::X17 && // Ditto for X17.
9537 C.isAvailableAcrossAndOutOfSeq(Reg, TRI) &&
9538 C.isAvailableInsideSeq(Reg, TRI))
9539 return Reg;
9540 }
9541 return Register();
9542}
9543
9544static bool
9546 const outliner::Candidate &b) {
9547 const auto &MFIa = a.getMF()->getInfo<AArch64FunctionInfo>();
9548 const auto &MFIb = b.getMF()->getInfo<AArch64FunctionInfo>();
9549
9550 return MFIa->shouldSignReturnAddress(false) == MFIb->shouldSignReturnAddress(false) &&
9551 MFIa->shouldSignReturnAddress(true) == MFIb->shouldSignReturnAddress(true);
9552}
9553
9554static bool
9556 const outliner::Candidate &b) {
9557 const auto &MFIa = a.getMF()->getInfo<AArch64FunctionInfo>();
9558 const auto &MFIb = b.getMF()->getInfo<AArch64FunctionInfo>();
9559
9560 return MFIa->shouldSignWithBKey() == MFIb->shouldSignWithBKey();
9561}
9562
9564 const outliner::Candidate &b) {
9565 const AArch64Subtarget &SubtargetA =
9567 const AArch64Subtarget &SubtargetB =
9568 b.getMF()->getSubtarget<AArch64Subtarget>();
9569 return SubtargetA.hasV8_3aOps() == SubtargetB.hasV8_3aOps();
9570}
9571
9572std::optional<std::unique_ptr<outliner::OutlinedFunction>>
9573AArch64InstrInfo::getOutliningCandidateInfo(
9574 const MachineModuleInfo &MMI,
9575 std::vector<outliner::Candidate> &RepeatedSequenceLocs,
9576 unsigned MinRepeats) const {
9577 unsigned SequenceSize = 0;
9578 for (auto &MI : RepeatedSequenceLocs[0])
9579 SequenceSize += getInstSizeInBytes(MI);
9580
9581 unsigned NumBytesToCreateFrame = 0;
9582
9583 // We only allow outlining for functions having exactly matching return
9584 // address signing attributes, i.e., all share the same value for the
9585 // attribute "sign-return-address" and all share the same type of key they
9586 // are signed with.
9587 // Additionally we require all functions to simultaneously either support
9588 // v8.3a features or not. Otherwise an outlined function could get signed
9589 // using dedicated v8.3 instructions and a call from a function that doesn't
9590 // support v8.3 instructions would therefore be invalid.
9591 if (std::adjacent_find(
9592 RepeatedSequenceLocs.begin(), RepeatedSequenceLocs.end(),
9593 [](const outliner::Candidate &a, const outliner::Candidate &b) {
9594 // Return true if a and b are non-equal w.r.t. return address
9595 // signing or support of v8.3a features
9596 if (outliningCandidatesSigningScopeConsensus(a, b) &&
9597 outliningCandidatesSigningKeyConsensus(a, b) &&
9598 outliningCandidatesV8_3OpsConsensus(a, b)) {
9599 return false;
9600 }
9601 return true;
9602 }) != RepeatedSequenceLocs.end()) {
9603 return std::nullopt;
9604 }
9605
9606 // Since at this point all candidates agree on their return address signing
9607 // picking just one is fine. If the candidate functions potentially sign their
9608 // return addresses, the outlined function should do the same. Note that in
9609 // the case of "sign-return-address"="non-leaf" this is an assumption: It is
9610 // not certainly true that the outlined function will have to sign its return
9611 // address but this decision is made later, when the decision to outline
9612 // has already been made.
9613 // The same holds for the number of additional instructions we need: On
9614 // v8.3a RET can be replaced by RETAA/RETAB and no AUT instruction is
9615 // necessary. However, at this point we don't know if the outlined function
9616 // will have a RET instruction so we assume the worst.
9617 const TargetRegisterInfo &TRI = getRegisterInfo();
9618 // Performing a tail call may require extra checks when PAuth is enabled.
9619 // If PAuth is disabled, set it to zero for uniformity.
9620 unsigned NumBytesToCheckLRInTCEpilogue = 0;
9621 if (RepeatedSequenceLocs[0]
9622 .getMF()
9623 ->getInfo<AArch64FunctionInfo>()
9624 ->shouldSignReturnAddress(true)) {
9625 // One PAC and one AUT instructions
9626 NumBytesToCreateFrame += 8;
9627
9628 // PAuth is enabled - set extra tail call cost, if any.
9629 auto LRCheckMethod = Subtarget.getAuthenticatedLRCheckMethod(
9630 *RepeatedSequenceLocs[0].getMF());
9631 NumBytesToCheckLRInTCEpilogue =
9633 // Checking the authenticated LR value may significantly impact
9634 // SequenceSize, so account for it for more precise results.
9635 if (isTailCallReturnInst(RepeatedSequenceLocs[0].back()))
9636 SequenceSize += NumBytesToCheckLRInTCEpilogue;
9637
9638 // We have to check if sp modifying instructions would get outlined.
9639 // If so we only allow outlining if sp is unchanged overall, so matching
9640 // sub and add instructions are okay to outline, all other sp modifications
9641 // are not
9642 auto hasIllegalSPModification = [&TRI](outliner::Candidate &C) {
9643 int SPValue = 0;
9644 for (auto &MI : C) {
9645 if (MI.modifiesRegister(AArch64::SP, &TRI)) {
9646 switch (MI.getOpcode()) {
9647 case AArch64::ADDXri:
9648 case AArch64::ADDWri:
9649 assert(MI.getNumOperands() == 4 && "Wrong number of operands");
9650 assert(MI.getOperand(2).isImm() &&
9651 "Expected operand to be immediate");
9652 assert(MI.getOperand(1).isReg() &&
9653 "Expected operand to be a register");
9654 // Check if the add just increments sp. If so, we search for
9655 // matching sub instructions that decrement sp. If not, the
9656 // modification is illegal
9657 if (MI.getOperand(1).getReg() == AArch64::SP)
9658 SPValue += MI.getOperand(2).getImm();
9659 else
9660 return true;
9661 break;
9662 case AArch64::SUBXri:
9663 case AArch64::SUBWri:
9664 assert(MI.getNumOperands() == 4 && "Wrong number of operands");
9665 assert(MI.getOperand(2).isImm() &&
9666 "Expected operand to be immediate");
9667 assert(MI.getOperand(1).isReg() &&
9668 "Expected operand to be a register");
9669 // Check if the sub just decrements sp. If so, we search for
9670 // matching add instructions that increment sp. If not, the
9671 // modification is illegal
9672 if (MI.getOperand(1).getReg() == AArch64::SP)
9673 SPValue -= MI.getOperand(2).getImm();
9674 else
9675 return true;
9676 break;
9677 default:
9678 return true;
9679 }
9680 }
9681 }
9682 if (SPValue)
9683 return true;
9684 return false;
9685 };
9686 // Remove candidates with illegal stack modifying instructions
9687 llvm::erase_if(RepeatedSequenceLocs, hasIllegalSPModification);
9688
9689 // If the sequence doesn't have enough candidates left, then we're done.
9690 if (RepeatedSequenceLocs.size() < MinRepeats)
9691 return std::nullopt;
9692 }
9693
9694 // Properties about candidate MBBs that hold for all of them.
9695 unsigned FlagsSetInAll = 0xF;
9696
9697 // Compute liveness information for each candidate, and set FlagsSetInAll.
9698 for (outliner::Candidate &C : RepeatedSequenceLocs)
9699 FlagsSetInAll &= C.Flags;
9700
9701 unsigned LastInstrOpcode = RepeatedSequenceLocs[0].back().getOpcode();
9702
9703 // Helper lambda which sets call information for every candidate.
9704 auto SetCandidateCallInfo =
9705 [&RepeatedSequenceLocs](unsigned CallID, unsigned NumBytesForCall) {
9706 for (outliner::Candidate &C : RepeatedSequenceLocs)
9707 C.setCallInfo(CallID, NumBytesForCall);
9708 };
9709
9710 unsigned FrameID = MachineOutlinerDefault;
9711 NumBytesToCreateFrame += 4;
9712
9713 bool HasBTI = any_of(RepeatedSequenceLocs, [](outliner::Candidate &C) {
9714 return C.getMF()->getInfo<AArch64FunctionInfo>()->branchTargetEnforcement();
9715 });
9716
9717 // We check to see if CFI Instructions are present, and if they are
9718 // we find the number of CFI Instructions in the candidates.
9719 unsigned CFICount = 0;
9720 for (auto &I : RepeatedSequenceLocs[0]) {
9721 if (I.isCFIInstruction())
9722 CFICount++;
9723 }
9724
9725 // We compare the number of found CFI Instructions to the number of CFI
9726 // instructions in the parent function for each candidate. We must check this
9727 // since if we outline one of the CFI instructions in a function, we have to
9728 // outline them all for correctness. If we do not, the address offsets will be
9729 // incorrect between the two sections of the program.
9730 for (outliner::Candidate &C : RepeatedSequenceLocs) {
9731 std::vector<MCCFIInstruction> CFIInstructions =
9732 C.getMF()->getFrameInstructions();
9733
9734 if (CFICount > 0 && CFICount != CFIInstructions.size())
9735 return std::nullopt;
9736 }
9737
9738 // Returns true if an instructions is safe to fix up, false otherwise.
9739 auto IsSafeToFixup = [this, &TRI](MachineInstr &MI) {
9740 if (MI.isCall())
9741 return true;
9742
9743 if (!MI.modifiesRegister(AArch64::SP, &TRI) &&
9744 !MI.readsRegister(AArch64::SP, &TRI))
9745 return true;
9746
9747 // Any modification of SP will break our code to save/restore LR.
9748 // FIXME: We could handle some instructions which add a constant
9749 // offset to SP, with a bit more work.
9750 if (MI.modifiesRegister(AArch64::SP, &TRI))
9751 return false;
9752
9753 // At this point, we have a stack instruction that we might need to
9754 // fix up. We'll handle it if it's a load or store.
9755 if (MI.mayLoadOrStore()) {
9756 const MachineOperand *Base; // Filled with the base operand of MI.
9757 int64_t Offset; // Filled with the offset of MI.
9758 bool OffsetIsScalable;
9759
9760 // Does it allow us to offset the base operand and is the base the
9761 // register SP?
9762 if (!getMemOperandWithOffset(MI, Base, Offset, OffsetIsScalable, &TRI) ||
9763 !Base->isReg() || Base->getReg() != AArch64::SP)
9764 return false;
9765
9766 // Fixe-up code below assumes bytes.
9767 if (OffsetIsScalable)
9768 return false;
9769
9770 // Find the minimum/maximum offset for this instruction and check
9771 // if fixing it up would be in range.
9772 int64_t MinOffset,
9773 MaxOffset; // Unscaled offsets for the instruction.
9774 // The scale to multiply the offsets by.
9775 TypeSize Scale(0U, false), DummyWidth(0U, false);
9776 getMemOpInfo(MI.getOpcode(), Scale, DummyWidth, MinOffset, MaxOffset);
9777
9778 Offset += 16; // Update the offset to what it would be if we outlined.
9779 if (Offset < MinOffset * (int64_t)Scale.getFixedValue() ||
9780 Offset > MaxOffset * (int64_t)Scale.getFixedValue())
9781 return false;
9782
9783 // It's in range, so we can outline it.
9784 return true;
9785 }
9786
9787 // FIXME: Add handling for instructions like "add x0, sp, #8".
9788
9789 // We can't fix it up, so don't outline it.
9790 return false;
9791 };
9792
9793 // True if it's possible to fix up each stack instruction in this sequence.
9794 // Important for frames/call variants that modify the stack.
9795 bool AllStackInstrsSafe =
9796 llvm::all_of(RepeatedSequenceLocs[0], IsSafeToFixup);
9797
9798 // If the last instruction in any candidate is a terminator, then we should
9799 // tail call all of the candidates.
9800 if (RepeatedSequenceLocs[0].back().isTerminator()) {
9801 FrameID = MachineOutlinerTailCall;
9802 NumBytesToCreateFrame = 0;
9803 unsigned NumBytesForCall = 4 + NumBytesToCheckLRInTCEpilogue;
9804 SetCandidateCallInfo(MachineOutlinerTailCall, NumBytesForCall);
9805 }
9806
9807 else if (LastInstrOpcode == AArch64::BL ||
9808 ((LastInstrOpcode == AArch64::BLR ||
9809 LastInstrOpcode == AArch64::BLRNoIP) &&
9810 !HasBTI)) {
9811 // FIXME: Do we need to check if the code after this uses the value of LR?
9812 FrameID = MachineOutlinerThunk;
9813 NumBytesToCreateFrame = NumBytesToCheckLRInTCEpilogue;
9814 SetCandidateCallInfo(MachineOutlinerThunk, 4);
9815 }
9816
9817 else {
9818 // We need to decide how to emit calls + frames. We can always emit the same
9819 // frame if we don't need to save to the stack. If we have to save to the
9820 // stack, then we need a different frame.
9821 unsigned NumBytesNoStackCalls = 0;
9822 std::vector<outliner::Candidate> CandidatesWithoutStackFixups;
9823
9824 // Check if we have to save LR.
9825 for (outliner::Candidate &C : RepeatedSequenceLocs) {
9826 bool LRAvailable =
9828 ? C.isAvailableAcrossAndOutOfSeq(AArch64::LR, TRI)
9829 : true;
9830 // If we have a noreturn caller, then we're going to be conservative and
9831 // say that we have to save LR. If we don't have a ret at the end of the
9832 // block, then we can't reason about liveness accurately.
9833 //
9834 // FIXME: We can probably do better than always disabling this in
9835 // noreturn functions by fixing up the liveness info.
9836 bool IsNoReturn =
9837 C.getMF()->getFunction().hasFnAttribute(Attribute::NoReturn);
9838
9839 // Is LR available? If so, we don't need a save.
9840 if (LRAvailable && !IsNoReturn) {
9841 NumBytesNoStackCalls += 4;
9842 C.setCallInfo(MachineOutlinerNoLRSave, 4);
9843 CandidatesWithoutStackFixups.push_back(C);
9844 }
9845
9846 // Is an unused register available? If so, we won't modify the stack, so
9847 // we can outline with the same frame type as those that don't save LR.
9848 else if (findRegisterToSaveLRTo(C)) {
9849 NumBytesNoStackCalls += 12;
9850 C.setCallInfo(MachineOutlinerRegSave, 12);
9851 CandidatesWithoutStackFixups.push_back(C);
9852 }
9853
9854 // Is SP used in the sequence at all? If not, we don't have to modify
9855 // the stack, so we are guaranteed to get the same frame.
9856 else if (C.isAvailableInsideSeq(AArch64::SP, TRI)) {
9857 NumBytesNoStackCalls += 12;
9858 C.setCallInfo(MachineOutlinerDefault, 12);
9859 CandidatesWithoutStackFixups.push_back(C);
9860 }
9861
9862 // If we outline this, we need to modify the stack. Pretend we don't
9863 // outline this by saving all of its bytes.
9864 else {
9865 NumBytesNoStackCalls += SequenceSize;
9866 }
9867 }
9868
9869 // If there are no places where we have to save LR, then note that we
9870 // don't have to update the stack. Otherwise, give every candidate the
9871 // default call type, as long as it's safe to do so.
9872 if (!AllStackInstrsSafe ||
9873 NumBytesNoStackCalls <= RepeatedSequenceLocs.size() * 12) {
9874 RepeatedSequenceLocs = CandidatesWithoutStackFixups;
9875 FrameID = MachineOutlinerNoLRSave;
9876 if (RepeatedSequenceLocs.size() < MinRepeats)
9877 return std::nullopt;
9878 } else {
9879 SetCandidateCallInfo(MachineOutlinerDefault, 12);
9880
9881 // Bugzilla ID: 46767
9882 // TODO: Check if fixing up the stack more than once is safe so we can
9883 // outline these.
9884 //
9885 // An outline resulting in a caller that requires stack fixups at the
9886 // callsite to a callee that also requires stack fixups can happen when
9887 // there are no available registers at the candidate callsite for a
9888 // candidate that itself also has calls.
9889 //
9890 // In other words if function_containing_sequence in the following pseudo
9891 // assembly requires that we save LR at the point of the call, but there
9892 // are no available registers: in this case we save using SP and as a
9893 // result the SP offsets requires stack fixups by multiples of 16.
9894 //
9895 // function_containing_sequence:
9896 // ...
9897 // save LR to SP <- Requires stack instr fixups in OUTLINED_FUNCTION_N
9898 // call OUTLINED_FUNCTION_N
9899 // restore LR from SP
9900 // ...
9901 //
9902 // OUTLINED_FUNCTION_N:
9903 // save LR to SP <- Requires stack instr fixups in OUTLINED_FUNCTION_N
9904 // ...
9905 // bl foo
9906 // restore LR from SP
9907 // ret
9908 //
9909 // Because the code to handle more than one stack fixup does not
9910 // currently have the proper checks for legality, these cases will assert
9911 // in the AArch64 MachineOutliner. This is because the code to do this
9912 // needs more hardening, testing, better checks that generated code is
9913 // legal, etc and because it is only verified to handle a single pass of
9914 // stack fixup.
9915 //
9916 // The assert happens in AArch64InstrInfo::buildOutlinedFrame to catch
9917 // these cases until they are known to be handled. Bugzilla 46767 is
9918 // referenced in comments at the assert site.
9919 //
9920 // To avoid asserting (or generating non-legal code on noassert builds)
9921 // we remove all candidates which would need more than one stack fixup by
9922 // pruning the cases where the candidate has calls while also having no
9923 // available LR and having no available general purpose registers to copy
9924 // LR to (ie one extra stack save/restore).
9925 //
9926 if (FlagsSetInAll & MachineOutlinerMBBFlags::HasCalls) {
9927 erase_if(RepeatedSequenceLocs, [this, &TRI](outliner::Candidate &C) {
9928 auto IsCall = [](const MachineInstr &MI) { return MI.isCall(); };
9929 return (llvm::any_of(C, IsCall)) &&
9930 (!C.isAvailableAcrossAndOutOfSeq(AArch64::LR, TRI) ||
9931 !findRegisterToSaveLRTo(C));
9932 });
9933 }
9934 }
9935
9936 // If we dropped all of the candidates, bail out here.
9937 if (RepeatedSequenceLocs.size() < MinRepeats)
9938 return std::nullopt;
9939 }
9940
9941 // Does every candidate's MBB contain a call? If so, then we might have a call
9942 // in the range.
9943 if (FlagsSetInAll & MachineOutlinerMBBFlags::HasCalls) {
9944 // Check if the range contains a call. These require a save + restore of the
9945 // link register.
9946 outliner::Candidate &FirstCand = RepeatedSequenceLocs[0];
9947 bool ModStackToSaveLR = false;
9948 if (any_of(drop_end(FirstCand),
9949 [](const MachineInstr &MI) { return MI.isCall(); }))
9950 ModStackToSaveLR = true;
9951
9952 // Handle the last instruction separately. If this is a tail call, then the
9953 // last instruction is a call. We don't want to save + restore in this case.
9954 // However, it could be possible that the last instruction is a call without
9955 // it being valid to tail call this sequence. We should consider this as
9956 // well.
9957 else if (FrameID != MachineOutlinerThunk &&
9958 FrameID != MachineOutlinerTailCall && FirstCand.back().isCall())
9959 ModStackToSaveLR = true;
9960
9961 if (ModStackToSaveLR) {
9962 // We can't fix up the stack. Bail out.
9963 if (!AllStackInstrsSafe)
9964 return std::nullopt;
9965
9966 // Save + restore LR.
9967 NumBytesToCreateFrame += 8;
9968 }
9969 }
9970
9971 // If we have CFI instructions, we can only outline if the outlined section
9972 // can be a tail call
9973 if (FrameID != MachineOutlinerTailCall && CFICount > 0)
9974 return std::nullopt;
9975
9976 return std::make_unique<outliner::OutlinedFunction>(
9977 RepeatedSequenceLocs, SequenceSize, NumBytesToCreateFrame, FrameID);
9978}
9979
9980void AArch64InstrInfo::mergeOutliningCandidateAttributes(
9981 Function &F, std::vector<outliner::Candidate> &Candidates) const {
9982 // If a bunch of candidates reach this point they must agree on their return
9983 // address signing. It is therefore enough to just consider the signing
9984 // behaviour of one of them
9985 const auto &CFn = Candidates.front().getMF()->getFunction();
9986
9987 if (CFn.hasFnAttribute("ptrauth-returns"))
9988 F.addFnAttr(CFn.getFnAttribute("ptrauth-returns"));
9989 if (CFn.hasFnAttribute("ptrauth-auth-traps"))
9990 F.addFnAttr(CFn.getFnAttribute("ptrauth-auth-traps"));
9991 // Since all candidates belong to the same module, just copy the
9992 // function-level attributes of an arbitrary function.
9993 if (CFn.hasFnAttribute("sign-return-address"))
9994 F.addFnAttr(CFn.getFnAttribute("sign-return-address"));
9995 if (CFn.hasFnAttribute("sign-return-address-key"))
9996 F.addFnAttr(CFn.getFnAttribute("sign-return-address-key"));
9997
9998 AArch64GenInstrInfo::mergeOutliningCandidateAttributes(F, Candidates);
9999}
10000
10001bool AArch64InstrInfo::isFunctionSafeToOutlineFrom(
10002 MachineFunction &MF, bool OutlineFromLinkOnceODRs) const {
10003 const Function &F = MF.getFunction();
10004
10005 // Can F be deduplicated by the linker? If it can, don't outline from it.
10006 if (!OutlineFromLinkOnceODRs && F.hasLinkOnceODRLinkage())
10007 return false;
10008
10009 // Don't outline from functions with section markings; the program could
10010 // expect that all the code is in the named section.
10011 // FIXME: Allow outlining from multiple functions with the same section
10012 // marking.
10013 if (F.hasSection())
10014 return false;
10015
10016 // Outlining from functions with redzones is unsafe since the outliner may
10017 // modify the stack. Check if hasRedZone is true or unknown; if yes, don't
10018 // outline from it.
10019 AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
10020 if (!AFI || AFI->hasRedZone().value_or(true))
10021 return false;
10022
10023 // FIXME: Determine whether it is safe to outline from functions which contain
10024 // streaming-mode changes. We may need to ensure any smstart/smstop pairs are
10025 // outlined together and ensure it is safe to outline with async unwind info,
10026 // required for saving & restoring VG around calls.
10027 if (AFI->hasStreamingModeChanges())
10028 return false;
10029
10030 // FIXME: Teach the outliner to generate/handle Windows unwind info.
10032 return false;
10033
10034 // It's safe to outline from MF.
10035 return true;
10036}
10037
10039AArch64InstrInfo::getOutlinableRanges(MachineBasicBlock &MBB,
10040 unsigned &Flags) const {
10042 "Must track liveness!");
10044 std::pair<MachineBasicBlock::iterator, MachineBasicBlock::iterator>>
10045 Ranges;
10046 // According to the AArch64 Procedure Call Standard, the following are
10047 // undefined on entry/exit from a function call:
10048 //
10049 // * Registers x16, x17, (and thus w16, w17)
10050 // * Condition codes (and thus the NZCV register)
10051 //
10052 // If any of these registers are used inside or live across an outlined
10053 // function, then they may be modified later, either by the compiler or
10054 // some other tool (like the linker).
10055 //
10056 // To avoid outlining in these situations, partition each block into ranges
10057 // where these registers are dead. We will only outline from those ranges.
10058 LiveRegUnits LRU(getRegisterInfo());
10059 auto AreAllUnsafeRegsDead = [&LRU]() {
10060 return LRU.available(AArch64::W16) && LRU.available(AArch64::W17) &&
10061 LRU.available(AArch64::NZCV);
10062 };
10063
10064 // We need to know if LR is live across an outlining boundary later on in
10065 // order to decide how we'll create the outlined call, frame, etc.
10066 //
10067 // It's pretty expensive to check this for *every candidate* within a block.
10068 // That's some potentially n^2 behaviour, since in the worst case, we'd need
10069 // to compute liveness from the end of the block for O(n) candidates within
10070 // the block.
10071 //
10072 // So, to improve the average case, let's keep track of liveness from the end
10073 // of the block to the beginning of *every outlinable range*. If we know that
10074 // LR is available in every range we could outline from, then we know that
10075 // we don't need to check liveness for any candidate within that range.
10076 bool LRAvailableEverywhere = true;
10077 // Compute liveness bottom-up.
10078 LRU.addLiveOuts(MBB);
10079 // Update flags that require info about the entire MBB.
10080 auto UpdateWholeMBBFlags = [&Flags](const MachineInstr &MI) {
10081 if (MI.isCall() && !MI.isTerminator())
10083 };
10084 // Range: [RangeBegin, RangeEnd)
10085 MachineBasicBlock::instr_iterator RangeBegin, RangeEnd;
10086 unsigned RangeLen;
10087 auto CreateNewRangeStartingAt =
10088 [&RangeBegin, &RangeEnd,
10089 &RangeLen](MachineBasicBlock::instr_iterator NewBegin) {
10090 RangeBegin = NewBegin;
10091 RangeEnd = std::next(RangeBegin);
10092 RangeLen = 0;
10093 };
10094 auto SaveRangeIfNonEmpty = [&RangeLen, &Ranges, &RangeBegin, &RangeEnd]() {
10095 // At least one unsafe register is not dead. We do not want to outline at
10096 // this point. If it is long enough to outline from and does not cross a
10097 // bundle boundary, save the range [RangeBegin, RangeEnd).
10098 if (RangeLen <= 1)
10099 return;
10100 if (!RangeBegin.isEnd() && RangeBegin->isBundledWithPred())
10101 return;
10102 if (!RangeEnd.isEnd() && RangeEnd->isBundledWithPred())
10103 return;
10104 Ranges.emplace_back(RangeBegin, RangeEnd);
10105 };
10106 // Find the first point where all unsafe registers are dead.
10107 // FIND: <safe instr> <-- end of first potential range
10108 // SKIP: <unsafe def>
10109 // SKIP: ... everything between ...
10110 // SKIP: <unsafe use>
10111 auto FirstPossibleEndPt = MBB.instr_rbegin();
10112 for (; FirstPossibleEndPt != MBB.instr_rend(); ++FirstPossibleEndPt) {
10113 LRU.stepBackward(*FirstPossibleEndPt);
10114 // Update flags that impact how we outline across the entire block,
10115 // regardless of safety.
10116 UpdateWholeMBBFlags(*FirstPossibleEndPt);
10117 if (AreAllUnsafeRegsDead())
10118 break;
10119 }
10120 // If we exhausted the entire block, we have no safe ranges to outline.
10121 if (FirstPossibleEndPt == MBB.instr_rend())
10122 return Ranges;
10123 // Current range.
10124 CreateNewRangeStartingAt(FirstPossibleEndPt->getIterator());
10125 // StartPt points to the first place where all unsafe registers
10126 // are dead (if there is any such point). Begin partitioning the MBB into
10127 // ranges.
10128 for (auto &MI : make_range(FirstPossibleEndPt, MBB.instr_rend())) {
10129 LRU.stepBackward(MI);
10130 UpdateWholeMBBFlags(MI);
10131 if (!AreAllUnsafeRegsDead()) {
10132 SaveRangeIfNonEmpty();
10133 CreateNewRangeStartingAt(MI.getIterator());
10134 continue;
10135 }
10136 LRAvailableEverywhere &= LRU.available(AArch64::LR);
10137 RangeBegin = MI.getIterator();
10138 ++RangeLen;
10139 }
10140 // Above loop misses the last (or only) range. If we are still safe, then
10141 // let's save the range.
10142 if (AreAllUnsafeRegsDead())
10143 SaveRangeIfNonEmpty();
10144 if (Ranges.empty())
10145 return Ranges;
10146 // We found the ranges bottom-up. Mapping expects the top-down. Reverse
10147 // the order.
10148 std::reverse(Ranges.begin(), Ranges.end());
10149 // If there is at least one outlinable range where LR is unavailable
10150 // somewhere, remember that.
10151 if (!LRAvailableEverywhere)
10153 return Ranges;
10154}
10155
10157AArch64InstrInfo::getOutliningTypeImpl(const MachineModuleInfo &MMI,
10159 unsigned Flags) const {
10160 MachineInstr &MI = *MIT;
10161
10162 // Don't outline anything used for return address signing. The outlined
10163 // function will get signed later if needed
10164 switch (MI.getOpcode()) {
10165 case AArch64::PACM:
10166 case AArch64::PACIASP:
10167 case AArch64::PACIBSP:
10168 case AArch64::PACIASPPC:
10169 case AArch64::PACIBSPPC:
10170 case AArch64::AUTIASP:
10171 case AArch64::AUTIBSP:
10172 case AArch64::AUTIASPPCi:
10173 case AArch64::AUTIASPPCr:
10174 case AArch64::AUTIBSPPCi:
10175 case AArch64::AUTIBSPPCr:
10176 case AArch64::RETAA:
10177 case AArch64::RETAB:
10178 case AArch64::RETAASPPCi:
10179 case AArch64::RETAASPPCr:
10180 case AArch64::RETABSPPCi:
10181 case AArch64::RETABSPPCr:
10182 case AArch64::EMITBKEY:
10183 case AArch64::PAUTH_PROLOGUE:
10184 case AArch64::PAUTH_EPILOGUE:
10186 }
10187
10188 // We can only outline these if we will tail call the outlined function, or
10189 // fix up the CFI offsets. Currently, CFI instructions are outlined only if
10190 // in a tail call.
10191 //
10192 // FIXME: If the proper fixups for the offset are implemented, this should be
10193 // possible.
10194 if (MI.isCFIInstruction())
10196
10197 // Is this a terminator for a basic block?
10198 if (MI.isTerminator())
10199 // TargetInstrInfo::getOutliningType has already filtered out anything
10200 // that would break this, so we can allow it here.
10202
10203 // Make sure none of the operands are un-outlinable.
10204 for (const MachineOperand &MOP : MI.operands()) {
10205 // A check preventing CFI indices was here before, but only CFI
10206 // instructions should have those.
10207 assert(!MOP.isCFIIndex());
10208
10209 // If it uses LR or W30 explicitly, then don't touch it.
10210 if (MOP.isReg() && !MOP.isImplicit() &&
10211 (MOP.getReg() == AArch64::LR || MOP.getReg() == AArch64::W30))
10213 }
10214
10215 // Special cases for instructions that can always be outlined, but will fail
10216 // the later tests. e.g, ADRPs, which are PC-relative use LR, but can always
10217 // be outlined because they don't require a *specific* value to be in LR.
10218 if (MI.getOpcode() == AArch64::ADRP)
10220
10221 // If MI is a call we might be able to outline it. We don't want to outline
10222 // any calls that rely on the position of items on the stack. When we outline
10223 // something containing a call, we have to emit a save and restore of LR in
10224 // the outlined function. Currently, this always happens by saving LR to the
10225 // stack. Thus, if we outline, say, half the parameters for a function call
10226 // plus the call, then we'll break the callee's expectations for the layout
10227 // of the stack.
10228 //
10229 // FIXME: Allow calls to functions which construct a stack frame, as long
10230 // as they don't access arguments on the stack.
10231 // FIXME: Figure out some way to analyze functions defined in other modules.
10232 // We should be able to compute the memory usage based on the IR calling
10233 // convention, even if we can't see the definition.
10234 if (MI.isCall()) {
10235 // Get the function associated with the call. Look at each operand and find
10236 // the one that represents the callee and get its name.
10237 const Function *Callee = nullptr;
10238 for (const MachineOperand &MOP : MI.operands()) {
10239 if (MOP.isGlobal()) {
10240 Callee = dyn_cast<Function>(MOP.getGlobal());
10241 break;
10242 }
10243 }
10244
10245 // Never outline calls to mcount. There isn't any rule that would require
10246 // this, but the Linux kernel's "ftrace" feature depends on it.
10247 if (Callee && Callee->getName() == "\01_mcount")
10249
10250 // If we don't know anything about the callee, assume it depends on the
10251 // stack layout of the caller. In that case, it's only legal to outline
10252 // as a tail-call. Explicitly list the call instructions we know about so we
10253 // don't get unexpected results with call pseudo-instructions.
10254 auto UnknownCallOutlineType = outliner::InstrType::Illegal;
10255 if (MI.getOpcode() == AArch64::BLR ||
10256 MI.getOpcode() == AArch64::BLRNoIP || MI.getOpcode() == AArch64::BL)
10257 UnknownCallOutlineType = outliner::InstrType::LegalTerminator;
10258
10259 if (!Callee)
10260 return UnknownCallOutlineType;
10261
10262 // We have a function we have information about. Check it if it's something
10263 // can safely outline.
10264 MachineFunction *CalleeMF = MMI.getMachineFunction(*Callee);
10265
10266 // We don't know what's going on with the callee at all. Don't touch it.
10267 if (!CalleeMF)
10268 return UnknownCallOutlineType;
10269
10270 // Check if we know anything about the callee saves on the function. If we
10271 // don't, then don't touch it, since that implies that we haven't
10272 // computed anything about its stack frame yet.
10273 MachineFrameInfo &MFI = CalleeMF->getFrameInfo();
10274 if (!MFI.isCalleeSavedInfoValid() || MFI.getStackSize() > 0 ||
10275 MFI.getNumObjects() > 0)
10276 return UnknownCallOutlineType;
10277
10278 // At this point, we can say that CalleeMF ought to not pass anything on the
10279 // stack. Therefore, we can outline it.
10281 }
10282
10283 // Don't touch the link register or W30.
10284 if (MI.readsRegister(AArch64::W30, &getRegisterInfo()) ||
10285 MI.modifiesRegister(AArch64::W30, &getRegisterInfo()))
10287
10288 // Don't outline BTI instructions, because that will prevent the outlining
10289 // site from being indirectly callable.
10290 if (hasBTISemantics(MI))
10292
10294}
10295
10296void AArch64InstrInfo::fixupPostOutline(MachineBasicBlock &MBB) const {
10297 for (MachineInstr &MI : MBB) {
10298 const MachineOperand *Base;
10299 TypeSize Width(0, false);
10300 int64_t Offset;
10301 bool OffsetIsScalable;
10302
10303 // Is this a load or store with an immediate offset with SP as the base?
10304 if (!MI.mayLoadOrStore() ||
10305 !getMemOperandWithOffsetWidth(MI, Base, Offset, OffsetIsScalable, Width,
10306 &RI) ||
10307 (Base->isReg() && Base->getReg() != AArch64::SP))
10308 continue;
10309
10310 // It is, so we have to fix it up.
10311 TypeSize Scale(0U, false);
10312 int64_t Dummy1, Dummy2;
10313
10314 MachineOperand &StackOffsetOperand = getMemOpBaseRegImmOfsOffsetOperand(MI);
10315 assert(StackOffsetOperand.isImm() && "Stack offset wasn't immediate!");
10316 getMemOpInfo(MI.getOpcode(), Scale, Width, Dummy1, Dummy2);
10317 assert(Scale != 0 && "Unexpected opcode!");
10318 assert(!OffsetIsScalable && "Expected offset to be a byte offset");
10319
10320 // We've pushed the return address to the stack, so add 16 to the offset.
10321 // This is safe, since we already checked if it would overflow when we
10322 // checked if this instruction was legal to outline.
10323 int64_t NewImm = (Offset + 16) / (int64_t)Scale.getFixedValue();
10324 StackOffsetOperand.setImm(NewImm);
10325 }
10326}
10327
10329 const AArch64InstrInfo *TII,
10330 bool ShouldSignReturnAddr) {
10331 if (!ShouldSignReturnAddr)
10332 return;
10333
10334 BuildMI(MBB, MBB.begin(), DebugLoc(), TII->get(AArch64::PAUTH_PROLOGUE))
10336 BuildMI(MBB, MBB.getFirstInstrTerminator(), DebugLoc(),
10337 TII->get(AArch64::PAUTH_EPILOGUE))
10339}
10340
10341void AArch64InstrInfo::buildOutlinedFrame(
10343 const outliner::OutlinedFunction &OF) const {
10344
10345 AArch64FunctionInfo *FI = MF.getInfo<AArch64FunctionInfo>();
10346
10347 if (OF.FrameConstructionID == MachineOutlinerTailCall)
10348 FI->setOutliningStyle("Tail Call");
10349 else if (OF.FrameConstructionID == MachineOutlinerThunk) {
10350 // For thunk outlining, rewrite the last instruction from a call to a
10351 // tail-call.
10352 MachineInstr *Call = &*--MBB.instr_end();
10353 unsigned TailOpcode;
10354 if (Call->getOpcode() == AArch64::BL) {
10355 TailOpcode = AArch64::TCRETURNdi;
10356 } else {
10357 assert(Call->getOpcode() == AArch64::BLR ||
10358 Call->getOpcode() == AArch64::BLRNoIP);
10359 TailOpcode = AArch64::TCRETURNriALL;
10360 }
10361 MachineInstr *TC = BuildMI(MF, DebugLoc(), get(TailOpcode))
10362 .add(Call->getOperand(0))
10363 .addImm(0);
10364 MBB.insert(MBB.end(), TC);
10366
10367 FI->setOutliningStyle("Thunk");
10368 }
10369
10370 bool IsLeafFunction = true;
10371
10372 // Is there a call in the outlined range?
10373 auto IsNonTailCall = [](const MachineInstr &MI) {
10374 return MI.isCall() && !MI.isReturn();
10375 };
10376
10377 if (llvm::any_of(MBB.instrs(), IsNonTailCall)) {
10378 // Fix up the instructions in the range, since we're going to modify the
10379 // stack.
10380
10381 // Bugzilla ID: 46767
10382 // TODO: Check if fixing up twice is safe so we can outline these.
10383 assert(OF.FrameConstructionID != MachineOutlinerDefault &&
10384 "Can only fix up stack references once");
10385 fixupPostOutline(MBB);
10386
10387 IsLeafFunction = false;
10388
10389 // LR has to be a live in so that we can save it.
10390 if (!MBB.isLiveIn(AArch64::LR))
10391 MBB.addLiveIn(AArch64::LR);
10392
10395
10396 if (OF.FrameConstructionID == MachineOutlinerTailCall ||
10397 OF.FrameConstructionID == MachineOutlinerThunk)
10398 Et = std::prev(MBB.end());
10399
10400 // Insert a save before the outlined region
10401 MachineInstr *STRXpre = BuildMI(MF, DebugLoc(), get(AArch64::STRXpre))
10402 .addReg(AArch64::SP, RegState::Define)
10403 .addReg(AArch64::LR)
10404 .addReg(AArch64::SP)
10405 .addImm(-16);
10406 It = MBB.insert(It, STRXpre);
10407
10408 if (MF.getInfo<AArch64FunctionInfo>()->needsDwarfUnwindInfo(MF)) {
10409 CFIInstBuilder CFIBuilder(MBB, It, MachineInstr::FrameSetup);
10410
10411 // Add a CFI saying the stack was moved 16 B down.
10412 CFIBuilder.buildDefCFAOffset(16);
10413
10414 // Add a CFI saying that the LR that we want to find is now 16 B higher
10415 // than before.
10416 CFIBuilder.buildOffset(AArch64::LR, -16);
10417 }
10418
10419 // Insert a restore before the terminator for the function.
10420 MachineInstr *LDRXpost = BuildMI(MF, DebugLoc(), get(AArch64::LDRXpost))
10421 .addReg(AArch64::SP, RegState::Define)
10422 .addReg(AArch64::LR, RegState::Define)
10423 .addReg(AArch64::SP)
10424 .addImm(16);
10425 Et = MBB.insert(Et, LDRXpost);
10426 }
10427
10428 bool ShouldSignReturnAddr = FI->shouldSignReturnAddress(!IsLeafFunction);
10429
10430 // If this is a tail call outlined function, then there's already a return.
10431 if (OF.FrameConstructionID == MachineOutlinerTailCall ||
10432 OF.FrameConstructionID == MachineOutlinerThunk) {
10433 signOutlinedFunction(MF, MBB, this, ShouldSignReturnAddr);
10434 return;
10435 }
10436
10437 // It's not a tail call, so we have to insert the return ourselves.
10438
10439 // LR has to be a live in so that we can return to it.
10440 if (!MBB.isLiveIn(AArch64::LR))
10441 MBB.addLiveIn(AArch64::LR);
10442
10443 MachineInstr *ret = BuildMI(MF, DebugLoc(), get(AArch64::RET))
10444 .addReg(AArch64::LR);
10445 MBB.insert(MBB.end(), ret);
10446
10447 signOutlinedFunction(MF, MBB, this, ShouldSignReturnAddr);
10448
10449 FI->setOutliningStyle("Function");
10450
10451 // Did we have to modify the stack by saving the link register?
10452 if (OF.FrameConstructionID != MachineOutlinerDefault)
10453 return;
10454
10455 // We modified the stack.
10456 // Walk over the basic block and fix up all the stack accesses.
10457 fixupPostOutline(MBB);
10458}
10459
10460MachineBasicBlock::iterator AArch64InstrInfo::insertOutlinedCall(
10463
10464 // Are we tail calling?
10465 if (C.CallConstructionID == MachineOutlinerTailCall) {
10466 // If yes, then we can just branch to the label.
10467 It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::TCRETURNdi))
10468 .addGlobalAddress(M.getNamedValue(MF.getName()))
10469 .addImm(0));
10470 return It;
10471 }
10472
10473 // Are we saving the link register?
10474 if (C.CallConstructionID == MachineOutlinerNoLRSave ||
10475 C.CallConstructionID == MachineOutlinerThunk) {
10476 // No, so just insert the call.
10477 It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::BL))
10478 .addGlobalAddress(M.getNamedValue(MF.getName())));
10479 return It;
10480 }
10481
10482 // We want to return the spot where we inserted the call.
10484
10485 // Instructions for saving and restoring LR around the call instruction we're
10486 // going to insert.
10487 MachineInstr *Save;
10488 MachineInstr *Restore;
10489 // Can we save to a register?
10490 if (C.CallConstructionID == MachineOutlinerRegSave) {
10491 // FIXME: This logic should be sunk into a target-specific interface so that
10492 // we don't have to recompute the register.
10493 Register Reg = findRegisterToSaveLRTo(C);
10494 assert(Reg && "No callee-saved register available?");
10495
10496 // LR has to be a live in so that we can save it.
10497 if (!MBB.isLiveIn(AArch64::LR))
10498 MBB.addLiveIn(AArch64::LR);
10499
10500 // Save and restore LR from Reg.
10501 Save = BuildMI(MF, DebugLoc(), get(AArch64::ORRXrs), Reg)
10502 .addReg(AArch64::XZR)
10503 .addReg(AArch64::LR)
10504 .addImm(0);
10505 Restore = BuildMI(MF, DebugLoc(), get(AArch64::ORRXrs), AArch64::LR)
10506 .addReg(AArch64::XZR)
10507 .addReg(Reg)
10508 .addImm(0);
10509 } else {
10510 // We have the default case. Save and restore from SP.
10511 Save = BuildMI(MF, DebugLoc(), get(AArch64::STRXpre))
10512 .addReg(AArch64::SP, RegState::Define)
10513 .addReg(AArch64::LR)
10514 .addReg(AArch64::SP)
10515 .addImm(-16);
10516 Restore = BuildMI(MF, DebugLoc(), get(AArch64::LDRXpost))
10517 .addReg(AArch64::SP, RegState::Define)
10518 .addReg(AArch64::LR, RegState::Define)
10519 .addReg(AArch64::SP)
10520 .addImm(16);
10521 }
10522
10523 It = MBB.insert(It, Save);
10524 It++;
10525
10526 // Insert the call.
10527 It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::BL))
10528 .addGlobalAddress(M.getNamedValue(MF.getName())));
10529 CallPt = It;
10530 It++;
10531
10532 It = MBB.insert(It, Restore);
10533 return CallPt;
10534}
10535
10536bool AArch64InstrInfo::shouldOutlineFromFunctionByDefault(
10537 MachineFunction &MF) const {
10538 return MF.getFunction().hasMinSize();
10539}
10540
10541void AArch64InstrInfo::buildClearRegister(Register Reg, MachineBasicBlock &MBB,
10543 DebugLoc &DL,
10544 bool AllowSideEffects) const {
10545 const MachineFunction &MF = *MBB.getParent();
10546 const AArch64Subtarget &STI = MF.getSubtarget<AArch64Subtarget>();
10547 const AArch64RegisterInfo &TRI = *STI.getRegisterInfo();
10548
10549 if (TRI.isGeneralPurposeRegister(MF, Reg)) {
10550 BuildMI(MBB, Iter, DL, get(AArch64::MOVZXi), Reg).addImm(0).addImm(0);
10551 } else if (STI.isSVEorStreamingSVEAvailable()) {
10552 BuildMI(MBB, Iter, DL, get(AArch64::DUP_ZI_D), Reg)
10553 .addImm(0)
10554 .addImm(0);
10555 } else if (STI.isNeonAvailable()) {
10556 BuildMI(MBB, Iter, DL, get(AArch64::MOVIv2d_ns), Reg)
10557 .addImm(0);
10558 } else {
10559 // This is a streaming-compatible function without SVE. We don't have full
10560 // Neon (just FPRs), so we can at most use the first 64-bit sub-register.
10561 // So given `movi v..` would be illegal use `fmov d..` instead.
10562 assert(STI.hasNEON() && "Expected to have NEON.");
10563 Register Reg64 = TRI.getSubReg(Reg, AArch64::dsub);
10564 BuildMI(MBB, Iter, DL, get(AArch64::FMOVD0), Reg64);
10565 }
10566}
10567
10568std::optional<DestSourcePair>
10570
10571 // AArch64::ORRWrs and AArch64::ORRXrs with WZR/XZR reg
10572 // and zero immediate operands used as an alias for mov instruction.
10573 if (((MI.getOpcode() == AArch64::ORRWrs &&
10574 MI.getOperand(1).getReg() == AArch64::WZR &&
10575 MI.getOperand(3).getImm() == 0x0) ||
10576 (MI.getOpcode() == AArch64::ORRWrr &&
10577 MI.getOperand(1).getReg() == AArch64::WZR)) &&
10578 // Check that the w->w move is not a zero-extending w->x mov.
10579 (!MI.getOperand(0).getReg().isVirtual() ||
10580 MI.getOperand(0).getSubReg() == 0) &&
10581 (!MI.getOperand(0).getReg().isPhysical() ||
10582 MI.findRegisterDefOperandIdx(getXRegFromWReg(MI.getOperand(0).getReg()),
10583 /*TRI=*/nullptr) == -1))
10584 return DestSourcePair{MI.getOperand(0), MI.getOperand(2)};
10585
10586 if (MI.getOpcode() == AArch64::ORRXrs &&
10587 MI.getOperand(1).getReg() == AArch64::XZR &&
10588 MI.getOperand(3).getImm() == 0x0)
10589 return DestSourcePair{MI.getOperand(0), MI.getOperand(2)};
10590
10591 return std::nullopt;
10592}
10593
10594std::optional<DestSourcePair>
10596 if ((MI.getOpcode() == AArch64::ORRWrs &&
10597 MI.getOperand(1).getReg() == AArch64::WZR &&
10598 MI.getOperand(3).getImm() == 0x0) ||
10599 (MI.getOpcode() == AArch64::ORRWrr &&
10600 MI.getOperand(1).getReg() == AArch64::WZR))
10601 return DestSourcePair{MI.getOperand(0), MI.getOperand(2)};
10602 return std::nullopt;
10603}
10604
10605std::optional<RegImmPair>
10606AArch64InstrInfo::isAddImmediate(const MachineInstr &MI, Register Reg) const {
10607 int Sign = 1;
10608 int64_t Offset = 0;
10609
10610 // TODO: Handle cases where Reg is a super- or sub-register of the
10611 // destination register.
10612 const MachineOperand &Op0 = MI.getOperand(0);
10613 if (!Op0.isReg() || Reg != Op0.getReg())
10614 return std::nullopt;
10615
10616 switch (MI.getOpcode()) {
10617 default:
10618 return std::nullopt;
10619 case AArch64::SUBWri:
10620 case AArch64::SUBXri:
10621 case AArch64::SUBSWri:
10622 case AArch64::SUBSXri:
10623 Sign *= -1;
10624 [[fallthrough]];
10625 case AArch64::ADDSWri:
10626 case AArch64::ADDSXri:
10627 case AArch64::ADDWri:
10628 case AArch64::ADDXri: {
10629 // TODO: Third operand can be global address (usually some string).
10630 if (!MI.getOperand(0).isReg() || !MI.getOperand(1).isReg() ||
10631 !MI.getOperand(2).isImm())
10632 return std::nullopt;
10633 int Shift = MI.getOperand(3).getImm();
10634 assert((Shift == 0 || Shift == 12) && "Shift can be either 0 or 12");
10635 Offset = Sign * (MI.getOperand(2).getImm() << Shift);
10636 }
10637 }
10638 return RegImmPair{MI.getOperand(1).getReg(), Offset};
10639}
10640
10641/// If the given ORR instruction is a copy, and \p DescribedReg overlaps with
10642/// the destination register then, if possible, describe the value in terms of
10643/// the source register.
10644static std::optional<ParamLoadedValue>
10646 const TargetInstrInfo *TII,
10647 const TargetRegisterInfo *TRI) {
10648 auto DestSrc = TII->isCopyLikeInstr(MI);
10649 if (!DestSrc)
10650 return std::nullopt;
10651
10652 Register DestReg = DestSrc->Destination->getReg();
10653 Register SrcReg = DestSrc->Source->getReg();
10654
10655 if (!DestReg.isValid() || !SrcReg.isValid())
10656 return std::nullopt;
10657
10658 auto Expr = DIExpression::get(MI.getMF()->getFunction().getContext(), {});
10659
10660 // If the described register is the destination, just return the source.
10661 if (DestReg == DescribedReg)
10662 return ParamLoadedValue(MachineOperand::CreateReg(SrcReg, false), Expr);
10663
10664 // ORRWrs zero-extends to 64-bits, so we need to consider such cases.
10665 if (MI.getOpcode() == AArch64::ORRWrs &&
10666 TRI->isSuperRegister(DestReg, DescribedReg))
10667 return ParamLoadedValue(MachineOperand::CreateReg(SrcReg, false), Expr);
10668
10669 // We may need to describe the lower part of a ORRXrs move.
10670 if (MI.getOpcode() == AArch64::ORRXrs &&
10671 TRI->isSubRegister(DestReg, DescribedReg)) {
10672 Register SrcSubReg = TRI->getSubReg(SrcReg, AArch64::sub_32);
10673 return ParamLoadedValue(MachineOperand::CreateReg(SrcSubReg, false), Expr);
10674 }
10675
10676 assert(!TRI->isSuperOrSubRegisterEq(DestReg, DescribedReg) &&
10677 "Unhandled ORR[XW]rs copy case");
10678
10679 return std::nullopt;
10680}
10681
10682bool AArch64InstrInfo::isFunctionSafeToSplit(const MachineFunction &MF) const {
10683 // Functions cannot be split to different sections on AArch64 if they have
10684 // a red zone. This is because relaxing a cross-section branch may require
10685 // incrementing the stack pointer to spill a register, which would overwrite
10686 // the red zone.
10687 if (MF.getInfo<AArch64FunctionInfo>()->hasRedZone().value_or(true))
10688 return false;
10689
10691}
10692
10693bool AArch64InstrInfo::isMBBSafeToSplitToCold(
10694 const MachineBasicBlock &MBB) const {
10695 // Asm Goto blocks can contain conditional branches to goto labels, which can
10696 // get moved out of range of the branch instruction.
10697 auto isAsmGoto = [](const MachineInstr &MI) {
10698 return MI.getOpcode() == AArch64::INLINEASM_BR;
10699 };
10700 if (llvm::any_of(MBB, isAsmGoto) || MBB.isInlineAsmBrIndirectTarget())
10701 return false;
10702
10703 // Because jump tables are label-relative instead of table-relative, they all
10704 // must be in the same section or relocation fixup handling will fail.
10705
10706 // Check if MBB is a jump table target
10707 const MachineJumpTableInfo *MJTI = MBB.getParent()->getJumpTableInfo();
10708 auto containsMBB = [&MBB](const MachineJumpTableEntry &JTE) {
10709 return llvm::is_contained(JTE.MBBs, &MBB);
10710 };
10711 if (MJTI != nullptr && llvm::any_of(MJTI->getJumpTables(), containsMBB))
10712 return false;
10713
10714 // Check if MBB contains a jump table lookup
10715 for (const MachineInstr &MI : MBB) {
10716 switch (MI.getOpcode()) {
10717 case TargetOpcode::G_BRJT:
10718 case AArch64::JumpTableDest32:
10719 case AArch64::JumpTableDest16:
10720 case AArch64::JumpTableDest8:
10721 return false;
10722 default:
10723 continue;
10724 }
10725 }
10726
10727 // MBB isn't a special case, so it's safe to be split to the cold section.
10728 return true;
10729}
10730
10731std::optional<ParamLoadedValue>
10732AArch64InstrInfo::describeLoadedValue(const MachineInstr &MI,
10733 Register Reg) const {
10734 const MachineFunction *MF = MI.getMF();
10735 const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
10736 switch (MI.getOpcode()) {
10737 case AArch64::MOVZWi:
10738 case AArch64::MOVZXi: {
10739 // MOVZWi may be used for producing zero-extended 32-bit immediates in
10740 // 64-bit parameters, so we need to consider super-registers.
10741 if (!TRI->isSuperRegisterEq(MI.getOperand(0).getReg(), Reg))
10742 return std::nullopt;
10743
10744 if (!MI.getOperand(1).isImm())
10745 return std::nullopt;
10746 int64_t Immediate = MI.getOperand(1).getImm();
10747 int Shift = MI.getOperand(2).getImm();
10748 return ParamLoadedValue(MachineOperand::CreateImm(Immediate << Shift),
10749 nullptr);
10750 }
10751 case AArch64::ORRWrs:
10752 case AArch64::ORRXrs:
10753 return describeORRLoadedValue(MI, Reg, this, TRI);
10754 }
10755
10757}
10758
10759bool AArch64InstrInfo::isExtendLikelyToBeFolded(
10760 MachineInstr &ExtMI, MachineRegisterInfo &MRI) const {
10761 assert(ExtMI.getOpcode() == TargetOpcode::G_SEXT ||
10762 ExtMI.getOpcode() == TargetOpcode::G_ZEXT ||
10763 ExtMI.getOpcode() == TargetOpcode::G_ANYEXT);
10764
10765 // Anyexts are nops.
10766 if (ExtMI.getOpcode() == TargetOpcode::G_ANYEXT)
10767 return true;
10768
10769 Register DefReg = ExtMI.getOperand(0).getReg();
10770 if (!MRI.hasOneNonDBGUse(DefReg))
10771 return false;
10772
10773 // It's likely that a sext/zext as a G_PTR_ADD offset will be folded into an
10774 // addressing mode.
10775 auto *UserMI = &*MRI.use_instr_nodbg_begin(DefReg);
10776 return UserMI->getOpcode() == TargetOpcode::G_PTR_ADD;
10777}
10778
10779uint64_t AArch64InstrInfo::getElementSizeForOpcode(unsigned Opc) const {
10780 return get(Opc).TSFlags & AArch64::ElementSizeMask;
10781}
10782
10783bool AArch64InstrInfo::isPTestLikeOpcode(unsigned Opc) const {
10784 return get(Opc).TSFlags & AArch64::InstrFlagIsPTestLike;
10785}
10786
10787bool AArch64InstrInfo::isWhileOpcode(unsigned Opc) const {
10788 return get(Opc).TSFlags & AArch64::InstrFlagIsWhile;
10789}
10790
10791unsigned int
10792AArch64InstrInfo::getTailDuplicateSize(CodeGenOptLevel OptLevel) const {
10793 return OptLevel >= CodeGenOptLevel::Aggressive ? 6 : 2;
10794}
10795
10796bool AArch64InstrInfo::isLegalAddressingMode(unsigned NumBytes, int64_t Offset,
10797 unsigned Scale) const {
10798 if (Offset && Scale)
10799 return false;
10800
10801 // Check Reg + Imm
10802 if (!Scale) {
10803 // 9-bit signed offset
10804 if (isInt<9>(Offset))
10805 return true;
10806
10807 // 12-bit unsigned offset
10808 unsigned Shift = Log2_64(NumBytes);
10809 if (NumBytes && Offset > 0 && (Offset / NumBytes) <= (1LL << 12) - 1 &&
10810 // Must be a multiple of NumBytes (NumBytes is a power of 2)
10811 (Offset >> Shift) << Shift == Offset)
10812 return true;
10813 return false;
10814 }
10815
10816 // Check reg1 + SIZE_IN_BYTES * reg2 and reg1 + reg2
10817 return Scale == 1 || (Scale > 0 && Scale == NumBytes);
10818}
10819
10821 if (MF.getSubtarget<AArch64Subtarget>().hardenSlsBlr())
10822 return AArch64::BLRNoIP;
10823 else
10824 return AArch64::BLR;
10825}
10826
10829 Register TargetReg, bool FrameSetup) const {
10830 assert(TargetReg != AArch64::SP && "New top of stack cannot already be in SP");
10831
10832 MachineBasicBlock &MBB = *MBBI->getParent();
10833 MachineFunction &MF = *MBB.getParent();
10834 const AArch64InstrInfo *TII =
10835 MF.getSubtarget<AArch64Subtarget>().getInstrInfo();
10836 int64_t ProbeSize = MF.getInfo<AArch64FunctionInfo>()->getStackProbeSize();
10837 DebugLoc DL = MBB.findDebugLoc(MBBI);
10838
10839 MachineFunction::iterator MBBInsertPoint = std::next(MBB.getIterator());
10840 MachineBasicBlock *LoopTestMBB =
10841 MF.CreateMachineBasicBlock(MBB.getBasicBlock());
10842 MF.insert(MBBInsertPoint, LoopTestMBB);
10843 MachineBasicBlock *LoopBodyMBB =
10844 MF.CreateMachineBasicBlock(MBB.getBasicBlock());
10845 MF.insert(MBBInsertPoint, LoopBodyMBB);
10846 MachineBasicBlock *ExitMBB = MF.CreateMachineBasicBlock(MBB.getBasicBlock());
10847 MF.insert(MBBInsertPoint, ExitMBB);
10848 MachineInstr::MIFlag Flags =
10850
10851 // LoopTest:
10852 // SUB SP, SP, #ProbeSize
10853 emitFrameOffset(*LoopTestMBB, LoopTestMBB->end(), DL, AArch64::SP,
10854 AArch64::SP, StackOffset::getFixed(-ProbeSize), TII, Flags);
10855
10856 // CMP SP, TargetReg
10857 BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL, TII->get(AArch64::SUBSXrx64),
10858 AArch64::XZR)
10859 .addReg(AArch64::SP)
10860 .addReg(TargetReg)
10862 .setMIFlags(Flags);
10863
10864 // B.<Cond> LoopExit
10865 BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL, TII->get(AArch64::Bcc))
10867 .addMBB(ExitMBB)
10868 .setMIFlags(Flags);
10869
10870 // STR XZR, [SP]
10871 BuildMI(*LoopBodyMBB, LoopBodyMBB->end(), DL, TII->get(AArch64::STRXui))
10872 .addReg(AArch64::XZR)
10873 .addReg(AArch64::SP)
10874 .addImm(0)
10875 .setMIFlags(Flags);
10876
10877 // B loop
10878 BuildMI(*LoopBodyMBB, LoopBodyMBB->end(), DL, TII->get(AArch64::B))
10879 .addMBB(LoopTestMBB)
10880 .setMIFlags(Flags);
10881
10882 // LoopExit:
10883 // MOV SP, TargetReg
10884 BuildMI(*ExitMBB, ExitMBB->end(), DL, TII->get(AArch64::ADDXri), AArch64::SP)
10885 .addReg(TargetReg)
10886 .addImm(0)
10888 .setMIFlags(Flags);
10889
10890 // LDR XZR, [SP]
10891 BuildMI(*ExitMBB, ExitMBB->end(), DL, TII->get(AArch64::LDRXui))
10892 .addReg(AArch64::XZR, RegState::Define)
10893 .addReg(AArch64::SP)
10894 .addImm(0)
10895 .setMIFlags(Flags);
10896
10897 ExitMBB->splice(ExitMBB->end(), &MBB, std::next(MBBI), MBB.end());
10899
10900 LoopTestMBB->addSuccessor(ExitMBB);
10901 LoopTestMBB->addSuccessor(LoopBodyMBB);
10902 LoopBodyMBB->addSuccessor(LoopTestMBB);
10903 MBB.addSuccessor(LoopTestMBB);
10904
10905 // Update liveins.
10906 if (MF.getRegInfo().reservedRegsFrozen())
10907 fullyRecomputeLiveIns({ExitMBB, LoopBodyMBB, LoopTestMBB});
10908
10909 return ExitMBB->begin();
10910}
10911
10912namespace {
10913class AArch64PipelinerLoopInfo : public TargetInstrInfo::PipelinerLoopInfo {
10914 MachineFunction *MF;
10915 const TargetInstrInfo *TII;
10916 const TargetRegisterInfo *TRI;
10918
10919 /// The block of the loop
10920 MachineBasicBlock *LoopBB;
10921 /// The conditional branch of the loop
10922 MachineInstr *CondBranch;
10923 /// The compare instruction for loop control
10924 MachineInstr *Comp;
10925 /// The number of the operand of the loop counter value in Comp
10926 unsigned CompCounterOprNum;
10927 /// The instruction that updates the loop counter value
10928 MachineInstr *Update;
10929 /// The number of the operand of the loop counter value in Update
10930 unsigned UpdateCounterOprNum;
10931 /// The initial value of the loop counter
10932 Register Init;
10933 /// True iff Update is a predecessor of Comp
10934 bool IsUpdatePriorComp;
10935
10936 /// The normalized condition used by createTripCountGreaterCondition()
10938
10939public:
10940 AArch64PipelinerLoopInfo(MachineBasicBlock *LoopBB, MachineInstr *CondBranch,
10941 MachineInstr *Comp, unsigned CompCounterOprNum,
10942 MachineInstr *Update, unsigned UpdateCounterOprNum,
10943 Register Init, bool IsUpdatePriorComp,
10945 : MF(Comp->getParent()->getParent()),
10946 TII(MF->getSubtarget().getInstrInfo()),
10947 TRI(MF->getSubtarget().getRegisterInfo()), MRI(MF->getRegInfo()),
10948 LoopBB(LoopBB), CondBranch(CondBranch), Comp(Comp),
10949 CompCounterOprNum(CompCounterOprNum), Update(Update),
10950 UpdateCounterOprNum(UpdateCounterOprNum), Init(Init),
10951 IsUpdatePriorComp(IsUpdatePriorComp), Cond(Cond.begin(), Cond.end()) {}
10952
10953 bool shouldIgnoreForPipelining(const MachineInstr *MI) const override {
10954 // Make the instructions for loop control be placed in stage 0.
10955 // The predecessors of Comp are considered by the caller.
10956 return MI == Comp;
10957 }
10958
10959 std::optional<bool> createTripCountGreaterCondition(
10960 int TC, MachineBasicBlock &MBB,
10961 SmallVectorImpl<MachineOperand> &CondParam) override {
10962 // A branch instruction will be inserted as "if (Cond) goto epilogue".
10963 // Cond is normalized for such use.
10964 // The predecessors of the branch are assumed to have already been inserted.
10965 CondParam = Cond;
10966 return {};
10967 }
10968
10969 void createRemainingIterationsGreaterCondition(
10970 int TC, MachineBasicBlock &MBB, SmallVectorImpl<MachineOperand> &Cond,
10971 DenseMap<MachineInstr *, MachineInstr *> &LastStage0Insts) override;
10972
10973 void setPreheader(MachineBasicBlock *NewPreheader) override {}
10974
10975 void adjustTripCount(int TripCountAdjust) override {}
10976
10977 bool isMVEExpanderSupported() override { return true; }
10978};
10979} // namespace
10980
10981/// Clone an instruction from MI. The register of ReplaceOprNum-th operand
10982/// is replaced by ReplaceReg. The output register is newly created.
10983/// The other operands are unchanged from MI.
10984static Register cloneInstr(const MachineInstr *MI, unsigned ReplaceOprNum,
10985 Register ReplaceReg, MachineBasicBlock &MBB,
10986 MachineBasicBlock::iterator InsertTo) {
10987 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
10988 const TargetInstrInfo *TII = MBB.getParent()->getSubtarget().getInstrInfo();
10989 const TargetRegisterInfo *TRI =
10990 MBB.getParent()->getSubtarget().getRegisterInfo();
10991 MachineInstr *NewMI = MBB.getParent()->CloneMachineInstr(MI);
10992 Register Result = 0;
10993 for (unsigned I = 0; I < NewMI->getNumOperands(); ++I) {
10994 if (I == 0 && NewMI->getOperand(0).getReg().isVirtual()) {
10995 Result = MRI.createVirtualRegister(
10996 MRI.getRegClass(NewMI->getOperand(0).getReg()));
10997 NewMI->getOperand(I).setReg(Result);
10998 } else if (I == ReplaceOprNum) {
10999 MRI.constrainRegClass(ReplaceReg,
11000 TII->getRegClass(NewMI->getDesc(), I, TRI));
11001 NewMI->getOperand(I).setReg(ReplaceReg);
11002 }
11003 }
11004 MBB.insert(InsertTo, NewMI);
11005 return Result;
11006}
11007
11008void AArch64PipelinerLoopInfo::createRemainingIterationsGreaterCondition(
11011 // Create and accumulate conditions for next TC iterations.
11012 // Example:
11013 // SUBSXrr N, counter, implicit-def $nzcv # compare instruction for the last
11014 // # iteration of the kernel
11015 //
11016 // # insert the following instructions
11017 // cond = CSINCXr 0, 0, C, implicit $nzcv
11018 // counter = ADDXri counter, 1 # clone from this->Update
11019 // SUBSXrr n, counter, implicit-def $nzcv # clone from this->Comp
11020 // cond = CSINCXr cond, cond, C, implicit $nzcv
11021 // ... (repeat TC times)
11022 // SUBSXri cond, 0, implicit-def $nzcv
11023
11024 assert(CondBranch->getOpcode() == AArch64::Bcc);
11025 // CondCode to exit the loop
11027 (AArch64CC::CondCode)CondBranch->getOperand(0).getImm();
11028 if (CondBranch->getOperand(1).getMBB() == LoopBB)
11030
11031 // Accumulate conditions to exit the loop
11032 Register AccCond = AArch64::XZR;
11033
11034 // If CC holds, CurCond+1 is returned; otherwise CurCond is returned.
11035 auto AccumulateCond = [&](Register CurCond,
11037 Register NewCond = MRI.createVirtualRegister(&AArch64::GPR64commonRegClass);
11038 BuildMI(MBB, MBB.end(), Comp->getDebugLoc(), TII->get(AArch64::CSINCXr))
11039 .addReg(NewCond, RegState::Define)
11040 .addReg(CurCond)
11041 .addReg(CurCond)
11043 return NewCond;
11044 };
11045
11046 if (!LastStage0Insts.empty() && LastStage0Insts[Comp]->getParent() == &MBB) {
11047 // Update and Comp for I==0 are already exists in MBB
11048 // (MBB is an unrolled kernel)
11049 Register Counter;
11050 for (int I = 0; I <= TC; ++I) {
11051 Register NextCounter;
11052 if (I != 0)
11053 NextCounter =
11054 cloneInstr(Comp, CompCounterOprNum, Counter, MBB, MBB.end());
11055
11056 AccCond = AccumulateCond(AccCond, CC);
11057
11058 if (I != TC) {
11059 if (I == 0) {
11060 if (Update != Comp && IsUpdatePriorComp) {
11061 Counter =
11062 LastStage0Insts[Comp]->getOperand(CompCounterOprNum).getReg();
11063 NextCounter = cloneInstr(Update, UpdateCounterOprNum, Counter, MBB,
11064 MBB.end());
11065 } else {
11066 // can use already calculated value
11067 NextCounter = LastStage0Insts[Update]->getOperand(0).getReg();
11068 }
11069 } else if (Update != Comp) {
11070 NextCounter =
11071 cloneInstr(Update, UpdateCounterOprNum, Counter, MBB, MBB.end());
11072 }
11073 }
11074 Counter = NextCounter;
11075 }
11076 } else {
11077 Register Counter;
11078 if (LastStage0Insts.empty()) {
11079 // use initial counter value (testing if the trip count is sufficient to
11080 // be executed by pipelined code)
11081 Counter = Init;
11082 if (IsUpdatePriorComp)
11083 Counter =
11084 cloneInstr(Update, UpdateCounterOprNum, Counter, MBB, MBB.end());
11085 } else {
11086 // MBB is an epilogue block. LastStage0Insts[Comp] is in the kernel block.
11087 Counter = LastStage0Insts[Comp]->getOperand(CompCounterOprNum).getReg();
11088 }
11089
11090 for (int I = 0; I <= TC; ++I) {
11091 Register NextCounter;
11092 NextCounter =
11093 cloneInstr(Comp, CompCounterOprNum, Counter, MBB, MBB.end());
11094 AccCond = AccumulateCond(AccCond, CC);
11095 if (I != TC && Update != Comp)
11096 NextCounter =
11097 cloneInstr(Update, UpdateCounterOprNum, Counter, MBB, MBB.end());
11098 Counter = NextCounter;
11099 }
11100 }
11101
11102 // If AccCond == 0, the remainder is greater than TC.
11103 BuildMI(MBB, MBB.end(), Comp->getDebugLoc(), TII->get(AArch64::SUBSXri))
11104 .addReg(AArch64::XZR, RegState::Define | RegState::Dead)
11105 .addReg(AccCond)
11106 .addImm(0)
11107 .addImm(0);
11108 Cond.clear();
11110}
11111
11112static void extractPhiReg(const MachineInstr &Phi, const MachineBasicBlock *MBB,
11113 Register &RegMBB, Register &RegOther) {
11114 assert(Phi.getNumOperands() == 5);
11115 if (Phi.getOperand(2).getMBB() == MBB) {
11116 RegMBB = Phi.getOperand(1).getReg();
11117 RegOther = Phi.getOperand(3).getReg();
11118 } else {
11119 assert(Phi.getOperand(4).getMBB() == MBB);
11120 RegMBB = Phi.getOperand(3).getReg();
11121 RegOther = Phi.getOperand(1).getReg();
11122 }
11123}
11124
11126 if (!Reg.isVirtual())
11127 return false;
11128 const MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
11129 return MRI.getVRegDef(Reg)->getParent() != BB;
11130}
11131
11132/// If Reg is an induction variable, return true and set some parameters
11133static bool getIndVarInfo(Register Reg, const MachineBasicBlock *LoopBB,
11134 MachineInstr *&UpdateInst,
11135 unsigned &UpdateCounterOprNum, Register &InitReg,
11136 bool &IsUpdatePriorComp) {
11137 // Example:
11138 //
11139 // Preheader:
11140 // InitReg = ...
11141 // LoopBB:
11142 // Reg0 = PHI (InitReg, Preheader), (Reg1, LoopBB)
11143 // Reg = COPY Reg0 ; COPY is ignored.
11144 // Reg1 = ADD Reg, #1; UpdateInst. Incremented by a loop invariant value.
11145 // ; Reg is the value calculated in the previous
11146 // ; iteration, so IsUpdatePriorComp == false.
11147
11148 if (LoopBB->pred_size() != 2)
11149 return false;
11150 if (!Reg.isVirtual())
11151 return false;
11152 const MachineRegisterInfo &MRI = LoopBB->getParent()->getRegInfo();
11153 UpdateInst = nullptr;
11154 UpdateCounterOprNum = 0;
11155 InitReg = 0;
11156 IsUpdatePriorComp = true;
11157 Register CurReg = Reg;
11158 while (true) {
11159 MachineInstr *Def = MRI.getVRegDef(CurReg);
11160 if (Def->getParent() != LoopBB)
11161 return false;
11162 if (Def->isCopy()) {
11163 // Ignore copy instructions unless they contain subregisters
11164 if (Def->getOperand(0).getSubReg() || Def->getOperand(1).getSubReg())
11165 return false;
11166 CurReg = Def->getOperand(1).getReg();
11167 } else if (Def->isPHI()) {
11168 if (InitReg != 0)
11169 return false;
11170 if (!UpdateInst)
11171 IsUpdatePriorComp = false;
11172 extractPhiReg(*Def, LoopBB, CurReg, InitReg);
11173 } else {
11174 if (UpdateInst)
11175 return false;
11176 switch (Def->getOpcode()) {
11177 case AArch64::ADDSXri:
11178 case AArch64::ADDSWri:
11179 case AArch64::SUBSXri:
11180 case AArch64::SUBSWri:
11181 case AArch64::ADDXri:
11182 case AArch64::ADDWri:
11183 case AArch64::SUBXri:
11184 case AArch64::SUBWri:
11185 UpdateInst = Def;
11186 UpdateCounterOprNum = 1;
11187 break;
11188 case AArch64::ADDSXrr:
11189 case AArch64::ADDSWrr:
11190 case AArch64::SUBSXrr:
11191 case AArch64::SUBSWrr:
11192 case AArch64::ADDXrr:
11193 case AArch64::ADDWrr:
11194 case AArch64::SUBXrr:
11195 case AArch64::SUBWrr:
11196 UpdateInst = Def;
11197 if (isDefinedOutside(Def->getOperand(2).getReg(), LoopBB))
11198 UpdateCounterOprNum = 1;
11199 else if (isDefinedOutside(Def->getOperand(1).getReg(), LoopBB))
11200 UpdateCounterOprNum = 2;
11201 else
11202 return false;
11203 break;
11204 default:
11205 return false;
11206 }
11207 CurReg = Def->getOperand(UpdateCounterOprNum).getReg();
11208 }
11209
11210 if (!CurReg.isVirtual())
11211 return false;
11212 if (Reg == CurReg)
11213 break;
11214 }
11215
11216 if (!UpdateInst)
11217 return false;
11218
11219 return true;
11220}
11221
11222std::unique_ptr<TargetInstrInfo::PipelinerLoopInfo>
11224 // Accept loops that meet the following conditions
11225 // * The conditional branch is BCC
11226 // * The compare instruction is ADDS/SUBS/WHILEXX
11227 // * One operand of the compare is an induction variable and the other is a
11228 // loop invariant value
11229 // * The induction variable is incremented/decremented by a single instruction
11230 // * Does not contain CALL or instructions which have unmodeled side effects
11231
11232 for (MachineInstr &MI : *LoopBB)
11233 if (MI.isCall() || MI.hasUnmodeledSideEffects())
11234 // This instruction may use NZCV, which interferes with the instruction to
11235 // be inserted for loop control.
11236 return nullptr;
11237
11238 MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
11240 if (analyzeBranch(*LoopBB, TBB, FBB, Cond))
11241 return nullptr;
11242
11243 // Infinite loops are not supported
11244 if (TBB == LoopBB && FBB == LoopBB)
11245 return nullptr;
11246
11247 // Must be conditional branch
11248 if (TBB != LoopBB && FBB == nullptr)
11249 return nullptr;
11250
11251 assert((TBB == LoopBB || FBB == LoopBB) &&
11252 "The Loop must be a single-basic-block loop");
11253
11254 MachineInstr *CondBranch = &*LoopBB->getFirstTerminator();
11256
11257 if (CondBranch->getOpcode() != AArch64::Bcc)
11258 return nullptr;
11259
11260 // Normalization for createTripCountGreaterCondition()
11261 if (TBB == LoopBB)
11263
11264 MachineInstr *Comp = nullptr;
11265 unsigned CompCounterOprNum = 0;
11266 for (MachineInstr &MI : reverse(*LoopBB)) {
11267 if (MI.modifiesRegister(AArch64::NZCV, &TRI)) {
11268 // Guarantee that the compare is SUBS/ADDS/WHILEXX and that one of the
11269 // operands is a loop invariant value
11270
11271 switch (MI.getOpcode()) {
11272 case AArch64::SUBSXri:
11273 case AArch64::SUBSWri:
11274 case AArch64::ADDSXri:
11275 case AArch64::ADDSWri:
11276 Comp = &MI;
11277 CompCounterOprNum = 1;
11278 break;
11279 case AArch64::ADDSWrr:
11280 case AArch64::ADDSXrr:
11281 case AArch64::SUBSWrr:
11282 case AArch64::SUBSXrr:
11283 Comp = &MI;
11284 break;
11285 default:
11286 if (isWhileOpcode(MI.getOpcode())) {
11287 Comp = &MI;
11288 break;
11289 }
11290 return nullptr;
11291 }
11292
11293 if (CompCounterOprNum == 0) {
11294 if (isDefinedOutside(Comp->getOperand(1).getReg(), LoopBB))
11295 CompCounterOprNum = 2;
11296 else if (isDefinedOutside(Comp->getOperand(2).getReg(), LoopBB))
11297 CompCounterOprNum = 1;
11298 else
11299 return nullptr;
11300 }
11301 break;
11302 }
11303 }
11304 if (!Comp)
11305 return nullptr;
11306
11307 MachineInstr *Update = nullptr;
11308 Register Init;
11309 bool IsUpdatePriorComp;
11310 unsigned UpdateCounterOprNum;
11311 if (!getIndVarInfo(Comp->getOperand(CompCounterOprNum).getReg(), LoopBB,
11312 Update, UpdateCounterOprNum, Init, IsUpdatePriorComp))
11313 return nullptr;
11314
11315 return std::make_unique<AArch64PipelinerLoopInfo>(
11316 LoopBB, CondBranch, Comp, CompCounterOprNum, Update, UpdateCounterOprNum,
11317 Init, IsUpdatePriorComp, Cond);
11318}
11319
11320/// verifyInstruction - Perform target specific instruction verification.
11321bool AArch64InstrInfo::verifyInstruction(const MachineInstr &MI,
11322 StringRef &ErrInfo) const {
11323 // Verify that immediate offsets on load/store instructions are within range.
11324 // Stack objects with an FI operand are excluded as they can be fixed up
11325 // during PEI.
11326 TypeSize Scale(0U, false), Width(0U, false);
11327 int64_t MinOffset, MaxOffset;
11328 if (getMemOpInfo(MI.getOpcode(), Scale, Width, MinOffset, MaxOffset)) {
11329 unsigned ImmIdx = getLoadStoreImmIdx(MI.getOpcode());
11330 if (MI.getOperand(ImmIdx).isImm() && !MI.getOperand(ImmIdx - 1).isFI()) {
11331 int64_t Imm = MI.getOperand(ImmIdx).getImm();
11332 if (Imm < MinOffset || Imm > MaxOffset) {
11333 ErrInfo = "Unexpected immediate on load/store instruction";
11334 return false;
11335 }
11336 }
11337 }
11338
11339 const MCInstrDesc &MCID = MI.getDesc();
11340 for (unsigned Op = 0; Op < MCID.getNumOperands(); Op++) {
11341 const MachineOperand &MO = MI.getOperand(Op);
11342 switch (MCID.operands()[Op].OperandType) {
11344 if (!MO.isImm() || MO.getImm() != 0) {
11345 ErrInfo = "OPERAND_IMPLICIT_IMM_0 should be 0";
11346 return false;
11347 }
11348 break;
11350 if (!MO.isImm() ||
11352 (AArch64_AM::getShiftValue(MO.getImm()) != 8 &&
11353 AArch64_AM::getShiftValue(MO.getImm()) != 16)) {
11354 ErrInfo = "OPERAND_SHIFT_MSL should be msl shift of 8 or 16";
11355 return false;
11356 }
11357 break;
11358 default:
11359 break;
11360 }
11361 }
11362 return true;
11363}
11364
11365#define GET_INSTRINFO_HELPERS
11366#define GET_INSTRMAP_INFO
11367#include "AArch64GenInstrInfo.inc"
unsigned SubReg
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
static bool forwardCopyWillClobberTuple(unsigned DestReg, unsigned SrcReg, unsigned NumRegs)
static cl::opt< unsigned > BCCDisplacementBits("aarch64-bcc-offset-bits", cl::Hidden, cl::init(19), cl::desc("Restrict range of Bcc instructions (DEBUG)"))
static Register genNeg(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, DenseMap< Register, unsigned > &InstrIdxForVirtReg, unsigned MnegOpc, const TargetRegisterClass *RC)
genNeg - Helper to generate an intermediate negation of the second operand of Root
static cl::opt< unsigned > GatherOptSearchLimit("aarch64-search-limit", cl::Hidden, cl::init(2048), cl::desc("Restrict range of instructions to search for the " "machine-combiner gather pattern optimization"))
static bool getMaddPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns)
Find instructions that can be turned into madd.
static AArch64CC::CondCode findCondCodeUsedByInstr(const MachineInstr &Instr)
Find a condition code used by the instruction.
static MachineInstr * genFusedMultiplyAcc(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, unsigned IdxMulOpd, unsigned MaddOpc, const TargetRegisterClass *RC)
genFusedMultiplyAcc - Helper to generate fused multiply accumulate instructions.
static MachineInstr * genFusedMultiplyAccNeg(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, DenseMap< Register, unsigned > &InstrIdxForVirtReg, unsigned IdxMulOpd, unsigned MaddOpc, unsigned MnegOpc, const TargetRegisterClass *RC)
genFusedMultiplyAccNeg - Helper to generate fused multiply accumulate instructions with an additional...
static bool isCombineInstrCandidate64(unsigned Opc)
static unsigned removeCopies(const MachineRegisterInfo &MRI, unsigned VReg)
static bool areCFlagsAccessedBetweenInstrs(MachineBasicBlock::iterator From, MachineBasicBlock::iterator To, const TargetRegisterInfo *TRI, const AccessKind AccessToCheck=AK_All)
True when condition flags are accessed (either by writing or reading) on the instruction trace starti...
static bool getFMAPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns)
Floating-Point Support.
static bool isADDSRegImm(unsigned Opcode)
static void appendOffsetComment(int NumBytes, llvm::raw_string_ostream &Comment, StringRef RegScale={})
static unsigned sForm(MachineInstr &Instr)
Get opcode of S version of Instr.
static bool isCombineInstrSettingFlag(unsigned Opc)
static bool getFNEGPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns)
static bool getIndVarInfo(Register Reg, const MachineBasicBlock *LoopBB, MachineInstr *&UpdateInst, unsigned &UpdateCounterOprNum, Register &InitReg, bool &IsUpdatePriorComp)
If Reg is an induction variable, return true and set some parameters.
static const MachineInstrBuilder & AddSubReg(const MachineInstrBuilder &MIB, MCRegister Reg, unsigned SubIdx, unsigned State, const TargetRegisterInfo *TRI)
static bool canPairLdStOpc(unsigned FirstOpc, unsigned SecondOpc)
static int findCondCodeUseOperandIdxForBranchOrSelect(const MachineInstr &Instr)
static bool isPostIndexLdStOpcode(unsigned Opcode)
Return true if the opcode is a post-index ld/st instruction, which really loads from base+0.
static unsigned getBranchDisplacementBits(unsigned Opc)
static cl::opt< unsigned > CBDisplacementBits("aarch64-cb-offset-bits", cl::Hidden, cl::init(9), cl::desc("Restrict range of CB instructions (DEBUG)"))
static std::optional< ParamLoadedValue > describeORRLoadedValue(const MachineInstr &MI, Register DescribedReg, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
If the given ORR instruction is a copy, and DescribedReg overlaps with the destination register then,...
static bool getFMULPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns)
static void appendReadRegExpr(SmallVectorImpl< char > &Expr, unsigned RegNum)
static MachineInstr * genMaddR(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, unsigned IdxMulOpd, unsigned MaddOpc, unsigned VR, const TargetRegisterClass *RC)
genMaddR - Generate madd instruction and combine mul and add using an extra virtual register Example ...
static Register cloneInstr(const MachineInstr *MI, unsigned ReplaceOprNum, Register ReplaceReg, MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertTo)
Clone an instruction from MI.
static bool scaleOffset(unsigned Opc, int64_t &Offset)
static bool canCombineWithFMUL(MachineBasicBlock &MBB, MachineOperand &MO, unsigned MulOpc)
unsigned scaledOffsetOpcode(unsigned Opcode, unsigned &Scale)
static MachineInstr * genFusedMultiplyIdx(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, unsigned IdxMulOpd, unsigned MaddOpc, const TargetRegisterClass *RC)
genFusedMultiplyIdx - Helper to generate fused multiply accumulate instructions.
static MachineInstr * genIndexedMultiply(MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, unsigned IdxDupOp, unsigned MulOpc, const TargetRegisterClass *RC, MachineRegisterInfo &MRI)
Fold (FMUL x (DUP y lane)) into (FMUL_indexed x y lane)
static bool isSUBSRegImm(unsigned Opcode)
static bool UpdateOperandRegClass(MachineInstr &Instr)
static const TargetRegisterClass * getRegClass(const MachineInstr &MI, Register Reg)
static bool canCmpInstrBeRemoved(MachineInstr &MI, MachineInstr &CmpInstr, int CmpValue, const TargetRegisterInfo &TRI, SmallVectorImpl< MachineInstr * > &CCUseInstrs, bool &IsInvertCC)
unsigned unscaledOffsetOpcode(unsigned Opcode)
static bool getLoadPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns)
Search for patterns of LD instructions we can optimize.
static bool canInstrSubstituteCmpInstr(MachineInstr &MI, MachineInstr &CmpInstr, const TargetRegisterInfo &TRI)
Check if CmpInstr can be substituted by MI.
static UsedNZCV getUsedNZCV(AArch64CC::CondCode CC)
static bool isCombineInstrCandidateFP(const MachineInstr &Inst)
static void appendLoadRegExpr(SmallVectorImpl< char > &Expr, int64_t OffsetFromDefCFA)
static void appendConstantExpr(SmallVectorImpl< char > &Expr, int64_t Constant, dwarf::LocationAtom Operation)
static unsigned convertToNonFlagSettingOpc(const MachineInstr &MI)
Return the opcode that does not set flags when possible - otherwise return the original opcode.
static bool outliningCandidatesV8_3OpsConsensus(const outliner::Candidate &a, const outliner::Candidate &b)
static bool isCombineInstrCandidate32(unsigned Opc)
static void parseCondBranch(MachineInstr *LastInst, MachineBasicBlock *&Target, SmallVectorImpl< MachineOperand > &Cond)
static unsigned offsetExtendOpcode(unsigned Opcode)
MachineOutlinerMBBFlags
@ LRUnavailableSomewhere
@ UnsafeRegsDead
static void loadRegPairFromStackSlot(const TargetRegisterInfo &TRI, MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore, const MCInstrDesc &MCID, Register DestReg, unsigned SubIdx0, unsigned SubIdx1, int FI, MachineMemOperand *MMO)
static void generateGatherLanePattern(MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, SmallVectorImpl< MachineInstr * > &DelInstrs, DenseMap< Register, unsigned > &InstrIdxForVirtReg, unsigned Pattern, unsigned NumLanes)
Generate optimized instruction sequence for gather load patterns to improve Memory-Level Parallelism ...
static bool getMiscPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns)
Find other MI combine patterns.
static bool outliningCandidatesSigningKeyConsensus(const outliner::Candidate &a, const outliner::Candidate &b)
static bool outliningCandidatesSigningScopeConsensus(const outliner::Candidate &a, const outliner::Candidate &b)
static bool shouldClusterFI(const MachineFrameInfo &MFI, int FI1, int64_t Offset1, unsigned Opcode1, int FI2, int64_t Offset2, unsigned Opcode2)
static cl::opt< unsigned > TBZDisplacementBits("aarch64-tbz-offset-bits", cl::Hidden, cl::init(14), cl::desc("Restrict range of TB[N]Z instructions (DEBUG)"))
static void extractPhiReg(const MachineInstr &Phi, const MachineBasicBlock *MBB, Register &RegMBB, Register &RegOther)
static MCCFIInstruction createDefCFAExpression(const TargetRegisterInfo &TRI, unsigned Reg, const StackOffset &Offset)
static bool isDefinedOutside(Register Reg, const MachineBasicBlock *BB)
static MachineInstr * genFusedMultiply(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, unsigned IdxMulOpd, unsigned MaddOpc, const TargetRegisterClass *RC, FMAInstKind kind=FMAInstKind::Default, const Register *ReplacedAddend=nullptr)
genFusedMultiply - Generate fused multiply instructions.
static bool getGatherLanePattern(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns, unsigned LoadLaneOpCode, unsigned NumLanes)
Check if the given instruction forms a gather load pattern that can be optimized for better Memory-Le...
static MachineInstr * genFusedMultiplyIdxNeg(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, DenseMap< Register, unsigned > &InstrIdxForVirtReg, unsigned IdxMulOpd, unsigned MaddOpc, unsigned MnegOpc, const TargetRegisterClass *RC)
genFusedMultiplyAccNeg - Helper to generate fused multiply accumulate instructions with an additional...
static bool isCombineInstrCandidate(unsigned Opc)
static unsigned regOffsetOpcode(unsigned Opcode)
MachineOutlinerClass
Constants defining how certain sequences should be outlined.
@ MachineOutlinerTailCall
Emit a save, restore, call, and return.
@ MachineOutlinerRegSave
Emit a call and tail-call.
@ MachineOutlinerNoLRSave
Only emit a branch.
@ MachineOutlinerThunk
Emit a call and return.
@ MachineOutlinerDefault
static cl::opt< unsigned > BDisplacementBits("aarch64-b-offset-bits", cl::Hidden, cl::init(26), cl::desc("Restrict range of B instructions (DEBUG)"))
static bool areCFlagsAliveInSuccessors(const MachineBasicBlock *MBB)
Check if AArch64::NZCV should be alive in successors of MBB.
static void emitFrameOffsetAdj(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, unsigned DestReg, unsigned SrcReg, int64_t Offset, unsigned Opc, const TargetInstrInfo *TII, MachineInstr::MIFlag Flag, bool NeedsWinCFI, bool *HasWinCFI, bool EmitCFAOffset, StackOffset CFAOffset, unsigned FrameReg)
static bool isCheapImmediate(const MachineInstr &MI, unsigned BitSize)
static cl::opt< unsigned > CBZDisplacementBits("aarch64-cbz-offset-bits", cl::Hidden, cl::init(19), cl::desc("Restrict range of CB[N]Z instructions (DEBUG)"))
static void genSubAdd2SubSub(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, SmallVectorImpl< MachineInstr * > &DelInstrs, unsigned IdxOpd1, DenseMap< Register, unsigned > &InstrIdxForVirtReg)
Do the following transformation A - (B + C) ==> (A - B) - C A - (B + C) ==> (A - C) - B.
static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg, unsigned *NewReg=nullptr)
static void signOutlinedFunction(MachineFunction &MF, MachineBasicBlock &MBB, const AArch64InstrInfo *TII, bool ShouldSignReturnAddr)
static MachineInstr * genFNegatedMAD(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs)
static bool canCombineWithMUL(MachineBasicBlock &MBB, MachineOperand &MO, unsigned MulOpc, unsigned ZeroReg)
static void storeRegPairToStackSlot(const TargetRegisterInfo &TRI, MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore, const MCInstrDesc &MCID, Register SrcReg, bool IsKill, unsigned SubIdx0, unsigned SubIdx1, int FI, MachineMemOperand *MMO)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static const Function * getParent(const Value *V)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
@ Default
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
Module.h This file contains the declarations for the Module class.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static LVOptions Options
Definition LVOptions.cpp:25
This file implements the LivePhysRegs utility for tracking liveness of physical registers.
#define F(x, y, z)
Definition MD5.cpp:55
#define I(x, y, z)
Definition MD5.cpp:58
Machine Check Debug Module
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
MachineInstr unsigned OpIdx
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
PowerPC Reduce CR logical Operation
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
const SmallVectorImpl< MachineOperand > & Cond
This file declares the machine register scavenger class.
This file contains some templates that are useful if you are working with the STL at all.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition Value.cpp:480
This file defines the SmallSet class.
This file defines the SmallVector class.
static bool canCombine(MachineBasicBlock &MBB, MachineOperand &MO, unsigned CombineOpc=0)
AArch64FunctionInfo - This class is derived from MachineFunctionInfo and contains private AArch64-spe...
bool shouldSignReturnAddress(const MachineFunction &MF) const
void setOutliningStyle(const std::string &Style)
std::optional< bool > hasRedZone() const
static bool isHForm(const MachineInstr &MI)
Returns whether the instruction is in H form (16 bit operands)
void insertSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, Register DstReg, ArrayRef< MachineOperand > Cond, Register TrueReg, Register FalseReg) const override
static bool hasBTISemantics(const MachineInstr &MI)
Returns whether the instruction can be compatible with non-zero BTYPE.
static bool isQForm(const MachineInstr &MI)
Returns whether the instruction is in Q form (128 bit operands)
static bool getMemOpInfo(unsigned Opcode, TypeSize &Scale, TypeSize &Width, int64_t &MinOffset, int64_t &MaxOffset)
Returns true if opcode Opc is a memory operation.
static bool isTailCallReturnInst(const MachineInstr &MI)
Returns true if MI is one of the TCRETURN* instructions.
static bool isFPRCopy(const MachineInstr &MI)
Does this instruction rename an FPR without modifying bits?
MachineInstr * emitLdStWithAddr(MachineInstr &MemI, const ExtAddrMode &AM) const override
std::optional< DestSourcePair > isCopyInstrImpl(const MachineInstr &MI) const override
If the specific machine instruction is an instruction that moves/copies value from one register to an...
MachineBasicBlock * getBranchDestBlock(const MachineInstr &MI) const override
unsigned getInstSizeInBytes(const MachineInstr &MI) const override
GetInstSize - Return the number of bytes of code the specified instruction may be.
bool areMemAccessesTriviallyDisjoint(const MachineInstr &MIa, const MachineInstr &MIb) const override
void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register DestReg, Register SrcReg, bool KillSrc, bool RenamableDest=false, bool RenamableSrc=false) const override
static bool isGPRCopy(const MachineInstr &MI)
Does this instruction rename a GPR without modifying bits?
static unsigned convertToFlagSettingOpc(unsigned Opc)
Return the opcode that set flags when possible.
bool isBranchOffsetInRange(unsigned BranchOpc, int64_t BrOffset) const override
bool canInsertSelect(const MachineBasicBlock &, ArrayRef< MachineOperand > Cond, Register, Register, Register, int &, int &, int &) const override
static const MachineOperand & getLdStOffsetOp(const MachineInstr &MI)
Returns the immediate offset operator of a load/store.
bool isCoalescableExtInstr(const MachineInstr &MI, Register &SrcReg, Register &DstReg, unsigned &SubIdx) const override
static std::optional< unsigned > getUnscaledLdSt(unsigned Opc)
Returns the unscaled load/store for the scaled load/store opcode, if there is a corresponding unscale...
static bool hasUnscaledLdStOffset(unsigned Opc)
Return true if it has an unscaled load/store offset.
static const MachineOperand & getLdStAmountOp(const MachineInstr &MI)
Returns the shift amount operator of a load/store.
static bool isPreLdSt(const MachineInstr &MI)
Returns whether the instruction is a pre-indexed load/store.
std::optional< ExtAddrMode > getAddrModeFromMemoryOp(const MachineInstr &MemI, const TargetRegisterInfo *TRI) const override
bool getMemOperandsWithOffsetWidth(const MachineInstr &MI, SmallVectorImpl< const MachineOperand * > &BaseOps, int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width, const TargetRegisterInfo *TRI) const override
bool analyzeBranchPredicate(MachineBasicBlock &MBB, MachineBranchPredicate &MBP, bool AllowModify) const override
void insertIndirectBranch(MachineBasicBlock &MBB, MachineBasicBlock &NewDestBB, MachineBasicBlock &RestoreBB, const DebugLoc &DL, int64_t BrOffset, RegScavenger *RS) const override
static bool isPairableLdStInst(const MachineInstr &MI)
Return true if pairing the given load or store may be paired with another.
const AArch64RegisterInfo & getRegisterInfo() const
getRegisterInfo - TargetInstrInfo is a superset of MRegister info.
static bool isPreSt(const MachineInstr &MI)
Returns whether the instruction is a pre-indexed store.
MachineInstr * foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI, ArrayRef< unsigned > Ops, MachineBasicBlock::iterator InsertPt, int FrameIndex, LiveIntervals *LIS=nullptr, VirtRegMap *VRM=nullptr) const override
void insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const override
AArch64InstrInfo(const AArch64Subtarget &STI)
static bool isPairedLdSt(const MachineInstr &MI)
Returns whether the instruction is a paired load/store.
bool getMemOperandWithOffsetWidth(const MachineInstr &MI, const MachineOperand *&BaseOp, int64_t &Offset, bool &OffsetIsScalable, TypeSize &Width, const TargetRegisterInfo *TRI) const
If OffsetIsScalable is set to 'true', the offset is scaled by vscale.
Register isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const override
bool reverseBranchCondition(SmallVectorImpl< MachineOperand > &Cond) const override
static bool isStridedAccess(const MachineInstr &MI)
Return true if the given load or store is a strided memory access.
bool shouldClusterMemOps(ArrayRef< const MachineOperand * > BaseOps1, int64_t Offset1, bool OffsetIsScalable1, ArrayRef< const MachineOperand * > BaseOps2, int64_t Offset2, bool OffsetIsScalable2, unsigned ClusterSize, unsigned NumBytes) const override
Detect opportunities for ldp/stp formation.
unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const override
bool isThroughputPattern(unsigned Pattern) const override
Return true when a code sequence can improve throughput.
MachineOperand & getMemOpBaseRegImmOfsOffsetOperand(MachineInstr &LdSt) const
Return the immediate offset of the base register in a load/store LdSt.
bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify=false) const override
bool canFoldIntoAddrMode(const MachineInstr &MemI, Register Reg, const MachineInstr &AddrI, ExtAddrMode &AM) const override
static bool isLdStPairSuppressed(const MachineInstr &MI)
Return true if pairing the given load or store is hinted to be unprofitable.
std::unique_ptr< TargetInstrInfo::PipelinerLoopInfo > analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const override
bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const override
unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const override
MachineBasicBlock::iterator probedStackAlloc(MachineBasicBlock::iterator MBBI, Register TargetReg, bool FrameSetup) const
Return true when there is potentially a faster code sequence for an instruction chain ending in Root.
bool optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int64_t CmpMask, int64_t CmpValue, const MachineRegisterInfo *MRI) const override
optimizeCompareInstr - Convert the instruction supplying the argument to the comparison into one that...
static unsigned getLoadStoreImmIdx(unsigned Opc)
Returns the index for the immediate for a given instruction.
static bool isGPRZero(const MachineInstr &MI)
Does this instruction set its full destination register to zero?
void copyGPRRegTuple(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc, unsigned Opcode, unsigned ZeroReg, llvm::ArrayRef< unsigned > Indices) const
bool analyzeCompare(const MachineInstr &MI, Register &SrcReg, Register &SrcReg2, int64_t &CmpMask, int64_t &CmpValue) const override
analyzeCompare - For a comparison instruction, return the source registers in SrcReg and SrcReg2,...
CombinerObjective getCombinerObjective(unsigned Pattern) const override
static bool isFpOrNEON(Register Reg)
Returns whether the physical register is FP or NEON.
bool isAsCheapAsAMove(const MachineInstr &MI) const override
std::optional< DestSourcePair > isCopyLikeInstrImpl(const MachineInstr &MI) const override
static void suppressLdStPair(MachineInstr &MI)
Hint that pairing the given load or store is unprofitable.
Register isStoreToStackSlot(const MachineInstr &MI, int &FrameIndex) const override
static bool isPreLd(const MachineInstr &MI)
Returns whether the instruction is a pre-indexed load.
void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg, MachineInstr::MIFlag Flags=MachineInstr::NoFlags) const override
void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register DestReg, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg, MachineInstr::MIFlag Flags=MachineInstr::NoFlags) const override
void copyPhysRegTuple(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc, unsigned Opcode, llvm::ArrayRef< unsigned > Indices) const
bool optimizeCondBranch(MachineInstr &MI) const override
Replace csincr-branch sequence by simple conditional branch.
static int getMemScale(unsigned Opc)
Scaling factor for (scaled or unscaled) load or store.
bool isCandidateToMergeOrPair(const MachineInstr &MI) const
Return true if this is a load/store that can be potentially paired/merged.
MCInst getNop() const override
static const MachineOperand & getLdStBaseOp(const MachineInstr &MI)
Returns the base register operator of a load/store.
bool isReservedReg(const MachineFunction &MF, MCRegister Reg) const
const AArch64RegisterInfo * getRegisterInfo() const override
bool isNeonAvailable() const
Returns true if the target has NEON and the function at runtime is known to have NEON enabled (e....
bool isSVEorStreamingSVEAvailable() const
Returns true if the target has access to either the full range of SVE instructions,...
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:41
const T & front() const
front - Get the first element.
Definition ArrayRef.h:150
size_t size() const
size - Get the array size.
Definition ArrayRef.h:147
This is an important base class in LLVM.
Definition Constant.h:43
A debug info location.
Definition DebugLoc.h:124
bool empty() const
Definition DenseMap.h:109
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition DenseMap.h:222
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition Function.h:706
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition Function.h:703
Module * getParent()
Get the module that this global value is contained inside of...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
static LocationSize precise(uint64_t Value)
This class is intended to be used as a base class for asm properties and features specific to the tar...
Definition MCAsmInfo.h:64
bool usesWindowsCFI() const
Definition MCAsmInfo.h:652
static MCCFIInstruction cfiDefCfa(MCSymbol *L, unsigned Register, int64_t Offset, SMLoc Loc={})
.cfi_def_cfa defines a rule for computing CFA as: take address from Register and add Offset to it.
Definition MCDwarf.h:585
static MCCFIInstruction createOffset(MCSymbol *L, unsigned Register, int64_t Offset, SMLoc Loc={})
.cfi_offset Previous value of Register is saved at offset Offset from CFA.
Definition MCDwarf.h:627
static MCCFIInstruction cfiDefCfaOffset(MCSymbol *L, int64_t Offset, SMLoc Loc={})
.cfi_def_cfa_offset modifies a rule for computing CFA.
Definition MCDwarf.h:600
static MCCFIInstruction createEscape(MCSymbol *L, StringRef Vals, SMLoc Loc={}, StringRef Comment="")
.cfi_escape Allows the user to add arbitrary bytes to the unwind info.
Definition MCDwarf.h:697
MCInstBuilder & addImm(int64_t Val)
Add a new integer immediate operand.
Instances of this class represent a single low-level machine instruction.
Definition MCInst.h:188
Describe properties that are true of each instruction in the target description file.
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
ArrayRef< MCOperandInfo > operands() const
Wrapper class representing physical registers. Should be passed by value.
Definition MCRegister.h:33
constexpr bool isValid() const
Definition MCRegister.h:76
static constexpr unsigned NoRegister
Definition MCRegister.h:52
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition Metadata.h:1569
Set of metadata that should be preserved when using BuildMI().
bool isInlineAsmBrIndirectTarget() const
Returns true if this is the indirect dest of an INLINEASM_BR.
LLVM_ABI void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
LLVM_ABI instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
reverse_instr_iterator instr_rbegin()
LLVM_ABI iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
reverse_instr_iterator instr_rend()
Instructions::iterator instr_iterator
Instructions::const_iterator const_instr_iterator
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
LLVM_ABI instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
void setMachineBlockAddressTaken()
Set this block to indicate that its address is used as something other than the target of a terminato...
LLVM_ABI bool isLiveIn(MCRegister Reg, LaneBitmask LaneMask=LaneBitmask::getAll()) const
Return true if the specified register is in the live in set.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
uint64_t getStackSize() const
Return the number of bytes that must be allocated to hold all of the fixed size frame objects.
void setStackID(int ObjectIdx, uint8_t ID)
bool isCalleeSavedInfoValid() const
Has the callee saved info been calculated yet?
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
unsigned getNumObjects() const
Return the number of objects.
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
unsigned addFrameInst(const MCCFIInstruction &Inst)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
BasicBlockListType::iterator iterator
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineJumpTableInfo * getJumpTableInfo() const
getJumpTableInfo - Return the jump table info object for the current function.
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineInstr - Allocate a new MachineInstr.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
const MachineInstrBuilder & setMemRefs(ArrayRef< MachineMemOperand * > MMOs) const
const MachineInstrBuilder & addCFIIndex(unsigned CFIIndex) const
const MachineInstrBuilder & setMIFlag(MachineInstr::MIFlag Flag) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addSym(MCSymbol *Sym, unsigned char TargetFlags=0) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
reverse_iterator getReverse() const
Get a reverse iterator to the same node.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
bool mayLoadOrStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read or modify memory.
bool isCopy() const
const MachineBasicBlock * getParent() const
bool isCall(QueryType Type=AnyInBundle) const
bool getFlag(MIFlag Flag) const
Return whether an MI flag is set.
LLVM_ABI uint32_t mergeFlagsWith(const MachineInstr &Other) const
Return the MIFlags which represent both MachineInstrs.
unsigned getNumOperands() const
Retuns the total number of operands.
LLVM_ABI unsigned getNumExplicitOperands() const
Returns the number of non-implicit operands.
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
LLVM_ABI bool hasUnmodeledSideEffects() const
Return true if this instruction has side effects that are not modeled by mayLoad / mayStore,...
bool registerDefIsDead(Register Reg, const TargetRegisterInfo *TRI) const
Returns true if the register is dead in this machine instruction.
bool definesRegister(Register Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr fully defines the specified register.
LLVM_ABI void setDesc(const MCInstrDesc &TID)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one.
LLVM_ABI bool hasOrderedMemoryRef() const
Return true if this instruction may have an ordered or volatile memory reference, or if the informati...
LLVM_ABI const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
ArrayRef< MachineMemOperand * > memoperands() const
Access to memory operands of the instruction.
LLVM_ABI bool isLoadFoldBarrier() const
Returns true if it is illegal to fold a load across this instruction.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
LLVM_ABI void removeOperand(unsigned OpNo)
Erase an operand from an instruction, leaving it with one fewer operand than it started with.
LLVM_ABI void addRegisterDefined(Register Reg, const TargetRegisterInfo *RegInfo=nullptr)
We have determined MI defines a register.
const MachineOperand & getOperand(unsigned i) const
uint32_t getFlags() const
Return the MI flags bitvector.
LLVM_ABI int findRegisterDefOperandIdx(Register Reg, const TargetRegisterInfo *TRI, bool isDead=false, bool Overlap=false) const
Returns the operand index that is a def of the specified register or -1 if it is not found.
const std::vector< MachineJumpTableEntry > & getJumpTables() const
A description of a memory reference used in the backend.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
This class contains meta information specific to a module.
LLVM_ABI MachineFunction * getMachineFunction(const Function &F) const
Returns the MachineFunction associated to IR function F if there is one, otherwise nullptr.
MachineOperand class - Representation of each machine instruction operand.
void setSubReg(unsigned subReg)
unsigned getSubReg() const
void setImm(int64_t immVal)
int64_t getImm() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
MachineBasicBlock * getMBB() const
void setIsDead(bool Val=true)
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
void setIsKill(bool Val=true)
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
static MachineOperand CreateImm(int64_t Val)
MachineOperandType getType() const
getType - Returns the MachineOperandType for this operand.
void setIsUndef(bool Val=true)
Register getReg() const
getReg - Returns the register number.
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
LLVM_ABI bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
bool tracksLiveness() const
tracksLiveness - Returns true when tracking register liveness accurately.
bool reservedRegsFrozen() const
reservedRegsFrozen - Returns true after freezeReservedRegs() was called to ensure the set of reserved...
const TargetRegisterClass * getRegClassOrNull(Register Reg) const
Return the register class of Reg, or null if Reg has not been assigned a register class yet.
LLVM_ABI const TargetRegisterClass * constrainRegClass(Register Reg, const TargetRegisterClass *RC, unsigned MinNumRegs=0)
constrainRegClass - Constrain the register class of the specified virtual register to be a common sub...
LLVM_ABI MachineInstr * getUniqueVRegDef(Register Reg) const
getUniqueVRegDef - Return the unique machine instr that defines the specified virtual register or nul...
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
MI-level patchpoint operands.
Definition StackMaps.h:77
uint32_t getNumPatchBytes() const
Return the number of patchable bytes the given patchpoint should emit.
Definition StackMaps.h:105
Wrapper class representing virtual and physical registers.
Definition Register.h:19
MCRegister asMCReg() const
Utility to check-convert this value to a MCRegister.
Definition Register.h:102
constexpr bool isValid() const
Definition Register.h:107
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:74
static constexpr bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:61
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition Register.h:78
Represents a location in source code.
Definition SMLoc.h:23
bool erase(PtrType Ptr)
Remove pointer from the set.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition SmallSet.h:133
bool empty() const
Definition SmallSet.h:168
bool erase(const T &V)
Definition SmallSet.h:199
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition SmallString.h:26
void append(StringRef RHS)
Append from a StringRef.
Definition SmallString.h:68
StringRef str() const
Explicit conversion to StringRef.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
MI-level stackmap operands.
Definition StackMaps.h:36
uint32_t getNumPatchBytes() const
Return the number of patchable bytes the given stackmap should emit.
Definition StackMaps.h:51
StackOffset holds a fixed and a scalable offset in bytes.
Definition TypeSize.h:31
int64_t getFixed() const
Returns the fixed component of the stack.
Definition TypeSize.h:47
int64_t getScalable() const
Returns the scalable component of the stack.
Definition TypeSize.h:50
static StackOffset get(int64_t Fixed, int64_t Scalable)
Definition TypeSize.h:42
static StackOffset getScalable(int64_t Scalable)
Definition TypeSize.h:41
static StackOffset getFixed(int64_t Fixed)
Definition TypeSize.h:40
MI-level Statepoint operands.
Definition StackMaps.h:159
uint32_t getNumPatchBytes() const
Return the number of patchable bytes the given statepoint should emit.
Definition StackMaps.h:208
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
Object returned by analyzeLoopForPipelining.
TargetInstrInfo - Interface to description of machine instruction set.
virtual void genAlternativeCodeSequence(MachineInstr &Root, unsigned Pattern, SmallVectorImpl< MachineInstr * > &InsInstrs, SmallVectorImpl< MachineInstr * > &DelInstrs, DenseMap< Register, unsigned > &InstIdxForVirtReg) const
When getMachineCombinerPatterns() finds patterns, this function generates the instructions that could...
virtual std::optional< ParamLoadedValue > describeLoadedValue(const MachineInstr &MI, Register Reg) const
Produce the expression describing the MI loading a value into the physical register Reg.
virtual bool getMachineCombinerPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns, bool DoRegPressureReduce) const
Return true when there is potentially a faster code sequence for an instruction chain ending in Root.
virtual bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const
Test if the given instruction should be considered a scheduling boundary.
virtual CombinerObjective getCombinerObjective(unsigned Pattern) const
Return the objective of a combiner pattern.
virtual bool isFunctionSafeToSplit(const MachineFunction &MF) const
Return true if the function is a viable candidate for machine function splitting.
TargetOptions Options
const MCAsmInfo * getMCAsmInfo() const
Return target specific asm information.
bool contains(Register Reg) const
Return true if the specified register is included in this register class.
bool hasSubClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a sub-class of or equal to this class.
bool hasSuperClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a super-class of or equal to this class.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
TargetSubtargetInfo - Generic base class for all target subtargets.
virtual const TargetInstrInfo * getInstrInfo() const
virtual const TargetRegisterInfo * getRegisterInfo() const =0
Return the target's register information.
Target - Wrapper for Target specific information.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition TypeSize.h:344
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
Definition TypeSize.h:347
Value * getOperand(unsigned i) const
Definition User.h:232
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition TypeSize.h:169
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition TypeSize.h:166
self_iterator getIterator()
Definition ilist_node.h:123
A raw_ostream that writes to an std::string.
CallInst * Call
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static CondCode getInvertedCondCode(CondCode Code)
@ MO_DLLIMPORT
MO_DLLIMPORT - On a symbol operand, this represents that the reference to the symbol is for an import...
@ MO_NC
MO_NC - Indicates whether the linker is expected to check the symbol reference for overflow.
@ MO_G1
MO_G1 - A symbol operand with this flag (granule 1) represents the bits 16-31 of a 64-bit address,...
@ MO_S
MO_S - Indicates that the bits of the symbol operand represented by MO_G0 etc are signed.
@ MO_PAGEOFF
MO_PAGEOFF - A symbol operand with this flag represents the offset of that symbol within a 4K page.
@ MO_GOT
MO_GOT - This flag indicates that a symbol operand represents the address of the GOT entry for the sy...
@ MO_PREL
MO_PREL - Indicates that the bits of the symbol operand represented by MO_G0 etc are PC relative.
@ MO_G0
MO_G0 - A symbol operand with this flag (granule 0) represents the bits 0-15 of a 64-bit address,...
@ MO_ARM64EC_CALLMANGLE
MO_ARM64EC_CALLMANGLE - Operand refers to the Arm64EC-mangled version of a symbol,...
@ MO_PAGE
MO_PAGE - A symbol operand with this flag represents the pc-relative offset of the 4K page containing...
@ MO_HI12
MO_HI12 - This flag indicates that a symbol operand represents the bits 13-24 of a 64-bit address,...
@ MO_TLS
MO_TLS - Indicates that the operand being accessed is some kind of thread-local symbol.
@ MO_G2
MO_G2 - A symbol operand with this flag (granule 2) represents the bits 32-47 of a 64-bit address,...
@ MO_TAGGED
MO_TAGGED - With MO_PAGE, indicates that the page includes a memory tag in bits 56-63.
@ MO_G3
MO_G3 - A symbol operand with this flag (granule 3) represents the high 16-bits of a 64-bit address,...
@ MO_COFFSTUB
MO_COFFSTUB - On a symbol operand "FOO", this indicates that the reference is actually to the "....
unsigned getCheckerSizeInBytes(AuthCheckMethod Method)
Returns the number of bytes added by checkAuthenticatedRegister.
static uint64_t decodeLogicalImmediate(uint64_t val, unsigned regSize)
decodeLogicalImmediate - Decode a logical immediate value in the form "N:immr:imms" (where the immr a...
static unsigned getShiftValue(unsigned Imm)
getShiftValue - Extract the shift value.
static unsigned getArithExtendImm(AArch64_AM::ShiftExtendType ET, unsigned Imm)
getArithExtendImm - Encode the extend type and shift amount for an arithmetic instruction: imm: 3-bit...
static unsigned getArithShiftValue(unsigned Imm)
getArithShiftValue - get the arithmetic shift value.
static uint64_t encodeLogicalImmediate(uint64_t imm, unsigned regSize)
encodeLogicalImmediate - Return the encoded immediate value for a logical immediate instruction of th...
static AArch64_AM::ShiftExtendType getArithExtendType(unsigned Imm)
static AArch64_AM::ShiftExtendType getShiftType(unsigned Imm)
getShiftType - Extract the shift type.
static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)
getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...
void expandMOVImm(uint64_t Imm, unsigned BitSize, SmallVectorImpl< ImmInsnModel > &Insn)
Expand a MOVi32imm or MOVi64imm pseudo instruction to one or more real move-immediate instructions to...
static const uint64_t InstrFlagIsWhile
static const uint64_t InstrFlagIsPTestLike
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Dead
Unused definition.
@ Renamable
Register that may be renamed.
@ Define
Register definition.
@ Kill
The last use of a register.
@ Undef
Value of the register doesn't matter.
initializer< Ty > init(const Ty &Val)
constexpr double e
InstrType
Represents how an instruction should be mapped by the outliner.
LLVM_ABI Instruction & back() const
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:477
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1725
static bool isCondBranchOpcode(int Opc)
MCCFIInstruction createDefCFA(const TargetRegisterInfo &TRI, unsigned FrameReg, unsigned Reg, const StackOffset &Offset, bool LastAdjustmentWasScalable=true)
static bool isPTrueOpcode(unsigned Opc)
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
bool succeeded(LogicalResult Result)
Utility function that returns true if the provided LogicalResult corresponds to a success value.
int isAArch64FrameOffsetLegal(const MachineInstr &MI, StackOffset &Offset, bool *OutUseUnscaledOp=nullptr, unsigned *OutUnscaledOp=nullptr, int64_t *EmittableOffset=nullptr)
Check if the Offset is a valid frame offset for MI.
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
Definition MathExtras.h:165
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition STLExtras.h:2472
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
static bool isIndirectBranchOpcode(int Opc)
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
unsigned getBLRCallOpcode(const MachineFunction &MF)
Return opcode to be used for indirect calls.
@ AArch64FrameOffsetIsLegal
Offset is legal.
@ AArch64FrameOffsetCanUpdate
Offset can apply, at least partly.
@ AArch64FrameOffsetCannotUpdate
Offset cannot apply.
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:284
Op::Description Desc
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:337
static bool isSEHInstruction(const MachineInstr &MI)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1732
auto reverse(ContainerTy &&C)
Definition STLExtras.h:406
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
void sort(IteratorTy Start, IteratorTy End)
Definition STLExtras.h:1622
AArch64MachineCombinerPattern
@ MULSUBv8i16_OP2
@ FMULv4i16_indexed_OP1
@ FMLSv1i32_indexed_OP2
@ MULSUBv2i32_indexed_OP1
@ FMLAv2i32_indexed_OP2
@ MULADDv4i16_indexed_OP2
@ FMLAv1i64_indexed_OP1
@ MULSUBv16i8_OP1
@ FMLAv8i16_indexed_OP2
@ FMULv2i32_indexed_OP1
@ MULSUBv8i16_indexed_OP2
@ FMLAv1i64_indexed_OP2
@ MULSUBv4i16_indexed_OP2
@ FMLAv1i32_indexed_OP1
@ FMLAv2i64_indexed_OP2
@ FMLSv8i16_indexed_OP1
@ MULSUBv2i32_OP1
@ FMULv4i16_indexed_OP2
@ MULSUBv4i32_indexed_OP2
@ FMULv2i64_indexed_OP2
@ FMLAv4i32_indexed_OP1
@ MULADDv4i16_OP2
@ FMULv8i16_indexed_OP2
@ MULSUBv4i16_OP1
@ MULADDv4i32_OP2
@ MULADDv2i32_OP2
@ MULADDv16i8_OP2
@ FMLSv4i16_indexed_OP1
@ MULADDv16i8_OP1
@ FMLAv2i64_indexed_OP1
@ FMLAv1i32_indexed_OP2
@ FMLSv2i64_indexed_OP2
@ MULADDv2i32_OP1
@ MULADDv4i32_OP1
@ MULADDv2i32_indexed_OP1
@ MULSUBv16i8_OP2
@ MULADDv4i32_indexed_OP1
@ MULADDv2i32_indexed_OP2
@ FMLAv4i16_indexed_OP2
@ MULSUBv8i16_OP1
@ FMULv2i32_indexed_OP2
@ FMLSv2i32_indexed_OP2
@ FMLSv4i32_indexed_OP1
@ FMULv2i64_indexed_OP1
@ MULSUBv4i16_OP2
@ FMLSv4i16_indexed_OP2
@ FMLAv2i32_indexed_OP1
@ FMLSv2i32_indexed_OP1
@ FMLAv8i16_indexed_OP1
@ MULSUBv4i16_indexed_OP1
@ FMLSv4i32_indexed_OP2
@ MULADDv4i32_indexed_OP2
@ MULSUBv4i32_OP2
@ MULSUBv8i16_indexed_OP1
@ MULADDv8i16_OP2
@ MULSUBv2i32_indexed_OP2
@ FMULv4i32_indexed_OP2
@ FMLSv2i64_indexed_OP1
@ MULADDv4i16_OP1
@ FMLAv4i32_indexed_OP2
@ MULADDv8i16_indexed_OP1
@ FMULv4i32_indexed_OP1
@ FMLAv4i16_indexed_OP1
@ FMULv8i16_indexed_OP1
@ MULADDv8i16_OP1
@ MULSUBv4i32_indexed_OP1
@ MULSUBv4i32_OP1
@ FMLSv8i16_indexed_OP2
@ MULADDv8i16_indexed_OP2
@ MULSUBv2i32_OP2
@ FMLSv1i64_indexed_OP2
@ MULADDv4i16_indexed_OP1
void emitFrameOffset(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, unsigned DestReg, unsigned SrcReg, StackOffset Offset, const TargetInstrInfo *TII, MachineInstr::MIFlag=MachineInstr::NoFlags, bool SetNZCV=false, bool NeedsWinCFI=false, bool *HasWinCFI=nullptr, bool EmitCFAOffset=false, StackOffset InitialOffset={}, unsigned FrameReg=AArch64::SP)
emitFrameOffset - Emit instructions as needed to set DestReg to SrcReg plus Offset.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:167
CombinerObjective
The combiner's goal may differ based on which pattern it is attempting to optimize.
std::optional< UsedNZCV > examineCFlagsUse(MachineInstr &MI, MachineInstr &CmpInstr, const TargetRegisterInfo &TRI, SmallVectorImpl< MachineInstr * > *CCUseInstrs=nullptr)
CodeGenOptLevel
Code generation optimization level.
Definition CodeGen.h:82
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
auto instructionsWithoutDebug(IterT It, IterT End, bool SkipPseudoOp=true)
Construct a range iterator which begins at It and moves forwards until End is reached,...
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
auto drop_end(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the last N elements excluded.
Definition STLExtras.h:323
unsigned getUndefRegState(bool B)
static MCRegister getXRegFromWReg(MCRegister Reg)
unsigned getDefRegState(bool B)
MCCFIInstruction createCFAOffset(const TargetRegisterInfo &MRI, unsigned Reg, const StackOffset &OffsetFromDefCFA, std::optional< int64_t > IncomingVGOffsetFromDefCFA)
unsigned getKillRegState(bool B)
FunctionAddr VTableAddr Next
Definition InstrProf.h:141
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
static bool isUncondBranchOpcode(int Opc)
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition STLExtras.h:2120
constexpr bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
Definition MathExtras.h:248
bool rewriteAArch64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, unsigned FrameReg, StackOffset &Offset, const AArch64InstrInfo *TII)
rewriteAArch64FrameIndex - Rewrite MI to access 'Offset' bytes from the FP.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1897
static const MachineMemOperand::Flags MOSuppressPair
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition MathExtras.h:572
void appendLEB128(SmallVectorImpl< U > &Buffer, T Value)
Definition LEB128.h:238
std::pair< MachineOperand, DIExpression * > ParamLoadedValue
bool isNZCVTouchedInInstructionRange(const MachineInstr &DefMI, const MachineInstr &UseMI, const TargetRegisterInfo *TRI)
Return true if there is an instruction /after/ DefMI and before UseMI which either reads or clobbers ...
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
Definition Sequence.h:305
static const MachineMemOperand::Flags MOStridedAccess
void fullyRecomputeLiveIns(ArrayRef< MachineBasicBlock * > MBBs)
Convenience function for recomputing live-in's for a set of MBBs until the computation converges.
LLVM_ABI Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
Used to describe addressing mode similar to ExtAddrMode in CodeGenPrepare.
LLVM_ABI static const MBBSectionID ColdSectionID
This class contains a discriminated union of information about pointers in memory operands,...
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
An individual sequence of instructions to be replaced with a call to an outlined function.
MachineFunction * getMF() const
The information necessary to create an outlined function for some class of candidate.