LLVM 22.0.0git
AArch64InstrInfo.cpp
Go to the documentation of this file.
1//===- AArch64InstrInfo.cpp - AArch64 Instruction Information -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains the AArch64 implementation of the TargetInstrInfo class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "AArch64InstrInfo.h"
14#include "AArch64ExpandImm.h"
16#include "AArch64PointerAuth.h"
17#include "AArch64Subtarget.h"
21#include "llvm/ADT/ArrayRef.h"
22#include "llvm/ADT/STLExtras.h"
23#include "llvm/ADT/SmallSet.h"
43#include "llvm/IR/DebugLoc.h"
44#include "llvm/IR/GlobalValue.h"
45#include "llvm/IR/Module.h"
46#include "llvm/MC/MCAsmInfo.h"
47#include "llvm/MC/MCInst.h"
49#include "llvm/MC/MCInstrDesc.h"
54#include "llvm/Support/LEB128.h"
58#include <cassert>
59#include <cstdint>
60#include <iterator>
61#include <utility>
62
63using namespace llvm;
64
65#define GET_INSTRINFO_CTOR_DTOR
66#include "AArch64GenInstrInfo.inc"
67
69 CBDisplacementBits("aarch64-cb-offset-bits", cl::Hidden, cl::init(9),
70 cl::desc("Restrict range of CB instructions (DEBUG)"));
71
73 "aarch64-tbz-offset-bits", cl::Hidden, cl::init(14),
74 cl::desc("Restrict range of TB[N]Z instructions (DEBUG)"));
75
77 "aarch64-cbz-offset-bits", cl::Hidden, cl::init(19),
78 cl::desc("Restrict range of CB[N]Z instructions (DEBUG)"));
79
81 BCCDisplacementBits("aarch64-bcc-offset-bits", cl::Hidden, cl::init(19),
82 cl::desc("Restrict range of Bcc instructions (DEBUG)"));
83
85 BDisplacementBits("aarch64-b-offset-bits", cl::Hidden, cl::init(26),
86 cl::desc("Restrict range of B instructions (DEBUG)"));
87
89 "aarch64-search-limit", cl::Hidden, cl::init(2048),
90 cl::desc("Restrict range of instructions to search for the "
91 "machine-combiner gather pattern optimization"));
92
94 : AArch64GenInstrInfo(STI, AArch64::ADJCALLSTACKDOWN,
95 AArch64::ADJCALLSTACKUP, AArch64::CATCHRET),
96 RI(STI.getTargetTriple(), STI.getHwMode()), Subtarget(STI) {}
97
98/// GetInstSize - Return the number of bytes of code the specified
99/// instruction may be. This returns the maximum number of bytes.
101 const MachineBasicBlock &MBB = *MI.getParent();
102 const MachineFunction *MF = MBB.getParent();
103 const Function &F = MF->getFunction();
104 const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo();
105
106 {
107 auto Op = MI.getOpcode();
108 if (Op == AArch64::INLINEASM || Op == AArch64::INLINEASM_BR)
109 return getInlineAsmLength(MI.getOperand(0).getSymbolName(), *MAI);
110 }
111
112 // Meta-instructions emit no code.
113 if (MI.isMetaInstruction())
114 return 0;
115
116 // FIXME: We currently only handle pseudoinstructions that don't get expanded
117 // before the assembly printer.
118 unsigned NumBytes = 0;
119 const MCInstrDesc &Desc = MI.getDesc();
120
121 if (!MI.isBundle() && isTailCallReturnInst(MI)) {
122 NumBytes = Desc.getSize() ? Desc.getSize() : 4;
123
124 const auto *MFI = MF->getInfo<AArch64FunctionInfo>();
125 if (!MFI->shouldSignReturnAddress(MF))
126 return NumBytes;
127
128 const auto &STI = MF->getSubtarget<AArch64Subtarget>();
129 auto Method = STI.getAuthenticatedLRCheckMethod(*MF);
130 NumBytes += AArch64PAuth::getCheckerSizeInBytes(Method);
131 return NumBytes;
132 }
133
134 // Size should be preferably set in
135 // llvm/lib/Target/AArch64/AArch64InstrInfo.td (default case).
136 // Specific cases handle instructions of variable sizes
137 switch (Desc.getOpcode()) {
138 default:
139 if (Desc.getSize())
140 return Desc.getSize();
141
142 // Anything not explicitly designated otherwise (i.e. pseudo-instructions
143 // with fixed constant size but not specified in .td file) is a normal
144 // 4-byte insn.
145 NumBytes = 4;
146 break;
147 case TargetOpcode::STACKMAP:
148 // The upper bound for a stackmap intrinsic is the full length of its shadow
149 NumBytes = StackMapOpers(&MI).getNumPatchBytes();
150 assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
151 break;
152 case TargetOpcode::PATCHPOINT:
153 // The size of the patchpoint intrinsic is the number of bytes requested
154 NumBytes = PatchPointOpers(&MI).getNumPatchBytes();
155 assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
156 break;
157 case TargetOpcode::STATEPOINT:
158 NumBytes = StatepointOpers(&MI).getNumPatchBytes();
159 assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
160 // No patch bytes means a normal call inst is emitted
161 if (NumBytes == 0)
162 NumBytes = 4;
163 break;
164 case TargetOpcode::PATCHABLE_FUNCTION_ENTER:
165 // If `patchable-function-entry` is set, PATCHABLE_FUNCTION_ENTER
166 // instructions are expanded to the specified number of NOPs. Otherwise,
167 // they are expanded to 36-byte XRay sleds.
168 NumBytes =
169 F.getFnAttributeAsParsedInteger("patchable-function-entry", 9) * 4;
170 break;
171 case TargetOpcode::PATCHABLE_FUNCTION_EXIT:
172 case TargetOpcode::PATCHABLE_TAIL_CALL:
173 case TargetOpcode::PATCHABLE_TYPED_EVENT_CALL:
174 // An XRay sled can be 4 bytes of alignment plus a 32-byte block.
175 NumBytes = 36;
176 break;
177 case TargetOpcode::PATCHABLE_EVENT_CALL:
178 // EVENT_CALL XRay sleds are exactly 6 instructions long (no alignment).
179 NumBytes = 24;
180 break;
181
182 case AArch64::SPACE:
183 NumBytes = MI.getOperand(1).getImm();
184 break;
185 case TargetOpcode::BUNDLE:
186 NumBytes = getInstBundleLength(MI);
187 break;
188 }
189
190 return NumBytes;
191}
192
193unsigned AArch64InstrInfo::getInstBundleLength(const MachineInstr &MI) const {
194 unsigned Size = 0;
196 MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end();
197 while (++I != E && I->isInsideBundle()) {
198 assert(!I->isBundle() && "No nested bundle!");
200 }
201 return Size;
202}
203
206 // Block ends with fall-through condbranch.
207 switch (LastInst->getOpcode()) {
208 default:
209 llvm_unreachable("Unknown branch instruction?");
210 case AArch64::Bcc:
211 Target = LastInst->getOperand(1).getMBB();
212 Cond.push_back(LastInst->getOperand(0));
213 break;
214 case AArch64::CBZW:
215 case AArch64::CBZX:
216 case AArch64::CBNZW:
217 case AArch64::CBNZX:
218 Target = LastInst->getOperand(1).getMBB();
219 Cond.push_back(MachineOperand::CreateImm(-1));
220 Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
221 Cond.push_back(LastInst->getOperand(0));
222 break;
223 case AArch64::TBZW:
224 case AArch64::TBZX:
225 case AArch64::TBNZW:
226 case AArch64::TBNZX:
227 Target = LastInst->getOperand(2).getMBB();
228 Cond.push_back(MachineOperand::CreateImm(-1));
229 Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
230 Cond.push_back(LastInst->getOperand(0));
231 Cond.push_back(LastInst->getOperand(1));
232 break;
233 case AArch64::CBWPri:
234 case AArch64::CBXPri:
235 case AArch64::CBWPrr:
236 case AArch64::CBXPrr:
237 Target = LastInst->getOperand(3).getMBB();
238 Cond.push_back(MachineOperand::CreateImm(-1));
239 Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
240 Cond.push_back(LastInst->getOperand(0));
241 Cond.push_back(LastInst->getOperand(1));
242 Cond.push_back(LastInst->getOperand(2));
243 break;
244 }
245}
246
247static unsigned getBranchDisplacementBits(unsigned Opc) {
248 switch (Opc) {
249 default:
250 llvm_unreachable("unexpected opcode!");
251 case AArch64::B:
252 return BDisplacementBits;
253 case AArch64::TBNZW:
254 case AArch64::TBZW:
255 case AArch64::TBNZX:
256 case AArch64::TBZX:
257 return TBZDisplacementBits;
258 case AArch64::CBNZW:
259 case AArch64::CBZW:
260 case AArch64::CBNZX:
261 case AArch64::CBZX:
262 return CBZDisplacementBits;
263 case AArch64::Bcc:
264 return BCCDisplacementBits;
265 case AArch64::CBWPri:
266 case AArch64::CBXPri:
267 case AArch64::CBWPrr:
268 case AArch64::CBXPrr:
269 return CBDisplacementBits;
270 }
271}
272
274 int64_t BrOffset) const {
275 unsigned Bits = getBranchDisplacementBits(BranchOp);
276 assert(Bits >= 3 && "max branch displacement must be enough to jump"
277 "over conditional branch expansion");
278 return isIntN(Bits, BrOffset / 4);
279}
280
283 switch (MI.getOpcode()) {
284 default:
285 llvm_unreachable("unexpected opcode!");
286 case AArch64::B:
287 return MI.getOperand(0).getMBB();
288 case AArch64::TBZW:
289 case AArch64::TBNZW:
290 case AArch64::TBZX:
291 case AArch64::TBNZX:
292 return MI.getOperand(2).getMBB();
293 case AArch64::CBZW:
294 case AArch64::CBNZW:
295 case AArch64::CBZX:
296 case AArch64::CBNZX:
297 case AArch64::Bcc:
298 return MI.getOperand(1).getMBB();
299 case AArch64::CBWPri:
300 case AArch64::CBXPri:
301 case AArch64::CBWPrr:
302 case AArch64::CBXPrr:
303 return MI.getOperand(3).getMBB();
304 }
305}
306
308 MachineBasicBlock &NewDestBB,
309 MachineBasicBlock &RestoreBB,
310 const DebugLoc &DL,
311 int64_t BrOffset,
312 RegScavenger *RS) const {
313 assert(RS && "RegScavenger required for long branching");
314 assert(MBB.empty() &&
315 "new block should be inserted for expanding unconditional branch");
316 assert(MBB.pred_size() == 1);
317 assert(RestoreBB.empty() &&
318 "restore block should be inserted for restoring clobbered registers");
319
320 auto buildIndirectBranch = [&](Register Reg, MachineBasicBlock &DestBB) {
321 // Offsets outside of the signed 33-bit range are not supported for ADRP +
322 // ADD.
323 if (!isInt<33>(BrOffset))
325 "Branch offsets outside of the signed 33-bit range not supported");
326
327 BuildMI(MBB, MBB.end(), DL, get(AArch64::ADRP), Reg)
328 .addSym(DestBB.getSymbol(), AArch64II::MO_PAGE);
329 BuildMI(MBB, MBB.end(), DL, get(AArch64::ADDXri), Reg)
330 .addReg(Reg)
331 .addSym(DestBB.getSymbol(), AArch64II::MO_PAGEOFF | AArch64II::MO_NC)
332 .addImm(0);
333 BuildMI(MBB, MBB.end(), DL, get(AArch64::BR)).addReg(Reg);
334 };
335
336 RS->enterBasicBlockEnd(MBB);
337 // If X16 is unused, we can rely on the linker to insert a range extension
338 // thunk if NewDestBB is out of range of a single B instruction.
339 constexpr Register Reg = AArch64::X16;
340 if (!RS->isRegUsed(Reg)) {
341 insertUnconditionalBranch(MBB, &NewDestBB, DL);
342 RS->setRegUsed(Reg);
343 return;
344 }
345
346 // If there's a free register and it's worth inflating the code size,
347 // manually insert the indirect branch.
348 Register Scavenged = RS->FindUnusedReg(&AArch64::GPR64RegClass);
349 if (Scavenged != AArch64::NoRegister &&
350 MBB.getSectionID() == MBBSectionID::ColdSectionID) {
351 buildIndirectBranch(Scavenged, NewDestBB);
352 RS->setRegUsed(Scavenged);
353 return;
354 }
355
356 // Note: Spilling X16 briefly moves the stack pointer, making it incompatible
357 // with red zones.
358 AArch64FunctionInfo *AFI = MBB.getParent()->getInfo<AArch64FunctionInfo>();
359 if (!AFI || AFI->hasRedZone().value_or(true))
361 "Unable to insert indirect branch inside function that has red zone");
362
363 // Otherwise, spill X16 and defer range extension to the linker.
364 BuildMI(MBB, MBB.end(), DL, get(AArch64::STRXpre))
365 .addReg(AArch64::SP, RegState::Define)
366 .addReg(Reg)
367 .addReg(AArch64::SP)
368 .addImm(-16);
369
370 BuildMI(MBB, MBB.end(), DL, get(AArch64::B)).addMBB(&RestoreBB);
371
372 BuildMI(RestoreBB, RestoreBB.end(), DL, get(AArch64::LDRXpost))
373 .addReg(AArch64::SP, RegState::Define)
375 .addReg(AArch64::SP)
376 .addImm(16);
377}
378
379// Branch analysis.
382 MachineBasicBlock *&FBB,
384 bool AllowModify) const {
385 // If the block has no terminators, it just falls into the block after it.
386 MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
387 if (I == MBB.end())
388 return false;
389
390 // Skip over SpeculationBarrierEndBB terminators
391 if (I->getOpcode() == AArch64::SpeculationBarrierISBDSBEndBB ||
392 I->getOpcode() == AArch64::SpeculationBarrierSBEndBB) {
393 --I;
394 }
395
396 if (!isUnpredicatedTerminator(*I))
397 return false;
398
399 // Get the last instruction in the block.
400 MachineInstr *LastInst = &*I;
401
402 // If there is only one terminator instruction, process it.
403 unsigned LastOpc = LastInst->getOpcode();
404 if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
405 if (isUncondBranchOpcode(LastOpc)) {
406 TBB = LastInst->getOperand(0).getMBB();
407 return false;
408 }
409 if (isCondBranchOpcode(LastOpc)) {
410 // Block ends with fall-through condbranch.
411 parseCondBranch(LastInst, TBB, Cond);
412 return false;
413 }
414 return true; // Can't handle indirect branch.
415 }
416
417 // Get the instruction before it if it is a terminator.
418 MachineInstr *SecondLastInst = &*I;
419 unsigned SecondLastOpc = SecondLastInst->getOpcode();
420
421 // If AllowModify is true and the block ends with two or more unconditional
422 // branches, delete all but the first unconditional branch.
423 if (AllowModify && isUncondBranchOpcode(LastOpc)) {
424 while (isUncondBranchOpcode(SecondLastOpc)) {
425 LastInst->eraseFromParent();
426 LastInst = SecondLastInst;
427 LastOpc = LastInst->getOpcode();
428 if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
429 // Return now the only terminator is an unconditional branch.
430 TBB = LastInst->getOperand(0).getMBB();
431 return false;
432 }
433 SecondLastInst = &*I;
434 SecondLastOpc = SecondLastInst->getOpcode();
435 }
436 }
437
438 // If we're allowed to modify and the block ends in a unconditional branch
439 // which could simply fallthrough, remove the branch. (Note: This case only
440 // matters when we can't understand the whole sequence, otherwise it's also
441 // handled by BranchFolding.cpp.)
442 if (AllowModify && isUncondBranchOpcode(LastOpc) &&
443 MBB.isLayoutSuccessor(getBranchDestBlock(*LastInst))) {
444 LastInst->eraseFromParent();
445 LastInst = SecondLastInst;
446 LastOpc = LastInst->getOpcode();
447 if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
448 assert(!isUncondBranchOpcode(LastOpc) &&
449 "unreachable unconditional branches removed above");
450
451 if (isCondBranchOpcode(LastOpc)) {
452 // Block ends with fall-through condbranch.
453 parseCondBranch(LastInst, TBB, Cond);
454 return false;
455 }
456 return true; // Can't handle indirect branch.
457 }
458 SecondLastInst = &*I;
459 SecondLastOpc = SecondLastInst->getOpcode();
460 }
461
462 // If there are three terminators, we don't know what sort of block this is.
463 if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(*--I))
464 return true;
465
466 // If the block ends with a B and a Bcc, handle it.
467 if (isCondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
468 parseCondBranch(SecondLastInst, TBB, Cond);
469 FBB = LastInst->getOperand(0).getMBB();
470 return false;
471 }
472
473 // If the block ends with two unconditional branches, handle it. The second
474 // one is not executed, so remove it.
475 if (isUncondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
476 TBB = SecondLastInst->getOperand(0).getMBB();
477 I = LastInst;
478 if (AllowModify)
479 I->eraseFromParent();
480 return false;
481 }
482
483 // ...likewise if it ends with an indirect branch followed by an unconditional
484 // branch.
485 if (isIndirectBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
486 I = LastInst;
487 if (AllowModify)
488 I->eraseFromParent();
489 return true;
490 }
491
492 // Otherwise, can't handle this.
493 return true;
494}
495
497 MachineBranchPredicate &MBP,
498 bool AllowModify) const {
499 // For the moment, handle only a block which ends with a cb(n)zx followed by
500 // a fallthrough. Why this? Because it is a common form.
501 // TODO: Should we handle b.cc?
502
503 MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
504 if (I == MBB.end())
505 return true;
506
507 // Skip over SpeculationBarrierEndBB terminators
508 if (I->getOpcode() == AArch64::SpeculationBarrierISBDSBEndBB ||
509 I->getOpcode() == AArch64::SpeculationBarrierSBEndBB) {
510 --I;
511 }
512
513 if (!isUnpredicatedTerminator(*I))
514 return true;
515
516 // Get the last instruction in the block.
517 MachineInstr *LastInst = &*I;
518 unsigned LastOpc = LastInst->getOpcode();
519 if (!isCondBranchOpcode(LastOpc))
520 return true;
521
522 switch (LastOpc) {
523 default:
524 return true;
525 case AArch64::CBZW:
526 case AArch64::CBZX:
527 case AArch64::CBNZW:
528 case AArch64::CBNZX:
529 break;
530 };
531
532 MBP.TrueDest = LastInst->getOperand(1).getMBB();
533 assert(MBP.TrueDest && "expected!");
534 MBP.FalseDest = MBB.getNextNode();
535
536 MBP.ConditionDef = nullptr;
537 MBP.SingleUseCondition = false;
538
539 MBP.LHS = LastInst->getOperand(0);
540 MBP.RHS = MachineOperand::CreateImm(0);
541 MBP.Predicate = (LastOpc == AArch64::CBNZX || LastOpc == AArch64::CBNZW)
542 ? MachineBranchPredicate::PRED_NE
543 : MachineBranchPredicate::PRED_EQ;
544 return false;
545}
546
549 if (Cond[0].getImm() != -1) {
550 // Regular Bcc
551 AArch64CC::CondCode CC = (AArch64CC::CondCode)(int)Cond[0].getImm();
553 } else {
554 // Folded compare-and-branch
555 switch (Cond[1].getImm()) {
556 default:
557 llvm_unreachable("Unknown conditional branch!");
558 case AArch64::CBZW:
559 Cond[1].setImm(AArch64::CBNZW);
560 break;
561 case AArch64::CBNZW:
562 Cond[1].setImm(AArch64::CBZW);
563 break;
564 case AArch64::CBZX:
565 Cond[1].setImm(AArch64::CBNZX);
566 break;
567 case AArch64::CBNZX:
568 Cond[1].setImm(AArch64::CBZX);
569 break;
570 case AArch64::TBZW:
571 Cond[1].setImm(AArch64::TBNZW);
572 break;
573 case AArch64::TBNZW:
574 Cond[1].setImm(AArch64::TBZW);
575 break;
576 case AArch64::TBZX:
577 Cond[1].setImm(AArch64::TBNZX);
578 break;
579 case AArch64::TBNZX:
580 Cond[1].setImm(AArch64::TBZX);
581 break;
582
583 // Cond is { -1, Opcode, CC, Op0, Op1 }
584 case AArch64::CBWPri:
585 case AArch64::CBXPri:
586 case AArch64::CBWPrr:
587 case AArch64::CBXPrr: {
588 // Pseudos using standard 4bit Arm condition codes
590 static_cast<AArch64CC::CondCode>(Cond[2].getImm());
592 }
593 }
594 }
595
596 return false;
597}
598
600 int *BytesRemoved) const {
601 MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
602 if (I == MBB.end())
603 return 0;
604
605 if (!isUncondBranchOpcode(I->getOpcode()) &&
606 !isCondBranchOpcode(I->getOpcode()))
607 return 0;
608
609 // Remove the branch.
610 I->eraseFromParent();
611
612 I = MBB.end();
613
614 if (I == MBB.begin()) {
615 if (BytesRemoved)
616 *BytesRemoved = 4;
617 return 1;
618 }
619 --I;
620 if (!isCondBranchOpcode(I->getOpcode())) {
621 if (BytesRemoved)
622 *BytesRemoved = 4;
623 return 1;
624 }
625
626 // Remove the branch.
627 I->eraseFromParent();
628 if (BytesRemoved)
629 *BytesRemoved = 8;
630
631 return 2;
632}
633
634void AArch64InstrInfo::instantiateCondBranch(
637 if (Cond[0].getImm() != -1) {
638 // Regular Bcc
639 BuildMI(&MBB, DL, get(AArch64::Bcc)).addImm(Cond[0].getImm()).addMBB(TBB);
640 } else {
641 // Folded compare-and-branch
642 // Note that we use addOperand instead of addReg to keep the flags.
643
644 // cbz, cbnz
645 const MachineInstrBuilder MIB =
646 BuildMI(&MBB, DL, get(Cond[1].getImm())).add(Cond[2]);
647
648 // tbz/tbnz
649 if (Cond.size() > 3)
650 MIB.add(Cond[3]);
651
652 // cb
653 if (Cond.size() > 4)
654 MIB.add(Cond[4]);
655
656 MIB.addMBB(TBB);
657 }
658}
659
662 ArrayRef<MachineOperand> Cond, const DebugLoc &DL, int *BytesAdded) const {
663 // Shouldn't be a fall through.
664 assert(TBB && "insertBranch must not be told to insert a fallthrough");
665
666 if (!FBB) {
667 if (Cond.empty()) // Unconditional branch?
668 BuildMI(&MBB, DL, get(AArch64::B)).addMBB(TBB);
669 else
670 instantiateCondBranch(MBB, DL, TBB, Cond);
671
672 if (BytesAdded)
673 *BytesAdded = 4;
674
675 return 1;
676 }
677
678 // Two-way conditional branch.
679 instantiateCondBranch(MBB, DL, TBB, Cond);
680 BuildMI(&MBB, DL, get(AArch64::B)).addMBB(FBB);
681
682 if (BytesAdded)
683 *BytesAdded = 8;
684
685 return 2;
686}
687
688// Find the original register that VReg is copied from.
689static unsigned removeCopies(const MachineRegisterInfo &MRI, unsigned VReg) {
690 while (Register::isVirtualRegister(VReg)) {
691 const MachineInstr *DefMI = MRI.getVRegDef(VReg);
692 if (!DefMI->isFullCopy())
693 return VReg;
694 VReg = DefMI->getOperand(1).getReg();
695 }
696 return VReg;
697}
698
699// Determine if VReg is defined by an instruction that can be folded into a
700// csel instruction. If so, return the folded opcode, and the replacement
701// register.
702static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg,
703 unsigned *NewReg = nullptr) {
704 VReg = removeCopies(MRI, VReg);
706 return 0;
707
708 bool Is64Bit = AArch64::GPR64allRegClass.hasSubClassEq(MRI.getRegClass(VReg));
709 const MachineInstr *DefMI = MRI.getVRegDef(VReg);
710 unsigned Opc = 0;
711 unsigned SrcReg = 0;
712 switch (DefMI->getOpcode()) {
713 case AArch64::SUBREG_TO_REG:
714 // Check for the following way to define an 64-bit immediate:
715 // %0:gpr32 = MOVi32imm 1
716 // %1:gpr64 = SUBREG_TO_REG 0, %0:gpr32, %subreg.sub_32
717 if (!DefMI->getOperand(1).isImm() || DefMI->getOperand(1).getImm() != 0)
718 return 0;
719 if (!DefMI->getOperand(2).isReg())
720 return 0;
721 if (!DefMI->getOperand(3).isImm() ||
722 DefMI->getOperand(3).getImm() != AArch64::sub_32)
723 return 0;
724 DefMI = MRI.getVRegDef(DefMI->getOperand(2).getReg());
725 if (DefMI->getOpcode() != AArch64::MOVi32imm)
726 return 0;
727 if (!DefMI->getOperand(1).isImm() || DefMI->getOperand(1).getImm() != 1)
728 return 0;
729 assert(Is64Bit);
730 SrcReg = AArch64::XZR;
731 Opc = AArch64::CSINCXr;
732 break;
733
734 case AArch64::MOVi32imm:
735 case AArch64::MOVi64imm:
736 if (!DefMI->getOperand(1).isImm() || DefMI->getOperand(1).getImm() != 1)
737 return 0;
738 SrcReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
739 Opc = Is64Bit ? AArch64::CSINCXr : AArch64::CSINCWr;
740 break;
741
742 case AArch64::ADDSXri:
743 case AArch64::ADDSWri:
744 // if NZCV is used, do not fold.
745 if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr,
746 true) == -1)
747 return 0;
748 // fall-through to ADDXri and ADDWri.
749 [[fallthrough]];
750 case AArch64::ADDXri:
751 case AArch64::ADDWri:
752 // add x, 1 -> csinc.
753 if (!DefMI->getOperand(2).isImm() || DefMI->getOperand(2).getImm() != 1 ||
754 DefMI->getOperand(3).getImm() != 0)
755 return 0;
756 SrcReg = DefMI->getOperand(1).getReg();
757 Opc = Is64Bit ? AArch64::CSINCXr : AArch64::CSINCWr;
758 break;
759
760 case AArch64::ORNXrr:
761 case AArch64::ORNWrr: {
762 // not x -> csinv, represented as orn dst, xzr, src.
763 unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg());
764 if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)
765 return 0;
766 SrcReg = DefMI->getOperand(2).getReg();
767 Opc = Is64Bit ? AArch64::CSINVXr : AArch64::CSINVWr;
768 break;
769 }
770
771 case AArch64::SUBSXrr:
772 case AArch64::SUBSWrr:
773 // if NZCV is used, do not fold.
774 if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr,
775 true) == -1)
776 return 0;
777 // fall-through to SUBXrr and SUBWrr.
778 [[fallthrough]];
779 case AArch64::SUBXrr:
780 case AArch64::SUBWrr: {
781 // neg x -> csneg, represented as sub dst, xzr, src.
782 unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg());
783 if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)
784 return 0;
785 SrcReg = DefMI->getOperand(2).getReg();
786 Opc = Is64Bit ? AArch64::CSNEGXr : AArch64::CSNEGWr;
787 break;
788 }
789 default:
790 return 0;
791 }
792 assert(Opc && SrcReg && "Missing parameters");
793
794 if (NewReg)
795 *NewReg = SrcReg;
796 return Opc;
797}
798
801 Register DstReg, Register TrueReg,
802 Register FalseReg, int &CondCycles,
803 int &TrueCycles,
804 int &FalseCycles) const {
805 // Check register classes.
806 const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
807 const TargetRegisterClass *RC =
808 RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg));
809 if (!RC)
810 return false;
811
812 // Also need to check the dest regclass, in case we're trying to optimize
813 // something like:
814 // %1(gpr) = PHI %2(fpr), bb1, %(fpr), bb2
815 if (!RI.getCommonSubClass(RC, MRI.getRegClass(DstReg)))
816 return false;
817
818 // Expanding cbz/tbz requires an extra cycle of latency on the condition.
819 unsigned ExtraCondLat = Cond.size() != 1;
820
821 // GPRs are handled by csel.
822 // FIXME: Fold in x+1, -x, and ~x when applicable.
823 if (AArch64::GPR64allRegClass.hasSubClassEq(RC) ||
824 AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
825 // Single-cycle csel, csinc, csinv, and csneg.
826 CondCycles = 1 + ExtraCondLat;
827 TrueCycles = FalseCycles = 1;
828 if (canFoldIntoCSel(MRI, TrueReg))
829 TrueCycles = 0;
830 else if (canFoldIntoCSel(MRI, FalseReg))
831 FalseCycles = 0;
832 return true;
833 }
834
835 // Scalar floating point is handled by fcsel.
836 // FIXME: Form fabs, fmin, and fmax when applicable.
837 if (AArch64::FPR64RegClass.hasSubClassEq(RC) ||
838 AArch64::FPR32RegClass.hasSubClassEq(RC)) {
839 CondCycles = 5 + ExtraCondLat;
840 TrueCycles = FalseCycles = 2;
841 return true;
842 }
843
844 // Can't do vectors.
845 return false;
846}
847
850 const DebugLoc &DL, Register DstReg,
852 Register TrueReg, Register FalseReg) const {
853 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
854
855 // Parse the condition code, see parseCondBranch() above.
857 switch (Cond.size()) {
858 default:
859 llvm_unreachable("Unknown condition opcode in Cond");
860 case 1: // b.cc
862 break;
863 case 3: { // cbz/cbnz
864 // We must insert a compare against 0.
865 bool Is64Bit;
866 switch (Cond[1].getImm()) {
867 default:
868 llvm_unreachable("Unknown branch opcode in Cond");
869 case AArch64::CBZW:
870 Is64Bit = false;
871 CC = AArch64CC::EQ;
872 break;
873 case AArch64::CBZX:
874 Is64Bit = true;
875 CC = AArch64CC::EQ;
876 break;
877 case AArch64::CBNZW:
878 Is64Bit = false;
879 CC = AArch64CC::NE;
880 break;
881 case AArch64::CBNZX:
882 Is64Bit = true;
883 CC = AArch64CC::NE;
884 break;
885 }
886 Register SrcReg = Cond[2].getReg();
887 if (Is64Bit) {
888 // cmp reg, #0 is actually subs xzr, reg, #0.
889 MRI.constrainRegClass(SrcReg, &AArch64::GPR64spRegClass);
890 BuildMI(MBB, I, DL, get(AArch64::SUBSXri), AArch64::XZR)
891 .addReg(SrcReg)
892 .addImm(0)
893 .addImm(0);
894 } else {
895 MRI.constrainRegClass(SrcReg, &AArch64::GPR32spRegClass);
896 BuildMI(MBB, I, DL, get(AArch64::SUBSWri), AArch64::WZR)
897 .addReg(SrcReg)
898 .addImm(0)
899 .addImm(0);
900 }
901 break;
902 }
903 case 4: { // tbz/tbnz
904 // We must insert a tst instruction.
905 switch (Cond[1].getImm()) {
906 default:
907 llvm_unreachable("Unknown branch opcode in Cond");
908 case AArch64::TBZW:
909 case AArch64::TBZX:
910 CC = AArch64CC::EQ;
911 break;
912 case AArch64::TBNZW:
913 case AArch64::TBNZX:
914 CC = AArch64CC::NE;
915 break;
916 }
917 // cmp reg, #foo is actually ands xzr, reg, #1<<foo.
918 if (Cond[1].getImm() == AArch64::TBZW || Cond[1].getImm() == AArch64::TBNZW)
919 BuildMI(MBB, I, DL, get(AArch64::ANDSWri), AArch64::WZR)
920 .addReg(Cond[2].getReg())
921 .addImm(
923 else
924 BuildMI(MBB, I, DL, get(AArch64::ANDSXri), AArch64::XZR)
925 .addReg(Cond[2].getReg())
926 .addImm(
928 break;
929 }
930 case 5: { // cb
931 // We must insert a cmp, that is a subs
932 // 0 1 2 3 4
933 // Cond is { -1, Opcode, CC, Op0, Op1 }
934 unsigned SUBSOpC, SUBSDestReg;
935 bool IsImm = false;
936 CC = static_cast<AArch64CC::CondCode>(Cond[2].getImm());
937 switch (Cond[1].getImm()) {
938 default:
939 llvm_unreachable("Unknown branch opcode in Cond");
940 case AArch64::CBWPri:
941 SUBSOpC = AArch64::SUBSWri;
942 SUBSDestReg = AArch64::WZR;
943 IsImm = true;
944 break;
945 case AArch64::CBXPri:
946 SUBSOpC = AArch64::SUBSXri;
947 SUBSDestReg = AArch64::XZR;
948 IsImm = true;
949 break;
950 case AArch64::CBWPrr:
951 SUBSOpC = AArch64::SUBSWrr;
952 SUBSDestReg = AArch64::WZR;
953 IsImm = false;
954 break;
955 case AArch64::CBXPrr:
956 SUBSOpC = AArch64::SUBSXrr;
957 SUBSDestReg = AArch64::XZR;
958 IsImm = false;
959 break;
960 }
961
962 if (IsImm)
963 BuildMI(MBB, I, DL, get(SUBSOpC), SUBSDestReg)
964 .addReg(Cond[3].getReg())
965 .addImm(Cond[4].getImm())
966 .addImm(0);
967 else
968 BuildMI(MBB, I, DL, get(SUBSOpC), SUBSDestReg)
969 .addReg(Cond[3].getReg())
970 .addReg(Cond[4].getReg());
971 }
972 }
973
974 unsigned Opc = 0;
975 const TargetRegisterClass *RC = nullptr;
976 bool TryFold = false;
977 if (MRI.constrainRegClass(DstReg, &AArch64::GPR64RegClass)) {
978 RC = &AArch64::GPR64RegClass;
979 Opc = AArch64::CSELXr;
980 TryFold = true;
981 } else if (MRI.constrainRegClass(DstReg, &AArch64::GPR32RegClass)) {
982 RC = &AArch64::GPR32RegClass;
983 Opc = AArch64::CSELWr;
984 TryFold = true;
985 } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR64RegClass)) {
986 RC = &AArch64::FPR64RegClass;
987 Opc = AArch64::FCSELDrrr;
988 } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR32RegClass)) {
989 RC = &AArch64::FPR32RegClass;
990 Opc = AArch64::FCSELSrrr;
991 }
992 assert(RC && "Unsupported regclass");
993
994 // Try folding simple instructions into the csel.
995 if (TryFold) {
996 unsigned NewReg = 0;
997 unsigned FoldedOpc = canFoldIntoCSel(MRI, TrueReg, &NewReg);
998 if (FoldedOpc) {
999 // The folded opcodes csinc, csinc and csneg apply the operation to
1000 // FalseReg, so we need to invert the condition.
1002 TrueReg = FalseReg;
1003 } else
1004 FoldedOpc = canFoldIntoCSel(MRI, FalseReg, &NewReg);
1005
1006 // Fold the operation. Leave any dead instructions for DCE to clean up.
1007 if (FoldedOpc) {
1008 FalseReg = NewReg;
1009 Opc = FoldedOpc;
1010 // Extend the live range of NewReg.
1011 MRI.clearKillFlags(NewReg);
1012 }
1013 }
1014
1015 // Pull all virtual register into the appropriate class.
1016 MRI.constrainRegClass(TrueReg, RC);
1017 // FalseReg might be WZR or XZR if the folded operand is a literal 1.
1018 assert(
1019 (FalseReg.isVirtual() || FalseReg == AArch64::WZR ||
1020 FalseReg == AArch64::XZR) &&
1021 "FalseReg was folded into a non-virtual register other than WZR or XZR");
1022 if (FalseReg.isVirtual())
1023 MRI.constrainRegClass(FalseReg, RC);
1024
1025 // Insert the csel.
1026 BuildMI(MBB, I, DL, get(Opc), DstReg)
1027 .addReg(TrueReg)
1028 .addReg(FalseReg)
1029 .addImm(CC);
1030}
1031
1032// Return true if Imm can be loaded into a register by a "cheap" sequence of
1033// instructions. For now, "cheap" means at most two instructions.
1034static bool isCheapImmediate(const MachineInstr &MI, unsigned BitSize) {
1035 if (BitSize == 32)
1036 return true;
1037
1038 assert(BitSize == 64 && "Only bit sizes of 32 or 64 allowed");
1039 uint64_t Imm = static_cast<uint64_t>(MI.getOperand(1).getImm());
1041 AArch64_IMM::expandMOVImm(Imm, BitSize, Is);
1042
1043 return Is.size() <= 2;
1044}
1045
1046// FIXME: this implementation should be micro-architecture dependent, so a
1047// micro-architecture target hook should be introduced here in future.
1049 if (Subtarget.hasExynosCheapAsMoveHandling()) {
1050 if (isExynosCheapAsMove(MI))
1051 return true;
1052 return MI.isAsCheapAsAMove();
1053 }
1054
1055 switch (MI.getOpcode()) {
1056 default:
1057 return MI.isAsCheapAsAMove();
1058
1059 case AArch64::ADDWrs:
1060 case AArch64::ADDXrs:
1061 case AArch64::SUBWrs:
1062 case AArch64::SUBXrs:
1063 return Subtarget.hasALULSLFast() && MI.getOperand(3).getImm() <= 4;
1064
1065 // If MOVi32imm or MOVi64imm can be expanded into ORRWri or
1066 // ORRXri, it is as cheap as MOV.
1067 // Likewise if it can be expanded to MOVZ/MOVN/MOVK.
1068 case AArch64::MOVi32imm:
1069 return isCheapImmediate(MI, 32);
1070 case AArch64::MOVi64imm:
1071 return isCheapImmediate(MI, 64);
1072 }
1073}
1074
1075bool AArch64InstrInfo::isFalkorShiftExtFast(const MachineInstr &MI) {
1076 switch (MI.getOpcode()) {
1077 default:
1078 return false;
1079
1080 case AArch64::ADDWrs:
1081 case AArch64::ADDXrs:
1082 case AArch64::ADDSWrs:
1083 case AArch64::ADDSXrs: {
1084 unsigned Imm = MI.getOperand(3).getImm();
1085 unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
1086 if (ShiftVal == 0)
1087 return true;
1088 return AArch64_AM::getShiftType(Imm) == AArch64_AM::LSL && ShiftVal <= 5;
1089 }
1090
1091 case AArch64::ADDWrx:
1092 case AArch64::ADDXrx:
1093 case AArch64::ADDXrx64:
1094 case AArch64::ADDSWrx:
1095 case AArch64::ADDSXrx:
1096 case AArch64::ADDSXrx64: {
1097 unsigned Imm = MI.getOperand(3).getImm();
1098 switch (AArch64_AM::getArithExtendType(Imm)) {
1099 default:
1100 return false;
1101 case AArch64_AM::UXTB:
1102 case AArch64_AM::UXTH:
1103 case AArch64_AM::UXTW:
1104 case AArch64_AM::UXTX:
1105 return AArch64_AM::getArithShiftValue(Imm) <= 4;
1106 }
1107 }
1108
1109 case AArch64::SUBWrs:
1110 case AArch64::SUBSWrs: {
1111 unsigned Imm = MI.getOperand(3).getImm();
1112 unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
1113 return ShiftVal == 0 ||
1114 (AArch64_AM::getShiftType(Imm) == AArch64_AM::ASR && ShiftVal == 31);
1115 }
1116
1117 case AArch64::SUBXrs:
1118 case AArch64::SUBSXrs: {
1119 unsigned Imm = MI.getOperand(3).getImm();
1120 unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
1121 return ShiftVal == 0 ||
1122 (AArch64_AM::getShiftType(Imm) == AArch64_AM::ASR && ShiftVal == 63);
1123 }
1124
1125 case AArch64::SUBWrx:
1126 case AArch64::SUBXrx:
1127 case AArch64::SUBXrx64:
1128 case AArch64::SUBSWrx:
1129 case AArch64::SUBSXrx:
1130 case AArch64::SUBSXrx64: {
1131 unsigned Imm = MI.getOperand(3).getImm();
1132 switch (AArch64_AM::getArithExtendType(Imm)) {
1133 default:
1134 return false;
1135 case AArch64_AM::UXTB:
1136 case AArch64_AM::UXTH:
1137 case AArch64_AM::UXTW:
1138 case AArch64_AM::UXTX:
1139 return AArch64_AM::getArithShiftValue(Imm) == 0;
1140 }
1141 }
1142
1143 case AArch64::LDRBBroW:
1144 case AArch64::LDRBBroX:
1145 case AArch64::LDRBroW:
1146 case AArch64::LDRBroX:
1147 case AArch64::LDRDroW:
1148 case AArch64::LDRDroX:
1149 case AArch64::LDRHHroW:
1150 case AArch64::LDRHHroX:
1151 case AArch64::LDRHroW:
1152 case AArch64::LDRHroX:
1153 case AArch64::LDRQroW:
1154 case AArch64::LDRQroX:
1155 case AArch64::LDRSBWroW:
1156 case AArch64::LDRSBWroX:
1157 case AArch64::LDRSBXroW:
1158 case AArch64::LDRSBXroX:
1159 case AArch64::LDRSHWroW:
1160 case AArch64::LDRSHWroX:
1161 case AArch64::LDRSHXroW:
1162 case AArch64::LDRSHXroX:
1163 case AArch64::LDRSWroW:
1164 case AArch64::LDRSWroX:
1165 case AArch64::LDRSroW:
1166 case AArch64::LDRSroX:
1167 case AArch64::LDRWroW:
1168 case AArch64::LDRWroX:
1169 case AArch64::LDRXroW:
1170 case AArch64::LDRXroX:
1171 case AArch64::PRFMroW:
1172 case AArch64::PRFMroX:
1173 case AArch64::STRBBroW:
1174 case AArch64::STRBBroX:
1175 case AArch64::STRBroW:
1176 case AArch64::STRBroX:
1177 case AArch64::STRDroW:
1178 case AArch64::STRDroX:
1179 case AArch64::STRHHroW:
1180 case AArch64::STRHHroX:
1181 case AArch64::STRHroW:
1182 case AArch64::STRHroX:
1183 case AArch64::STRQroW:
1184 case AArch64::STRQroX:
1185 case AArch64::STRSroW:
1186 case AArch64::STRSroX:
1187 case AArch64::STRWroW:
1188 case AArch64::STRWroX:
1189 case AArch64::STRXroW:
1190 case AArch64::STRXroX: {
1191 unsigned IsSigned = MI.getOperand(3).getImm();
1192 return !IsSigned;
1193 }
1194 }
1195}
1196
1197bool AArch64InstrInfo::isSEHInstruction(const MachineInstr &MI) {
1198 unsigned Opc = MI.getOpcode();
1199 switch (Opc) {
1200 default:
1201 return false;
1202 case AArch64::SEH_StackAlloc:
1203 case AArch64::SEH_SaveFPLR:
1204 case AArch64::SEH_SaveFPLR_X:
1205 case AArch64::SEH_SaveReg:
1206 case AArch64::SEH_SaveReg_X:
1207 case AArch64::SEH_SaveRegP:
1208 case AArch64::SEH_SaveRegP_X:
1209 case AArch64::SEH_SaveFReg:
1210 case AArch64::SEH_SaveFReg_X:
1211 case AArch64::SEH_SaveFRegP:
1212 case AArch64::SEH_SaveFRegP_X:
1213 case AArch64::SEH_SetFP:
1214 case AArch64::SEH_AddFP:
1215 case AArch64::SEH_Nop:
1216 case AArch64::SEH_PrologEnd:
1217 case AArch64::SEH_EpilogStart:
1218 case AArch64::SEH_EpilogEnd:
1219 case AArch64::SEH_PACSignLR:
1220 case AArch64::SEH_SaveAnyRegQP:
1221 case AArch64::SEH_SaveAnyRegQPX:
1222 case AArch64::SEH_AllocZ:
1223 case AArch64::SEH_SaveZReg:
1224 case AArch64::SEH_SavePReg:
1225 return true;
1226 }
1227}
1228
1230 Register &SrcReg, Register &DstReg,
1231 unsigned &SubIdx) const {
1232 switch (MI.getOpcode()) {
1233 default:
1234 return false;
1235 case AArch64::SBFMXri: // aka sxtw
1236 case AArch64::UBFMXri: // aka uxtw
1237 // Check for the 32 -> 64 bit extension case, these instructions can do
1238 // much more.
1239 if (MI.getOperand(2).getImm() != 0 || MI.getOperand(3).getImm() != 31)
1240 return false;
1241 // This is a signed or unsigned 32 -> 64 bit extension.
1242 SrcReg = MI.getOperand(1).getReg();
1243 DstReg = MI.getOperand(0).getReg();
1244 SubIdx = AArch64::sub_32;
1245 return true;
1246 }
1247}
1248
1250 const MachineInstr &MIa, const MachineInstr &MIb) const {
1252 const MachineOperand *BaseOpA = nullptr, *BaseOpB = nullptr;
1253 int64_t OffsetA = 0, OffsetB = 0;
1254 TypeSize WidthA(0, false), WidthB(0, false);
1255 bool OffsetAIsScalable = false, OffsetBIsScalable = false;
1256
1257 assert(MIa.mayLoadOrStore() && "MIa must be a load or store.");
1258 assert(MIb.mayLoadOrStore() && "MIb must be a load or store.");
1259
1262 return false;
1263
1264 // Retrieve the base, offset from the base and width. Width
1265 // is the size of memory that is being loaded/stored (e.g. 1, 2, 4, 8). If
1266 // base are identical, and the offset of a lower memory access +
1267 // the width doesn't overlap the offset of a higher memory access,
1268 // then the memory accesses are different.
1269 // If OffsetAIsScalable and OffsetBIsScalable are both true, they
1270 // are assumed to have the same scale (vscale).
1271 if (getMemOperandWithOffsetWidth(MIa, BaseOpA, OffsetA, OffsetAIsScalable,
1272 WidthA, TRI) &&
1273 getMemOperandWithOffsetWidth(MIb, BaseOpB, OffsetB, OffsetBIsScalable,
1274 WidthB, TRI)) {
1275 if (BaseOpA->isIdenticalTo(*BaseOpB) &&
1276 OffsetAIsScalable == OffsetBIsScalable) {
1277 int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB;
1278 int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA;
1279 TypeSize LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
1280 if (LowWidth.isScalable() == OffsetAIsScalable &&
1281 LowOffset + (int)LowWidth.getKnownMinValue() <= HighOffset)
1282 return true;
1283 }
1284 }
1285 return false;
1286}
1287
1289 const MachineBasicBlock *MBB,
1290 const MachineFunction &MF) const {
1292 return true;
1293
1294 // Do not move an instruction that can be recognized as a branch target.
1295 if (hasBTISemantics(MI))
1296 return true;
1297
1298 switch (MI.getOpcode()) {
1299 case AArch64::HINT:
1300 // CSDB hints are scheduling barriers.
1301 if (MI.getOperand(0).getImm() == 0x14)
1302 return true;
1303 break;
1304 case AArch64::DSB:
1305 case AArch64::ISB:
1306 // DSB and ISB also are scheduling barriers.
1307 return true;
1308 case AArch64::MSRpstatesvcrImm1:
1309 // SMSTART and SMSTOP are also scheduling barriers.
1310 return true;
1311 default:;
1312 }
1313 if (isSEHInstruction(MI))
1314 return true;
1315 auto Next = std::next(MI.getIterator());
1316 return Next != MBB->end() && Next->isCFIInstruction();
1317}
1318
1319/// analyzeCompare - For a comparison instruction, return the source registers
1320/// in SrcReg and SrcReg2, and the value it compares against in CmpValue.
1321/// Return true if the comparison instruction can be analyzed.
1323 Register &SrcReg2, int64_t &CmpMask,
1324 int64_t &CmpValue) const {
1325 // The first operand can be a frame index where we'd normally expect a
1326 // register.
1327 // FIXME: Pass subregisters out of analyzeCompare
1328 assert(MI.getNumOperands() >= 2 && "All AArch64 cmps should have 2 operands");
1329 if (!MI.getOperand(1).isReg() || MI.getOperand(1).getSubReg())
1330 return false;
1331
1332 switch (MI.getOpcode()) {
1333 default:
1334 break;
1335 case AArch64::PTEST_PP:
1336 case AArch64::PTEST_PP_ANY:
1337 case AArch64::PTEST_PP_FIRST:
1338 SrcReg = MI.getOperand(0).getReg();
1339 SrcReg2 = MI.getOperand(1).getReg();
1340 if (MI.getOperand(2).getSubReg())
1341 return false;
1342
1343 // Not sure about the mask and value for now...
1344 CmpMask = ~0;
1345 CmpValue = 0;
1346 return true;
1347 case AArch64::SUBSWrr:
1348 case AArch64::SUBSWrs:
1349 case AArch64::SUBSWrx:
1350 case AArch64::SUBSXrr:
1351 case AArch64::SUBSXrs:
1352 case AArch64::SUBSXrx:
1353 case AArch64::ADDSWrr:
1354 case AArch64::ADDSWrs:
1355 case AArch64::ADDSWrx:
1356 case AArch64::ADDSXrr:
1357 case AArch64::ADDSXrs:
1358 case AArch64::ADDSXrx:
1359 // Replace SUBSWrr with SUBWrr if NZCV is not used.
1360 SrcReg = MI.getOperand(1).getReg();
1361 SrcReg2 = MI.getOperand(2).getReg();
1362
1363 // FIXME: Pass subregisters out of analyzeCompare
1364 if (MI.getOperand(2).getSubReg())
1365 return false;
1366
1367 CmpMask = ~0;
1368 CmpValue = 0;
1369 return true;
1370 case AArch64::SUBSWri:
1371 case AArch64::ADDSWri:
1372 case AArch64::SUBSXri:
1373 case AArch64::ADDSXri:
1374 SrcReg = MI.getOperand(1).getReg();
1375 SrcReg2 = 0;
1376 CmpMask = ~0;
1377 CmpValue = MI.getOperand(2).getImm();
1378 return true;
1379 case AArch64::ANDSWri:
1380 case AArch64::ANDSXri:
1381 // ANDS does not use the same encoding scheme as the others xxxS
1382 // instructions.
1383 SrcReg = MI.getOperand(1).getReg();
1384 SrcReg2 = 0;
1385 CmpMask = ~0;
1387 MI.getOperand(2).getImm(),
1388 MI.getOpcode() == AArch64::ANDSWri ? 32 : 64);
1389 return true;
1390 }
1391
1392 return false;
1393}
1394
1396 MachineBasicBlock *MBB = Instr.getParent();
1397 assert(MBB && "Can't get MachineBasicBlock here");
1398 MachineFunction *MF = MBB->getParent();
1399 assert(MF && "Can't get MachineFunction here");
1403
1404 for (unsigned OpIdx = 0, EndIdx = Instr.getNumOperands(); OpIdx < EndIdx;
1405 ++OpIdx) {
1406 MachineOperand &MO = Instr.getOperand(OpIdx);
1407 const TargetRegisterClass *OpRegCstraints =
1408 Instr.getRegClassConstraint(OpIdx, TII, TRI);
1409
1410 // If there's no constraint, there's nothing to do.
1411 if (!OpRegCstraints)
1412 continue;
1413 // If the operand is a frame index, there's nothing to do here.
1414 // A frame index operand will resolve correctly during PEI.
1415 if (MO.isFI())
1416 continue;
1417
1418 assert(MO.isReg() &&
1419 "Operand has register constraints without being a register!");
1420
1421 Register Reg = MO.getReg();
1422 if (Reg.isPhysical()) {
1423 if (!OpRegCstraints->contains(Reg))
1424 return false;
1425 } else if (!OpRegCstraints->hasSubClassEq(MRI->getRegClass(Reg)) &&
1426 !MRI->constrainRegClass(Reg, OpRegCstraints))
1427 return false;
1428 }
1429
1430 return true;
1431}
1432
1433/// Return the opcode that does not set flags when possible - otherwise
1434/// return the original opcode. The caller is responsible to do the actual
1435/// substitution and legality checking.
1437 // Don't convert all compare instructions, because for some the zero register
1438 // encoding becomes the sp register.
1439 bool MIDefinesZeroReg = false;
1440 if (MI.definesRegister(AArch64::WZR, /*TRI=*/nullptr) ||
1441 MI.definesRegister(AArch64::XZR, /*TRI=*/nullptr))
1442 MIDefinesZeroReg = true;
1443
1444 switch (MI.getOpcode()) {
1445 default:
1446 return MI.getOpcode();
1447 case AArch64::ADDSWrr:
1448 return AArch64::ADDWrr;
1449 case AArch64::ADDSWri:
1450 return MIDefinesZeroReg ? AArch64::ADDSWri : AArch64::ADDWri;
1451 case AArch64::ADDSWrs:
1452 return MIDefinesZeroReg ? AArch64::ADDSWrs : AArch64::ADDWrs;
1453 case AArch64::ADDSWrx:
1454 return AArch64::ADDWrx;
1455 case AArch64::ADDSXrr:
1456 return AArch64::ADDXrr;
1457 case AArch64::ADDSXri:
1458 return MIDefinesZeroReg ? AArch64::ADDSXri : AArch64::ADDXri;
1459 case AArch64::ADDSXrs:
1460 return MIDefinesZeroReg ? AArch64::ADDSXrs : AArch64::ADDXrs;
1461 case AArch64::ADDSXrx:
1462 return AArch64::ADDXrx;
1463 case AArch64::SUBSWrr:
1464 return AArch64::SUBWrr;
1465 case AArch64::SUBSWri:
1466 return MIDefinesZeroReg ? AArch64::SUBSWri : AArch64::SUBWri;
1467 case AArch64::SUBSWrs:
1468 return MIDefinesZeroReg ? AArch64::SUBSWrs : AArch64::SUBWrs;
1469 case AArch64::SUBSWrx:
1470 return AArch64::SUBWrx;
1471 case AArch64::SUBSXrr:
1472 return AArch64::SUBXrr;
1473 case AArch64::SUBSXri:
1474 return MIDefinesZeroReg ? AArch64::SUBSXri : AArch64::SUBXri;
1475 case AArch64::SUBSXrs:
1476 return MIDefinesZeroReg ? AArch64::SUBSXrs : AArch64::SUBXrs;
1477 case AArch64::SUBSXrx:
1478 return AArch64::SUBXrx;
1479 }
1480}
1481
1482enum AccessKind { AK_Write = 0x01, AK_Read = 0x10, AK_All = 0x11 };
1483
1484/// True when condition flags are accessed (either by writing or reading)
1485/// on the instruction trace starting at From and ending at To.
1486///
1487/// Note: If From and To are from different blocks it's assumed CC are accessed
1488/// on the path.
1491 const TargetRegisterInfo *TRI, const AccessKind AccessToCheck = AK_All) {
1492 // Early exit if To is at the beginning of the BB.
1493 if (To == To->getParent()->begin())
1494 return true;
1495
1496 // Check whether the instructions are in the same basic block
1497 // If not, assume the condition flags might get modified somewhere.
1498 if (To->getParent() != From->getParent())
1499 return true;
1500
1501 // From must be above To.
1502 assert(std::any_of(
1503 ++To.getReverse(), To->getParent()->rend(),
1504 [From](MachineInstr &MI) { return MI.getIterator() == From; }));
1505
1506 // We iterate backward starting at \p To until we hit \p From.
1507 for (const MachineInstr &Instr :
1509 if (((AccessToCheck & AK_Write) &&
1510 Instr.modifiesRegister(AArch64::NZCV, TRI)) ||
1511 ((AccessToCheck & AK_Read) && Instr.readsRegister(AArch64::NZCV, TRI)))
1512 return true;
1513 }
1514 return false;
1515}
1516
1517std::optional<unsigned>
1518AArch64InstrInfo::canRemovePTestInstr(MachineInstr *PTest, MachineInstr *Mask,
1519 MachineInstr *Pred,
1520 const MachineRegisterInfo *MRI) const {
1521 unsigned MaskOpcode = Mask->getOpcode();
1522 unsigned PredOpcode = Pred->getOpcode();
1523 bool PredIsPTestLike = isPTestLikeOpcode(PredOpcode);
1524 bool PredIsWhileLike = isWhileOpcode(PredOpcode);
1525
1526 if (PredIsWhileLike) {
1527 // For PTEST(PG, PG), PTEST is redundant when PG is the result of a WHILEcc
1528 // instruction and the condition is "any" since WHILcc does an implicit
1529 // PTEST(ALL, PG) check and PG is always a subset of ALL.
1530 if ((Mask == Pred) && PTest->getOpcode() == AArch64::PTEST_PP_ANY)
1531 return PredOpcode;
1532
1533 // For PTEST(PTRUE_ALL, WHILE), if the element size matches, the PTEST is
1534 // redundant since WHILE performs an implicit PTEST with an all active
1535 // mask.
1536 if (isPTrueOpcode(MaskOpcode) && Mask->getOperand(1).getImm() == 31 &&
1537 getElementSizeForOpcode(MaskOpcode) ==
1538 getElementSizeForOpcode(PredOpcode))
1539 return PredOpcode;
1540
1541 // For PTEST_FIRST(PTRUE_ALL, WHILE), the PTEST_FIRST is redundant since
1542 // WHILEcc performs an implicit PTEST with an all active mask, setting
1543 // the N flag as the PTEST_FIRST would.
1544 if (PTest->getOpcode() == AArch64::PTEST_PP_FIRST &&
1545 isPTrueOpcode(MaskOpcode) && Mask->getOperand(1).getImm() == 31)
1546 return PredOpcode;
1547
1548 return {};
1549 }
1550
1551 if (PredIsPTestLike) {
1552 // For PTEST(PG, PG), PTEST is redundant when PG is the result of an
1553 // instruction that sets the flags as PTEST would and the condition is
1554 // "any" since PG is always a subset of the governing predicate of the
1555 // ptest-like instruction.
1556 if ((Mask == Pred) && PTest->getOpcode() == AArch64::PTEST_PP_ANY)
1557 return PredOpcode;
1558
1559 auto PTestLikeMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg());
1560
1561 // If the PTEST like instruction's general predicate is not `Mask`, attempt
1562 // to look through a copy and try again. This is because some instructions
1563 // take a predicate whose register class is a subset of its result class.
1564 if (Mask != PTestLikeMask && PTestLikeMask->isFullCopy() &&
1565 PTestLikeMask->getOperand(1).getReg().isVirtual())
1566 PTestLikeMask =
1567 MRI->getUniqueVRegDef(PTestLikeMask->getOperand(1).getReg());
1568
1569 // For PTEST(PTRUE_ALL, PTEST_LIKE), the PTEST is redundant if the
1570 // the element size matches and either the PTEST_LIKE instruction uses
1571 // the same all active mask or the condition is "any".
1572 if (isPTrueOpcode(MaskOpcode) && Mask->getOperand(1).getImm() == 31 &&
1573 getElementSizeForOpcode(MaskOpcode) ==
1574 getElementSizeForOpcode(PredOpcode)) {
1575 if (Mask == PTestLikeMask || PTest->getOpcode() == AArch64::PTEST_PP_ANY)
1576 return PredOpcode;
1577 }
1578
1579 // For PTEST(PG, PTEST_LIKE(PG, ...)), the PTEST is redundant since the
1580 // flags are set based on the same mask 'PG', but PTEST_LIKE must operate
1581 // on 8-bit predicates like the PTEST. Otherwise, for instructions like
1582 // compare that also support 16/32/64-bit predicates, the implicit PTEST
1583 // performed by the compare could consider fewer lanes for these element
1584 // sizes.
1585 //
1586 // For example, consider
1587 //
1588 // ptrue p0.b ; P0=1111-1111-1111-1111
1589 // index z0.s, #0, #1 ; Z0=<0,1,2,3>
1590 // index z1.s, #1, #1 ; Z1=<1,2,3,4>
1591 // cmphi p1.s, p0/z, z1.s, z0.s ; P1=0001-0001-0001-0001
1592 // ; ^ last active
1593 // ptest p0, p1.b ; P1=0001-0001-0001-0001
1594 // ; ^ last active
1595 //
1596 // where the compare generates a canonical all active 32-bit predicate
1597 // (equivalent to 'ptrue p1.s, all'). The implicit PTEST sets the last
1598 // active flag, whereas the PTEST instruction with the same mask doesn't.
1599 // For PTEST_ANY this doesn't apply as the flags in this case would be
1600 // identical regardless of element size.
1601 uint64_t PredElementSize = getElementSizeForOpcode(PredOpcode);
1602 if (Mask == PTestLikeMask && (PredElementSize == AArch64::ElementSizeB ||
1603 PTest->getOpcode() == AArch64::PTEST_PP_ANY))
1604 return PredOpcode;
1605
1606 return {};
1607 }
1608
1609 // If OP in PTEST(PG, OP(PG, ...)) has a flag-setting variant change the
1610 // opcode so the PTEST becomes redundant.
1611 switch (PredOpcode) {
1612 case AArch64::AND_PPzPP:
1613 case AArch64::BIC_PPzPP:
1614 case AArch64::EOR_PPzPP:
1615 case AArch64::NAND_PPzPP:
1616 case AArch64::NOR_PPzPP:
1617 case AArch64::ORN_PPzPP:
1618 case AArch64::ORR_PPzPP:
1619 case AArch64::BRKA_PPzP:
1620 case AArch64::BRKPA_PPzPP:
1621 case AArch64::BRKB_PPzP:
1622 case AArch64::BRKPB_PPzPP:
1623 case AArch64::RDFFR_PPz: {
1624 // Check to see if our mask is the same. If not the resulting flag bits
1625 // may be different and we can't remove the ptest.
1626 auto *PredMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg());
1627 if (Mask != PredMask)
1628 return {};
1629 break;
1630 }
1631 case AArch64::BRKN_PPzP: {
1632 // BRKN uses an all active implicit mask to set flags unlike the other
1633 // flag-setting instructions.
1634 // PTEST(PTRUE_B(31), BRKN(PG, A, B)) -> BRKNS(PG, A, B).
1635 if ((MaskOpcode != AArch64::PTRUE_B) ||
1636 (Mask->getOperand(1).getImm() != 31))
1637 return {};
1638 break;
1639 }
1640 case AArch64::PTRUE_B:
1641 // PTEST(OP=PTRUE_B(A), OP) -> PTRUES_B(A)
1642 break;
1643 default:
1644 // Bail out if we don't recognize the input
1645 return {};
1646 }
1647
1648 return convertToFlagSettingOpc(PredOpcode);
1649}
1650
1651/// optimizePTestInstr - Attempt to remove a ptest of a predicate-generating
1652/// operation which could set the flags in an identical manner
1653bool AArch64InstrInfo::optimizePTestInstr(
1654 MachineInstr *PTest, unsigned MaskReg, unsigned PredReg,
1655 const MachineRegisterInfo *MRI) const {
1656 auto *Mask = MRI->getUniqueVRegDef(MaskReg);
1657 auto *Pred = MRI->getUniqueVRegDef(PredReg);
1658
1659 if (Pred->isCopy() && PTest->getOpcode() == AArch64::PTEST_PP_FIRST) {
1660 // Instructions which return a multi-vector (e.g. WHILECC_x2) require copies
1661 // before the branch to extract each subregister.
1662 auto Op = Pred->getOperand(1);
1663 if (Op.isReg() && Op.getReg().isVirtual() &&
1664 Op.getSubReg() == AArch64::psub0)
1665 Pred = MRI->getUniqueVRegDef(Op.getReg());
1666 }
1667
1668 unsigned PredOpcode = Pred->getOpcode();
1669 auto NewOp = canRemovePTestInstr(PTest, Mask, Pred, MRI);
1670 if (!NewOp)
1671 return false;
1672
1673 const TargetRegisterInfo *TRI = &getRegisterInfo();
1674
1675 // If another instruction between Pred and PTest accesses flags, don't remove
1676 // the ptest or update the earlier instruction to modify them.
1677 if (areCFlagsAccessedBetweenInstrs(Pred, PTest, TRI))
1678 return false;
1679
1680 // If we pass all the checks, it's safe to remove the PTEST and use the flags
1681 // as they are prior to PTEST. Sometimes this requires the tested PTEST
1682 // operand to be replaced with an equivalent instruction that also sets the
1683 // flags.
1684 PTest->eraseFromParent();
1685 if (*NewOp != PredOpcode) {
1686 Pred->setDesc(get(*NewOp));
1687 bool succeeded = UpdateOperandRegClass(*Pred);
1688 (void)succeeded;
1689 assert(succeeded && "Operands have incompatible register classes!");
1690 Pred->addRegisterDefined(AArch64::NZCV, TRI);
1691 }
1692
1693 // Ensure that the flags def is live.
1694 if (Pred->registerDefIsDead(AArch64::NZCV, TRI)) {
1695 unsigned i = 0, e = Pred->getNumOperands();
1696 for (; i != e; ++i) {
1697 MachineOperand &MO = Pred->getOperand(i);
1698 if (MO.isReg() && MO.isDef() && MO.getReg() == AArch64::NZCV) {
1699 MO.setIsDead(false);
1700 break;
1701 }
1702 }
1703 }
1704 return true;
1705}
1706
1707/// Try to optimize a compare instruction. A compare instruction is an
1708/// instruction which produces AArch64::NZCV. It can be truly compare
1709/// instruction
1710/// when there are no uses of its destination register.
1711///
1712/// The following steps are tried in order:
1713/// 1. Convert CmpInstr into an unconditional version.
1714/// 2. Remove CmpInstr if above there is an instruction producing a needed
1715/// condition code or an instruction which can be converted into such an
1716/// instruction.
1717/// Only comparison with zero is supported.
1719 MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int64_t CmpMask,
1720 int64_t CmpValue, const MachineRegisterInfo *MRI) const {
1721 assert(CmpInstr.getParent());
1722 assert(MRI);
1723
1724 // Replace SUBSWrr with SUBWrr if NZCV is not used.
1725 int DeadNZCVIdx =
1726 CmpInstr.findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr, true);
1727 if (DeadNZCVIdx != -1) {
1728 if (CmpInstr.definesRegister(AArch64::WZR, /*TRI=*/nullptr) ||
1729 CmpInstr.definesRegister(AArch64::XZR, /*TRI=*/nullptr)) {
1730 CmpInstr.eraseFromParent();
1731 return true;
1732 }
1733 unsigned Opc = CmpInstr.getOpcode();
1734 unsigned NewOpc = convertToNonFlagSettingOpc(CmpInstr);
1735 if (NewOpc == Opc)
1736 return false;
1737 const MCInstrDesc &MCID = get(NewOpc);
1738 CmpInstr.setDesc(MCID);
1739 CmpInstr.removeOperand(DeadNZCVIdx);
1740 bool succeeded = UpdateOperandRegClass(CmpInstr);
1741 (void)succeeded;
1742 assert(succeeded && "Some operands reg class are incompatible!");
1743 return true;
1744 }
1745
1746 if (CmpInstr.getOpcode() == AArch64::PTEST_PP ||
1747 CmpInstr.getOpcode() == AArch64::PTEST_PP_ANY ||
1748 CmpInstr.getOpcode() == AArch64::PTEST_PP_FIRST)
1749 return optimizePTestInstr(&CmpInstr, SrcReg, SrcReg2, MRI);
1750
1751 if (SrcReg2 != 0)
1752 return false;
1753
1754 // CmpInstr is a Compare instruction if destination register is not used.
1755 if (!MRI->use_nodbg_empty(CmpInstr.getOperand(0).getReg()))
1756 return false;
1757
1758 if (CmpValue == 0 && substituteCmpToZero(CmpInstr, SrcReg, *MRI))
1759 return true;
1760 return (CmpValue == 0 || CmpValue == 1) &&
1761 removeCmpToZeroOrOne(CmpInstr, SrcReg, CmpValue, *MRI);
1762}
1763
1764/// Get opcode of S version of Instr.
1765/// If Instr is S version its opcode is returned.
1766/// AArch64::INSTRUCTION_LIST_END is returned if Instr does not have S version
1767/// or we are not interested in it.
1768static unsigned sForm(MachineInstr &Instr) {
1769 switch (Instr.getOpcode()) {
1770 default:
1771 return AArch64::INSTRUCTION_LIST_END;
1772
1773 case AArch64::ADDSWrr:
1774 case AArch64::ADDSWri:
1775 case AArch64::ADDSXrr:
1776 case AArch64::ADDSXri:
1777 case AArch64::SUBSWrr:
1778 case AArch64::SUBSWri:
1779 case AArch64::SUBSXrr:
1780 case AArch64::SUBSXri:
1781 return Instr.getOpcode();
1782
1783 case AArch64::ADDWrr:
1784 return AArch64::ADDSWrr;
1785 case AArch64::ADDWri:
1786 return AArch64::ADDSWri;
1787 case AArch64::ADDXrr:
1788 return AArch64::ADDSXrr;
1789 case AArch64::ADDXri:
1790 return AArch64::ADDSXri;
1791 case AArch64::ADCWr:
1792 return AArch64::ADCSWr;
1793 case AArch64::ADCXr:
1794 return AArch64::ADCSXr;
1795 case AArch64::SUBWrr:
1796 return AArch64::SUBSWrr;
1797 case AArch64::SUBWri:
1798 return AArch64::SUBSWri;
1799 case AArch64::SUBXrr:
1800 return AArch64::SUBSXrr;
1801 case AArch64::SUBXri:
1802 return AArch64::SUBSXri;
1803 case AArch64::SBCWr:
1804 return AArch64::SBCSWr;
1805 case AArch64::SBCXr:
1806 return AArch64::SBCSXr;
1807 case AArch64::ANDWri:
1808 return AArch64::ANDSWri;
1809 case AArch64::ANDXri:
1810 return AArch64::ANDSXri;
1811 }
1812}
1813
1814/// Check if AArch64::NZCV should be alive in successors of MBB.
1816 for (auto *BB : MBB->successors())
1817 if (BB->isLiveIn(AArch64::NZCV))
1818 return true;
1819 return false;
1820}
1821
1822/// \returns The condition code operand index for \p Instr if it is a branch
1823/// or select and -1 otherwise.
1824static int
1826 switch (Instr.getOpcode()) {
1827 default:
1828 return -1;
1829
1830 case AArch64::Bcc: {
1831 int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV, /*TRI=*/nullptr);
1832 assert(Idx >= 2);
1833 return Idx - 2;
1834 }
1835
1836 case AArch64::CSINVWr:
1837 case AArch64::CSINVXr:
1838 case AArch64::CSINCWr:
1839 case AArch64::CSINCXr:
1840 case AArch64::CSELWr:
1841 case AArch64::CSELXr:
1842 case AArch64::CSNEGWr:
1843 case AArch64::CSNEGXr:
1844 case AArch64::FCSELSrrr:
1845 case AArch64::FCSELDrrr: {
1846 int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV, /*TRI=*/nullptr);
1847 assert(Idx >= 1);
1848 return Idx - 1;
1849 }
1850 }
1851}
1852
1853/// Find a condition code used by the instruction.
1854/// Returns AArch64CC::Invalid if either the instruction does not use condition
1855/// codes or we don't optimize CmpInstr in the presence of such instructions.
1858 return CCIdx >= 0 ? static_cast<AArch64CC::CondCode>(
1859 Instr.getOperand(CCIdx).getImm())
1861}
1862
1865 UsedNZCV UsedFlags;
1866 switch (CC) {
1867 default:
1868 break;
1869
1870 case AArch64CC::EQ: // Z set
1871 case AArch64CC::NE: // Z clear
1872 UsedFlags.Z = true;
1873 break;
1874
1875 case AArch64CC::HI: // Z clear and C set
1876 case AArch64CC::LS: // Z set or C clear
1877 UsedFlags.Z = true;
1878 [[fallthrough]];
1879 case AArch64CC::HS: // C set
1880 case AArch64CC::LO: // C clear
1881 UsedFlags.C = true;
1882 break;
1883
1884 case AArch64CC::MI: // N set
1885 case AArch64CC::PL: // N clear
1886 UsedFlags.N = true;
1887 break;
1888
1889 case AArch64CC::VS: // V set
1890 case AArch64CC::VC: // V clear
1891 UsedFlags.V = true;
1892 break;
1893
1894 case AArch64CC::GT: // Z clear, N and V the same
1895 case AArch64CC::LE: // Z set, N and V differ
1896 UsedFlags.Z = true;
1897 [[fallthrough]];
1898 case AArch64CC::GE: // N and V the same
1899 case AArch64CC::LT: // N and V differ
1900 UsedFlags.N = true;
1901 UsedFlags.V = true;
1902 break;
1903 }
1904 return UsedFlags;
1905}
1906
1907/// \returns Conditions flags used after \p CmpInstr in its MachineBB if NZCV
1908/// flags are not alive in successors of the same \p CmpInstr and \p MI parent.
1909/// \returns std::nullopt otherwise.
1910///
1911/// Collect instructions using that flags in \p CCUseInstrs if provided.
1912std::optional<UsedNZCV>
1914 const TargetRegisterInfo &TRI,
1915 SmallVectorImpl<MachineInstr *> *CCUseInstrs) {
1916 MachineBasicBlock *CmpParent = CmpInstr.getParent();
1917 if (MI.getParent() != CmpParent)
1918 return std::nullopt;
1919
1920 if (areCFlagsAliveInSuccessors(CmpParent))
1921 return std::nullopt;
1922
1923 UsedNZCV NZCVUsedAfterCmp;
1925 std::next(CmpInstr.getIterator()), CmpParent->instr_end())) {
1926 if (Instr.readsRegister(AArch64::NZCV, &TRI)) {
1928 if (CC == AArch64CC::Invalid) // Unsupported conditional instruction
1929 return std::nullopt;
1930 NZCVUsedAfterCmp |= getUsedNZCV(CC);
1931 if (CCUseInstrs)
1932 CCUseInstrs->push_back(&Instr);
1933 }
1934 if (Instr.modifiesRegister(AArch64::NZCV, &TRI))
1935 break;
1936 }
1937 return NZCVUsedAfterCmp;
1938}
1939
1940static bool isADDSRegImm(unsigned Opcode) {
1941 return Opcode == AArch64::ADDSWri || Opcode == AArch64::ADDSXri;
1942}
1943
1944static bool isSUBSRegImm(unsigned Opcode) {
1945 return Opcode == AArch64::SUBSWri || Opcode == AArch64::SUBSXri;
1946}
1947
1948/// Check if CmpInstr can be substituted by MI.
1949///
1950/// CmpInstr can be substituted:
1951/// - CmpInstr is either 'ADDS %vreg, 0' or 'SUBS %vreg, 0'
1952/// - and, MI and CmpInstr are from the same MachineBB
1953/// - and, condition flags are not alive in successors of the CmpInstr parent
1954/// - and, if MI opcode is the S form there must be no defs of flags between
1955/// MI and CmpInstr
1956/// or if MI opcode is not the S form there must be neither defs of flags
1957/// nor uses of flags between MI and CmpInstr.
1958/// - and, if C/V flags are not used after CmpInstr
1959/// or if N flag is used but MI produces poison value if signed overflow
1960/// occurs.
1962 const TargetRegisterInfo &TRI) {
1963 // NOTE this assertion guarantees that MI.getOpcode() is add or subtraction
1964 // that may or may not set flags.
1965 assert(sForm(MI) != AArch64::INSTRUCTION_LIST_END);
1966
1967 const unsigned CmpOpcode = CmpInstr.getOpcode();
1968 if (!isADDSRegImm(CmpOpcode) && !isSUBSRegImm(CmpOpcode))
1969 return false;
1970
1971 assert((CmpInstr.getOperand(2).isImm() &&
1972 CmpInstr.getOperand(2).getImm() == 0) &&
1973 "Caller guarantees that CmpInstr compares with constant 0");
1974
1975 std::optional<UsedNZCV> NZVCUsed = examineCFlagsUse(MI, CmpInstr, TRI);
1976 if (!NZVCUsed || NZVCUsed->C)
1977 return false;
1978
1979 // CmpInstr is either 'ADDS %vreg, 0' or 'SUBS %vreg, 0', and MI is either
1980 // '%vreg = add ...' or '%vreg = sub ...'.
1981 // Condition flag V is used to indicate signed overflow.
1982 // 1) MI and CmpInstr set N and V to the same value.
1983 // 2) If MI is add/sub with no-signed-wrap, it produces a poison value when
1984 // signed overflow occurs, so CmpInstr could still be simplified away.
1985 if (NZVCUsed->V && !MI.getFlag(MachineInstr::NoSWrap))
1986 return false;
1987
1988 AccessKind AccessToCheck = AK_Write;
1989 if (sForm(MI) != MI.getOpcode())
1990 AccessToCheck = AK_All;
1991 return !areCFlagsAccessedBetweenInstrs(&MI, &CmpInstr, &TRI, AccessToCheck);
1992}
1993
1994/// Substitute an instruction comparing to zero with another instruction
1995/// which produces needed condition flags.
1996///
1997/// Return true on success.
1998bool AArch64InstrInfo::substituteCmpToZero(
1999 MachineInstr &CmpInstr, unsigned SrcReg,
2000 const MachineRegisterInfo &MRI) const {
2001 // Get the unique definition of SrcReg.
2002 MachineInstr *MI = MRI.getUniqueVRegDef(SrcReg);
2003 if (!MI)
2004 return false;
2005
2006 const TargetRegisterInfo &TRI = getRegisterInfo();
2007
2008 unsigned NewOpc = sForm(*MI);
2009 if (NewOpc == AArch64::INSTRUCTION_LIST_END)
2010 return false;
2011
2012 if (!canInstrSubstituteCmpInstr(*MI, CmpInstr, TRI))
2013 return false;
2014
2015 // Update the instruction to set NZCV.
2016 MI->setDesc(get(NewOpc));
2017 CmpInstr.eraseFromParent();
2019 (void)succeeded;
2020 assert(succeeded && "Some operands reg class are incompatible!");
2021 MI->addRegisterDefined(AArch64::NZCV, &TRI);
2022 return true;
2023}
2024
2025/// \returns True if \p CmpInstr can be removed.
2026///
2027/// \p IsInvertCC is true if, after removing \p CmpInstr, condition
2028/// codes used in \p CCUseInstrs must be inverted.
2030 int CmpValue, const TargetRegisterInfo &TRI,
2032 bool &IsInvertCC) {
2033 assert((CmpValue == 0 || CmpValue == 1) &&
2034 "Only comparisons to 0 or 1 considered for removal!");
2035
2036 // MI is 'CSINCWr %vreg, wzr, wzr, <cc>' or 'CSINCXr %vreg, xzr, xzr, <cc>'
2037 unsigned MIOpc = MI.getOpcode();
2038 if (MIOpc == AArch64::CSINCWr) {
2039 if (MI.getOperand(1).getReg() != AArch64::WZR ||
2040 MI.getOperand(2).getReg() != AArch64::WZR)
2041 return false;
2042 } else if (MIOpc == AArch64::CSINCXr) {
2043 if (MI.getOperand(1).getReg() != AArch64::XZR ||
2044 MI.getOperand(2).getReg() != AArch64::XZR)
2045 return false;
2046 } else {
2047 return false;
2048 }
2050 if (MICC == AArch64CC::Invalid)
2051 return false;
2052
2053 // NZCV needs to be defined
2054 if (MI.findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr, true) != -1)
2055 return false;
2056
2057 // CmpInstr is 'ADDS %vreg, 0' or 'SUBS %vreg, 0' or 'SUBS %vreg, 1'
2058 const unsigned CmpOpcode = CmpInstr.getOpcode();
2059 bool IsSubsRegImm = isSUBSRegImm(CmpOpcode);
2060 if (CmpValue && !IsSubsRegImm)
2061 return false;
2062 if (!CmpValue && !IsSubsRegImm && !isADDSRegImm(CmpOpcode))
2063 return false;
2064
2065 // MI conditions allowed: eq, ne, mi, pl
2066 UsedNZCV MIUsedNZCV = getUsedNZCV(MICC);
2067 if (MIUsedNZCV.C || MIUsedNZCV.V)
2068 return false;
2069
2070 std::optional<UsedNZCV> NZCVUsedAfterCmp =
2071 examineCFlagsUse(MI, CmpInstr, TRI, &CCUseInstrs);
2072 // Condition flags are not used in CmpInstr basic block successors and only
2073 // Z or N flags allowed to be used after CmpInstr within its basic block
2074 if (!NZCVUsedAfterCmp || NZCVUsedAfterCmp->C || NZCVUsedAfterCmp->V)
2075 return false;
2076 // Z or N flag used after CmpInstr must correspond to the flag used in MI
2077 if ((MIUsedNZCV.Z && NZCVUsedAfterCmp->N) ||
2078 (MIUsedNZCV.N && NZCVUsedAfterCmp->Z))
2079 return false;
2080 // If CmpInstr is comparison to zero MI conditions are limited to eq, ne
2081 if (MIUsedNZCV.N && !CmpValue)
2082 return false;
2083
2084 // There must be no defs of flags between MI and CmpInstr
2085 if (areCFlagsAccessedBetweenInstrs(&MI, &CmpInstr, &TRI, AK_Write))
2086 return false;
2087
2088 // Condition code is inverted in the following cases:
2089 // 1. MI condition is ne; CmpInstr is 'ADDS %vreg, 0' or 'SUBS %vreg, 0'
2090 // 2. MI condition is eq, pl; CmpInstr is 'SUBS %vreg, 1'
2091 IsInvertCC = (CmpValue && (MICC == AArch64CC::EQ || MICC == AArch64CC::PL)) ||
2092 (!CmpValue && MICC == AArch64CC::NE);
2093 return true;
2094}
2095
2096/// Remove comparison in csinc-cmp sequence
2097///
2098/// Examples:
2099/// 1. \code
2100/// csinc w9, wzr, wzr, ne
2101/// cmp w9, #0
2102/// b.eq
2103/// \endcode
2104/// to
2105/// \code
2106/// csinc w9, wzr, wzr, ne
2107/// b.ne
2108/// \endcode
2109///
2110/// 2. \code
2111/// csinc x2, xzr, xzr, mi
2112/// cmp x2, #1
2113/// b.pl
2114/// \endcode
2115/// to
2116/// \code
2117/// csinc x2, xzr, xzr, mi
2118/// b.pl
2119/// \endcode
2120///
2121/// \param CmpInstr comparison instruction
2122/// \return True when comparison removed
2123bool AArch64InstrInfo::removeCmpToZeroOrOne(
2124 MachineInstr &CmpInstr, unsigned SrcReg, int CmpValue,
2125 const MachineRegisterInfo &MRI) const {
2126 MachineInstr *MI = MRI.getUniqueVRegDef(SrcReg);
2127 if (!MI)
2128 return false;
2129 const TargetRegisterInfo &TRI = getRegisterInfo();
2130 SmallVector<MachineInstr *, 4> CCUseInstrs;
2131 bool IsInvertCC = false;
2132 if (!canCmpInstrBeRemoved(*MI, CmpInstr, CmpValue, TRI, CCUseInstrs,
2133 IsInvertCC))
2134 return false;
2135 // Make transformation
2136 CmpInstr.eraseFromParent();
2137 if (IsInvertCC) {
2138 // Invert condition codes in CmpInstr CC users
2139 for (MachineInstr *CCUseInstr : CCUseInstrs) {
2140 int Idx = findCondCodeUseOperandIdxForBranchOrSelect(*CCUseInstr);
2141 assert(Idx >= 0 && "Unexpected instruction using CC.");
2142 MachineOperand &CCOperand = CCUseInstr->getOperand(Idx);
2144 static_cast<AArch64CC::CondCode>(CCOperand.getImm()));
2145 CCOperand.setImm(CCUse);
2146 }
2147 }
2148 return true;
2149}
2150
2151bool AArch64InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
2152 if (MI.getOpcode() != TargetOpcode::LOAD_STACK_GUARD &&
2153 MI.getOpcode() != AArch64::CATCHRET)
2154 return false;
2155
2156 MachineBasicBlock &MBB = *MI.getParent();
2157 auto &Subtarget = MBB.getParent()->getSubtarget<AArch64Subtarget>();
2158 auto TRI = Subtarget.getRegisterInfo();
2159 DebugLoc DL = MI.getDebugLoc();
2160
2161 if (MI.getOpcode() == AArch64::CATCHRET) {
2162 // Skip to the first instruction before the epilog.
2163 const TargetInstrInfo *TII =
2165 MachineBasicBlock *TargetMBB = MI.getOperand(0).getMBB();
2167 MachineBasicBlock::iterator FirstEpilogSEH = std::prev(MBBI);
2168 while (FirstEpilogSEH->getFlag(MachineInstr::FrameDestroy) &&
2169 FirstEpilogSEH != MBB.begin())
2170 FirstEpilogSEH = std::prev(FirstEpilogSEH);
2171 if (FirstEpilogSEH != MBB.begin())
2172 FirstEpilogSEH = std::next(FirstEpilogSEH);
2173 BuildMI(MBB, FirstEpilogSEH, DL, TII->get(AArch64::ADRP))
2174 .addReg(AArch64::X0, RegState::Define)
2175 .addMBB(TargetMBB);
2176 BuildMI(MBB, FirstEpilogSEH, DL, TII->get(AArch64::ADDXri))
2177 .addReg(AArch64::X0, RegState::Define)
2178 .addReg(AArch64::X0)
2179 .addMBB(TargetMBB)
2180 .addImm(0);
2181 TargetMBB->setMachineBlockAddressTaken();
2182 return true;
2183 }
2184
2185 Register Reg = MI.getOperand(0).getReg();
2187 if (M.getStackProtectorGuard() == "sysreg") {
2188 const AArch64SysReg::SysReg *SrcReg =
2189 AArch64SysReg::lookupSysRegByName(M.getStackProtectorGuardReg());
2190 if (!SrcReg)
2191 report_fatal_error("Unknown SysReg for Stack Protector Guard Register");
2192
2193 // mrs xN, sysreg
2194 BuildMI(MBB, MI, DL, get(AArch64::MRS))
2196 .addImm(SrcReg->Encoding);
2197 int Offset = M.getStackProtectorGuardOffset();
2198 if (Offset >= 0 && Offset <= 32760 && Offset % 8 == 0) {
2199 // ldr xN, [xN, #offset]
2200 BuildMI(MBB, MI, DL, get(AArch64::LDRXui))
2201 .addDef(Reg)
2203 .addImm(Offset / 8);
2204 } else if (Offset >= -256 && Offset <= 255) {
2205 // ldur xN, [xN, #offset]
2206 BuildMI(MBB, MI, DL, get(AArch64::LDURXi))
2207 .addDef(Reg)
2209 .addImm(Offset);
2210 } else if (Offset >= -4095 && Offset <= 4095) {
2211 if (Offset > 0) {
2212 // add xN, xN, #offset
2213 BuildMI(MBB, MI, DL, get(AArch64::ADDXri))
2214 .addDef(Reg)
2216 .addImm(Offset)
2217 .addImm(0);
2218 } else {
2219 // sub xN, xN, #offset
2220 BuildMI(MBB, MI, DL, get(AArch64::SUBXri))
2221 .addDef(Reg)
2223 .addImm(-Offset)
2224 .addImm(0);
2225 }
2226 // ldr xN, [xN]
2227 BuildMI(MBB, MI, DL, get(AArch64::LDRXui))
2228 .addDef(Reg)
2230 .addImm(0);
2231 } else {
2232 // Cases that are larger than +/- 4095 and not a multiple of 8, or larger
2233 // than 23760.
2234 // It might be nice to use AArch64::MOVi32imm here, which would get
2235 // expanded in PreSched2 after PostRA, but our lone scratch Reg already
2236 // contains the MRS result. findScratchNonCalleeSaveRegister() in
2237 // AArch64FrameLowering might help us find such a scratch register
2238 // though. If we failed to find a scratch register, we could emit a
2239 // stream of add instructions to build up the immediate. Or, we could try
2240 // to insert a AArch64::MOVi32imm before register allocation so that we
2241 // didn't need to scavenge for a scratch register.
2242 report_fatal_error("Unable to encode Stack Protector Guard Offset");
2243 }
2244 MBB.erase(MI);
2245 return true;
2246 }
2247
2248 const GlobalValue *GV =
2249 cast<GlobalValue>((*MI.memoperands_begin())->getValue());
2250 const TargetMachine &TM = MBB.getParent()->getTarget();
2251 unsigned OpFlags = Subtarget.ClassifyGlobalReference(GV, TM);
2252 const unsigned char MO_NC = AArch64II::MO_NC;
2253
2254 if ((OpFlags & AArch64II::MO_GOT) != 0) {
2255 BuildMI(MBB, MI, DL, get(AArch64::LOADgot), Reg)
2256 .addGlobalAddress(GV, 0, OpFlags);
2257 if (Subtarget.isTargetILP32()) {
2258 unsigned Reg32 = TRI->getSubReg(Reg, AArch64::sub_32);
2259 BuildMI(MBB, MI, DL, get(AArch64::LDRWui))
2260 .addDef(Reg32, RegState::Dead)
2262 .addImm(0)
2263 .addMemOperand(*MI.memoperands_begin())
2265 } else {
2266 BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
2268 .addImm(0)
2269 .addMemOperand(*MI.memoperands_begin());
2270 }
2271 } else if (TM.getCodeModel() == CodeModel::Large) {
2272 assert(!Subtarget.isTargetILP32() && "how can large exist in ILP32?");
2273 BuildMI(MBB, MI, DL, get(AArch64::MOVZXi), Reg)
2274 .addGlobalAddress(GV, 0, AArch64II::MO_G0 | MO_NC)
2275 .addImm(0);
2276 BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
2278 .addGlobalAddress(GV, 0, AArch64II::MO_G1 | MO_NC)
2279 .addImm(16);
2280 BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
2282 .addGlobalAddress(GV, 0, AArch64II::MO_G2 | MO_NC)
2283 .addImm(32);
2284 BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
2287 .addImm(48);
2288 BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
2290 .addImm(0)
2291 .addMemOperand(*MI.memoperands_begin());
2292 } else if (TM.getCodeModel() == CodeModel::Tiny) {
2293 BuildMI(MBB, MI, DL, get(AArch64::ADR), Reg)
2294 .addGlobalAddress(GV, 0, OpFlags);
2295 } else {
2296 BuildMI(MBB, MI, DL, get(AArch64::ADRP), Reg)
2297 .addGlobalAddress(GV, 0, OpFlags | AArch64II::MO_PAGE);
2298 unsigned char LoFlags = OpFlags | AArch64II::MO_PAGEOFF | MO_NC;
2299 if (Subtarget.isTargetILP32()) {
2300 unsigned Reg32 = TRI->getSubReg(Reg, AArch64::sub_32);
2301 BuildMI(MBB, MI, DL, get(AArch64::LDRWui))
2302 .addDef(Reg32, RegState::Dead)
2304 .addGlobalAddress(GV, 0, LoFlags)
2305 .addMemOperand(*MI.memoperands_begin())
2307 } else {
2308 BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
2310 .addGlobalAddress(GV, 0, LoFlags)
2311 .addMemOperand(*MI.memoperands_begin());
2312 }
2313 }
2314
2315 MBB.erase(MI);
2316
2317 return true;
2318}
2319
2320// Return true if this instruction simply sets its single destination register
2321// to zero. This is equivalent to a register rename of the zero-register.
2323 switch (MI.getOpcode()) {
2324 default:
2325 break;
2326 case AArch64::MOVZWi:
2327 case AArch64::MOVZXi: // movz Rd, #0 (LSL #0)
2328 if (MI.getOperand(1).isImm() && MI.getOperand(1).getImm() == 0) {
2329 assert(MI.getDesc().getNumOperands() == 3 &&
2330 MI.getOperand(2).getImm() == 0 && "invalid MOVZi operands");
2331 return true;
2332 }
2333 break;
2334 case AArch64::ANDWri: // and Rd, Rzr, #imm
2335 return MI.getOperand(1).getReg() == AArch64::WZR;
2336 case AArch64::ANDXri:
2337 return MI.getOperand(1).getReg() == AArch64::XZR;
2338 case TargetOpcode::COPY:
2339 return MI.getOperand(1).getReg() == AArch64::WZR;
2340 }
2341 return false;
2342}
2343
2344// Return true if this instruction simply renames a general register without
2345// modifying bits.
2347 switch (MI.getOpcode()) {
2348 default:
2349 break;
2350 case TargetOpcode::COPY: {
2351 // GPR32 copies will by lowered to ORRXrs
2352 Register DstReg = MI.getOperand(0).getReg();
2353 return (AArch64::GPR32RegClass.contains(DstReg) ||
2354 AArch64::GPR64RegClass.contains(DstReg));
2355 }
2356 case AArch64::ORRXrs: // orr Xd, Xzr, Xm (LSL #0)
2357 if (MI.getOperand(1).getReg() == AArch64::XZR) {
2358 assert(MI.getDesc().getNumOperands() == 4 &&
2359 MI.getOperand(3).getImm() == 0 && "invalid ORRrs operands");
2360 return true;
2361 }
2362 break;
2363 case AArch64::ADDXri: // add Xd, Xn, #0 (LSL #0)
2364 if (MI.getOperand(2).getImm() == 0) {
2365 assert(MI.getDesc().getNumOperands() == 4 &&
2366 MI.getOperand(3).getImm() == 0 && "invalid ADDXri operands");
2367 return true;
2368 }
2369 break;
2370 }
2371 return false;
2372}
2373
2374// Return true if this instruction simply renames a general register without
2375// modifying bits.
2377 switch (MI.getOpcode()) {
2378 default:
2379 break;
2380 case TargetOpcode::COPY: {
2381 Register DstReg = MI.getOperand(0).getReg();
2382 return AArch64::FPR128RegClass.contains(DstReg);
2383 }
2384 case AArch64::ORRv16i8:
2385 if (MI.getOperand(1).getReg() == MI.getOperand(2).getReg()) {
2386 assert(MI.getDesc().getNumOperands() == 3 && MI.getOperand(0).isReg() &&
2387 "invalid ORRv16i8 operands");
2388 return true;
2389 }
2390 break;
2391 }
2392 return false;
2393}
2394
2396 int &FrameIndex) const {
2397 switch (MI.getOpcode()) {
2398 default:
2399 break;
2400 case AArch64::LDRWui:
2401 case AArch64::LDRXui:
2402 case AArch64::LDRBui:
2403 case AArch64::LDRHui:
2404 case AArch64::LDRSui:
2405 case AArch64::LDRDui:
2406 case AArch64::LDRQui:
2407 case AArch64::LDR_PXI:
2408 if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() &&
2409 MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) {
2410 FrameIndex = MI.getOperand(1).getIndex();
2411 return MI.getOperand(0).getReg();
2412 }
2413 break;
2414 }
2415
2416 return 0;
2417}
2418
2420 int &FrameIndex) const {
2421 switch (MI.getOpcode()) {
2422 default:
2423 break;
2424 case AArch64::STRWui:
2425 case AArch64::STRXui:
2426 case AArch64::STRBui:
2427 case AArch64::STRHui:
2428 case AArch64::STRSui:
2429 case AArch64::STRDui:
2430 case AArch64::STRQui:
2431 case AArch64::STR_PXI:
2432 if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() &&
2433 MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) {
2434 FrameIndex = MI.getOperand(1).getIndex();
2435 return MI.getOperand(0).getReg();
2436 }
2437 break;
2438 }
2439 return 0;
2440}
2441
2442/// Check all MachineMemOperands for a hint to suppress pairing.
2444 return llvm::any_of(MI.memoperands(), [](MachineMemOperand *MMO) {
2445 return MMO->getFlags() & MOSuppressPair;
2446 });
2447}
2448
2449/// Set a flag on the first MachineMemOperand to suppress pairing.
2451 if (MI.memoperands_empty())
2452 return;
2453 (*MI.memoperands_begin())->setFlags(MOSuppressPair);
2454}
2455
2456/// Check all MachineMemOperands for a hint that the load/store is strided.
2458 return llvm::any_of(MI.memoperands(), [](MachineMemOperand *MMO) {
2459 return MMO->getFlags() & MOStridedAccess;
2460 });
2461}
2462
2464 switch (Opc) {
2465 default:
2466 return false;
2467 case AArch64::STURSi:
2468 case AArch64::STRSpre:
2469 case AArch64::STURDi:
2470 case AArch64::STRDpre:
2471 case AArch64::STURQi:
2472 case AArch64::STRQpre:
2473 case AArch64::STURBBi:
2474 case AArch64::STURHHi:
2475 case AArch64::STURWi:
2476 case AArch64::STRWpre:
2477 case AArch64::STURXi:
2478 case AArch64::STRXpre:
2479 case AArch64::LDURSi:
2480 case AArch64::LDRSpre:
2481 case AArch64::LDURDi:
2482 case AArch64::LDRDpre:
2483 case AArch64::LDURQi:
2484 case AArch64::LDRQpre:
2485 case AArch64::LDURWi:
2486 case AArch64::LDRWpre:
2487 case AArch64::LDURXi:
2488 case AArch64::LDRXpre:
2489 case AArch64::LDRSWpre:
2490 case AArch64::LDURSWi:
2491 case AArch64::LDURHHi:
2492 case AArch64::LDURBBi:
2493 case AArch64::LDURSBWi:
2494 case AArch64::LDURSHWi:
2495 return true;
2496 }
2497}
2498
2499std::optional<unsigned> AArch64InstrInfo::getUnscaledLdSt(unsigned Opc) {
2500 switch (Opc) {
2501 default: return {};
2502 case AArch64::PRFMui: return AArch64::PRFUMi;
2503 case AArch64::LDRXui: return AArch64::LDURXi;
2504 case AArch64::LDRWui: return AArch64::LDURWi;
2505 case AArch64::LDRBui: return AArch64::LDURBi;
2506 case AArch64::LDRHui: return AArch64::LDURHi;
2507 case AArch64::LDRSui: return AArch64::LDURSi;
2508 case AArch64::LDRDui: return AArch64::LDURDi;
2509 case AArch64::LDRQui: return AArch64::LDURQi;
2510 case AArch64::LDRBBui: return AArch64::LDURBBi;
2511 case AArch64::LDRHHui: return AArch64::LDURHHi;
2512 case AArch64::LDRSBXui: return AArch64::LDURSBXi;
2513 case AArch64::LDRSBWui: return AArch64::LDURSBWi;
2514 case AArch64::LDRSHXui: return AArch64::LDURSHXi;
2515 case AArch64::LDRSHWui: return AArch64::LDURSHWi;
2516 case AArch64::LDRSWui: return AArch64::LDURSWi;
2517 case AArch64::STRXui: return AArch64::STURXi;
2518 case AArch64::STRWui: return AArch64::STURWi;
2519 case AArch64::STRBui: return AArch64::STURBi;
2520 case AArch64::STRHui: return AArch64::STURHi;
2521 case AArch64::STRSui: return AArch64::STURSi;
2522 case AArch64::STRDui: return AArch64::STURDi;
2523 case AArch64::STRQui: return AArch64::STURQi;
2524 case AArch64::STRBBui: return AArch64::STURBBi;
2525 case AArch64::STRHHui: return AArch64::STURHHi;
2526 }
2527}
2528
2530 switch (Opc) {
2531 default:
2532 llvm_unreachable("Unhandled Opcode in getLoadStoreImmIdx");
2533 case AArch64::ADDG:
2534 case AArch64::LDAPURBi:
2535 case AArch64::LDAPURHi:
2536 case AArch64::LDAPURi:
2537 case AArch64::LDAPURSBWi:
2538 case AArch64::LDAPURSBXi:
2539 case AArch64::LDAPURSHWi:
2540 case AArch64::LDAPURSHXi:
2541 case AArch64::LDAPURSWi:
2542 case AArch64::LDAPURXi:
2543 case AArch64::LDR_PPXI:
2544 case AArch64::LDR_PXI:
2545 case AArch64::LDR_ZXI:
2546 case AArch64::LDR_ZZXI:
2547 case AArch64::LDR_ZZXI_STRIDED_CONTIGUOUS:
2548 case AArch64::LDR_ZZZXI:
2549 case AArch64::LDR_ZZZZXI:
2550 case AArch64::LDR_ZZZZXI_STRIDED_CONTIGUOUS:
2551 case AArch64::LDRBBui:
2552 case AArch64::LDRBui:
2553 case AArch64::LDRDui:
2554 case AArch64::LDRHHui:
2555 case AArch64::LDRHui:
2556 case AArch64::LDRQui:
2557 case AArch64::LDRSBWui:
2558 case AArch64::LDRSBXui:
2559 case AArch64::LDRSHWui:
2560 case AArch64::LDRSHXui:
2561 case AArch64::LDRSui:
2562 case AArch64::LDRSWui:
2563 case AArch64::LDRWui:
2564 case AArch64::LDRXui:
2565 case AArch64::LDURBBi:
2566 case AArch64::LDURBi:
2567 case AArch64::LDURDi:
2568 case AArch64::LDURHHi:
2569 case AArch64::LDURHi:
2570 case AArch64::LDURQi:
2571 case AArch64::LDURSBWi:
2572 case AArch64::LDURSBXi:
2573 case AArch64::LDURSHWi:
2574 case AArch64::LDURSHXi:
2575 case AArch64::LDURSi:
2576 case AArch64::LDURSWi:
2577 case AArch64::LDURWi:
2578 case AArch64::LDURXi:
2579 case AArch64::PRFMui:
2580 case AArch64::PRFUMi:
2581 case AArch64::ST2Gi:
2582 case AArch64::STGi:
2583 case AArch64::STLURBi:
2584 case AArch64::STLURHi:
2585 case AArch64::STLURWi:
2586 case AArch64::STLURXi:
2587 case AArch64::StoreSwiftAsyncContext:
2588 case AArch64::STR_PPXI:
2589 case AArch64::STR_PXI:
2590 case AArch64::STR_ZXI:
2591 case AArch64::STR_ZZXI:
2592 case AArch64::STR_ZZXI_STRIDED_CONTIGUOUS:
2593 case AArch64::STR_ZZZXI:
2594 case AArch64::STR_ZZZZXI:
2595 case AArch64::STR_ZZZZXI_STRIDED_CONTIGUOUS:
2596 case AArch64::STRBBui:
2597 case AArch64::STRBui:
2598 case AArch64::STRDui:
2599 case AArch64::STRHHui:
2600 case AArch64::STRHui:
2601 case AArch64::STRQui:
2602 case AArch64::STRSui:
2603 case AArch64::STRWui:
2604 case AArch64::STRXui:
2605 case AArch64::STURBBi:
2606 case AArch64::STURBi:
2607 case AArch64::STURDi:
2608 case AArch64::STURHHi:
2609 case AArch64::STURHi:
2610 case AArch64::STURQi:
2611 case AArch64::STURSi:
2612 case AArch64::STURWi:
2613 case AArch64::STURXi:
2614 case AArch64::STZ2Gi:
2615 case AArch64::STZGi:
2616 case AArch64::TAGPstack:
2617 return 2;
2618 case AArch64::LD1B_D_IMM:
2619 case AArch64::LD1B_H_IMM:
2620 case AArch64::LD1B_IMM:
2621 case AArch64::LD1B_S_IMM:
2622 case AArch64::LD1D_IMM:
2623 case AArch64::LD1H_D_IMM:
2624 case AArch64::LD1H_IMM:
2625 case AArch64::LD1H_S_IMM:
2626 case AArch64::LD1RB_D_IMM:
2627 case AArch64::LD1RB_H_IMM:
2628 case AArch64::LD1RB_IMM:
2629 case AArch64::LD1RB_S_IMM:
2630 case AArch64::LD1RD_IMM:
2631 case AArch64::LD1RH_D_IMM:
2632 case AArch64::LD1RH_IMM:
2633 case AArch64::LD1RH_S_IMM:
2634 case AArch64::LD1RSB_D_IMM:
2635 case AArch64::LD1RSB_H_IMM:
2636 case AArch64::LD1RSB_S_IMM:
2637 case AArch64::LD1RSH_D_IMM:
2638 case AArch64::LD1RSH_S_IMM:
2639 case AArch64::LD1RSW_IMM:
2640 case AArch64::LD1RW_D_IMM:
2641 case AArch64::LD1RW_IMM:
2642 case AArch64::LD1SB_D_IMM:
2643 case AArch64::LD1SB_H_IMM:
2644 case AArch64::LD1SB_S_IMM:
2645 case AArch64::LD1SH_D_IMM:
2646 case AArch64::LD1SH_S_IMM:
2647 case AArch64::LD1SW_D_IMM:
2648 case AArch64::LD1W_D_IMM:
2649 case AArch64::LD1W_IMM:
2650 case AArch64::LD2B_IMM:
2651 case AArch64::LD2D_IMM:
2652 case AArch64::LD2H_IMM:
2653 case AArch64::LD2W_IMM:
2654 case AArch64::LD3B_IMM:
2655 case AArch64::LD3D_IMM:
2656 case AArch64::LD3H_IMM:
2657 case AArch64::LD3W_IMM:
2658 case AArch64::LD4B_IMM:
2659 case AArch64::LD4D_IMM:
2660 case AArch64::LD4H_IMM:
2661 case AArch64::LD4W_IMM:
2662 case AArch64::LDG:
2663 case AArch64::LDNF1B_D_IMM:
2664 case AArch64::LDNF1B_H_IMM:
2665 case AArch64::LDNF1B_IMM:
2666 case AArch64::LDNF1B_S_IMM:
2667 case AArch64::LDNF1D_IMM:
2668 case AArch64::LDNF1H_D_IMM:
2669 case AArch64::LDNF1H_IMM:
2670 case AArch64::LDNF1H_S_IMM:
2671 case AArch64::LDNF1SB_D_IMM:
2672 case AArch64::LDNF1SB_H_IMM:
2673 case AArch64::LDNF1SB_S_IMM:
2674 case AArch64::LDNF1SH_D_IMM:
2675 case AArch64::LDNF1SH_S_IMM:
2676 case AArch64::LDNF1SW_D_IMM:
2677 case AArch64::LDNF1W_D_IMM:
2678 case AArch64::LDNF1W_IMM:
2679 case AArch64::LDNPDi:
2680 case AArch64::LDNPQi:
2681 case AArch64::LDNPSi:
2682 case AArch64::LDNPWi:
2683 case AArch64::LDNPXi:
2684 case AArch64::LDNT1B_ZRI:
2685 case AArch64::LDNT1D_ZRI:
2686 case AArch64::LDNT1H_ZRI:
2687 case AArch64::LDNT1W_ZRI:
2688 case AArch64::LDPDi:
2689 case AArch64::LDPQi:
2690 case AArch64::LDPSi:
2691 case AArch64::LDPWi:
2692 case AArch64::LDPXi:
2693 case AArch64::LDRBBpost:
2694 case AArch64::LDRBBpre:
2695 case AArch64::LDRBpost:
2696 case AArch64::LDRBpre:
2697 case AArch64::LDRDpost:
2698 case AArch64::LDRDpre:
2699 case AArch64::LDRHHpost:
2700 case AArch64::LDRHHpre:
2701 case AArch64::LDRHpost:
2702 case AArch64::LDRHpre:
2703 case AArch64::LDRQpost:
2704 case AArch64::LDRQpre:
2705 case AArch64::LDRSpost:
2706 case AArch64::LDRSpre:
2707 case AArch64::LDRWpost:
2708 case AArch64::LDRWpre:
2709 case AArch64::LDRXpost:
2710 case AArch64::LDRXpre:
2711 case AArch64::ST1B_D_IMM:
2712 case AArch64::ST1B_H_IMM:
2713 case AArch64::ST1B_IMM:
2714 case AArch64::ST1B_S_IMM:
2715 case AArch64::ST1D_IMM:
2716 case AArch64::ST1H_D_IMM:
2717 case AArch64::ST1H_IMM:
2718 case AArch64::ST1H_S_IMM:
2719 case AArch64::ST1W_D_IMM:
2720 case AArch64::ST1W_IMM:
2721 case AArch64::ST2B_IMM:
2722 case AArch64::ST2D_IMM:
2723 case AArch64::ST2H_IMM:
2724 case AArch64::ST2W_IMM:
2725 case AArch64::ST3B_IMM:
2726 case AArch64::ST3D_IMM:
2727 case AArch64::ST3H_IMM:
2728 case AArch64::ST3W_IMM:
2729 case AArch64::ST4B_IMM:
2730 case AArch64::ST4D_IMM:
2731 case AArch64::ST4H_IMM:
2732 case AArch64::ST4W_IMM:
2733 case AArch64::STGPi:
2734 case AArch64::STGPreIndex:
2735 case AArch64::STZGPreIndex:
2736 case AArch64::ST2GPreIndex:
2737 case AArch64::STZ2GPreIndex:
2738 case AArch64::STGPostIndex:
2739 case AArch64::STZGPostIndex:
2740 case AArch64::ST2GPostIndex:
2741 case AArch64::STZ2GPostIndex:
2742 case AArch64::STNPDi:
2743 case AArch64::STNPQi:
2744 case AArch64::STNPSi:
2745 case AArch64::STNPWi:
2746 case AArch64::STNPXi:
2747 case AArch64::STNT1B_ZRI:
2748 case AArch64::STNT1D_ZRI:
2749 case AArch64::STNT1H_ZRI:
2750 case AArch64::STNT1W_ZRI:
2751 case AArch64::STPDi:
2752 case AArch64::STPQi:
2753 case AArch64::STPSi:
2754 case AArch64::STPWi:
2755 case AArch64::STPXi:
2756 case AArch64::STRBBpost:
2757 case AArch64::STRBBpre:
2758 case AArch64::STRBpost:
2759 case AArch64::STRBpre:
2760 case AArch64::STRDpost:
2761 case AArch64::STRDpre:
2762 case AArch64::STRHHpost:
2763 case AArch64::STRHHpre:
2764 case AArch64::STRHpost:
2765 case AArch64::STRHpre:
2766 case AArch64::STRQpost:
2767 case AArch64::STRQpre:
2768 case AArch64::STRSpost:
2769 case AArch64::STRSpre:
2770 case AArch64::STRWpost:
2771 case AArch64::STRWpre:
2772 case AArch64::STRXpost:
2773 case AArch64::STRXpre:
2774 return 3;
2775 case AArch64::LDPDpost:
2776 case AArch64::LDPDpre:
2777 case AArch64::LDPQpost:
2778 case AArch64::LDPQpre:
2779 case AArch64::LDPSpost:
2780 case AArch64::LDPSpre:
2781 case AArch64::LDPWpost:
2782 case AArch64::LDPWpre:
2783 case AArch64::LDPXpost:
2784 case AArch64::LDPXpre:
2785 case AArch64::STGPpre:
2786 case AArch64::STGPpost:
2787 case AArch64::STPDpost:
2788 case AArch64::STPDpre:
2789 case AArch64::STPQpost:
2790 case AArch64::STPQpre:
2791 case AArch64::STPSpost:
2792 case AArch64::STPSpre:
2793 case AArch64::STPWpost:
2794 case AArch64::STPWpre:
2795 case AArch64::STPXpost:
2796 case AArch64::STPXpre:
2797 return 4;
2798 }
2799}
2800
2802 switch (MI.getOpcode()) {
2803 default:
2804 return false;
2805 // Scaled instructions.
2806 case AArch64::STRSui:
2807 case AArch64::STRDui:
2808 case AArch64::STRQui:
2809 case AArch64::STRXui:
2810 case AArch64::STRWui:
2811 case AArch64::LDRSui:
2812 case AArch64::LDRDui:
2813 case AArch64::LDRQui:
2814 case AArch64::LDRXui:
2815 case AArch64::LDRWui:
2816 case AArch64::LDRSWui:
2817 // Unscaled instructions.
2818 case AArch64::STURSi:
2819 case AArch64::STRSpre:
2820 case AArch64::STURDi:
2821 case AArch64::STRDpre:
2822 case AArch64::STURQi:
2823 case AArch64::STRQpre:
2824 case AArch64::STURWi:
2825 case AArch64::STRWpre:
2826 case AArch64::STURXi:
2827 case AArch64::STRXpre:
2828 case AArch64::LDURSi:
2829 case AArch64::LDRSpre:
2830 case AArch64::LDURDi:
2831 case AArch64::LDRDpre:
2832 case AArch64::LDURQi:
2833 case AArch64::LDRQpre:
2834 case AArch64::LDURWi:
2835 case AArch64::LDRWpre:
2836 case AArch64::LDURXi:
2837 case AArch64::LDRXpre:
2838 case AArch64::LDURSWi:
2839 case AArch64::LDRSWpre:
2840 // SVE instructions.
2841 case AArch64::LDR_ZXI:
2842 case AArch64::STR_ZXI:
2843 return true;
2844 }
2845}
2846
2848 switch (MI.getOpcode()) {
2849 default:
2850 assert((!MI.isCall() || !MI.isReturn()) &&
2851 "Unexpected instruction - was a new tail call opcode introduced?");
2852 return false;
2853 case AArch64::TCRETURNdi:
2854 case AArch64::TCRETURNri:
2855 case AArch64::TCRETURNrix16x17:
2856 case AArch64::TCRETURNrix17:
2857 case AArch64::TCRETURNrinotx16:
2858 case AArch64::TCRETURNriALL:
2859 case AArch64::AUTH_TCRETURN:
2860 case AArch64::AUTH_TCRETURN_BTI:
2861 return true;
2862 }
2863}
2864
2866 switch (Opc) {
2867 default:
2868 llvm_unreachable("Opcode has no flag setting equivalent!");
2869 // 32-bit cases:
2870 case AArch64::ADDWri:
2871 return AArch64::ADDSWri;
2872 case AArch64::ADDWrr:
2873 return AArch64::ADDSWrr;
2874 case AArch64::ADDWrs:
2875 return AArch64::ADDSWrs;
2876 case AArch64::ADDWrx:
2877 return AArch64::ADDSWrx;
2878 case AArch64::ANDWri:
2879 return AArch64::ANDSWri;
2880 case AArch64::ANDWrr:
2881 return AArch64::ANDSWrr;
2882 case AArch64::ANDWrs:
2883 return AArch64::ANDSWrs;
2884 case AArch64::BICWrr:
2885 return AArch64::BICSWrr;
2886 case AArch64::BICWrs:
2887 return AArch64::BICSWrs;
2888 case AArch64::SUBWri:
2889 return AArch64::SUBSWri;
2890 case AArch64::SUBWrr:
2891 return AArch64::SUBSWrr;
2892 case AArch64::SUBWrs:
2893 return AArch64::SUBSWrs;
2894 case AArch64::SUBWrx:
2895 return AArch64::SUBSWrx;
2896 // 64-bit cases:
2897 case AArch64::ADDXri:
2898 return AArch64::ADDSXri;
2899 case AArch64::ADDXrr:
2900 return AArch64::ADDSXrr;
2901 case AArch64::ADDXrs:
2902 return AArch64::ADDSXrs;
2903 case AArch64::ADDXrx:
2904 return AArch64::ADDSXrx;
2905 case AArch64::ANDXri:
2906 return AArch64::ANDSXri;
2907 case AArch64::ANDXrr:
2908 return AArch64::ANDSXrr;
2909 case AArch64::ANDXrs:
2910 return AArch64::ANDSXrs;
2911 case AArch64::BICXrr:
2912 return AArch64::BICSXrr;
2913 case AArch64::BICXrs:
2914 return AArch64::BICSXrs;
2915 case AArch64::SUBXri:
2916 return AArch64::SUBSXri;
2917 case AArch64::SUBXrr:
2918 return AArch64::SUBSXrr;
2919 case AArch64::SUBXrs:
2920 return AArch64::SUBSXrs;
2921 case AArch64::SUBXrx:
2922 return AArch64::SUBSXrx;
2923 // SVE instructions:
2924 case AArch64::AND_PPzPP:
2925 return AArch64::ANDS_PPzPP;
2926 case AArch64::BIC_PPzPP:
2927 return AArch64::BICS_PPzPP;
2928 case AArch64::EOR_PPzPP:
2929 return AArch64::EORS_PPzPP;
2930 case AArch64::NAND_PPzPP:
2931 return AArch64::NANDS_PPzPP;
2932 case AArch64::NOR_PPzPP:
2933 return AArch64::NORS_PPzPP;
2934 case AArch64::ORN_PPzPP:
2935 return AArch64::ORNS_PPzPP;
2936 case AArch64::ORR_PPzPP:
2937 return AArch64::ORRS_PPzPP;
2938 case AArch64::BRKA_PPzP:
2939 return AArch64::BRKAS_PPzP;
2940 case AArch64::BRKPA_PPzPP:
2941 return AArch64::BRKPAS_PPzPP;
2942 case AArch64::BRKB_PPzP:
2943 return AArch64::BRKBS_PPzP;
2944 case AArch64::BRKPB_PPzPP:
2945 return AArch64::BRKPBS_PPzPP;
2946 case AArch64::BRKN_PPzP:
2947 return AArch64::BRKNS_PPzP;
2948 case AArch64::RDFFR_PPz:
2949 return AArch64::RDFFRS_PPz;
2950 case AArch64::PTRUE_B:
2951 return AArch64::PTRUES_B;
2952 }
2953}
2954
2955// Is this a candidate for ld/st merging or pairing? For example, we don't
2956// touch volatiles or load/stores that have a hint to avoid pair formation.
2958
2959 bool IsPreLdSt = isPreLdSt(MI);
2960
2961 // If this is a volatile load/store, don't mess with it.
2962 if (MI.hasOrderedMemoryRef())
2963 return false;
2964
2965 // Make sure this is a reg/fi+imm (as opposed to an address reloc).
2966 // For Pre-inc LD/ST, the operand is shifted by one.
2967 assert((MI.getOperand(IsPreLdSt ? 2 : 1).isReg() ||
2968 MI.getOperand(IsPreLdSt ? 2 : 1).isFI()) &&
2969 "Expected a reg or frame index operand.");
2970
2971 // For Pre-indexed addressing quadword instructions, the third operand is the
2972 // immediate value.
2973 bool IsImmPreLdSt = IsPreLdSt && MI.getOperand(3).isImm();
2974
2975 if (!MI.getOperand(2).isImm() && !IsImmPreLdSt)
2976 return false;
2977
2978 // Can't merge/pair if the instruction modifies the base register.
2979 // e.g., ldr x0, [x0]
2980 // This case will never occur with an FI base.
2981 // However, if the instruction is an LDR<S,D,Q,W,X,SW>pre or
2982 // STR<S,D,Q,W,X>pre, it can be merged.
2983 // For example:
2984 // ldr q0, [x11, #32]!
2985 // ldr q1, [x11, #16]
2986 // to
2987 // ldp q0, q1, [x11, #32]!
2988 if (MI.getOperand(1).isReg() && !IsPreLdSt) {
2989 Register BaseReg = MI.getOperand(1).getReg();
2991 if (MI.modifiesRegister(BaseReg, TRI))
2992 return false;
2993 }
2994
2995 // Pairing SVE fills/spills is only valid for little-endian targets that
2996 // implement VLS 128.
2997 switch (MI.getOpcode()) {
2998 default:
2999 break;
3000 case AArch64::LDR_ZXI:
3001 case AArch64::STR_ZXI:
3002 if (!Subtarget.isLittleEndian() ||
3003 Subtarget.getSVEVectorSizeInBits() != 128)
3004 return false;
3005 }
3006
3007 // Check if this load/store has a hint to avoid pair formation.
3008 // MachineMemOperands hints are set by the AArch64StorePairSuppress pass.
3010 return false;
3011
3012 // Do not pair any callee-save store/reload instructions in the
3013 // prologue/epilogue if the CFI information encoded the operations as separate
3014 // instructions, as that will cause the size of the actual prologue to mismatch
3015 // with the prologue size recorded in the Windows CFI.
3016 const MCAsmInfo *MAI = MI.getMF()->getTarget().getMCAsmInfo();
3017 bool NeedsWinCFI = MAI->usesWindowsCFI() &&
3018 MI.getMF()->getFunction().needsUnwindTableEntry();
3019 if (NeedsWinCFI && (MI.getFlag(MachineInstr::FrameSetup) ||
3021 return false;
3022
3023 // On some CPUs quad load/store pairs are slower than two single load/stores.
3024 if (Subtarget.isPaired128Slow()) {
3025 switch (MI.getOpcode()) {
3026 default:
3027 break;
3028 case AArch64::LDURQi:
3029 case AArch64::STURQi:
3030 case AArch64::LDRQui:
3031 case AArch64::STRQui:
3032 return false;
3033 }
3034 }
3035
3036 return true;
3037}
3038
3041 int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width,
3042 const TargetRegisterInfo *TRI) const {
3043 if (!LdSt.mayLoadOrStore())
3044 return false;
3045
3046 const MachineOperand *BaseOp;
3047 TypeSize WidthN(0, false);
3048 if (!getMemOperandWithOffsetWidth(LdSt, BaseOp, Offset, OffsetIsScalable,
3049 WidthN, TRI))
3050 return false;
3051 // The maximum vscale is 16 under AArch64, return the maximal extent for the
3052 // vector.
3053 Width = LocationSize::precise(WidthN);
3054 BaseOps.push_back(BaseOp);
3055 return true;
3056}
3057
3058std::optional<ExtAddrMode>
3060 const TargetRegisterInfo *TRI) const {
3061 const MachineOperand *Base; // Filled with the base operand of MI.
3062 int64_t Offset; // Filled with the offset of MI.
3063 bool OffsetIsScalable;
3064 if (!getMemOperandWithOffset(MemI, Base, Offset, OffsetIsScalable, TRI))
3065 return std::nullopt;
3066
3067 if (!Base->isReg())
3068 return std::nullopt;
3069 ExtAddrMode AM;
3070 AM.BaseReg = Base->getReg();
3071 AM.Displacement = Offset;
3072 AM.ScaledReg = 0;
3073 AM.Scale = 0;
3074 return AM;
3075}
3076
3078 Register Reg,
3079 const MachineInstr &AddrI,
3080 ExtAddrMode &AM) const {
3081 // Filter out instructions into which we cannot fold.
3082 unsigned NumBytes;
3083 int64_t OffsetScale = 1;
3084 switch (MemI.getOpcode()) {
3085 default:
3086 return false;
3087
3088 case AArch64::LDURQi:
3089 case AArch64::STURQi:
3090 NumBytes = 16;
3091 break;
3092
3093 case AArch64::LDURDi:
3094 case AArch64::STURDi:
3095 case AArch64::LDURXi:
3096 case AArch64::STURXi:
3097 NumBytes = 8;
3098 break;
3099
3100 case AArch64::LDURWi:
3101 case AArch64::LDURSWi:
3102 case AArch64::STURWi:
3103 NumBytes = 4;
3104 break;
3105
3106 case AArch64::LDURHi:
3107 case AArch64::STURHi:
3108 case AArch64::LDURHHi:
3109 case AArch64::STURHHi:
3110 case AArch64::LDURSHXi:
3111 case AArch64::LDURSHWi:
3112 NumBytes = 2;
3113 break;
3114
3115 case AArch64::LDRBroX:
3116 case AArch64::LDRBBroX:
3117 case AArch64::LDRSBXroX:
3118 case AArch64::LDRSBWroX:
3119 case AArch64::STRBroX:
3120 case AArch64::STRBBroX:
3121 case AArch64::LDURBi:
3122 case AArch64::LDURBBi:
3123 case AArch64::LDURSBXi:
3124 case AArch64::LDURSBWi:
3125 case AArch64::STURBi:
3126 case AArch64::STURBBi:
3127 case AArch64::LDRBui:
3128 case AArch64::LDRBBui:
3129 case AArch64::LDRSBXui:
3130 case AArch64::LDRSBWui:
3131 case AArch64::STRBui:
3132 case AArch64::STRBBui:
3133 NumBytes = 1;
3134 break;
3135
3136 case AArch64::LDRQroX:
3137 case AArch64::STRQroX:
3138 case AArch64::LDRQui:
3139 case AArch64::STRQui:
3140 NumBytes = 16;
3141 OffsetScale = 16;
3142 break;
3143
3144 case AArch64::LDRDroX:
3145 case AArch64::STRDroX:
3146 case AArch64::LDRXroX:
3147 case AArch64::STRXroX:
3148 case AArch64::LDRDui:
3149 case AArch64::STRDui:
3150 case AArch64::LDRXui:
3151 case AArch64::STRXui:
3152 NumBytes = 8;
3153 OffsetScale = 8;
3154 break;
3155
3156 case AArch64::LDRWroX:
3157 case AArch64::LDRSWroX:
3158 case AArch64::STRWroX:
3159 case AArch64::LDRWui:
3160 case AArch64::LDRSWui:
3161 case AArch64::STRWui:
3162 NumBytes = 4;
3163 OffsetScale = 4;
3164 break;
3165
3166 case AArch64::LDRHroX:
3167 case AArch64::STRHroX:
3168 case AArch64::LDRHHroX:
3169 case AArch64::STRHHroX:
3170 case AArch64::LDRSHXroX:
3171 case AArch64::LDRSHWroX:
3172 case AArch64::LDRHui:
3173 case AArch64::STRHui:
3174 case AArch64::LDRHHui:
3175 case AArch64::STRHHui:
3176 case AArch64::LDRSHXui:
3177 case AArch64::LDRSHWui:
3178 NumBytes = 2;
3179 OffsetScale = 2;
3180 break;
3181 }
3182
3183 // Check the fold operand is not the loaded/stored value.
3184 const MachineOperand &BaseRegOp = MemI.getOperand(0);
3185 if (BaseRegOp.isReg() && BaseRegOp.getReg() == Reg)
3186 return false;
3187
3188 // Handle memory instructions with a [Reg, Reg] addressing mode.
3189 if (MemI.getOperand(2).isReg()) {
3190 // Bail if the addressing mode already includes extension of the offset
3191 // register.
3192 if (MemI.getOperand(3).getImm())
3193 return false;
3194
3195 // Check if we actually have a scaled offset.
3196 if (MemI.getOperand(4).getImm() == 0)
3197 OffsetScale = 1;
3198
3199 // If the address instructions is folded into the base register, then the
3200 // addressing mode must not have a scale. Then we can swap the base and the
3201 // scaled registers.
3202 if (MemI.getOperand(1).getReg() == Reg && OffsetScale != 1)
3203 return false;
3204
3205 switch (AddrI.getOpcode()) {
3206 default:
3207 return false;
3208
3209 case AArch64::SBFMXri:
3210 // sxtw Xa, Wm
3211 // ldr Xd, [Xn, Xa, lsl #N]
3212 // ->
3213 // ldr Xd, [Xn, Wm, sxtw #N]
3214 if (AddrI.getOperand(2).getImm() != 0 ||
3215 AddrI.getOperand(3).getImm() != 31)
3216 return false;
3217
3218 AM.BaseReg = MemI.getOperand(1).getReg();
3219 if (AM.BaseReg == Reg)
3220 AM.BaseReg = MemI.getOperand(2).getReg();
3221 AM.ScaledReg = AddrI.getOperand(1).getReg();
3222 AM.Scale = OffsetScale;
3223 AM.Displacement = 0;
3225 return true;
3226
3227 case TargetOpcode::SUBREG_TO_REG: {
3228 // mov Wa, Wm
3229 // ldr Xd, [Xn, Xa, lsl #N]
3230 // ->
3231 // ldr Xd, [Xn, Wm, uxtw #N]
3232
3233 // Zero-extension looks like an ORRWrs followed by a SUBREG_TO_REG.
3234 if (AddrI.getOperand(1).getImm() != 0 ||
3235 AddrI.getOperand(3).getImm() != AArch64::sub_32)
3236 return false;
3237
3238 const MachineRegisterInfo &MRI = AddrI.getMF()->getRegInfo();
3239 Register OffsetReg = AddrI.getOperand(2).getReg();
3240 if (!OffsetReg.isVirtual() || !MRI.hasOneNonDBGUse(OffsetReg))
3241 return false;
3242
3243 const MachineInstr &DefMI = *MRI.getVRegDef(OffsetReg);
3244 if (DefMI.getOpcode() != AArch64::ORRWrs ||
3245 DefMI.getOperand(1).getReg() != AArch64::WZR ||
3246 DefMI.getOperand(3).getImm() != 0)
3247 return false;
3248
3249 AM.BaseReg = MemI.getOperand(1).getReg();
3250 if (AM.BaseReg == Reg)
3251 AM.BaseReg = MemI.getOperand(2).getReg();
3252 AM.ScaledReg = DefMI.getOperand(2).getReg();
3253 AM.Scale = OffsetScale;
3254 AM.Displacement = 0;
3256 return true;
3257 }
3258 }
3259 }
3260
3261 // Handle memory instructions with a [Reg, #Imm] addressing mode.
3262
3263 // Check we are not breaking a potential conversion to an LDP.
3264 auto validateOffsetForLDP = [](unsigned NumBytes, int64_t OldOffset,
3265 int64_t NewOffset) -> bool {
3266 int64_t MinOffset, MaxOffset;
3267 switch (NumBytes) {
3268 default:
3269 return true;
3270 case 4:
3271 MinOffset = -256;
3272 MaxOffset = 252;
3273 break;
3274 case 8:
3275 MinOffset = -512;
3276 MaxOffset = 504;
3277 break;
3278 case 16:
3279 MinOffset = -1024;
3280 MaxOffset = 1008;
3281 break;
3282 }
3283 return OldOffset < MinOffset || OldOffset > MaxOffset ||
3284 (NewOffset >= MinOffset && NewOffset <= MaxOffset);
3285 };
3286 auto canFoldAddSubImmIntoAddrMode = [&](int64_t Disp) -> bool {
3287 int64_t OldOffset = MemI.getOperand(2).getImm() * OffsetScale;
3288 int64_t NewOffset = OldOffset + Disp;
3289 if (!isLegalAddressingMode(NumBytes, NewOffset, /* Scale */ 0))
3290 return false;
3291 // If the old offset would fit into an LDP, but the new offset wouldn't,
3292 // bail out.
3293 if (!validateOffsetForLDP(NumBytes, OldOffset, NewOffset))
3294 return false;
3295 AM.BaseReg = AddrI.getOperand(1).getReg();
3296 AM.ScaledReg = 0;
3297 AM.Scale = 0;
3298 AM.Displacement = NewOffset;
3300 return true;
3301 };
3302
3303 auto canFoldAddRegIntoAddrMode =
3304 [&](int64_t Scale,
3306 if (MemI.getOperand(2).getImm() != 0)
3307 return false;
3308 if ((unsigned)Scale != Scale)
3309 return false;
3310 if (!isLegalAddressingMode(NumBytes, /* Offset */ 0, Scale))
3311 return false;
3312 AM.BaseReg = AddrI.getOperand(1).getReg();
3313 AM.ScaledReg = AddrI.getOperand(2).getReg();
3314 AM.Scale = Scale;
3315 AM.Displacement = 0;
3316 AM.Form = Form;
3317 return true;
3318 };
3319
3320 auto avoidSlowSTRQ = [&](const MachineInstr &MemI) {
3321 unsigned Opcode = MemI.getOpcode();
3322 return (Opcode == AArch64::STURQi || Opcode == AArch64::STRQui) &&
3323 Subtarget.isSTRQroSlow();
3324 };
3325
3326 int64_t Disp = 0;
3327 const bool OptSize = MemI.getMF()->getFunction().hasOptSize();
3328 switch (AddrI.getOpcode()) {
3329 default:
3330 return false;
3331
3332 case AArch64::ADDXri:
3333 // add Xa, Xn, #N
3334 // ldr Xd, [Xa, #M]
3335 // ->
3336 // ldr Xd, [Xn, #N'+M]
3337 Disp = AddrI.getOperand(2).getImm() << AddrI.getOperand(3).getImm();
3338 return canFoldAddSubImmIntoAddrMode(Disp);
3339
3340 case AArch64::SUBXri:
3341 // sub Xa, Xn, #N
3342 // ldr Xd, [Xa, #M]
3343 // ->
3344 // ldr Xd, [Xn, #N'+M]
3345 Disp = AddrI.getOperand(2).getImm() << AddrI.getOperand(3).getImm();
3346 return canFoldAddSubImmIntoAddrMode(-Disp);
3347
3348 case AArch64::ADDXrs: {
3349 // add Xa, Xn, Xm, lsl #N
3350 // ldr Xd, [Xa]
3351 // ->
3352 // ldr Xd, [Xn, Xm, lsl #N]
3353
3354 // Don't fold the add if the result would be slower, unless optimising for
3355 // size.
3356 unsigned Shift = static_cast<unsigned>(AddrI.getOperand(3).getImm());
3358 return false;
3359 Shift = AArch64_AM::getShiftValue(Shift);
3360 if (!OptSize) {
3361 if (Shift != 2 && Shift != 3 && Subtarget.hasAddrLSLSlow14())
3362 return false;
3363 if (avoidSlowSTRQ(MemI))
3364 return false;
3365 }
3366 return canFoldAddRegIntoAddrMode(1ULL << Shift);
3367 }
3368
3369 case AArch64::ADDXrr:
3370 // add Xa, Xn, Xm
3371 // ldr Xd, [Xa]
3372 // ->
3373 // ldr Xd, [Xn, Xm, lsl #0]
3374
3375 // Don't fold the add if the result would be slower, unless optimising for
3376 // size.
3377 if (!OptSize && avoidSlowSTRQ(MemI))
3378 return false;
3379 return canFoldAddRegIntoAddrMode(1);
3380
3381 case AArch64::ADDXrx:
3382 // add Xa, Xn, Wm, {s,u}xtw #N
3383 // ldr Xd, [Xa]
3384 // ->
3385 // ldr Xd, [Xn, Wm, {s,u}xtw #N]
3386
3387 // Don't fold the add if the result would be slower, unless optimising for
3388 // size.
3389 if (!OptSize && avoidSlowSTRQ(MemI))
3390 return false;
3391
3392 // Can fold only sign-/zero-extend of a word.
3393 unsigned Imm = static_cast<unsigned>(AddrI.getOperand(3).getImm());
3395 if (Extend != AArch64_AM::UXTW && Extend != AArch64_AM::SXTW)
3396 return false;
3397
3398 return canFoldAddRegIntoAddrMode(
3399 1ULL << AArch64_AM::getArithShiftValue(Imm),
3402 }
3403}
3404
3405// Given an opcode for an instruction with a [Reg, #Imm] addressing mode,
3406// return the opcode of an instruction performing the same operation, but using
3407// the [Reg, Reg] addressing mode.
3408static unsigned regOffsetOpcode(unsigned Opcode) {
3409 switch (Opcode) {
3410 default:
3411 llvm_unreachable("Address folding not implemented for instruction");
3412
3413 case AArch64::LDURQi:
3414 case AArch64::LDRQui:
3415 return AArch64::LDRQroX;
3416 case AArch64::STURQi:
3417 case AArch64::STRQui:
3418 return AArch64::STRQroX;
3419 case AArch64::LDURDi:
3420 case AArch64::LDRDui:
3421 return AArch64::LDRDroX;
3422 case AArch64::STURDi:
3423 case AArch64::STRDui:
3424 return AArch64::STRDroX;
3425 case AArch64::LDURXi:
3426 case AArch64::LDRXui:
3427 return AArch64::LDRXroX;
3428 case AArch64::STURXi:
3429 case AArch64::STRXui:
3430 return AArch64::STRXroX;
3431 case AArch64::LDURWi:
3432 case AArch64::LDRWui:
3433 return AArch64::LDRWroX;
3434 case AArch64::LDURSWi:
3435 case AArch64::LDRSWui:
3436 return AArch64::LDRSWroX;
3437 case AArch64::STURWi:
3438 case AArch64::STRWui:
3439 return AArch64::STRWroX;
3440 case AArch64::LDURHi:
3441 case AArch64::LDRHui:
3442 return AArch64::LDRHroX;
3443 case AArch64::STURHi:
3444 case AArch64::STRHui:
3445 return AArch64::STRHroX;
3446 case AArch64::LDURHHi:
3447 case AArch64::LDRHHui:
3448 return AArch64::LDRHHroX;
3449 case AArch64::STURHHi:
3450 case AArch64::STRHHui:
3451 return AArch64::STRHHroX;
3452 case AArch64::LDURSHXi:
3453 case AArch64::LDRSHXui:
3454 return AArch64::LDRSHXroX;
3455 case AArch64::LDURSHWi:
3456 case AArch64::LDRSHWui:
3457 return AArch64::LDRSHWroX;
3458 case AArch64::LDURBi:
3459 case AArch64::LDRBui:
3460 return AArch64::LDRBroX;
3461 case AArch64::LDURBBi:
3462 case AArch64::LDRBBui:
3463 return AArch64::LDRBBroX;
3464 case AArch64::LDURSBXi:
3465 case AArch64::LDRSBXui:
3466 return AArch64::LDRSBXroX;
3467 case AArch64::LDURSBWi:
3468 case AArch64::LDRSBWui:
3469 return AArch64::LDRSBWroX;
3470 case AArch64::STURBi:
3471 case AArch64::STRBui:
3472 return AArch64::STRBroX;
3473 case AArch64::STURBBi:
3474 case AArch64::STRBBui:
3475 return AArch64::STRBBroX;
3476 }
3477}
3478
3479// Given an opcode for an instruction with a [Reg, #Imm] addressing mode, return
3480// the opcode of an instruction performing the same operation, but using the
3481// [Reg, #Imm] addressing mode with scaled offset.
3482unsigned scaledOffsetOpcode(unsigned Opcode, unsigned &Scale) {
3483 switch (Opcode) {
3484 default:
3485 llvm_unreachable("Address folding not implemented for instruction");
3486
3487 case AArch64::LDURQi:
3488 Scale = 16;
3489 return AArch64::LDRQui;
3490 case AArch64::STURQi:
3491 Scale = 16;
3492 return AArch64::STRQui;
3493 case AArch64::LDURDi:
3494 Scale = 8;
3495 return AArch64::LDRDui;
3496 case AArch64::STURDi:
3497 Scale = 8;
3498 return AArch64::STRDui;
3499 case AArch64::LDURXi:
3500 Scale = 8;
3501 return AArch64::LDRXui;
3502 case AArch64::STURXi:
3503 Scale = 8;
3504 return AArch64::STRXui;
3505 case AArch64::LDURWi:
3506 Scale = 4;
3507 return AArch64::LDRWui;
3508 case AArch64::LDURSWi:
3509 Scale = 4;
3510 return AArch64::LDRSWui;
3511 case AArch64::STURWi:
3512 Scale = 4;
3513 return AArch64::STRWui;
3514 case AArch64::LDURHi:
3515 Scale = 2;
3516 return AArch64::LDRHui;
3517 case AArch64::STURHi:
3518 Scale = 2;
3519 return AArch64::STRHui;
3520 case AArch64::LDURHHi:
3521 Scale = 2;
3522 return AArch64::LDRHHui;
3523 case AArch64::STURHHi:
3524 Scale = 2;
3525 return AArch64::STRHHui;
3526 case AArch64::LDURSHXi:
3527 Scale = 2;
3528 return AArch64::LDRSHXui;
3529 case AArch64::LDURSHWi:
3530 Scale = 2;
3531 return AArch64::LDRSHWui;
3532 case AArch64::LDURBi:
3533 Scale = 1;
3534 return AArch64::LDRBui;
3535 case AArch64::LDURBBi:
3536 Scale = 1;
3537 return AArch64::LDRBBui;
3538 case AArch64::LDURSBXi:
3539 Scale = 1;
3540 return AArch64::LDRSBXui;
3541 case AArch64::LDURSBWi:
3542 Scale = 1;
3543 return AArch64::LDRSBWui;
3544 case AArch64::STURBi:
3545 Scale = 1;
3546 return AArch64::STRBui;
3547 case AArch64::STURBBi:
3548 Scale = 1;
3549 return AArch64::STRBBui;
3550 case AArch64::LDRQui:
3551 case AArch64::STRQui:
3552 Scale = 16;
3553 return Opcode;
3554 case AArch64::LDRDui:
3555 case AArch64::STRDui:
3556 case AArch64::LDRXui:
3557 case AArch64::STRXui:
3558 Scale = 8;
3559 return Opcode;
3560 case AArch64::LDRWui:
3561 case AArch64::LDRSWui:
3562 case AArch64::STRWui:
3563 Scale = 4;
3564 return Opcode;
3565 case AArch64::LDRHui:
3566 case AArch64::STRHui:
3567 case AArch64::LDRHHui:
3568 case AArch64::STRHHui:
3569 case AArch64::LDRSHXui:
3570 case AArch64::LDRSHWui:
3571 Scale = 2;
3572 return Opcode;
3573 case AArch64::LDRBui:
3574 case AArch64::LDRBBui:
3575 case AArch64::LDRSBXui:
3576 case AArch64::LDRSBWui:
3577 case AArch64::STRBui:
3578 case AArch64::STRBBui:
3579 Scale = 1;
3580 return Opcode;
3581 }
3582}
3583
3584// Given an opcode for an instruction with a [Reg, #Imm] addressing mode, return
3585// the opcode of an instruction performing the same operation, but using the
3586// [Reg, #Imm] addressing mode with unscaled offset.
3587unsigned unscaledOffsetOpcode(unsigned Opcode) {
3588 switch (Opcode) {
3589 default:
3590 llvm_unreachable("Address folding not implemented for instruction");
3591
3592 case AArch64::LDURQi:
3593 case AArch64::STURQi:
3594 case AArch64::LDURDi:
3595 case AArch64::STURDi:
3596 case AArch64::LDURXi:
3597 case AArch64::STURXi:
3598 case AArch64::LDURWi:
3599 case AArch64::LDURSWi:
3600 case AArch64::STURWi:
3601 case AArch64::LDURHi:
3602 case AArch64::STURHi:
3603 case AArch64::LDURHHi:
3604 case AArch64::STURHHi:
3605 case AArch64::LDURSHXi:
3606 case AArch64::LDURSHWi:
3607 case AArch64::LDURBi:
3608 case AArch64::STURBi:
3609 case AArch64::LDURBBi:
3610 case AArch64::STURBBi:
3611 case AArch64::LDURSBWi:
3612 case AArch64::LDURSBXi:
3613 return Opcode;
3614 case AArch64::LDRQui:
3615 return AArch64::LDURQi;
3616 case AArch64::STRQui:
3617 return AArch64::STURQi;
3618 case AArch64::LDRDui:
3619 return AArch64::LDURDi;
3620 case AArch64::STRDui:
3621 return AArch64::STURDi;
3622 case AArch64::LDRXui:
3623 return AArch64::LDURXi;
3624 case AArch64::STRXui:
3625 return AArch64::STURXi;
3626 case AArch64::LDRWui:
3627 return AArch64::LDURWi;
3628 case AArch64::LDRSWui:
3629 return AArch64::LDURSWi;
3630 case AArch64::STRWui:
3631 return AArch64::STURWi;
3632 case AArch64::LDRHui:
3633 return AArch64::LDURHi;
3634 case AArch64::STRHui:
3635 return AArch64::STURHi;
3636 case AArch64::LDRHHui:
3637 return AArch64::LDURHHi;
3638 case AArch64::STRHHui:
3639 return AArch64::STURHHi;
3640 case AArch64::LDRSHXui:
3641 return AArch64::LDURSHXi;
3642 case AArch64::LDRSHWui:
3643 return AArch64::LDURSHWi;
3644 case AArch64::LDRBBui:
3645 return AArch64::LDURBBi;
3646 case AArch64::LDRBui:
3647 return AArch64::LDURBi;
3648 case AArch64::STRBBui:
3649 return AArch64::STURBBi;
3650 case AArch64::STRBui:
3651 return AArch64::STURBi;
3652 case AArch64::LDRSBWui:
3653 return AArch64::LDURSBWi;
3654 case AArch64::LDRSBXui:
3655 return AArch64::LDURSBXi;
3656 }
3657}
3658
3659// Given the opcode of a memory load/store instruction, return the opcode of an
3660// instruction performing the same operation, but using
3661// the [Reg, Reg, {s,u}xtw #N] addressing mode with sign-/zero-extend of the
3662// offset register.
3663static unsigned offsetExtendOpcode(unsigned Opcode) {
3664 switch (Opcode) {
3665 default:
3666 llvm_unreachable("Address folding not implemented for instruction");
3667
3668 case AArch64::LDRQroX:
3669 case AArch64::LDURQi:
3670 case AArch64::LDRQui:
3671 return AArch64::LDRQroW;
3672 case AArch64::STRQroX:
3673 case AArch64::STURQi:
3674 case AArch64::STRQui:
3675 return AArch64::STRQroW;
3676 case AArch64::LDRDroX:
3677 case AArch64::LDURDi:
3678 case AArch64::LDRDui:
3679 return AArch64::LDRDroW;
3680 case AArch64::STRDroX:
3681 case AArch64::STURDi:
3682 case AArch64::STRDui:
3683 return AArch64::STRDroW;
3684 case AArch64::LDRXroX:
3685 case AArch64::LDURXi:
3686 case AArch64::LDRXui:
3687 return AArch64::LDRXroW;
3688 case AArch64::STRXroX:
3689 case AArch64::STURXi:
3690 case AArch64::STRXui:
3691 return AArch64::STRXroW;
3692 case AArch64::LDRWroX:
3693 case AArch64::LDURWi:
3694 case AArch64::LDRWui:
3695 return AArch64::LDRWroW;
3696 case AArch64::LDRSWroX:
3697 case AArch64::LDURSWi:
3698 case AArch64::LDRSWui:
3699 return AArch64::LDRSWroW;
3700 case AArch64::STRWroX:
3701 case AArch64::STURWi:
3702 case AArch64::STRWui:
3703 return AArch64::STRWroW;
3704 case AArch64::LDRHroX:
3705 case AArch64::LDURHi:
3706 case AArch64::LDRHui:
3707 return AArch64::LDRHroW;
3708 case AArch64::STRHroX:
3709 case AArch64::STURHi:
3710 case AArch64::STRHui:
3711 return AArch64::STRHroW;
3712 case AArch64::LDRHHroX:
3713 case AArch64::LDURHHi:
3714 case AArch64::LDRHHui:
3715 return AArch64::LDRHHroW;
3716 case AArch64::STRHHroX:
3717 case AArch64::STURHHi:
3718 case AArch64::STRHHui:
3719 return AArch64::STRHHroW;
3720 case AArch64::LDRSHXroX:
3721 case AArch64::LDURSHXi:
3722 case AArch64::LDRSHXui:
3723 return AArch64::LDRSHXroW;
3724 case AArch64::LDRSHWroX:
3725 case AArch64::LDURSHWi:
3726 case AArch64::LDRSHWui:
3727 return AArch64::LDRSHWroW;
3728 case AArch64::LDRBroX:
3729 case AArch64::LDURBi:
3730 case AArch64::LDRBui:
3731 return AArch64::LDRBroW;
3732 case AArch64::LDRBBroX:
3733 case AArch64::LDURBBi:
3734 case AArch64::LDRBBui:
3735 return AArch64::LDRBBroW;
3736 case AArch64::LDRSBXroX:
3737 case AArch64::LDURSBXi:
3738 case AArch64::LDRSBXui:
3739 return AArch64::LDRSBXroW;
3740 case AArch64::LDRSBWroX:
3741 case AArch64::LDURSBWi:
3742 case AArch64::LDRSBWui:
3743 return AArch64::LDRSBWroW;
3744 case AArch64::STRBroX:
3745 case AArch64::STURBi:
3746 case AArch64::STRBui:
3747 return AArch64::STRBroW;
3748 case AArch64::STRBBroX:
3749 case AArch64::STURBBi:
3750 case AArch64::STRBBui:
3751 return AArch64::STRBBroW;
3752 }
3753}
3754
3756 const ExtAddrMode &AM) const {
3757
3758 const DebugLoc &DL = MemI.getDebugLoc();
3759 MachineBasicBlock &MBB = *MemI.getParent();
3761
3763 if (AM.ScaledReg) {
3764 // The new instruction will be in the form `ldr Rt, [Xn, Xm, lsl #imm]`.
3765 unsigned Opcode = regOffsetOpcode(MemI.getOpcode());
3766 MRI.constrainRegClass(AM.BaseReg, &AArch64::GPR64spRegClass);
3767 auto B = BuildMI(MBB, MemI, DL, get(Opcode))
3768 .addReg(MemI.getOperand(0).getReg(),
3769 MemI.mayLoad() ? RegState::Define : 0)
3770 .addReg(AM.BaseReg)
3771 .addReg(AM.ScaledReg)
3772 .addImm(0)
3773 .addImm(AM.Scale > 1)
3774 .setMemRefs(MemI.memoperands())
3775 .setMIFlags(MemI.getFlags());
3776 return B.getInstr();
3777 }
3778
3779 assert(AM.ScaledReg == 0 && AM.Scale == 0 &&
3780 "Addressing mode not supported for folding");
3781
3782 // The new instruction will be in the form `ld[u]r Rt, [Xn, #imm]`.
3783 unsigned Scale = 1;
3784 unsigned Opcode = MemI.getOpcode();
3785 if (isInt<9>(AM.Displacement))
3786 Opcode = unscaledOffsetOpcode(Opcode);
3787 else
3788 Opcode = scaledOffsetOpcode(Opcode, Scale);
3789
3790 auto B = BuildMI(MBB, MemI, DL, get(Opcode))
3791 .addReg(MemI.getOperand(0).getReg(),
3792 MemI.mayLoad() ? RegState::Define : 0)
3793 .addReg(AM.BaseReg)
3794 .addImm(AM.Displacement / Scale)
3795 .setMemRefs(MemI.memoperands())
3796 .setMIFlags(MemI.getFlags());
3797 return B.getInstr();
3798 }
3799
3802 // The new instruction will be in the form `ldr Rt, [Xn, Wm, {s,u}xtw #N]`.
3803 assert(AM.ScaledReg && !AM.Displacement &&
3804 "Address offset can be a register or an immediate, but not both");
3805 unsigned Opcode = offsetExtendOpcode(MemI.getOpcode());
3806 MRI.constrainRegClass(AM.BaseReg, &AArch64::GPR64spRegClass);
3807 // Make sure the offset register is in the correct register class.
3808 Register OffsetReg = AM.ScaledReg;
3809 const TargetRegisterClass *RC = MRI.getRegClass(OffsetReg);
3810 if (RC->hasSuperClassEq(&AArch64::GPR64RegClass)) {
3811 OffsetReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3812 BuildMI(MBB, MemI, DL, get(TargetOpcode::COPY), OffsetReg)
3813 .addReg(AM.ScaledReg, 0, AArch64::sub_32);
3814 }
3815 auto B = BuildMI(MBB, MemI, DL, get(Opcode))
3816 .addReg(MemI.getOperand(0).getReg(),
3817 MemI.mayLoad() ? RegState::Define : 0)
3818 .addReg(AM.BaseReg)
3819 .addReg(OffsetReg)
3821 .addImm(AM.Scale != 1)
3822 .setMemRefs(MemI.memoperands())
3823 .setMIFlags(MemI.getFlags());
3824
3825 return B.getInstr();
3826 }
3827
3829 "Function must not be called with an addressing mode it can't handle");
3830}
3831
3832/// Return true if the opcode is a post-index ld/st instruction, which really
3833/// loads from base+0.
3834static bool isPostIndexLdStOpcode(unsigned Opcode) {
3835 switch (Opcode) {
3836 default:
3837 return false;
3838 case AArch64::LD1Fourv16b_POST:
3839 case AArch64::LD1Fourv1d_POST:
3840 case AArch64::LD1Fourv2d_POST:
3841 case AArch64::LD1Fourv2s_POST:
3842 case AArch64::LD1Fourv4h_POST:
3843 case AArch64::LD1Fourv4s_POST:
3844 case AArch64::LD1Fourv8b_POST:
3845 case AArch64::LD1Fourv8h_POST:
3846 case AArch64::LD1Onev16b_POST:
3847 case AArch64::LD1Onev1d_POST:
3848 case AArch64::LD1Onev2d_POST:
3849 case AArch64::LD1Onev2s_POST:
3850 case AArch64::LD1Onev4h_POST:
3851 case AArch64::LD1Onev4s_POST:
3852 case AArch64::LD1Onev8b_POST:
3853 case AArch64::LD1Onev8h_POST:
3854 case AArch64::LD1Rv16b_POST:
3855 case AArch64::LD1Rv1d_POST:
3856 case AArch64::LD1Rv2d_POST:
3857 case AArch64::LD1Rv2s_POST:
3858 case AArch64::LD1Rv4h_POST:
3859 case AArch64::LD1Rv4s_POST:
3860 case AArch64::LD1Rv8b_POST:
3861 case AArch64::LD1Rv8h_POST:
3862 case AArch64::LD1Threev16b_POST:
3863 case AArch64::LD1Threev1d_POST:
3864 case AArch64::LD1Threev2d_POST:
3865 case AArch64::LD1Threev2s_POST:
3866 case AArch64::LD1Threev4h_POST:
3867 case AArch64::LD1Threev4s_POST:
3868 case AArch64::LD1Threev8b_POST:
3869 case AArch64::LD1Threev8h_POST:
3870 case AArch64::LD1Twov16b_POST:
3871 case AArch64::LD1Twov1d_POST:
3872 case AArch64::LD1Twov2d_POST:
3873 case AArch64::LD1Twov2s_POST:
3874 case AArch64::LD1Twov4h_POST:
3875 case AArch64::LD1Twov4s_POST:
3876 case AArch64::LD1Twov8b_POST:
3877 case AArch64::LD1Twov8h_POST:
3878 case AArch64::LD1i16_POST:
3879 case AArch64::LD1i32_POST:
3880 case AArch64::LD1i64_POST:
3881 case AArch64::LD1i8_POST:
3882 case AArch64::LD2Rv16b_POST:
3883 case AArch64::LD2Rv1d_POST:
3884 case AArch64::LD2Rv2d_POST:
3885 case AArch64::LD2Rv2s_POST:
3886 case AArch64::LD2Rv4h_POST:
3887 case AArch64::LD2Rv4s_POST:
3888 case AArch64::LD2Rv8b_POST:
3889 case AArch64::LD2Rv8h_POST:
3890 case AArch64::LD2Twov16b_POST:
3891 case AArch64::LD2Twov2d_POST:
3892 case AArch64::LD2Twov2s_POST:
3893 case AArch64::LD2Twov4h_POST:
3894 case AArch64::LD2Twov4s_POST:
3895 case AArch64::LD2Twov8b_POST:
3896 case AArch64::LD2Twov8h_POST:
3897 case AArch64::LD2i16_POST:
3898 case AArch64::LD2i32_POST:
3899 case AArch64::LD2i64_POST:
3900 case AArch64::LD2i8_POST:
3901 case AArch64::LD3Rv16b_POST:
3902 case AArch64::LD3Rv1d_POST:
3903 case AArch64::LD3Rv2d_POST:
3904 case AArch64::LD3Rv2s_POST:
3905 case AArch64::LD3Rv4h_POST:
3906 case AArch64::LD3Rv4s_POST:
3907 case AArch64::LD3Rv8b_POST:
3908 case AArch64::LD3Rv8h_POST:
3909 case AArch64::LD3Threev16b_POST:
3910 case AArch64::LD3Threev2d_POST:
3911 case AArch64::LD3Threev2s_POST:
3912 case AArch64::LD3Threev4h_POST:
3913 case AArch64::LD3Threev4s_POST:
3914 case AArch64::LD3Threev8b_POST:
3915 case AArch64::LD3Threev8h_POST:
3916 case AArch64::LD3i16_POST:
3917 case AArch64::LD3i32_POST:
3918 case AArch64::LD3i64_POST:
3919 case AArch64::LD3i8_POST:
3920 case AArch64::LD4Fourv16b_POST:
3921 case AArch64::LD4Fourv2d_POST:
3922 case AArch64::LD4Fourv2s_POST:
3923 case AArch64::LD4Fourv4h_POST:
3924 case AArch64::LD4Fourv4s_POST:
3925 case AArch64::LD4Fourv8b_POST:
3926 case AArch64::LD4Fourv8h_POST:
3927 case AArch64::LD4Rv16b_POST:
3928 case AArch64::LD4Rv1d_POST:
3929 case AArch64::LD4Rv2d_POST:
3930 case AArch64::LD4Rv2s_POST:
3931 case AArch64::LD4Rv4h_POST:
3932 case AArch64::LD4Rv4s_POST:
3933 case AArch64::LD4Rv8b_POST:
3934 case AArch64::LD4Rv8h_POST:
3935 case AArch64::LD4i16_POST:
3936 case AArch64::LD4i32_POST:
3937 case AArch64::LD4i64_POST:
3938 case AArch64::LD4i8_POST:
3939 case AArch64::LDAPRWpost:
3940 case AArch64::LDAPRXpost:
3941 case AArch64::LDIAPPWpost:
3942 case AArch64::LDIAPPXpost:
3943 case AArch64::LDPDpost:
3944 case AArch64::LDPQpost:
3945 case AArch64::LDPSWpost:
3946 case AArch64::LDPSpost:
3947 case AArch64::LDPWpost:
3948 case AArch64::LDPXpost:
3949 case AArch64::LDRBBpost:
3950 case AArch64::LDRBpost:
3951 case AArch64::LDRDpost:
3952 case AArch64::LDRHHpost:
3953 case AArch64::LDRHpost:
3954 case AArch64::LDRQpost:
3955 case AArch64::LDRSBWpost:
3956 case AArch64::LDRSBXpost:
3957 case AArch64::LDRSHWpost:
3958 case AArch64::LDRSHXpost:
3959 case AArch64::LDRSWpost:
3960 case AArch64::LDRSpost:
3961 case AArch64::LDRWpost:
3962 case AArch64::LDRXpost:
3963 case AArch64::ST1Fourv16b_POST:
3964 case AArch64::ST1Fourv1d_POST:
3965 case AArch64::ST1Fourv2d_POST:
3966 case AArch64::ST1Fourv2s_POST:
3967 case AArch64::ST1Fourv4h_POST:
3968 case AArch64::ST1Fourv4s_POST:
3969 case AArch64::ST1Fourv8b_POST:
3970 case AArch64::ST1Fourv8h_POST:
3971 case AArch64::ST1Onev16b_POST:
3972 case AArch64::ST1Onev1d_POST:
3973 case AArch64::ST1Onev2d_POST:
3974 case AArch64::ST1Onev2s_POST:
3975 case AArch64::ST1Onev4h_POST:
3976 case AArch64::ST1Onev4s_POST:
3977 case AArch64::ST1Onev8b_POST:
3978 case AArch64::ST1Onev8h_POST:
3979 case AArch64::ST1Threev16b_POST:
3980 case AArch64::ST1Threev1d_POST:
3981 case AArch64::ST1Threev2d_POST:
3982 case AArch64::ST1Threev2s_POST:
3983 case AArch64::ST1Threev4h_POST:
3984 case AArch64::ST1Threev4s_POST:
3985 case AArch64::ST1Threev8b_POST:
3986 case AArch64::ST1Threev8h_POST:
3987 case AArch64::ST1Twov16b_POST:
3988 case AArch64::ST1Twov1d_POST:
3989 case AArch64::ST1Twov2d_POST:
3990 case AArch64::ST1Twov2s_POST:
3991 case AArch64::ST1Twov4h_POST:
3992 case AArch64::ST1Twov4s_POST:
3993 case AArch64::ST1Twov8b_POST:
3994 case AArch64::ST1Twov8h_POST:
3995 case AArch64::ST1i16_POST:
3996 case AArch64::ST1i32_POST:
3997 case AArch64::ST1i64_POST:
3998 case AArch64::ST1i8_POST:
3999 case AArch64::ST2GPostIndex:
4000 case AArch64::ST2Twov16b_POST:
4001 case AArch64::ST2Twov2d_POST:
4002 case AArch64::ST2Twov2s_POST:
4003 case AArch64::ST2Twov4h_POST:
4004 case AArch64::ST2Twov4s_POST:
4005 case AArch64::ST2Twov8b_POST:
4006 case AArch64::ST2Twov8h_POST:
4007 case AArch64::ST2i16_POST:
4008 case AArch64::ST2i32_POST:
4009 case AArch64::ST2i64_POST:
4010 case AArch64::ST2i8_POST:
4011 case AArch64::ST3Threev16b_POST:
4012 case AArch64::ST3Threev2d_POST:
4013 case AArch64::ST3Threev2s_POST:
4014 case AArch64::ST3Threev4h_POST:
4015 case AArch64::ST3Threev4s_POST:
4016 case AArch64::ST3Threev8b_POST:
4017 case AArch64::ST3Threev8h_POST:
4018 case AArch64::ST3i16_POST:
4019 case AArch64::ST3i32_POST:
4020 case AArch64::ST3i64_POST:
4021 case AArch64::ST3i8_POST:
4022 case AArch64::ST4Fourv16b_POST:
4023 case AArch64::ST4Fourv2d_POST:
4024 case AArch64::ST4Fourv2s_POST:
4025 case AArch64::ST4Fourv4h_POST:
4026 case AArch64::ST4Fourv4s_POST:
4027 case AArch64::ST4Fourv8b_POST:
4028 case AArch64::ST4Fourv8h_POST:
4029 case AArch64::ST4i16_POST:
4030 case AArch64::ST4i32_POST:
4031 case AArch64::ST4i64_POST:
4032 case AArch64::ST4i8_POST:
4033 case AArch64::STGPostIndex:
4034 case AArch64::STGPpost:
4035 case AArch64::STPDpost:
4036 case AArch64::STPQpost:
4037 case AArch64::STPSpost:
4038 case AArch64::STPWpost:
4039 case AArch64::STPXpost:
4040 case AArch64::STRBBpost:
4041 case AArch64::STRBpost:
4042 case AArch64::STRDpost:
4043 case AArch64::STRHHpost:
4044 case AArch64::STRHpost:
4045 case AArch64::STRQpost:
4046 case AArch64::STRSpost:
4047 case AArch64::STRWpost:
4048 case AArch64::STRXpost:
4049 case AArch64::STZ2GPostIndex:
4050 case AArch64::STZGPostIndex:
4051 return true;
4052 }
4053}
4054
4056 const MachineInstr &LdSt, const MachineOperand *&BaseOp, int64_t &Offset,
4057 bool &OffsetIsScalable, TypeSize &Width,
4058 const TargetRegisterInfo *TRI) const {
4059 assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
4060 // Handle only loads/stores with base register followed by immediate offset.
4061 if (LdSt.getNumExplicitOperands() == 3) {
4062 // Non-paired instruction (e.g., ldr x1, [x0, #8]).
4063 if ((!LdSt.getOperand(1).isReg() && !LdSt.getOperand(1).isFI()) ||
4064 !LdSt.getOperand(2).isImm())
4065 return false;
4066 } else if (LdSt.getNumExplicitOperands() == 4) {
4067 // Paired instruction (e.g., ldp x1, x2, [x0, #8]).
4068 if (!LdSt.getOperand(1).isReg() ||
4069 (!LdSt.getOperand(2).isReg() && !LdSt.getOperand(2).isFI()) ||
4070 !LdSt.getOperand(3).isImm())
4071 return false;
4072 } else
4073 return false;
4074
4075 // Get the scaling factor for the instruction and set the width for the
4076 // instruction.
4077 TypeSize Scale(0U, false);
4078 int64_t Dummy1, Dummy2;
4079
4080 // If this returns false, then it's an instruction we don't want to handle.
4081 if (!getMemOpInfo(LdSt.getOpcode(), Scale, Width, Dummy1, Dummy2))
4082 return false;
4083
4084 // Compute the offset. Offset is calculated as the immediate operand
4085 // multiplied by the scaling factor. Unscaled instructions have scaling factor
4086 // set to 1. Postindex are a special case which have an offset of 0.
4087 if (isPostIndexLdStOpcode(LdSt.getOpcode())) {
4088 BaseOp = &LdSt.getOperand(2);
4089 Offset = 0;
4090 } else if (LdSt.getNumExplicitOperands() == 3) {
4091 BaseOp = &LdSt.getOperand(1);
4092 Offset = LdSt.getOperand(2).getImm() * Scale.getKnownMinValue();
4093 } else {
4094 assert(LdSt.getNumExplicitOperands() == 4 && "invalid number of operands");
4095 BaseOp = &LdSt.getOperand(2);
4096 Offset = LdSt.getOperand(3).getImm() * Scale.getKnownMinValue();
4097 }
4098 OffsetIsScalable = Scale.isScalable();
4099
4100 return BaseOp->isReg() || BaseOp->isFI();
4101}
4102
4105 assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
4106 MachineOperand &OfsOp = LdSt.getOperand(LdSt.getNumExplicitOperands() - 1);
4107 assert(OfsOp.isImm() && "Offset operand wasn't immediate.");
4108 return OfsOp;
4109}
4110
4111bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
4112 TypeSize &Width, int64_t &MinOffset,
4113 int64_t &MaxOffset) {
4114 switch (Opcode) {
4115 // Not a memory operation or something we want to handle.
4116 default:
4117 Scale = TypeSize::getFixed(0);
4118 Width = TypeSize::getFixed(0);
4119 MinOffset = MaxOffset = 0;
4120 return false;
4121 // LDR / STR
4122 case AArch64::LDRQui:
4123 case AArch64::STRQui:
4124 Scale = TypeSize::getFixed(16);
4125 Width = TypeSize::getFixed(16);
4126 MinOffset = 0;
4127 MaxOffset = 4095;
4128 break;
4129 case AArch64::LDRXui:
4130 case AArch64::LDRDui:
4131 case AArch64::STRXui:
4132 case AArch64::STRDui:
4133 case AArch64::PRFMui:
4134 Scale = TypeSize::getFixed(8);
4135 Width = TypeSize::getFixed(8);
4136 MinOffset = 0;
4137 MaxOffset = 4095;
4138 break;
4139 case AArch64::LDRWui:
4140 case AArch64::LDRSui:
4141 case AArch64::LDRSWui:
4142 case AArch64::STRWui:
4143 case AArch64::STRSui:
4144 Scale = TypeSize::getFixed(4);
4145 Width = TypeSize::getFixed(4);
4146 MinOffset = 0;
4147 MaxOffset = 4095;
4148 break;
4149 case AArch64::LDRHui:
4150 case AArch64::LDRHHui:
4151 case AArch64::LDRSHWui:
4152 case AArch64::LDRSHXui:
4153 case AArch64::STRHui:
4154 case AArch64::STRHHui:
4155 Scale = TypeSize::getFixed(2);
4156 Width = TypeSize::getFixed(2);
4157 MinOffset = 0;
4158 MaxOffset = 4095;
4159 break;
4160 case AArch64::LDRBui:
4161 case AArch64::LDRBBui:
4162 case AArch64::LDRSBWui:
4163 case AArch64::LDRSBXui:
4164 case AArch64::STRBui:
4165 case AArch64::STRBBui:
4166 Scale = TypeSize::getFixed(1);
4167 Width = TypeSize::getFixed(1);
4168 MinOffset = 0;
4169 MaxOffset = 4095;
4170 break;
4171 // post/pre inc
4172 case AArch64::STRQpre:
4173 case AArch64::LDRQpost:
4174 Scale = TypeSize::getFixed(1);
4175 Width = TypeSize::getFixed(16);
4176 MinOffset = -256;
4177 MaxOffset = 255;
4178 break;
4179 case AArch64::LDRDpost:
4180 case AArch64::LDRDpre:
4181 case AArch64::LDRXpost:
4182 case AArch64::LDRXpre:
4183 case AArch64::STRDpost:
4184 case AArch64::STRDpre:
4185 case AArch64::STRXpost:
4186 case AArch64::STRXpre:
4187 Scale = TypeSize::getFixed(1);
4188 Width = TypeSize::getFixed(8);
4189 MinOffset = -256;
4190 MaxOffset = 255;
4191 break;
4192 case AArch64::STRWpost:
4193 case AArch64::STRWpre:
4194 case AArch64::LDRWpost:
4195 case AArch64::LDRWpre:
4196 case AArch64::STRSpost:
4197 case AArch64::STRSpre:
4198 case AArch64::LDRSpost:
4199 case AArch64::LDRSpre:
4200 Scale = TypeSize::getFixed(1);
4201 Width = TypeSize::getFixed(4);
4202 MinOffset = -256;
4203 MaxOffset = 255;
4204 break;
4205 case AArch64::LDRHpost:
4206 case AArch64::LDRHpre:
4207 case AArch64::STRHpost:
4208 case AArch64::STRHpre:
4209 case AArch64::LDRHHpost:
4210 case AArch64::LDRHHpre:
4211 case AArch64::STRHHpost:
4212 case AArch64::STRHHpre:
4213 Scale = TypeSize::getFixed(1);
4214 Width = TypeSize::getFixed(2);
4215 MinOffset = -256;
4216 MaxOffset = 255;
4217 break;
4218 case AArch64::LDRBpost:
4219 case AArch64::LDRBpre:
4220 case AArch64::STRBpost:
4221 case AArch64::STRBpre:
4222 case AArch64::LDRBBpost:
4223 case AArch64::LDRBBpre:
4224 case AArch64::STRBBpost:
4225 case AArch64::STRBBpre:
4226 Scale = TypeSize::getFixed(1);
4227 Width = TypeSize::getFixed(1);
4228 MinOffset = -256;
4229 MaxOffset = 255;
4230 break;
4231 // Unscaled
4232 case AArch64::LDURQi:
4233 case AArch64::STURQi:
4234 Scale = TypeSize::getFixed(1);
4235 Width = TypeSize::getFixed(16);
4236 MinOffset = -256;
4237 MaxOffset = 255;
4238 break;
4239 case AArch64::LDURXi:
4240 case AArch64::LDURDi:
4241 case AArch64::LDAPURXi:
4242 case AArch64::STURXi:
4243 case AArch64::STURDi:
4244 case AArch64::STLURXi:
4245 case AArch64::PRFUMi:
4246 Scale = TypeSize::getFixed(1);
4247 Width = TypeSize::getFixed(8);
4248 MinOffset = -256;
4249 MaxOffset = 255;
4250 break;
4251 case AArch64::LDURWi:
4252 case AArch64::LDURSi:
4253 case AArch64::LDURSWi:
4254 case AArch64::LDAPURi:
4255 case AArch64::LDAPURSWi:
4256 case AArch64::STURWi:
4257 case AArch64::STURSi:
4258 case AArch64::STLURWi:
4259 Scale = TypeSize::getFixed(1);
4260 Width = TypeSize::getFixed(4);
4261 MinOffset = -256;
4262 MaxOffset = 255;
4263 break;
4264 case AArch64::LDURHi:
4265 case AArch64::LDURHHi:
4266 case AArch64::LDURSHXi:
4267 case AArch64::LDURSHWi:
4268 case AArch64::LDAPURHi:
4269 case AArch64::LDAPURSHWi:
4270 case AArch64::LDAPURSHXi:
4271 case AArch64::STURHi:
4272 case AArch64::STURHHi:
4273 case AArch64::STLURHi:
4274 Scale = TypeSize::getFixed(1);
4275 Width = TypeSize::getFixed(2);
4276 MinOffset = -256;
4277 MaxOffset = 255;
4278 break;
4279 case AArch64::LDURBi:
4280 case AArch64::LDURBBi:
4281 case AArch64::LDURSBXi:
4282 case AArch64::LDURSBWi:
4283 case AArch64::LDAPURBi:
4284 case AArch64::LDAPURSBWi:
4285 case AArch64::LDAPURSBXi:
4286 case AArch64::STURBi:
4287 case AArch64::STURBBi:
4288 case AArch64::STLURBi:
4289 Scale = TypeSize::getFixed(1);
4290 Width = TypeSize::getFixed(1);
4291 MinOffset = -256;
4292 MaxOffset = 255;
4293 break;
4294 // LDP / STP (including pre/post inc)
4295 case AArch64::LDPQi:
4296 case AArch64::LDNPQi:
4297 case AArch64::STPQi:
4298 case AArch64::STNPQi:
4299 case AArch64::LDPQpost:
4300 case AArch64::LDPQpre:
4301 case AArch64::STPQpost:
4302 case AArch64::STPQpre:
4303 Scale = TypeSize::getFixed(16);
4304 Width = TypeSize::getFixed(16 * 2);
4305 MinOffset = -64;
4306 MaxOffset = 63;
4307 break;
4308 case AArch64::LDPXi:
4309 case AArch64::LDPDi:
4310 case AArch64::LDNPXi:
4311 case AArch64::LDNPDi:
4312 case AArch64::STPXi:
4313 case AArch64::STPDi:
4314 case AArch64::STNPXi:
4315 case AArch64::STNPDi:
4316 case AArch64::LDPDpost:
4317 case AArch64::LDPDpre:
4318 case AArch64::LDPXpost:
4319 case AArch64::LDPXpre:
4320 case AArch64::STPDpost:
4321 case AArch64::STPDpre:
4322 case AArch64::STPXpost:
4323 case AArch64::STPXpre:
4324 Scale = TypeSize::getFixed(8);
4325 Width = TypeSize::getFixed(8 * 2);
4326 MinOffset = -64;
4327 MaxOffset = 63;
4328 break;
4329 case AArch64::LDPWi:
4330 case AArch64::LDPSi:
4331 case AArch64::LDNPWi:
4332 case AArch64::LDNPSi:
4333 case AArch64::STPWi:
4334 case AArch64::STPSi:
4335 case AArch64::STNPWi:
4336 case AArch64::STNPSi:
4337 case AArch64::LDPSpost:
4338 case AArch64::LDPSpre:
4339 case AArch64::LDPWpost:
4340 case AArch64::LDPWpre:
4341 case AArch64::STPSpost:
4342 case AArch64::STPSpre:
4343 case AArch64::STPWpost:
4344 case AArch64::STPWpre:
4345 Scale = TypeSize::getFixed(4);
4346 Width = TypeSize::getFixed(4 * 2);
4347 MinOffset = -64;
4348 MaxOffset = 63;
4349 break;
4350 case AArch64::StoreSwiftAsyncContext:
4351 // Store is an STRXui, but there might be an ADDXri in the expansion too.
4352 Scale = TypeSize::getFixed(1);
4353 Width = TypeSize::getFixed(8);
4354 MinOffset = 0;
4355 MaxOffset = 4095;
4356 break;
4357 case AArch64::ADDG:
4358 Scale = TypeSize::getFixed(16);
4359 Width = TypeSize::getFixed(0);
4360 MinOffset = 0;
4361 MaxOffset = 63;
4362 break;
4363 case AArch64::TAGPstack:
4364 Scale = TypeSize::getFixed(16);
4365 Width = TypeSize::getFixed(0);
4366 // TAGP with a negative offset turns into SUBP, which has a maximum offset
4367 // of 63 (not 64!).
4368 MinOffset = -63;
4369 MaxOffset = 63;
4370 break;
4371 case AArch64::LDG:
4372 case AArch64::STGi:
4373 case AArch64::STGPreIndex:
4374 case AArch64::STGPostIndex:
4375 case AArch64::STZGi:
4376 case AArch64::STZGPreIndex:
4377 case AArch64::STZGPostIndex:
4378 Scale = TypeSize::getFixed(16);
4379 Width = TypeSize::getFixed(16);
4380 MinOffset = -256;
4381 MaxOffset = 255;
4382 break;
4383 // SVE
4384 case AArch64::STR_ZZZZXI:
4385 case AArch64::STR_ZZZZXI_STRIDED_CONTIGUOUS:
4386 case AArch64::LDR_ZZZZXI:
4387 case AArch64::LDR_ZZZZXI_STRIDED_CONTIGUOUS:
4388 Scale = TypeSize::getScalable(16);
4389 Width = TypeSize::getScalable(16 * 4);
4390 MinOffset = -256;
4391 MaxOffset = 252;
4392 break;
4393 case AArch64::STR_ZZZXI:
4394 case AArch64::LDR_ZZZXI:
4395 Scale = TypeSize::getScalable(16);
4396 Width = TypeSize::getScalable(16 * 3);
4397 MinOffset = -256;
4398 MaxOffset = 253;
4399 break;
4400 case AArch64::STR_ZZXI:
4401 case AArch64::STR_ZZXI_STRIDED_CONTIGUOUS:
4402 case AArch64::LDR_ZZXI:
4403 case AArch64::LDR_ZZXI_STRIDED_CONTIGUOUS:
4404 Scale = TypeSize::getScalable(16);
4405 Width = TypeSize::getScalable(16 * 2);
4406 MinOffset = -256;
4407 MaxOffset = 254;
4408 break;
4409 case AArch64::LDR_PXI:
4410 case AArch64::STR_PXI:
4411 Scale = TypeSize::getScalable(2);
4412 Width = TypeSize::getScalable(2);
4413 MinOffset = -256;
4414 MaxOffset = 255;
4415 break;
4416 case AArch64::LDR_PPXI:
4417 case AArch64::STR_PPXI:
4418 Scale = TypeSize::getScalable(2);
4419 Width = TypeSize::getScalable(2 * 2);
4420 MinOffset = -256;
4421 MaxOffset = 254;
4422 break;
4423 case AArch64::LDR_ZXI:
4424 case AArch64::STR_ZXI:
4425 Scale = TypeSize::getScalable(16);
4426 Width = TypeSize::getScalable(16);
4427 MinOffset = -256;
4428 MaxOffset = 255;
4429 break;
4430 case AArch64::LD1B_IMM:
4431 case AArch64::LD1H_IMM:
4432 case AArch64::LD1W_IMM:
4433 case AArch64::LD1D_IMM:
4434 case AArch64::LDNT1B_ZRI:
4435 case AArch64::LDNT1H_ZRI:
4436 case AArch64::LDNT1W_ZRI:
4437 case AArch64::LDNT1D_ZRI:
4438 case AArch64::ST1B_IMM:
4439 case AArch64::ST1H_IMM:
4440 case AArch64::ST1W_IMM:
4441 case AArch64::ST1D_IMM:
4442 case AArch64::STNT1B_ZRI:
4443 case AArch64::STNT1H_ZRI:
4444 case AArch64::STNT1W_ZRI:
4445 case AArch64::STNT1D_ZRI:
4446 case AArch64::LDNF1B_IMM:
4447 case AArch64::LDNF1H_IMM:
4448 case AArch64::LDNF1W_IMM:
4449 case AArch64::LDNF1D_IMM:
4450 // A full vectors worth of data
4451 // Width = mbytes * elements
4452 Scale = TypeSize::getScalable(16);
4453 Width = TypeSize::getScalable(16);
4454 MinOffset = -8;
4455 MaxOffset = 7;
4456 break;
4457 case AArch64::LD2B_IMM:
4458 case AArch64::LD2H_IMM:
4459 case AArch64::LD2W_IMM:
4460 case AArch64::LD2D_IMM:
4461 case AArch64::ST2B_IMM:
4462 case AArch64::ST2H_IMM:
4463 case AArch64::ST2W_IMM:
4464 case AArch64::ST2D_IMM:
4465 Scale = TypeSize::getScalable(32);
4466 Width = TypeSize::getScalable(16 * 2);
4467 MinOffset = -8;
4468 MaxOffset = 7;
4469 break;
4470 case AArch64::LD3B_IMM:
4471 case AArch64::LD3H_IMM:
4472 case AArch64::LD3W_IMM:
4473 case AArch64::LD3D_IMM:
4474 case AArch64::ST3B_IMM:
4475 case AArch64::ST3H_IMM:
4476 case AArch64::ST3W_IMM:
4477 case AArch64::ST3D_IMM:
4478 Scale = TypeSize::getScalable(48);
4479 Width = TypeSize::getScalable(16 * 3);
4480 MinOffset = -8;
4481 MaxOffset = 7;
4482 break;
4483 case AArch64::LD4B_IMM:
4484 case AArch64::LD4H_IMM:
4485 case AArch64::LD4W_IMM:
4486 case AArch64::LD4D_IMM:
4487 case AArch64::ST4B_IMM:
4488 case AArch64::ST4H_IMM:
4489 case AArch64::ST4W_IMM:
4490 case AArch64::ST4D_IMM:
4491 Scale = TypeSize::getScalable(64);
4492 Width = TypeSize::getScalable(16 * 4);
4493 MinOffset = -8;
4494 MaxOffset = 7;
4495 break;
4496 case AArch64::LD1B_H_IMM:
4497 case AArch64::LD1SB_H_IMM:
4498 case AArch64::LD1H_S_IMM:
4499 case AArch64::LD1SH_S_IMM:
4500 case AArch64::LD1W_D_IMM:
4501 case AArch64::LD1SW_D_IMM:
4502 case AArch64::ST1B_H_IMM:
4503 case AArch64::ST1H_S_IMM:
4504 case AArch64::ST1W_D_IMM:
4505 case AArch64::LDNF1B_H_IMM:
4506 case AArch64::LDNF1SB_H_IMM:
4507 case AArch64::LDNF1H_S_IMM:
4508 case AArch64::LDNF1SH_S_IMM:
4509 case AArch64::LDNF1W_D_IMM:
4510 case AArch64::LDNF1SW_D_IMM:
4511 // A half vector worth of data
4512 // Width = mbytes * elements
4513 Scale = TypeSize::getScalable(8);
4514 Width = TypeSize::getScalable(8);
4515 MinOffset = -8;
4516 MaxOffset = 7;
4517 break;
4518 case AArch64::LD1B_S_IMM:
4519 case AArch64::LD1SB_S_IMM:
4520 case AArch64::LD1H_D_IMM:
4521 case AArch64::LD1SH_D_IMM:
4522 case AArch64::ST1B_S_IMM:
4523 case AArch64::ST1H_D_IMM:
4524 case AArch64::LDNF1B_S_IMM:
4525 case AArch64::LDNF1SB_S_IMM:
4526 case AArch64::LDNF1H_D_IMM:
4527 case AArch64::LDNF1SH_D_IMM:
4528 // A quarter vector worth of data
4529 // Width = mbytes * elements
4530 Scale = TypeSize::getScalable(4);
4531 Width = TypeSize::getScalable(4);
4532 MinOffset = -8;
4533 MaxOffset = 7;
4534 break;
4535 case AArch64::LD1B_D_IMM:
4536 case AArch64::LD1SB_D_IMM:
4537 case AArch64::ST1B_D_IMM:
4538 case AArch64::LDNF1B_D_IMM:
4539 case AArch64::LDNF1SB_D_IMM:
4540 // A eighth vector worth of data
4541 // Width = mbytes * elements
4542 Scale = TypeSize::getScalable(2);
4543 Width = TypeSize::getScalable(2);
4544 MinOffset = -8;
4545 MaxOffset = 7;
4546 break;
4547 case AArch64::ST2Gi:
4548 case AArch64::ST2GPreIndex:
4549 case AArch64::ST2GPostIndex:
4550 case AArch64::STZ2Gi:
4551 case AArch64::STZ2GPreIndex:
4552 case AArch64::STZ2GPostIndex:
4553 Scale = TypeSize::getFixed(16);
4554 Width = TypeSize::getFixed(32);
4555 MinOffset = -256;
4556 MaxOffset = 255;
4557 break;
4558 case AArch64::STGPi:
4559 case AArch64::STGPpost:
4560 case AArch64::STGPpre:
4561 Scale = TypeSize::getFixed(16);
4562 Width = TypeSize::getFixed(16);
4563 MinOffset = -64;
4564 MaxOffset = 63;
4565 break;
4566 case AArch64::LD1RB_IMM:
4567 case AArch64::LD1RB_H_IMM:
4568 case AArch64::LD1RB_S_IMM:
4569 case AArch64::LD1RB_D_IMM:
4570 case AArch64::LD1RSB_H_IMM:
4571 case AArch64::LD1RSB_S_IMM:
4572 case AArch64::LD1RSB_D_IMM:
4573 Scale = TypeSize::getFixed(1);
4574 Width = TypeSize::getFixed(1);
4575 MinOffset = 0;
4576 MaxOffset = 63;
4577 break;
4578 case AArch64::LD1RH_IMM:
4579 case AArch64::LD1RH_S_IMM:
4580 case AArch64::LD1RH_D_IMM:
4581 case AArch64::LD1RSH_S_IMM:
4582 case AArch64::LD1RSH_D_IMM:
4583 Scale = TypeSize::getFixed(2);
4584 Width = TypeSize::getFixed(2);
4585 MinOffset = 0;
4586 MaxOffset = 63;
4587 break;
4588 case AArch64::LD1RW_IMM:
4589 case AArch64::LD1RW_D_IMM:
4590 case AArch64::LD1RSW_IMM:
4591 Scale = TypeSize::getFixed(4);
4592 Width = TypeSize::getFixed(4);
4593 MinOffset = 0;
4594 MaxOffset = 63;
4595 break;
4596 case AArch64::LD1RD_IMM:
4597 Scale = TypeSize::getFixed(8);
4598 Width = TypeSize::getFixed(8);
4599 MinOffset = 0;
4600 MaxOffset = 63;
4601 break;
4602 }
4603
4604 return true;
4605}
4606
4607// Scaling factor for unscaled load or store.
4609 switch (Opc) {
4610 default:
4611 llvm_unreachable("Opcode has unknown scale!");
4612 case AArch64::LDRBBui:
4613 case AArch64::LDURBBi:
4614 case AArch64::LDRSBWui:
4615 case AArch64::LDURSBWi:
4616 case AArch64::STRBBui:
4617 case AArch64::STURBBi:
4618 return 1;
4619 case AArch64::LDRHHui:
4620 case AArch64::LDURHHi:
4621 case AArch64::LDRSHWui:
4622 case AArch64::LDURSHWi:
4623 case AArch64::STRHHui:
4624 case AArch64::STURHHi:
4625 return 2;
4626 case AArch64::LDRSui:
4627 case AArch64::LDURSi:
4628 case AArch64::LDRSpre:
4629 case AArch64::LDRSWui:
4630 case AArch64::LDURSWi:
4631 case AArch64::LDRSWpre:
4632 case AArch64::LDRWpre:
4633 case AArch64::LDRWui:
4634 case AArch64::LDURWi:
4635 case AArch64::STRSui:
4636 case AArch64::STURSi:
4637 case AArch64::STRSpre:
4638 case AArch64::STRWui:
4639 case AArch64::STURWi:
4640 case AArch64::STRWpre:
4641 case AArch64::LDPSi:
4642 case AArch64::LDPSWi:
4643 case AArch64::LDPWi:
4644 case AArch64::STPSi:
4645 case AArch64::STPWi:
4646 return 4;
4647 case AArch64::LDRDui:
4648 case AArch64::LDURDi:
4649 case AArch64::LDRDpre:
4650 case AArch64::LDRXui:
4651 case AArch64::LDURXi:
4652 case AArch64::LDRXpre:
4653 case AArch64::STRDui:
4654 case AArch64::STURDi:
4655 case AArch64::STRDpre:
4656 case AArch64::STRXui:
4657 case AArch64::STURXi:
4658 case AArch64::STRXpre:
4659 case AArch64::LDPDi:
4660 case AArch64::LDPXi:
4661 case AArch64::STPDi:
4662 case AArch64::STPXi:
4663 return 8;
4664 case AArch64::LDRQui:
4665 case AArch64::LDURQi:
4666 case AArch64::STRQui:
4667 case AArch64::STURQi:
4668 case AArch64::STRQpre:
4669 case AArch64::LDPQi:
4670 case AArch64::LDRQpre:
4671 case AArch64::STPQi:
4672 case AArch64::STGi:
4673 case AArch64::STZGi:
4674 case AArch64::ST2Gi:
4675 case AArch64::STZ2Gi:
4676 case AArch64::STGPi:
4677 return 16;
4678 }
4679}
4680
4682 switch (MI.getOpcode()) {
4683 default:
4684 return false;
4685 case AArch64::LDRWpre:
4686 case AArch64::LDRXpre:
4687 case AArch64::LDRSWpre:
4688 case AArch64::LDRSpre:
4689 case AArch64::LDRDpre:
4690 case AArch64::LDRQpre:
4691 return true;
4692 }
4693}
4694
4696 switch (MI.getOpcode()) {
4697 default:
4698 return false;
4699 case AArch64::STRWpre:
4700 case AArch64::STRXpre:
4701 case AArch64::STRSpre:
4702 case AArch64::STRDpre:
4703 case AArch64::STRQpre:
4704 return true;
4705 }
4706}
4707
4709 return isPreLd(MI) || isPreSt(MI);
4710}
4711
4713 switch (MI.getOpcode()) {
4714 default:
4715 return false;
4716 case AArch64::LDPSi:
4717 case AArch64::LDPSWi:
4718 case AArch64::LDPDi:
4719 case AArch64::LDPQi:
4720 case AArch64::LDPWi:
4721 case AArch64::LDPXi:
4722 case AArch64::STPSi:
4723 case AArch64::STPDi:
4724 case AArch64::STPQi:
4725 case AArch64::STPWi:
4726 case AArch64::STPXi:
4727 case AArch64::STGPi:
4728 return true;
4729 }
4730}
4731
4733 assert(MI.mayLoadOrStore() && "Load or store instruction expected");
4734 unsigned Idx =
4736 : 1;
4737 return MI.getOperand(Idx);
4738}
4739
4740const MachineOperand &
4742 assert(MI.mayLoadOrStore() && "Load or store instruction expected");
4743 unsigned Idx =
4745 : 2;
4746 return MI.getOperand(Idx);
4747}
4748
4749const MachineOperand &
4751 switch (MI.getOpcode()) {
4752 default:
4753 llvm_unreachable("Unexpected opcode");
4754 case AArch64::LDRBroX:
4755 case AArch64::LDRBBroX:
4756 case AArch64::LDRSBXroX:
4757 case AArch64::LDRSBWroX:
4758 case AArch64::LDRHroX:
4759 case AArch64::LDRHHroX:
4760 case AArch64::LDRSHXroX:
4761 case AArch64::LDRSHWroX:
4762 case AArch64::LDRWroX:
4763 case AArch64::LDRSroX:
4764 case AArch64::LDRSWroX:
4765 case AArch64::LDRDroX:
4766 case AArch64::LDRXroX:
4767 case AArch64::LDRQroX:
4768 return MI.getOperand(4);
4769 }
4770}
4771
4773 Register Reg) {
4774 if (MI.getParent() == nullptr)
4775 return nullptr;
4776 const MachineFunction *MF = MI.getParent()->getParent();
4777 return MF ? MF->getRegInfo().getRegClassOrNull(Reg) : nullptr;
4778}
4779
4781 auto IsHFPR = [&](const MachineOperand &Op) {
4782 if (!Op.isReg())
4783 return false;
4784 auto Reg = Op.getReg();
4785 if (Reg.isPhysical())
4786 return AArch64::FPR16RegClass.contains(Reg);
4787 const TargetRegisterClass *TRC = ::getRegClass(MI, Reg);
4788 return TRC == &AArch64::FPR16RegClass ||
4789 TRC == &AArch64::FPR16_loRegClass;
4790 };
4791 return llvm::any_of(MI.operands(), IsHFPR);
4792}
4793
4795 auto IsQFPR = [&](const MachineOperand &Op) {
4796 if (!Op.isReg())
4797 return false;
4798 auto Reg = Op.getReg();
4799 if (Reg.isPhysical())
4800 return AArch64::FPR128RegClass.contains(Reg);
4801 const TargetRegisterClass *TRC = ::getRegClass(MI, Reg);
4802 return TRC == &AArch64::FPR128RegClass ||
4803 TRC == &AArch64::FPR128_loRegClass;
4804 };
4805 return llvm::any_of(MI.operands(), IsQFPR);
4806}
4807
4809 switch (MI.getOpcode()) {
4810 case AArch64::BRK:
4811 case AArch64::HLT:
4812 case AArch64::PACIASP:
4813 case AArch64::PACIBSP:
4814 // Implicit BTI behavior.
4815 return true;
4816 case AArch64::PAUTH_PROLOGUE:
4817 // PAUTH_PROLOGUE expands to PACI(A|B)SP.
4818 return true;
4819 case AArch64::HINT: {
4820 unsigned Imm = MI.getOperand(0).getImm();
4821 // Explicit BTI instruction.
4822 if (Imm == 32 || Imm == 34 || Imm == 36 || Imm == 38)
4823 return true;
4824 // PACI(A|B)SP instructions.
4825 if (Imm == 25 || Imm == 27)
4826 return true;
4827 return false;
4828 }
4829 default:
4830 return false;
4831 }
4832}
4833
4835 if (Reg == 0)
4836 return false;
4837 assert(Reg.isPhysical() && "Expected physical register in isFpOrNEON");
4838 return AArch64::FPR128RegClass.contains(Reg) ||
4839 AArch64::FPR64RegClass.contains(Reg) ||
4840 AArch64::FPR32RegClass.contains(Reg) ||
4841 AArch64::FPR16RegClass.contains(Reg) ||
4842 AArch64::FPR8RegClass.contains(Reg);
4843}
4844
4846 auto IsFPR = [&](const MachineOperand &Op) {
4847 if (!Op.isReg())
4848 return false;
4849 auto Reg = Op.getReg();
4850 if (Reg.isPhysical())
4851 return isFpOrNEON(Reg);
4852
4853 const TargetRegisterClass *TRC = ::getRegClass(MI, Reg);
4854 return TRC == &AArch64::FPR128RegClass ||
4855 TRC == &AArch64::FPR128_loRegClass ||
4856 TRC == &AArch64::FPR64RegClass ||
4857 TRC == &AArch64::FPR64_loRegClass ||
4858 TRC == &AArch64::FPR32RegClass || TRC == &AArch64::FPR16RegClass ||
4859 TRC == &AArch64::FPR8RegClass;
4860 };
4861 return llvm::any_of(MI.operands(), IsFPR);
4862}
4863
4864// Scale the unscaled offsets. Returns false if the unscaled offset can't be
4865// scaled.
4866static bool scaleOffset(unsigned Opc, int64_t &Offset) {
4868
4869 // If the byte-offset isn't a multiple of the stride, we can't scale this
4870 // offset.
4871 if (Offset % Scale != 0)
4872 return false;
4873
4874 // Convert the byte-offset used by unscaled into an "element" offset used
4875 // by the scaled pair load/store instructions.
4876 Offset /= Scale;
4877 return true;
4878}
4879
4880static bool canPairLdStOpc(unsigned FirstOpc, unsigned SecondOpc) {
4881 if (FirstOpc == SecondOpc)
4882 return true;
4883 // We can also pair sign-ext and zero-ext instructions.
4884 switch (FirstOpc) {
4885 default:
4886 return false;
4887 case AArch64::STRSui:
4888 case AArch64::STURSi:
4889 return SecondOpc == AArch64::STRSui || SecondOpc == AArch64::STURSi;
4890 case AArch64::STRDui:
4891 case AArch64::STURDi:
4892 return SecondOpc == AArch64::STRDui || SecondOpc == AArch64::STURDi;
4893 case AArch64::STRQui:
4894 case AArch64::STURQi:
4895 return SecondOpc == AArch64::STRQui || SecondOpc == AArch64::STURQi;
4896 case AArch64::STRWui:
4897 case AArch64::STURWi:
4898 return SecondOpc == AArch64::STRWui || SecondOpc == AArch64::STURWi;
4899 case AArch64::STRXui:
4900 case AArch64::STURXi:
4901 return SecondOpc == AArch64::STRXui || SecondOpc == AArch64::STURXi;
4902 case AArch64::LDRSui:
4903 case AArch64::LDURSi:
4904 return SecondOpc == AArch64::LDRSui || SecondOpc == AArch64::LDURSi;
4905 case AArch64::LDRDui:
4906 case AArch64::LDURDi:
4907 return SecondOpc == AArch64::LDRDui || SecondOpc == AArch64::LDURDi;
4908 case AArch64::LDRQui:
4909 case AArch64::LDURQi:
4910 return SecondOpc == AArch64::LDRQui || SecondOpc == AArch64::LDURQi;
4911 case AArch64::LDRWui:
4912 case AArch64::LDURWi:
4913 return SecondOpc == AArch64::LDRSWui || SecondOpc == AArch64::LDURSWi;
4914 case AArch64::LDRSWui:
4915 case AArch64::LDURSWi:
4916 return SecondOpc == AArch64::LDRWui || SecondOpc == AArch64::LDURWi;
4917 case AArch64::LDRXui:
4918 case AArch64::LDURXi:
4919 return SecondOpc == AArch64::LDRXui || SecondOpc == AArch64::LDURXi;
4920 }
4921 // These instructions can't be paired based on their opcodes.
4922 return false;
4923}
4924
4925static bool shouldClusterFI(const MachineFrameInfo &MFI, int FI1,
4926 int64_t Offset1, unsigned Opcode1, int FI2,
4927 int64_t Offset2, unsigned Opcode2) {
4928 // Accesses through fixed stack object frame indices may access a different
4929 // fixed stack slot. Check that the object offsets + offsets match.
4930 if (MFI.isFixedObjectIndex(FI1) && MFI.isFixedObjectIndex(FI2)) {
4931 int64_t ObjectOffset1 = MFI.getObjectOffset(FI1);
4932 int64_t ObjectOffset2 = MFI.getObjectOffset(FI2);
4933 assert(ObjectOffset1 <= ObjectOffset2 && "Object offsets are not ordered.");
4934 // Convert to scaled object offsets.
4935 int Scale1 = AArch64InstrInfo::getMemScale(Opcode1);
4936 if (ObjectOffset1 % Scale1 != 0)
4937 return false;
4938 ObjectOffset1 /= Scale1;
4939 int Scale2 = AArch64InstrInfo::getMemScale(Opcode2);
4940 if (ObjectOffset2 % Scale2 != 0)
4941 return false;
4942 ObjectOffset2 /= Scale2;
4943 ObjectOffset1 += Offset1;
4944 ObjectOffset2 += Offset2;
4945 return ObjectOffset1 + 1 == ObjectOffset2;
4946 }
4947
4948 return FI1 == FI2;
4949}
4950
4951/// Detect opportunities for ldp/stp formation.
4952///
4953/// Only called for LdSt for which getMemOperandWithOffset returns true.
4955 ArrayRef<const MachineOperand *> BaseOps1, int64_t OpOffset1,
4956 bool OffsetIsScalable1, ArrayRef<const MachineOperand *> BaseOps2,
4957 int64_t OpOffset2, bool OffsetIsScalable2, unsigned ClusterSize,
4958 unsigned NumBytes) const {
4959 assert(BaseOps1.size() == 1 && BaseOps2.size() == 1);
4960 const MachineOperand &BaseOp1 = *BaseOps1.front();
4961 const MachineOperand &BaseOp2 = *BaseOps2.front();
4962 const MachineInstr &FirstLdSt = *BaseOp1.getParent();
4963 const MachineInstr &SecondLdSt = *BaseOp2.getParent();
4964 if (BaseOp1.getType() != BaseOp2.getType())
4965 return false;
4966
4967 assert((BaseOp1.isReg() || BaseOp1.isFI()) &&
4968 "Only base registers and frame indices are supported.");
4969
4970 // Check for both base regs and base FI.
4971 if (BaseOp1.isReg() && BaseOp1.getReg() != BaseOp2.getReg())
4972 return false;
4973
4974 // Only cluster up to a single pair.
4975 if (ClusterSize > 2)
4976 return false;
4977
4978 if (!isPairableLdStInst(FirstLdSt) || !isPairableLdStInst(SecondLdSt))
4979 return false;
4980
4981 // Can we pair these instructions based on their opcodes?
4982 unsigned FirstOpc = FirstLdSt.getOpcode();
4983 unsigned SecondOpc = SecondLdSt.getOpcode();
4984 if (!canPairLdStOpc(FirstOpc, SecondOpc))
4985 return false;
4986
4987 // Can't merge volatiles or load/stores that have a hint to avoid pair
4988 // formation, for example.
4989 if (!isCandidateToMergeOrPair(FirstLdSt) ||
4990 !isCandidateToMergeOrPair(SecondLdSt))
4991 return false;
4992
4993 // isCandidateToMergeOrPair guarantees that operand 2 is an immediate.
4994 int64_t Offset1 = FirstLdSt.getOperand(2).getImm();
4995 if (hasUnscaledLdStOffset(FirstOpc) && !scaleOffset(FirstOpc, Offset1))
4996 return false;
4997
4998 int64_t Offset2 = SecondLdSt.getOperand(2).getImm();
4999 if (hasUnscaledLdStOffset(SecondOpc) && !scaleOffset(SecondOpc, Offset2))
5000 return false;
5001
5002 // Pairwise instructions have a 7-bit signed offset field.
5003 if (Offset1 > 63 || Offset1 < -64)
5004 return false;
5005
5006 // The caller should already have ordered First/SecondLdSt by offset.
5007 // Note: except for non-equal frame index bases
5008 if (BaseOp1.isFI()) {
5009 assert((!BaseOp1.isIdenticalTo(BaseOp2) || Offset1 <= Offset2) &&
5010 "Caller should have ordered offsets.");
5011
5012 const MachineFrameInfo &MFI =
5013 FirstLdSt.getParent()->getParent()->getFrameInfo();
5014 return shouldClusterFI(MFI, BaseOp1.getIndex(), Offset1, FirstOpc,
5015 BaseOp2.getIndex(), Offset2, SecondOpc);
5016 }
5017
5018 assert(Offset1 <= Offset2 && "Caller should have ordered offsets.");
5019
5020 return Offset1 + 1 == Offset2;
5021}
5022
5024 MCRegister Reg, unsigned SubIdx,
5025 unsigned State,
5026 const TargetRegisterInfo *TRI) {
5027 if (!SubIdx)
5028 return MIB.addReg(Reg, State);
5029
5030 if (Reg.isPhysical())
5031 return MIB.addReg(TRI->getSubReg(Reg, SubIdx), State);
5032 return MIB.addReg(Reg, State, SubIdx);
5033}
5034
5035static bool forwardCopyWillClobberTuple(unsigned DestReg, unsigned SrcReg,
5036 unsigned NumRegs) {
5037 // We really want the positive remainder mod 32 here, that happens to be
5038 // easily obtainable with a mask.
5039 return ((DestReg - SrcReg) & 0x1f) < NumRegs;
5040}
5041
5044 const DebugLoc &DL, MCRegister DestReg,
5045 MCRegister SrcReg, bool KillSrc,
5046 unsigned Opcode,
5047 ArrayRef<unsigned> Indices) const {
5048 assert(Subtarget.hasNEON() && "Unexpected register copy without NEON");
5050 uint16_t DestEncoding = TRI->getEncodingValue(DestReg);
5051 uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg);
5052 unsigned NumRegs = Indices.size();
5053
5054 int SubReg = 0, End = NumRegs, Incr = 1;
5055 if (forwardCopyWillClobberTuple(DestEncoding, SrcEncoding, NumRegs)) {
5056 SubReg = NumRegs - 1;
5057 End = -1;
5058 Incr = -1;
5059 }
5060
5061 for (; SubReg != End; SubReg += Incr) {
5062 const MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opcode));
5063 AddSubReg(MIB, DestReg, Indices[SubReg], RegState::Define, TRI);
5064 AddSubReg(MIB, SrcReg, Indices[SubReg], 0, TRI);
5065 AddSubReg(MIB, SrcReg, Indices[SubReg], getKillRegState(KillSrc), TRI);
5066 }
5067}
5068
5071 const DebugLoc &DL, MCRegister DestReg,
5072 MCRegister SrcReg, bool KillSrc,
5073 unsigned Opcode, unsigned ZeroReg,
5074 llvm::ArrayRef<unsigned> Indices) const {
5076 unsigned NumRegs = Indices.size();
5077
5078#ifndef NDEBUG
5079 uint16_t DestEncoding = TRI->getEncodingValue(DestReg);
5080 uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg);
5081 assert(DestEncoding % NumRegs == 0 && SrcEncoding % NumRegs == 0 &&
5082 "GPR reg sequences should not be able to overlap");
5083#endif
5084
5085 for (unsigned SubReg = 0; SubReg != NumRegs; ++SubReg) {
5086 const MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opcode));
5087 AddSubReg(MIB, DestReg, Indices[SubReg], RegState::Define, TRI);
5088 MIB.addReg(ZeroReg);
5089 AddSubReg(MIB, SrcReg, Indices[SubReg], getKillRegState(KillSrc), TRI);
5090 MIB.addImm(0);
5091 }
5092}
5093
5096 const DebugLoc &DL, Register DestReg,
5097 Register SrcReg, bool KillSrc,
5098 bool RenamableDest,
5099 bool RenamableSrc) const {
5100 if (AArch64::GPR32spRegClass.contains(DestReg) &&
5101 AArch64::GPR32spRegClass.contains(SrcReg)) {
5102 if (DestReg == AArch64::WSP || SrcReg == AArch64::WSP) {
5103 // If either operand is WSP, expand to ADD #0.
5104 if (Subtarget.hasZeroCycleRegMoveGPR64() &&
5105 !Subtarget.hasZeroCycleRegMoveGPR32()) {
5106 // Cyclone recognizes "ADD Xd, Xn, #0" as a zero-cycle register move.
5107 MCRegister DestRegX = RI.getMatchingSuperReg(DestReg, AArch64::sub_32,
5108 &AArch64::GPR64spRegClass);
5109 MCRegister SrcRegX = RI.getMatchingSuperReg(SrcReg, AArch64::sub_32,
5110 &AArch64::GPR64spRegClass);
5111 // This instruction is reading and writing X registers. This may upset
5112 // the register scavenger and machine verifier, so we need to indicate
5113 // that we are reading an undefined value from SrcRegX, but a proper
5114 // value from SrcReg.
5115 BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestRegX)
5116 .addReg(SrcRegX, RegState::Undef)
5117 .addImm(0)
5119 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
5120 } else {
5121 BuildMI(MBB, I, DL, get(AArch64::ADDWri), DestReg)
5122 .addReg(SrcReg, getKillRegState(KillSrc))
5123 .addImm(0)
5125 }
5126 } else if (Subtarget.hasZeroCycleRegMoveGPR64() &&
5127 !Subtarget.hasZeroCycleRegMoveGPR32()) {
5128 // Cyclone recognizes "ORR Xd, XZR, Xm" as a zero-cycle register move.
5129 MCRegister DestRegX = RI.getMatchingSuperReg(DestReg, AArch64::sub_32,
5130 &AArch64::GPR64spRegClass);
5131 assert(DestRegX.isValid() && "Destination super-reg not valid");
5132 MCRegister SrcRegX = RI.getMatchingSuperReg(SrcReg, AArch64::sub_32,
5133 &AArch64::GPR64spRegClass);
5134 assert(SrcRegX.isValid() && "Source super-reg not valid");
5135 // This instruction is reading and writing X registers. This may upset
5136 // the register scavenger and machine verifier, so we need to indicate
5137 // that we are reading an undefined value from SrcRegX, but a proper
5138 // value from SrcReg.
5139 BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestRegX)
5140 .addReg(AArch64::XZR)
5141 .addReg(SrcRegX, RegState::Undef)
5142 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
5143 } else {
5144 // Otherwise, expand to ORR WZR.
5145 BuildMI(MBB, I, DL, get(AArch64::ORRWrr), DestReg)
5146 .addReg(AArch64::WZR)
5147 .addReg(SrcReg, getKillRegState(KillSrc));
5148 }
5149 return;
5150 }
5151
5152 // GPR32 zeroing
5153 if (AArch64::GPR32spRegClass.contains(DestReg) && SrcReg == AArch64::WZR) {
5154 if (Subtarget.hasZeroCycleZeroingGPR64() &&
5155 !Subtarget.hasZeroCycleZeroingGPR32()) {
5156 MCRegister DestRegX = RI.getMatchingSuperReg(DestReg, AArch64::sub_32,
5157 &AArch64::GPR64spRegClass);
5158 assert(DestRegX.isValid() && "Destination super-reg not valid");
5159 BuildMI(MBB, I, DL, get(AArch64::MOVZXi), DestRegX)
5160 .addImm(0)
5162 } else if (Subtarget.hasZeroCycleZeroingGPR32()) {
5163 BuildMI(MBB, I, DL, get(AArch64::MOVZWi), DestReg)
5164 .addImm(0)
5166 } else {
5167 BuildMI(MBB, I, DL, get(AArch64::ORRWrr), DestReg)
5168 .addReg(AArch64::WZR)
5169 .addReg(AArch64::WZR);
5170 }
5171 return;
5172 }
5173
5174 if (AArch64::GPR64spRegClass.contains(DestReg) &&
5175 AArch64::GPR64spRegClass.contains(SrcReg)) {
5176 if (DestReg == AArch64::SP || SrcReg == AArch64::SP) {
5177 // If either operand is SP, expand to ADD #0.
5178 BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestReg)
5179 .addReg(SrcReg, getKillRegState(KillSrc))
5180 .addImm(0)
5182 } else {
5183 // Otherwise, expand to ORR XZR.
5184 BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestReg)
5185 .addReg(AArch64::XZR)
5186 .addReg(SrcReg, getKillRegState(KillSrc));
5187 }
5188 return;
5189 }
5190
5191 // GPR64 zeroing
5192 if (AArch64::GPR64spRegClass.contains(DestReg) && SrcReg == AArch64::XZR) {
5193 if (Subtarget.hasZeroCycleZeroingGPR64()) {
5194 BuildMI(MBB, I, DL, get(AArch64::MOVZXi), DestReg)
5195 .addImm(0)
5197 } else {
5198 BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestReg)
5199 .addReg(AArch64::XZR)
5200 .addReg(AArch64::XZR);
5201 }
5202 return;
5203 }
5204
5205 // Copy a Predicate register by ORRing with itself.
5206 if (AArch64::PPRRegClass.contains(DestReg) &&
5207 AArch64::PPRRegClass.contains(SrcReg)) {
5208 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5209 "Unexpected SVE register.");
5210 BuildMI(MBB, I, DL, get(AArch64::ORR_PPzPP), DestReg)
5211 .addReg(SrcReg) // Pg
5212 .addReg(SrcReg)
5213 .addReg(SrcReg, getKillRegState(KillSrc));
5214 return;
5215 }
5216
5217 // Copy a predicate-as-counter register by ORRing with itself as if it
5218 // were a regular predicate (mask) register.
5219 bool DestIsPNR = AArch64::PNRRegClass.contains(DestReg);
5220 bool SrcIsPNR = AArch64::PNRRegClass.contains(SrcReg);
5221 if (DestIsPNR || SrcIsPNR) {
5222 auto ToPPR = [](MCRegister R) -> MCRegister {
5223 return (R - AArch64::PN0) + AArch64::P0;
5224 };
5225 MCRegister PPRSrcReg = SrcIsPNR ? ToPPR(SrcReg) : SrcReg.asMCReg();
5226 MCRegister PPRDestReg = DestIsPNR ? ToPPR(DestReg) : DestReg.asMCReg();
5227
5228 if (PPRSrcReg != PPRDestReg) {
5229 auto NewMI = BuildMI(MBB, I, DL, get(AArch64::ORR_PPzPP), PPRDestReg)
5230 .addReg(PPRSrcReg) // Pg
5231 .addReg(PPRSrcReg)
5232 .addReg(PPRSrcReg, getKillRegState(KillSrc));
5233 if (DestIsPNR)
5234 NewMI.addDef(DestReg, RegState::Implicit);
5235 }
5236 return;
5237 }
5238
5239 // Copy a Z register by ORRing with itself.
5240 if (AArch64::ZPRRegClass.contains(DestReg) &&
5241 AArch64::ZPRRegClass.contains(SrcReg)) {
5242 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5243 "Unexpected SVE register.");
5244 BuildMI(MBB, I, DL, get(AArch64::ORR_ZZZ), DestReg)
5245 .addReg(SrcReg)
5246 .addReg(SrcReg, getKillRegState(KillSrc));
5247 return;
5248 }
5249
5250 // Copy a Z register pair by copying the individual sub-registers.
5251 if ((AArch64::ZPR2RegClass.contains(DestReg) ||
5252 AArch64::ZPR2StridedOrContiguousRegClass.contains(DestReg)) &&
5253 (AArch64::ZPR2RegClass.contains(SrcReg) ||
5254 AArch64::ZPR2StridedOrContiguousRegClass.contains(SrcReg))) {
5255 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5256 "Unexpected SVE register.");
5257 static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1};
5258 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORR_ZZZ,
5259 Indices);
5260 return;
5261 }
5262
5263 // Copy a Z register triple by copying the individual sub-registers.
5264 if (AArch64::ZPR3RegClass.contains(DestReg) &&
5265 AArch64::ZPR3RegClass.contains(SrcReg)) {
5266 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5267 "Unexpected SVE register.");
5268 static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1,
5269 AArch64::zsub2};
5270 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORR_ZZZ,
5271 Indices);
5272 return;
5273 }
5274
5275 // Copy a Z register quad by copying the individual sub-registers.
5276 if ((AArch64::ZPR4RegClass.contains(DestReg) ||
5277 AArch64::ZPR4StridedOrContiguousRegClass.contains(DestReg)) &&
5278 (AArch64::ZPR4RegClass.contains(SrcReg) ||
5279 AArch64::ZPR4StridedOrContiguousRegClass.contains(SrcReg))) {
5280 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5281 "Unexpected SVE register.");
5282 static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1,
5283 AArch64::zsub2, AArch64::zsub3};
5284 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORR_ZZZ,
5285 Indices);
5286 return;
5287 }
5288
5289 // Copy a DDDD register quad by copying the individual sub-registers.
5290 if (AArch64::DDDDRegClass.contains(DestReg) &&
5291 AArch64::DDDDRegClass.contains(SrcReg)) {
5292 static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1,
5293 AArch64::dsub2, AArch64::dsub3};
5294 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
5295 Indices);
5296 return;
5297 }
5298
5299 // Copy a DDD register triple by copying the individual sub-registers.
5300 if (AArch64::DDDRegClass.contains(DestReg) &&
5301 AArch64::DDDRegClass.contains(SrcReg)) {
5302 static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1,
5303 AArch64::dsub2};
5304 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
5305 Indices);
5306 return;
5307 }
5308
5309 // Copy a DD register pair by copying the individual sub-registers.
5310 if (AArch64::DDRegClass.contains(DestReg) &&
5311 AArch64::DDRegClass.contains(SrcReg)) {
5312 static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1};
5313 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
5314 Indices);
5315 return;
5316 }
5317
5318 // Copy a QQQQ register quad by copying the individual sub-registers.
5319 if (AArch64::QQQQRegClass.contains(DestReg) &&
5320 AArch64::QQQQRegClass.contains(SrcReg)) {
5321 static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1,
5322 AArch64::qsub2, AArch64::qsub3};
5323 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
5324 Indices);
5325 return;
5326 }
5327
5328 // Copy a QQQ register triple by copying the individual sub-registers.
5329 if (AArch64::QQQRegClass.contains(DestReg) &&
5330 AArch64::QQQRegClass.contains(SrcReg)) {
5331 static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1,
5332 AArch64::qsub2};
5333 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
5334 Indices);
5335 return;
5336 }
5337
5338 // Copy a QQ register pair by copying the individual sub-registers.
5339 if (AArch64::QQRegClass.contains(DestReg) &&
5340 AArch64::QQRegClass.contains(SrcReg)) {
5341 static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1};
5342 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
5343 Indices);
5344 return;
5345 }
5346
5347 if (AArch64::XSeqPairsClassRegClass.contains(DestReg) &&
5348 AArch64::XSeqPairsClassRegClass.contains(SrcReg)) {
5349 static const unsigned Indices[] = {AArch64::sube64, AArch64::subo64};
5350 copyGPRRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRXrs,
5351 AArch64::XZR, Indices);
5352 return;
5353 }
5354
5355 if (AArch64::WSeqPairsClassRegClass.contains(DestReg) &&
5356 AArch64::WSeqPairsClassRegClass.contains(SrcReg)) {
5357 static const unsigned Indices[] = {AArch64::sube32, AArch64::subo32};
5358 copyGPRRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRWrs,
5359 AArch64::WZR, Indices);
5360 return;
5361 }
5362
5363 if (AArch64::FPR128RegClass.contains(DestReg) &&
5364 AArch64::FPR128RegClass.contains(SrcReg)) {
5365 if (Subtarget.isSVEorStreamingSVEAvailable() &&
5366 !Subtarget.isNeonAvailable())
5367 BuildMI(MBB, I, DL, get(AArch64::ORR_ZZZ))
5368 .addReg(AArch64::Z0 + (DestReg - AArch64::Q0), RegState::Define)
5369 .addReg(AArch64::Z0 + (SrcReg - AArch64::Q0))
5370 .addReg(AArch64::Z0 + (SrcReg - AArch64::Q0));
5371 else if (Subtarget.isNeonAvailable())
5372 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
5373 .addReg(SrcReg)
5374 .addReg(SrcReg, getKillRegState(KillSrc));
5375 else {
5376 BuildMI(MBB, I, DL, get(AArch64::STRQpre))
5377 .addReg(AArch64::SP, RegState::Define)
5378 .addReg(SrcReg, getKillRegState(KillSrc))
5379 .addReg(AArch64::SP)
5380 .addImm(-16);
5381 BuildMI(MBB, I, DL, get(AArch64::LDRQpost))
5382 .addReg(AArch64::SP, RegState::Define)
5383 .addReg(DestReg, RegState::Define)
5384 .addReg(AArch64::SP)
5385 .addImm(16);
5386 }
5387 return;
5388 }
5389
5390 if (AArch64::FPR64RegClass.contains(DestReg) &&
5391 AArch64::FPR64RegClass.contains(SrcReg)) {
5392 if (Subtarget.hasZeroCycleRegMoveFPR128() &&
5393 !Subtarget.hasZeroCycleRegMoveFPR64() &&
5394 !Subtarget.hasZeroCycleRegMoveFPR32() && Subtarget.isNeonAvailable()) {
5395 MCRegister DestRegQ = RI.getMatchingSuperReg(DestReg, AArch64::dsub,
5396 &AArch64::FPR128RegClass);
5397 MCRegister SrcRegQ = RI.getMatchingSuperReg(SrcReg, AArch64::dsub,
5398 &AArch64::FPR128RegClass);
5399 // This instruction is reading and writing Q registers. This may upset
5400 // the register scavenger and machine verifier, so we need to indicate
5401 // that we are reading an undefined value from SrcRegQ, but a proper
5402 // value from SrcReg.
5403 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestRegQ)
5404 .addReg(SrcRegQ, RegState::Undef)
5405 .addReg(SrcRegQ, RegState::Undef)
5406 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
5407 } else {
5408 BuildMI(MBB, I, DL, get(AArch64::FMOVDr), DestReg)
5409 .addReg(SrcReg, getKillRegState(KillSrc));
5410 }
5411 return;
5412 }
5413
5414 if (AArch64::FPR32RegClass.contains(DestReg) &&
5415 AArch64::FPR32RegClass.contains(SrcReg)) {
5416 if (Subtarget.hasZeroCycleRegMoveFPR128() &&
5417 !Subtarget.hasZeroCycleRegMoveFPR64() &&
5418 !Subtarget.hasZeroCycleRegMoveFPR32() && Subtarget.isNeonAvailable()) {
5419 MCRegister DestRegQ = RI.getMatchingSuperReg(DestReg, AArch64::ssub,
5420 &AArch64::FPR128RegClass);
5421 MCRegister SrcRegQ = RI.getMatchingSuperReg(SrcReg, AArch64::ssub,
5422 &AArch64::FPR128RegClass);
5423 // This instruction is reading and writing Q registers. This may upset
5424 // the register scavenger and machine verifier, so we need to indicate
5425 // that we are reading an undefined value from SrcRegQ, but a proper
5426 // value from SrcReg.
5427 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestRegQ)
5428 .addReg(SrcRegQ, RegState::Undef)
5429 .addReg(SrcRegQ, RegState::Undef)
5430 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
5431 } else if (Subtarget.hasZeroCycleRegMoveFPR64() &&
5432 !Subtarget.hasZeroCycleRegMoveFPR32()) {
5433 MCRegister DestRegD = RI.getMatchingSuperReg(DestReg, AArch64::ssub,
5434 &AArch64::FPR64RegClass);
5435 MCRegister SrcRegD = RI.getMatchingSuperReg(SrcReg, AArch64::ssub,
5436 &AArch64::FPR64RegClass);
5437 // This instruction is reading and writing D registers. This may upset
5438 // the register scavenger and machine verifier, so we need to indicate
5439 // that we are reading an undefined value from SrcRegD, but a proper
5440 // value from SrcReg.
5441 BuildMI(MBB, I, DL, get(AArch64::FMOVDr), DestRegD)
5442 .addReg(SrcRegD, RegState::Undef)
5443 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
5444 } else {
5445 BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
5446 .addReg(SrcReg, getKillRegState(KillSrc));
5447 }
5448 return;
5449 }
5450
5451 if (AArch64::FPR16RegClass.contains(DestReg) &&
5452 AArch64::FPR16RegClass.contains(SrcReg)) {
5453 if (Subtarget.hasZeroCycleRegMoveFPR128() &&
5454 !Subtarget.hasZeroCycleRegMoveFPR64() &&
5455 !Subtarget.hasZeroCycleRegMoveFPR32() && Subtarget.isNeonAvailable()) {
5456 MCRegister DestRegQ = RI.getMatchingSuperReg(DestReg, AArch64::hsub,
5457 &AArch64::FPR128RegClass);
5458 MCRegister SrcRegQ = RI.getMatchingSuperReg(SrcReg, AArch64::hsub,
5459 &AArch64::FPR128RegClass);
5460 // This instruction is reading and writing Q registers. This may upset
5461 // the register scavenger and machine verifier, so we need to indicate
5462 // that we are reading an undefined value from SrcRegQ, but a proper
5463 // value from SrcReg.
5464 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestRegQ)
5465 .addReg(SrcRegQ, RegState::Undef)
5466 .addReg(SrcRegQ, RegState::Undef)
5467 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
5468 } else if (Subtarget.hasZeroCycleRegMoveFPR64() &&
5469 !Subtarget.hasZeroCycleRegMoveFPR32()) {
5470 MCRegister DestRegD = RI.getMatchingSuperReg(DestReg, AArch64::hsub,
5471 &AArch64::FPR64RegClass);
5472 MCRegister SrcRegD = RI.getMatchingSuperReg(SrcReg, AArch64::hsub,
5473 &AArch64::FPR64RegClass);
5474 // This instruction is reading and writing D registers. This may upset
5475 // the register scavenger and machine verifier, so we need to indicate
5476 // that we are reading an undefined value from SrcRegD, but a proper
5477 // value from SrcReg.
5478 BuildMI(MBB, I, DL, get(AArch64::FMOVDr), DestRegD)
5479 .addReg(SrcRegD, RegState::Undef)
5480 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
5481 } else {
5482 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::hsub,
5483 &AArch64::FPR32RegClass);
5484 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::hsub,
5485 &AArch64::FPR32RegClass);
5486 BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
5487 .addReg(SrcReg, getKillRegState(KillSrc));
5488 }
5489 return;
5490 }
5491
5492 if (AArch64::FPR8RegClass.contains(DestReg) &&
5493 AArch64::FPR8RegClass.contains(SrcReg)) {
5494 if (Subtarget.hasZeroCycleRegMoveFPR128() &&
5495 !Subtarget.hasZeroCycleRegMoveFPR64() &&
5496 !Subtarget.hasZeroCycleRegMoveFPR64() && Subtarget.isNeonAvailable()) {
5497 MCRegister DestRegQ = RI.getMatchingSuperReg(DestReg, AArch64::bsub,
5498 &AArch64::FPR128RegClass);
5499 MCRegister SrcRegQ = RI.getMatchingSuperReg(SrcReg, AArch64::bsub,
5500 &AArch64::FPR128RegClass);
5501 // This instruction is reading and writing Q registers. This may upset
5502 // the register scavenger and machine verifier, so we need to indicate
5503 // that we are reading an undefined value from SrcRegQ, but a proper
5504 // value from SrcReg.
5505 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestRegQ)
5506 .addReg(SrcRegQ, RegState::Undef)
5507 .addReg(SrcRegQ, RegState::Undef)
5508 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
5509 } else if (Subtarget.hasZeroCycleRegMoveFPR64() &&
5510 !Subtarget.hasZeroCycleRegMoveFPR32()) {
5511 MCRegister DestRegD = RI.getMatchingSuperReg(DestReg, AArch64::bsub,
5512 &AArch64::FPR64RegClass);
5513 MCRegister SrcRegD = RI.getMatchingSuperReg(SrcReg, AArch64::bsub,
5514 &AArch64::FPR64RegClass);
5515 // This instruction is reading and writing D registers. This may upset
5516 // the register scavenger and machine verifier, so we need to indicate
5517 // that we are reading an undefined value from SrcRegD, but a proper
5518 // value from SrcReg.
5519 BuildMI(MBB, I, DL, get(AArch64::FMOVDr), DestRegD)
5520 .addReg(SrcRegD, RegState::Undef)
5521 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
5522 } else {
5523 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::bsub,
5524 &AArch64::FPR32RegClass);
5525 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::bsub,
5526 &AArch64::FPR32RegClass);
5527 BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
5528 .addReg(SrcReg, getKillRegState(KillSrc));
5529 }
5530 return;
5531 }
5532
5533 // Copies between GPR64 and FPR64.
5534 if (AArch64::FPR64RegClass.contains(DestReg) &&
5535 AArch64::GPR64RegClass.contains(SrcReg)) {
5536 if (AArch64::XZR == SrcReg) {
5537 BuildMI(MBB, I, DL, get(AArch64::FMOVD0), DestReg);
5538 } else {
5539 BuildMI(MBB, I, DL, get(AArch64::FMOVXDr), DestReg)
5540 .addReg(SrcReg, getKillRegState(KillSrc));
5541 }
5542 return;
5543 }
5544 if (AArch64::GPR64RegClass.contains(DestReg) &&
5545 AArch64::FPR64RegClass.contains(SrcReg)) {
5546 BuildMI(MBB, I, DL, get(AArch64::FMOVDXr), DestReg)
5547 .addReg(SrcReg, getKillRegState(KillSrc));
5548 return;
5549 }
5550 // Copies between GPR32 and FPR32.
5551 if (AArch64::FPR32RegClass.contains(DestReg) &&
5552 AArch64::GPR32RegClass.contains(SrcReg)) {
5553 if (AArch64::WZR == SrcReg) {
5554 BuildMI(MBB, I, DL, get(AArch64::FMOVS0), DestReg);
5555 } else {
5556 BuildMI(MBB, I, DL, get(AArch64::FMOVWSr), DestReg)
5557 .addReg(SrcReg, getKillRegState(KillSrc));
5558 }
5559 return;
5560 }
5561 if (AArch64::GPR32RegClass.contains(DestReg) &&
5562 AArch64::FPR32RegClass.contains(SrcReg)) {
5563 BuildMI(MBB, I, DL, get(AArch64::FMOVSWr), DestReg)
5564 .addReg(SrcReg, getKillRegState(KillSrc));
5565 return;
5566 }
5567
5568 if (DestReg == AArch64::NZCV) {
5569 assert(AArch64::GPR64RegClass.contains(SrcReg) && "Invalid NZCV copy");
5570 BuildMI(MBB, I, DL, get(AArch64::MSR))
5571 .addImm(AArch64SysReg::NZCV)
5572 .addReg(SrcReg, getKillRegState(KillSrc))
5573 .addReg(AArch64::NZCV, RegState::Implicit | RegState::Define);
5574 return;
5575 }
5576
5577 if (SrcReg == AArch64::NZCV) {
5578 assert(AArch64::GPR64RegClass.contains(DestReg) && "Invalid NZCV copy");
5579 BuildMI(MBB, I, DL, get(AArch64::MRS), DestReg)
5580 .addImm(AArch64SysReg::NZCV)
5581 .addReg(AArch64::NZCV, RegState::Implicit | getKillRegState(KillSrc));
5582 return;
5583 }
5584
5585#ifndef NDEBUG
5586 errs() << RI.getRegAsmName(DestReg) << " = COPY " << RI.getRegAsmName(SrcReg)
5587 << "\n";
5588#endif
5589 llvm_unreachable("unimplemented reg-to-reg copy");
5590}
5591
5594 MachineBasicBlock::iterator InsertBefore,
5595 const MCInstrDesc &MCID,
5596 Register SrcReg, bool IsKill,
5597 unsigned SubIdx0, unsigned SubIdx1, int FI,
5598 MachineMemOperand *MMO) {
5599 Register SrcReg0 = SrcReg;
5600 Register SrcReg1 = SrcReg;
5601 if (SrcReg.isPhysical()) {
5602 SrcReg0 = TRI.getSubReg(SrcReg, SubIdx0);
5603 SubIdx0 = 0;
5604 SrcReg1 = TRI.getSubReg(SrcReg, SubIdx1);
5605 SubIdx1 = 0;
5606 }
5607 BuildMI(MBB, InsertBefore, DebugLoc(), MCID)
5608 .addReg(SrcReg0, getKillRegState(IsKill), SubIdx0)
5609 .addReg(SrcReg1, getKillRegState(IsKill), SubIdx1)
5610 .addFrameIndex(FI)
5611 .addImm(0)
5612 .addMemOperand(MMO);
5613}
5614
5617 Register SrcReg, bool isKill, int FI,
5618 const TargetRegisterClass *RC,
5619 const TargetRegisterInfo *TRI,
5620 Register VReg,
5621 MachineInstr::MIFlag Flags) const {
5622 MachineFunction &MF = *MBB.getParent();
5623 MachineFrameInfo &MFI = MF.getFrameInfo();
5624
5626 MachineMemOperand *MMO =
5628 MFI.getObjectSize(FI), MFI.getObjectAlign(FI));
5629 unsigned Opc = 0;
5630 bool Offset = true;
5632 unsigned StackID = TargetStackID::Default;
5633 switch (TRI->getSpillSize(*RC)) {
5634 case 1:
5635 if (AArch64::FPR8RegClass.hasSubClassEq(RC))
5636 Opc = AArch64::STRBui;
5637 break;
5638 case 2: {
5639 if (AArch64::FPR16RegClass.hasSubClassEq(RC))
5640 Opc = AArch64::STRHui;
5641 else if (AArch64::PNRRegClass.hasSubClassEq(RC) ||
5642 AArch64::PPRRegClass.hasSubClassEq(RC)) {
5643 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5644 "Unexpected register store without SVE store instructions");
5645 Opc = AArch64::STR_PXI;
5647 }
5648 break;
5649 }
5650 case 4:
5651 if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
5652 Opc = AArch64::STRWui;
5653 if (SrcReg.isVirtual())
5654 MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR32RegClass);
5655 else
5656 assert(SrcReg != AArch64::WSP);
5657 } else if (AArch64::FPR32RegClass.hasSubClassEq(RC))
5658 Opc = AArch64::STRSui;
5659 else if (AArch64::PPR2RegClass.hasSubClassEq(RC)) {
5660 Opc = AArch64::STR_PPXI;
5662 }
5663 break;
5664 case 8:
5665 if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) {
5666 Opc = AArch64::STRXui;
5667 if (SrcReg.isVirtual())
5668 MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass);
5669 else
5670 assert(SrcReg != AArch64::SP);
5671 } else if (AArch64::FPR64RegClass.hasSubClassEq(RC)) {
5672 Opc = AArch64::STRDui;
5673 } else if (AArch64::WSeqPairsClassRegClass.hasSubClassEq(RC)) {
5675 get(AArch64::STPWi), SrcReg, isKill,
5676 AArch64::sube32, AArch64::subo32, FI, MMO);
5677 return;
5678 }
5679 break;
5680 case 16:
5681 if (AArch64::FPR128RegClass.hasSubClassEq(RC))
5682 Opc = AArch64::STRQui;
5683 else if (AArch64::DDRegClass.hasSubClassEq(RC)) {
5684 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
5685 Opc = AArch64::ST1Twov1d;
5686 Offset = false;
5687 } else if (AArch64::XSeqPairsClassRegClass.hasSubClassEq(RC)) {
5689 get(AArch64::STPXi), SrcReg, isKill,
5690 AArch64::sube64, AArch64::subo64, FI, MMO);
5691 return;
5692 } else if (AArch64::ZPRRegClass.hasSubClassEq(RC)) {
5693 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5694 "Unexpected register store without SVE store instructions");
5695 Opc = AArch64::STR_ZXI;
5697 }
5698 break;
5699 case 24:
5700 if (AArch64::DDDRegClass.hasSubClassEq(RC)) {
5701 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
5702 Opc = AArch64::ST1Threev1d;
5703 Offset = false;
5704 }
5705 break;
5706 case 32:
5707 if (AArch64::DDDDRegClass.hasSubClassEq(RC)) {
5708 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
5709 Opc = AArch64::ST1Fourv1d;
5710 Offset = false;
5711 } else if (AArch64::QQRegClass.hasSubClassEq(RC)) {
5712 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
5713 Opc = AArch64::ST1Twov2d;
5714 Offset = false;
5715 } else if (AArch64::ZPR2StridedOrContiguousRegClass.hasSubClassEq(RC)) {
5716 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5717 "Unexpected register store without SVE store instructions");
5718 Opc = AArch64::STR_ZZXI_STRIDED_CONTIGUOUS;
5720 } else if (AArch64::ZPR2RegClass.hasSubClassEq(RC)) {
5721 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5722 "Unexpected register store without SVE store instructions");
5723 Opc = AArch64::STR_ZZXI;
5725 }
5726 break;
5727 case 48:
5728 if (AArch64::QQQRegClass.hasSubClassEq(RC)) {
5729 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
5730 Opc = AArch64::ST1Threev2d;
5731 Offset = false;
5732 } else if (AArch64::ZPR3RegClass.hasSubClassEq(RC)) {
5733 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5734 "Unexpected register store without SVE store instructions");
5735 Opc = AArch64::STR_ZZZXI;
5737 }
5738 break;
5739 case 64:
5740 if (AArch64::QQQQRegClass.hasSubClassEq(RC)) {
5741 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
5742 Opc = AArch64::ST1Fourv2d;
5743 Offset = false;
5744 } else if (AArch64::ZPR4StridedOrContiguousRegClass.hasSubClassEq(RC)) {
5745 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5746 "Unexpected register store without SVE store instructions");
5747 Opc = AArch64::STR_ZZZZXI_STRIDED_CONTIGUOUS;
5749 } else if (AArch64::ZPR4RegClass.hasSubClassEq(RC)) {
5750 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5751 "Unexpected register store without SVE store instructions");
5752 Opc = AArch64::STR_ZZZZXI;
5754 }
5755 break;
5756 }
5757 assert(Opc && "Unknown register class");
5758 MFI.setStackID(FI, StackID);
5759
5761 .addReg(SrcReg, getKillRegState(isKill))
5762 .addFrameIndex(FI);
5763
5764 if (Offset)
5765 MI.addImm(0);
5766 if (PNRReg.isValid())
5767 MI.addDef(PNRReg, RegState::Implicit);
5768 MI.addMemOperand(MMO);
5769}
5770
5773 MachineBasicBlock::iterator InsertBefore,
5774 const MCInstrDesc &MCID,
5775 Register DestReg, unsigned SubIdx0,
5776 unsigned SubIdx1, int FI,
5777 MachineMemOperand *MMO) {
5778 Register DestReg0 = DestReg;
5779 Register DestReg1 = DestReg;
5780 bool IsUndef = true;
5781 if (DestReg.isPhysical()) {
5782 DestReg0 = TRI.getSubReg(DestReg, SubIdx0);
5783 SubIdx0 = 0;
5784 DestReg1 = TRI.getSubReg(DestReg, SubIdx1);
5785 SubIdx1 = 0;
5786 IsUndef = false;
5787 }
5788 BuildMI(MBB, InsertBefore, DebugLoc(), MCID)
5789 .addReg(DestReg0, RegState::Define | getUndefRegState(IsUndef), SubIdx0)
5790 .addReg(DestReg1, RegState::Define | getUndefRegState(IsUndef), SubIdx1)
5791 .addFrameIndex(FI)
5792 .addImm(0)
5793 .addMemOperand(MMO);
5794}
5795
5798 int FI, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI,
5799 Register VReg, MachineInstr::MIFlag Flags) const {
5800 MachineFunction &MF = *MBB.getParent();
5801 MachineFrameInfo &MFI = MF.getFrameInfo();
5803 MachineMemOperand *MMO =
5805 MFI.getObjectSize(FI), MFI.getObjectAlign(FI));
5806
5807 unsigned Opc = 0;
5808 bool Offset = true;
5809 unsigned StackID = TargetStackID::Default;
5811 switch (TRI->getSpillSize(*RC)) {
5812 case 1:
5813 if (AArch64::FPR8RegClass.hasSubClassEq(RC))
5814 Opc = AArch64::LDRBui;
5815 break;
5816 case 2: {
5817 bool IsPNR = AArch64::PNRRegClass.hasSubClassEq(RC);
5818 if (AArch64::FPR16RegClass.hasSubClassEq(RC))
5819 Opc = AArch64::LDRHui;
5820 else if (IsPNR || AArch64::PPRRegClass.hasSubClassEq(RC)) {
5821 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5822 "Unexpected register load without SVE load instructions");
5823 if (IsPNR)
5824 PNRReg = DestReg;
5825 Opc = AArch64::LDR_PXI;
5827 }
5828 break;
5829 }
5830 case 4:
5831 if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
5832 Opc = AArch64::LDRWui;
5833 if (DestReg.isVirtual())
5834 MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR32RegClass);
5835 else
5836 assert(DestReg != AArch64::WSP);
5837 } else if (AArch64::FPR32RegClass.hasSubClassEq(RC))
5838 Opc = AArch64::LDRSui;
5839 else if (AArch64::PPR2RegClass.hasSubClassEq(RC)) {
5840 Opc = AArch64::LDR_PPXI;
5842 }
5843 break;
5844 case 8:
5845 if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) {
5846 Opc = AArch64::LDRXui;
5847 if (DestReg.isVirtual())
5848 MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR64RegClass);
5849 else
5850 assert(DestReg != AArch64::SP);
5851 } else if (AArch64::FPR64RegClass.hasSubClassEq(RC)) {
5852 Opc = AArch64::LDRDui;
5853 } else if (AArch64::WSeqPairsClassRegClass.hasSubClassEq(RC)) {
5855 get(AArch64::LDPWi), DestReg, AArch64::sube32,
5856 AArch64::subo32, FI, MMO);
5857 return;
5858 }
5859 break;
5860 case 16:
5861 if (AArch64::FPR128RegClass.hasSubClassEq(RC))
5862 Opc = AArch64::LDRQui;
5863 else if (AArch64::DDRegClass.hasSubClassEq(RC)) {
5864 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
5865 Opc = AArch64::LD1Twov1d;
5866 Offset = false;
5867 } else if (AArch64::XSeqPairsClassRegClass.hasSubClassEq(RC)) {
5869 get(AArch64::LDPXi), DestReg, AArch64::sube64,
5870 AArch64::subo64, FI, MMO);
5871 return;
5872 } else if (AArch64::ZPRRegClass.hasSubClassEq(RC)) {
5873 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5874 "Unexpected register load without SVE load instructions");
5875 Opc = AArch64::LDR_ZXI;
5877 }
5878 break;
5879 case 24:
5880 if (AArch64::DDDRegClass.hasSubClassEq(RC)) {
5881 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
5882 Opc = AArch64::LD1Threev1d;
5883 Offset = false;
5884 }
5885 break;
5886 case 32:
5887 if (AArch64::DDDDRegClass.hasSubClassEq(RC)) {
5888 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
5889 Opc = AArch64::LD1Fourv1d;
5890 Offset = false;
5891 } else if (AArch64::QQRegClass.hasSubClassEq(RC)) {
5892 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
5893 Opc = AArch64::LD1Twov2d;
5894 Offset = false;
5895 } else if (AArch64::ZPR2StridedOrContiguousRegClass.hasSubClassEq(RC)) {
5896 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5897 "Unexpected register load without SVE load instructions");
5898 Opc = AArch64::LDR_ZZXI_STRIDED_CONTIGUOUS;
5900 } else if (AArch64::ZPR2RegClass.hasSubClassEq(RC)) {
5901 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5902 "Unexpected register load without SVE load instructions");
5903 Opc = AArch64::LDR_ZZXI;
5905 }
5906 break;
5907 case 48:
5908 if (AArch64::QQQRegClass.hasSubClassEq(RC)) {
5909 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
5910 Opc = AArch64::LD1Threev2d;
5911 Offset = false;
5912 } else if (AArch64::ZPR3RegClass.hasSubClassEq(RC)) {
5913 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5914 "Unexpected register load without SVE load instructions");
5915 Opc = AArch64::LDR_ZZZXI;
5917 }
5918 break;
5919 case 64:
5920 if (AArch64::QQQQRegClass.hasSubClassEq(RC)) {
5921 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
5922 Opc = AArch64::LD1Fourv2d;
5923 Offset = false;
5924 } else if (AArch64::ZPR4StridedOrContiguousRegClass.hasSubClassEq(RC)) {
5925 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5926 "Unexpected register load without SVE load instructions");
5927 Opc = AArch64::LDR_ZZZZXI_STRIDED_CONTIGUOUS;
5929 } else if (AArch64::ZPR4RegClass.hasSubClassEq(RC)) {
5930 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5931 "Unexpected register load without SVE load instructions");
5932 Opc = AArch64::LDR_ZZZZXI;
5934 }
5935 break;
5936 }
5937
5938 assert(Opc && "Unknown register class");
5939 MFI.setStackID(FI, StackID);
5940
5942 .addReg(DestReg, getDefRegState(true))
5943 .addFrameIndex(FI);
5944 if (Offset)
5945 MI.addImm(0);
5946 if (PNRReg.isValid() && !PNRReg.isVirtual())
5947 MI.addDef(PNRReg, RegState::Implicit);
5948 MI.addMemOperand(MMO);
5949}
5950
5952 const MachineInstr &UseMI,
5953 const TargetRegisterInfo *TRI) {
5954 return any_of(instructionsWithoutDebug(std::next(DefMI.getIterator()),
5955 UseMI.getIterator()),
5956 [TRI](const MachineInstr &I) {
5957 return I.modifiesRegister(AArch64::NZCV, TRI) ||
5958 I.readsRegister(AArch64::NZCV, TRI);
5959 });
5960}
5961
5962void AArch64InstrInfo::decomposeStackOffsetForDwarfOffsets(
5963 const StackOffset &Offset, int64_t &ByteSized, int64_t &VGSized) {
5964 // The smallest scalable element supported by scaled SVE addressing
5965 // modes are predicates, which are 2 scalable bytes in size. So the scalable
5966 // byte offset must always be a multiple of 2.
5967 assert(Offset.getScalable() % 2 == 0 && "Invalid frame offset");
5968
5969 // VGSized offsets are divided by '2', because the VG register is the
5970 // the number of 64bit granules as opposed to 128bit vector chunks,
5971 // which is how the 'n' in e.g. MVT::nxv1i8 is modelled.
5972 // So, for a stack offset of 16 MVT::nxv1i8's, the size is n x 16 bytes.
5973 // VG = n * 2 and the dwarf offset must be VG * 8 bytes.
5974 ByteSized = Offset.getFixed();
5975 VGSized = Offset.getScalable() / 2;
5976}
5977
5978/// Returns the offset in parts to which this frame offset can be
5979/// decomposed for the purpose of describing a frame offset.
5980/// For non-scalable offsets this is simply its byte size.
5981void AArch64InstrInfo::decomposeStackOffsetForFrameOffsets(
5982 const StackOffset &Offset, int64_t &NumBytes, int64_t &NumPredicateVectors,
5983 int64_t &NumDataVectors) {
5984 // The smallest scalable element supported by scaled SVE addressing
5985 // modes are predicates, which are 2 scalable bytes in size. So the scalable
5986 // byte offset must always be a multiple of 2.
5987 assert(Offset.getScalable() % 2 == 0 && "Invalid frame offset");
5988
5989 NumBytes = Offset.getFixed();
5990 NumDataVectors = 0;
5991 NumPredicateVectors = Offset.getScalable() / 2;
5992 // This method is used to get the offsets to adjust the frame offset.
5993 // If the function requires ADDPL to be used and needs more than two ADDPL
5994 // instructions, part of the offset is folded into NumDataVectors so that it
5995 // uses ADDVL for part of it, reducing the number of ADDPL instructions.
5996 if (NumPredicateVectors % 8 == 0 || NumPredicateVectors < -64 ||
5997 NumPredicateVectors > 62) {
5998 NumDataVectors = NumPredicateVectors / 8;
5999 NumPredicateVectors -= NumDataVectors * 8;
6000 }
6001}
6002
6003// Convenience function to create a DWARF expression for: Constant `Operation`.
6004// This helper emits compact sequences for common cases. For example, for`-15
6005// DW_OP_plus`, this helper would create DW_OP_lit15 DW_OP_minus.
6008 if (Operation == dwarf::DW_OP_plus && Constant < 0 && -Constant <= 31) {
6009 // -Constant (1 to 31)
6010 Expr.push_back(dwarf::DW_OP_lit0 - Constant);
6011 Operation = dwarf::DW_OP_minus;
6012 } else if (Constant >= 0 && Constant <= 31) {
6013 // Literal value 0 to 31
6014 Expr.push_back(dwarf::DW_OP_lit0 + Constant);
6015 } else {
6016 // Signed constant
6017 Expr.push_back(dwarf::DW_OP_consts);
6019 }
6020 return Expr.push_back(Operation);
6021}
6022
6023// Convenience function to create a DWARF expression for a register.
6024static void appendReadRegExpr(SmallVectorImpl<char> &Expr, unsigned RegNum) {
6025 Expr.push_back((char)dwarf::DW_OP_bregx);
6027 Expr.push_back(0);
6028}
6029
6030// Convenience function to create a DWARF expression for loading a register from
6031// a CFA offset.
6033 int64_t OffsetFromDefCFA) {
6034 // This assumes the top of the DWARF stack contains the CFA.
6035 Expr.push_back(dwarf::DW_OP_dup);
6036 // Add the offset to the register.
6037 appendConstantExpr(Expr, OffsetFromDefCFA, dwarf::DW_OP_plus);
6038 // Dereference the address (loads a 64 bit value)..
6039 Expr.push_back(dwarf::DW_OP_deref);
6040}
6041
6042// Convenience function to create a comment for
6043// (+/-) NumBytes (* RegScale)?
6044static void appendOffsetComment(int NumBytes, llvm::raw_string_ostream &Comment,
6045 StringRef RegScale = {}) {
6046 if (NumBytes) {
6047 Comment << (NumBytes < 0 ? " - " : " + ") << std::abs(NumBytes);
6048 if (!RegScale.empty())
6049 Comment << ' ' << RegScale;
6050 }
6051}
6052
6053// Creates an MCCFIInstruction:
6054// { DW_CFA_def_cfa_expression, ULEB128 (sizeof expr), expr }
6056 unsigned Reg,
6057 const StackOffset &Offset) {
6058 int64_t NumBytes, NumVGScaledBytes;
6059 AArch64InstrInfo::decomposeStackOffsetForDwarfOffsets(Offset, NumBytes,
6060 NumVGScaledBytes);
6061 std::string CommentBuffer;
6062 llvm::raw_string_ostream Comment(CommentBuffer);
6063
6064 if (Reg == AArch64::SP)
6065 Comment << "sp";
6066 else if (Reg == AArch64::FP)
6067 Comment << "fp";
6068 else
6069 Comment << printReg(Reg, &TRI);
6070
6071 // Build up the expression (Reg + NumBytes + VG * NumVGScaledBytes)
6072 SmallString<64> Expr;
6073 unsigned DwarfReg = TRI.getDwarfRegNum(Reg, true);
6074 assert(DwarfReg <= 31 && "DwarfReg out of bounds (0..31)");
6075 // Reg + NumBytes
6076 Expr.push_back(dwarf::DW_OP_breg0 + DwarfReg);
6077 appendLEB128<LEB128Sign::Signed>(Expr, NumBytes);
6078 appendOffsetComment(NumBytes, Comment);
6079 if (NumVGScaledBytes) {
6080 // + VG * NumVGScaledBytes
6081 appendOffsetComment(NumVGScaledBytes, Comment, "* VG");
6082 appendReadRegExpr(Expr, TRI.getDwarfRegNum(AArch64::VG, true));
6083 appendConstantExpr(Expr, NumVGScaledBytes, dwarf::DW_OP_mul);
6084 Expr.push_back(dwarf::DW_OP_plus);
6085 }
6086
6087 // Wrap this into DW_CFA_def_cfa.
6088 SmallString<64> DefCfaExpr;
6089 DefCfaExpr.push_back(dwarf::DW_CFA_def_cfa_expression);
6090 appendLEB128<LEB128Sign::Unsigned>(DefCfaExpr, Expr.size());
6091 DefCfaExpr.append(Expr.str());
6092 return MCCFIInstruction::createEscape(nullptr, DefCfaExpr.str(), SMLoc(),
6093 Comment.str());
6094}
6095
6097 unsigned FrameReg, unsigned Reg,
6098 const StackOffset &Offset,
6099 bool LastAdjustmentWasScalable) {
6100 if (Offset.getScalable())
6101 return createDefCFAExpression(TRI, Reg, Offset);
6102
6103 if (FrameReg == Reg && !LastAdjustmentWasScalable)
6104 return MCCFIInstruction::cfiDefCfaOffset(nullptr, int(Offset.getFixed()));
6105
6106 unsigned DwarfReg = TRI.getDwarfRegNum(Reg, true);
6107 return MCCFIInstruction::cfiDefCfa(nullptr, DwarfReg, (int)Offset.getFixed());
6108}
6109
6112 const StackOffset &OffsetFromDefCFA,
6113 std::optional<int64_t> IncomingVGOffsetFromDefCFA) {
6114 int64_t NumBytes, NumVGScaledBytes;
6115 AArch64InstrInfo::decomposeStackOffsetForDwarfOffsets(
6116 OffsetFromDefCFA, NumBytes, NumVGScaledBytes);
6117
6118 unsigned DwarfReg = TRI.getDwarfRegNum(Reg, true);
6119
6120 // Non-scalable offsets can use DW_CFA_offset directly.
6121 if (!NumVGScaledBytes)
6122 return MCCFIInstruction::createOffset(nullptr, DwarfReg, NumBytes);
6123
6124 std::string CommentBuffer;
6125 llvm::raw_string_ostream Comment(CommentBuffer);
6126 Comment << printReg(Reg, &TRI) << " @ cfa";
6127
6128 // Build up expression (CFA + VG * NumVGScaledBytes + NumBytes)
6129 assert(NumVGScaledBytes && "Expected scalable offset");
6130 SmallString<64> OffsetExpr;
6131 // + VG * NumVGScaledBytes
6132 StringRef VGRegScale;
6133 if (IncomingVGOffsetFromDefCFA) {
6134 appendLoadRegExpr(OffsetExpr, *IncomingVGOffsetFromDefCFA);
6135 VGRegScale = "* IncomingVG";
6136 } else {
6137 appendReadRegExpr(OffsetExpr, TRI.getDwarfRegNum(AArch64::VG, true));
6138 VGRegScale = "* VG";
6139 }
6140 appendConstantExpr(OffsetExpr, NumVGScaledBytes, dwarf::DW_OP_mul);
6141 appendOffsetComment(NumVGScaledBytes, Comment, VGRegScale);
6142 OffsetExpr.push_back(dwarf::DW_OP_plus);
6143 if (NumBytes) {
6144 // + NumBytes
6145 appendOffsetComment(NumBytes, Comment);
6146 appendConstantExpr(OffsetExpr, NumBytes, dwarf::DW_OP_plus);
6147 }
6148
6149 // Wrap this into DW_CFA_expression
6150 SmallString<64> CfaExpr;
6151 CfaExpr.push_back(dwarf::DW_CFA_expression);
6152 appendLEB128<LEB128Sign::Unsigned>(CfaExpr, DwarfReg);
6153 appendLEB128<LEB128Sign::Unsigned>(CfaExpr, OffsetExpr.size());
6154 CfaExpr.append(OffsetExpr.str());
6155
6156 return MCCFIInstruction::createEscape(nullptr, CfaExpr.str(), SMLoc(),
6157 Comment.str());
6158}
6159
6160// Helper function to emit a frame offset adjustment from a given
6161// pointer (SrcReg), stored into DestReg. This function is explicit
6162// in that it requires the opcode.
6165 const DebugLoc &DL, unsigned DestReg,
6166 unsigned SrcReg, int64_t Offset, unsigned Opc,
6167 const TargetInstrInfo *TII,
6168 MachineInstr::MIFlag Flag, bool NeedsWinCFI,
6169 bool *HasWinCFI, bool EmitCFAOffset,
6170 StackOffset CFAOffset, unsigned FrameReg) {
6171 int Sign = 1;
6172 unsigned MaxEncoding, ShiftSize;
6173 switch (Opc) {
6174 case AArch64::ADDXri:
6175 case AArch64::ADDSXri:
6176 case AArch64::SUBXri:
6177 case AArch64::SUBSXri:
6178 MaxEncoding = 0xfff;
6179 ShiftSize = 12;
6180 break;
6181 case AArch64::ADDVL_XXI:
6182 case AArch64::ADDPL_XXI:
6183 case AArch64::ADDSVL_XXI:
6184 case AArch64::ADDSPL_XXI:
6185 MaxEncoding = 31;
6186 ShiftSize = 0;
6187 if (Offset < 0) {
6188 MaxEncoding = 32;
6189 Sign = -1;
6190 Offset = -Offset;
6191 }
6192 break;
6193 default:
6194 llvm_unreachable("Unsupported opcode");
6195 }
6196
6197 // `Offset` can be in bytes or in "scalable bytes".
6198 int VScale = 1;
6199 if (Opc == AArch64::ADDVL_XXI || Opc == AArch64::ADDSVL_XXI)
6200 VScale = 16;
6201 else if (Opc == AArch64::ADDPL_XXI || Opc == AArch64::ADDSPL_XXI)
6202 VScale = 2;
6203
6204 // FIXME: If the offset won't fit in 24-bits, compute the offset into a
6205 // scratch register. If DestReg is a virtual register, use it as the
6206 // scratch register; otherwise, create a new virtual register (to be
6207 // replaced by the scavenger at the end of PEI). That case can be optimized
6208 // slightly if DestReg is SP which is always 16-byte aligned, so the scratch
6209 // register can be loaded with offset%8 and the add/sub can use an extending
6210 // instruction with LSL#3.
6211 // Currently the function handles any offsets but generates a poor sequence
6212 // of code.
6213 // assert(Offset < (1 << 24) && "unimplemented reg plus immediate");
6214
6215 const unsigned MaxEncodableValue = MaxEncoding << ShiftSize;
6216 Register TmpReg = DestReg;
6217 if (TmpReg == AArch64::XZR)
6218 TmpReg = MBB.getParent()->getRegInfo().createVirtualRegister(
6219 &AArch64::GPR64RegClass);
6220 do {
6221 uint64_t ThisVal = std::min<uint64_t>(Offset, MaxEncodableValue);
6222 unsigned LocalShiftSize = 0;
6223 if (ThisVal > MaxEncoding) {
6224 ThisVal = ThisVal >> ShiftSize;
6225 LocalShiftSize = ShiftSize;
6226 }
6227 assert((ThisVal >> ShiftSize) <= MaxEncoding &&
6228 "Encoding cannot handle value that big");
6229
6230 Offset -= ThisVal << LocalShiftSize;
6231 if (Offset == 0)
6232 TmpReg = DestReg;
6233 auto MBI = BuildMI(MBB, MBBI, DL, TII->get(Opc), TmpReg)
6234 .addReg(SrcReg)
6235 .addImm(Sign * (int)ThisVal);
6236 if (ShiftSize)
6237 MBI = MBI.addImm(
6239 MBI = MBI.setMIFlag(Flag);
6240
6241 auto Change =
6242 VScale == 1
6243 ? StackOffset::getFixed(ThisVal << LocalShiftSize)
6244 : StackOffset::getScalable(VScale * (ThisVal << LocalShiftSize));
6245 if (Sign == -1 || Opc == AArch64::SUBXri || Opc == AArch64::SUBSXri)
6246 CFAOffset += Change;
6247 else
6248 CFAOffset -= Change;
6249 if (EmitCFAOffset && DestReg == TmpReg) {
6250 MachineFunction &MF = *MBB.getParent();
6251 const TargetSubtargetInfo &STI = MF.getSubtarget();
6252 const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
6253
6254 unsigned CFIIndex = MF.addFrameInst(
6255 createDefCFA(TRI, FrameReg, DestReg, CFAOffset, VScale != 1));
6256 BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
6257 .addCFIIndex(CFIIndex)
6258 .setMIFlags(Flag);
6259 }
6260
6261 if (NeedsWinCFI) {
6262 int Imm = (int)(ThisVal << LocalShiftSize);
6263 if (VScale != 1 && DestReg == AArch64::SP) {
6264 if (HasWinCFI)
6265 *HasWinCFI = true;
6266 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_AllocZ))
6267 .addImm(ThisVal)
6268 .setMIFlag(Flag);
6269 } else if ((DestReg == AArch64::FP && SrcReg == AArch64::SP) ||
6270 (SrcReg == AArch64::FP && DestReg == AArch64::SP)) {
6271 assert(VScale == 1 && "Expected non-scalable operation");
6272 if (HasWinCFI)
6273 *HasWinCFI = true;
6274 if (Imm == 0)
6275 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_SetFP)).setMIFlag(Flag);
6276 else
6277 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_AddFP))
6278 .addImm(Imm)
6279 .setMIFlag(Flag);
6280 assert(Offset == 0 && "Expected remaining offset to be zero to "
6281 "emit a single SEH directive");
6282 } else if (DestReg == AArch64::SP) {
6283 assert(VScale == 1 && "Expected non-scalable operation");
6284 if (HasWinCFI)
6285 *HasWinCFI = true;
6286 assert(SrcReg == AArch64::SP && "Unexpected SrcReg for SEH_StackAlloc");
6287 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_StackAlloc))
6288 .addImm(Imm)
6289 .setMIFlag(Flag);
6290 }
6291 }
6292
6293 SrcReg = TmpReg;
6294 } while (Offset);
6295}
6296
6299 unsigned DestReg, unsigned SrcReg,
6301 MachineInstr::MIFlag Flag, bool SetNZCV,
6302 bool NeedsWinCFI, bool *HasWinCFI,
6303 bool EmitCFAOffset, StackOffset CFAOffset,
6304 unsigned FrameReg) {
6305 // If a function is marked as arm_locally_streaming, then the runtime value of
6306 // vscale in the prologue/epilogue is different the runtime value of vscale
6307 // in the function's body. To avoid having to consider multiple vscales,
6308 // we can use `addsvl` to allocate any scalable stack-slots, which under
6309 // most circumstances will be only locals, not callee-save slots.
6310 const Function &F = MBB.getParent()->getFunction();
6311 bool UseSVL = F.hasFnAttribute("aarch64_pstate_sm_body");
6312
6313 int64_t Bytes, NumPredicateVectors, NumDataVectors;
6314 AArch64InstrInfo::decomposeStackOffsetForFrameOffsets(
6315 Offset, Bytes, NumPredicateVectors, NumDataVectors);
6316
6317 // Insert ADDSXri for scalable offset at the end.
6318 bool NeedsFinalDefNZCV = SetNZCV && (NumPredicateVectors || NumDataVectors);
6319 if (NeedsFinalDefNZCV)
6320 SetNZCV = false;
6321
6322 // First emit non-scalable frame offsets, or a simple 'mov'.
6323 if (Bytes || (!Offset && SrcReg != DestReg)) {
6324 assert((DestReg != AArch64::SP || Bytes % 8 == 0) &&
6325 "SP increment/decrement not 8-byte aligned");
6326 unsigned Opc = SetNZCV ? AArch64::ADDSXri : AArch64::ADDXri;
6327 if (Bytes < 0) {
6328 Bytes = -Bytes;
6329 Opc = SetNZCV ? AArch64::SUBSXri : AArch64::SUBXri;
6330 }
6331 emitFrameOffsetAdj(MBB, MBBI, DL, DestReg, SrcReg, Bytes, Opc, TII, Flag,
6332 NeedsWinCFI, HasWinCFI, EmitCFAOffset, CFAOffset,
6333 FrameReg);
6334 CFAOffset += (Opc == AArch64::ADDXri || Opc == AArch64::ADDSXri)
6335 ? StackOffset::getFixed(-Bytes)
6336 : StackOffset::getFixed(Bytes);
6337 SrcReg = DestReg;
6338 FrameReg = DestReg;
6339 }
6340
6341 assert(!(NeedsWinCFI && NumPredicateVectors) &&
6342 "WinCFI can't allocate fractions of an SVE data vector");
6343
6344 if (NumDataVectors) {
6345 emitFrameOffsetAdj(MBB, MBBI, DL, DestReg, SrcReg, NumDataVectors,
6346 UseSVL ? AArch64::ADDSVL_XXI : AArch64::ADDVL_XXI, TII,
6347 Flag, NeedsWinCFI, HasWinCFI, EmitCFAOffset, CFAOffset,
6348 FrameReg);
6349 CFAOffset += StackOffset::getScalable(-NumDataVectors * 16);
6350 SrcReg = DestReg;
6351 }
6352
6353 if (NumPredicateVectors) {
6354 assert(DestReg != AArch64::SP && "Unaligned access to SP");
6355 emitFrameOffsetAdj(MBB, MBBI, DL, DestReg, SrcReg, NumPredicateVectors,
6356 UseSVL ? AArch64::ADDSPL_XXI : AArch64::ADDPL_XXI, TII,
6357 Flag, NeedsWinCFI, HasWinCFI, EmitCFAOffset, CFAOffset,
6358 FrameReg);
6359 }
6360
6361 if (NeedsFinalDefNZCV)
6362 BuildMI(MBB, MBBI, DL, TII->get(AArch64::ADDSXri), DestReg)
6363 .addReg(DestReg)
6364 .addImm(0)
6365 .addImm(0);
6366}
6367
6370 MachineBasicBlock::iterator InsertPt, int FrameIndex,
6371 LiveIntervals *LIS, VirtRegMap *VRM) const {
6372 // This is a bit of a hack. Consider this instruction:
6373 //
6374 // %0 = COPY %sp; GPR64all:%0
6375 //
6376 // We explicitly chose GPR64all for the virtual register so such a copy might
6377 // be eliminated by RegisterCoalescer. However, that may not be possible, and
6378 // %0 may even spill. We can't spill %sp, and since it is in the GPR64all
6379 // register class, TargetInstrInfo::foldMemoryOperand() is going to try.
6380 //
6381 // To prevent that, we are going to constrain the %0 register class here.
6382 if (MI.isFullCopy()) {
6383 Register DstReg = MI.getOperand(0).getReg();
6384 Register SrcReg = MI.getOperand(1).getReg();
6385 if (SrcReg == AArch64::SP && DstReg.isVirtual()) {
6386 MF.getRegInfo().constrainRegClass(DstReg, &AArch64::GPR64RegClass);
6387 return nullptr;
6388 }
6389 if (DstReg == AArch64::SP && SrcReg.isVirtual()) {
6390 MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass);
6391 return nullptr;
6392 }
6393 // Nothing can folded with copy from/to NZCV.
6394 if (SrcReg == AArch64::NZCV || DstReg == AArch64::NZCV)
6395 return nullptr;
6396 }
6397
6398 // Handle the case where a copy is being spilled or filled but the source
6399 // and destination register class don't match. For example:
6400 //
6401 // %0 = COPY %xzr; GPR64common:%0
6402 //
6403 // In this case we can still safely fold away the COPY and generate the
6404 // following spill code:
6405 //
6406 // STRXui %xzr, %stack.0
6407 //
6408 // This also eliminates spilled cross register class COPYs (e.g. between x and
6409 // d regs) of the same size. For example:
6410 //
6411 // %0 = COPY %1; GPR64:%0, FPR64:%1
6412 //
6413 // will be filled as
6414 //
6415 // LDRDui %0, fi<#0>
6416 //
6417 // instead of
6418 //
6419 // LDRXui %Temp, fi<#0>
6420 // %0 = FMOV %Temp
6421 //
6422 if (MI.isCopy() && Ops.size() == 1 &&
6423 // Make sure we're only folding the explicit COPY defs/uses.
6424 (Ops[0] == 0 || Ops[0] == 1)) {
6425 bool IsSpill = Ops[0] == 0;
6426 bool IsFill = !IsSpill;
6428 const MachineRegisterInfo &MRI = MF.getRegInfo();
6429 MachineBasicBlock &MBB = *MI.getParent();
6430 const MachineOperand &DstMO = MI.getOperand(0);
6431 const MachineOperand &SrcMO = MI.getOperand(1);
6432 Register DstReg = DstMO.getReg();
6433 Register SrcReg = SrcMO.getReg();
6434 // This is slightly expensive to compute for physical regs since
6435 // getMinimalPhysRegClass is slow.
6436 auto getRegClass = [&](unsigned Reg) {
6437 return Register::isVirtualRegister(Reg) ? MRI.getRegClass(Reg)
6438 : TRI.getMinimalPhysRegClass(Reg);
6439 };
6440
6441 if (DstMO.getSubReg() == 0 && SrcMO.getSubReg() == 0) {
6442 assert(TRI.getRegSizeInBits(*getRegClass(DstReg)) ==
6443 TRI.getRegSizeInBits(*getRegClass(SrcReg)) &&
6444 "Mismatched register size in non subreg COPY");
6445 if (IsSpill)
6446 storeRegToStackSlot(MBB, InsertPt, SrcReg, SrcMO.isKill(), FrameIndex,
6447 getRegClass(SrcReg), &TRI, Register());
6448 else
6449 loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex,
6450 getRegClass(DstReg), &TRI, Register());
6451 return &*--InsertPt;
6452 }
6453
6454 // Handle cases like spilling def of:
6455 //
6456 // %0:sub_32<def,read-undef> = COPY %wzr; GPR64common:%0
6457 //
6458 // where the physical register source can be widened and stored to the full
6459 // virtual reg destination stack slot, in this case producing:
6460 //
6461 // STRXui %xzr, %stack.0
6462 //
6463 if (IsSpill && DstMO.isUndef() && SrcReg == AArch64::WZR &&
6464 TRI.getRegSizeInBits(*getRegClass(DstReg)) == 64) {
6465 assert(SrcMO.getSubReg() == 0 &&
6466 "Unexpected subreg on physical register");
6467 storeRegToStackSlot(MBB, InsertPt, AArch64::XZR, SrcMO.isKill(),
6468 FrameIndex, &AArch64::GPR64RegClass, &TRI,
6469 Register());
6470 return &*--InsertPt;
6471 }
6472
6473 // Handle cases like filling use of:
6474 //
6475 // %0:sub_32<def,read-undef> = COPY %1; GPR64:%0, GPR32:%1
6476 //
6477 // where we can load the full virtual reg source stack slot, into the subreg
6478 // destination, in this case producing:
6479 //
6480 // LDRWui %0:sub_32<def,read-undef>, %stack.0
6481 //
6482 if (IsFill && SrcMO.getSubReg() == 0 && DstMO.isUndef()) {
6483 const TargetRegisterClass *FillRC = nullptr;
6484 switch (DstMO.getSubReg()) {
6485 default:
6486 break;
6487 case AArch64::sub_32:
6488 if (AArch64::GPR64RegClass.hasSubClassEq(getRegClass(DstReg)))
6489 FillRC = &AArch64::GPR32RegClass;
6490 break;
6491 case AArch64::ssub:
6492 FillRC = &AArch64::FPR32RegClass;
6493 break;
6494 case AArch64::dsub:
6495 FillRC = &AArch64::FPR64RegClass;
6496 break;
6497 }
6498
6499 if (FillRC) {
6500 assert(TRI.getRegSizeInBits(*getRegClass(SrcReg)) ==
6501 TRI.getRegSizeInBits(*FillRC) &&
6502 "Mismatched regclass size on folded subreg COPY");
6503 loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex, FillRC, &TRI,
6504 Register());
6505 MachineInstr &LoadMI = *--InsertPt;
6506 MachineOperand &LoadDst = LoadMI.getOperand(0);
6507 assert(LoadDst.getSubReg() == 0 && "unexpected subreg on fill load");
6508 LoadDst.setSubReg(DstMO.getSubReg());
6509 LoadDst.setIsUndef();
6510 return &LoadMI;
6511 }
6512 }
6513 }
6514
6515 // Cannot fold.
6516 return nullptr;
6517}
6518
6520 StackOffset &SOffset,
6521 bool *OutUseUnscaledOp,
6522 unsigned *OutUnscaledOp,
6523 int64_t *EmittableOffset) {
6524 // Set output values in case of early exit.
6525 if (EmittableOffset)
6526 *EmittableOffset = 0;
6527 if (OutUseUnscaledOp)
6528 *OutUseUnscaledOp = false;
6529 if (OutUnscaledOp)
6530 *OutUnscaledOp = 0;
6531
6532 // Exit early for structured vector spills/fills as they can't take an
6533 // immediate offset.
6534 switch (MI.getOpcode()) {
6535 default:
6536 break;
6537 case AArch64::LD1Rv1d:
6538 case AArch64::LD1Rv2s:
6539 case AArch64::LD1Rv2d:
6540 case AArch64::LD1Rv4h:
6541 case AArch64::LD1Rv4s:
6542 case AArch64::LD1Rv8b:
6543 case AArch64::LD1Rv8h:
6544 case AArch64::LD1Rv16b:
6545 case AArch64::LD1Twov2d:
6546 case AArch64::LD1Threev2d:
6547 case AArch64::LD1Fourv2d:
6548 case AArch64::LD1Twov1d:
6549 case AArch64::LD1Threev1d:
6550 case AArch64::LD1Fourv1d:
6551 case AArch64::ST1Twov2d:
6552 case AArch64::ST1Threev2d:
6553 case AArch64::ST1Fourv2d:
6554 case AArch64::ST1Twov1d:
6555 case AArch64::ST1Threev1d:
6556 case AArch64::ST1Fourv1d:
6557 case AArch64::ST1i8:
6558 case AArch64::ST1i16:
6559 case AArch64::ST1i32:
6560 case AArch64::ST1i64:
6561 case AArch64::IRG:
6562 case AArch64::IRGstack:
6563 case AArch64::STGloop:
6564 case AArch64::STZGloop:
6566 }
6567
6568 // Get the min/max offset and the scale.
6569 TypeSize ScaleValue(0U, false), Width(0U, false);
6570 int64_t MinOff, MaxOff;
6571 if (!AArch64InstrInfo::getMemOpInfo(MI.getOpcode(), ScaleValue, Width, MinOff,
6572 MaxOff))
6573 llvm_unreachable("unhandled opcode in isAArch64FrameOffsetLegal");
6574
6575 // Construct the complete offset.
6576 bool IsMulVL = ScaleValue.isScalable();
6577 unsigned Scale = ScaleValue.getKnownMinValue();
6578 int64_t Offset = IsMulVL ? SOffset.getScalable() : SOffset.getFixed();
6579
6580 const MachineOperand &ImmOpnd =
6581 MI.getOperand(AArch64InstrInfo::getLoadStoreImmIdx(MI.getOpcode()));
6582 Offset += ImmOpnd.getImm() * Scale;
6583
6584 // If the offset doesn't match the scale, we rewrite the instruction to
6585 // use the unscaled instruction instead. Likewise, if we have a negative
6586 // offset and there is an unscaled op to use.
6587 std::optional<unsigned> UnscaledOp =
6589 bool useUnscaledOp = UnscaledOp && (Offset % Scale || Offset < 0);
6590 if (useUnscaledOp &&
6591 !AArch64InstrInfo::getMemOpInfo(*UnscaledOp, ScaleValue, Width, MinOff,
6592 MaxOff))
6593 llvm_unreachable("unhandled opcode in isAArch64FrameOffsetLegal");
6594
6595 Scale = ScaleValue.getKnownMinValue();
6596 assert(IsMulVL == ScaleValue.isScalable() &&
6597 "Unscaled opcode has different value for scalable");
6598
6599 int64_t Remainder = Offset % Scale;
6600 assert(!(Remainder && useUnscaledOp) &&
6601 "Cannot have remainder when using unscaled op");
6602
6603 assert(MinOff < MaxOff && "Unexpected Min/Max offsets");
6604 int64_t NewOffset = Offset / Scale;
6605 if (MinOff <= NewOffset && NewOffset <= MaxOff)
6606 Offset = Remainder;
6607 else {
6608 NewOffset = NewOffset < 0 ? MinOff : MaxOff;
6609 Offset = Offset - (NewOffset * Scale);
6610 }
6611
6612 if (EmittableOffset)
6613 *EmittableOffset = NewOffset;
6614 if (OutUseUnscaledOp)
6615 *OutUseUnscaledOp = useUnscaledOp;
6616 if (OutUnscaledOp && UnscaledOp)
6617 *OutUnscaledOp = *UnscaledOp;
6618
6619 if (IsMulVL)
6620 SOffset = StackOffset::get(SOffset.getFixed(), Offset);
6621 else
6622 SOffset = StackOffset::get(Offset, SOffset.getScalable());
6624 (SOffset ? 0 : AArch64FrameOffsetIsLegal);
6625}
6626
6628 unsigned FrameReg, StackOffset &Offset,
6629 const AArch64InstrInfo *TII) {
6630 unsigned Opcode = MI.getOpcode();
6631 unsigned ImmIdx = FrameRegIdx + 1;
6632
6633 if (Opcode == AArch64::ADDSXri || Opcode == AArch64::ADDXri) {
6634 Offset += StackOffset::getFixed(MI.getOperand(ImmIdx).getImm());
6635 emitFrameOffset(*MI.getParent(), MI, MI.getDebugLoc(),
6636 MI.getOperand(0).getReg(), FrameReg, Offset, TII,
6637 MachineInstr::NoFlags, (Opcode == AArch64::ADDSXri));
6638 MI.eraseFromParent();
6639 Offset = StackOffset();
6640 return true;
6641 }
6642
6643 int64_t NewOffset;
6644 unsigned UnscaledOp;
6645 bool UseUnscaledOp;
6646 int Status = isAArch64FrameOffsetLegal(MI, Offset, &UseUnscaledOp,
6647 &UnscaledOp, &NewOffset);
6650 // Replace the FrameIndex with FrameReg.
6651 MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
6652 if (UseUnscaledOp)
6653 MI.setDesc(TII->get(UnscaledOp));
6654
6655 MI.getOperand(ImmIdx).ChangeToImmediate(NewOffset);
6656 return !Offset;
6657 }
6658
6659 return false;
6660}
6661
6667
6669 return MCInstBuilder(AArch64::HINT).addImm(0);
6670}
6671
6672// AArch64 supports MachineCombiner.
6673bool AArch64InstrInfo::useMachineCombiner() const { return true; }
6674
6675// True when Opc sets flag
6676static bool isCombineInstrSettingFlag(unsigned Opc) {
6677 switch (Opc) {
6678 case AArch64::ADDSWrr:
6679 case AArch64::ADDSWri:
6680 case AArch64::ADDSXrr:
6681 case AArch64::ADDSXri:
6682 case AArch64::SUBSWrr:
6683 case AArch64::SUBSXrr:
6684 // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
6685 case AArch64::SUBSWri:
6686 case AArch64::SUBSXri:
6687 return true;
6688 default:
6689 break;
6690 }
6691 return false;
6692}
6693
6694// 32b Opcodes that can be combined with a MUL
6695static bool isCombineInstrCandidate32(unsigned Opc) {
6696 switch (Opc) {
6697 case AArch64::ADDWrr:
6698 case AArch64::ADDWri:
6699 case AArch64::SUBWrr:
6700 case AArch64::ADDSWrr:
6701 case AArch64::ADDSWri:
6702 case AArch64::SUBSWrr:
6703 // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
6704 case AArch64::SUBWri:
6705 case AArch64::SUBSWri:
6706 return true;
6707 default:
6708 break;
6709 }
6710 return false;
6711}
6712
6713// 64b Opcodes that can be combined with a MUL
6714static bool isCombineInstrCandidate64(unsigned Opc) {
6715 switch (Opc) {
6716 case AArch64::ADDXrr:
6717 case AArch64::ADDXri:
6718 case AArch64::SUBXrr:
6719 case AArch64::ADDSXrr:
6720 case AArch64::ADDSXri:
6721 case AArch64::SUBSXrr:
6722 // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
6723 case AArch64::SUBXri:
6724 case AArch64::SUBSXri:
6725 case AArch64::ADDv8i8:
6726 case AArch64::ADDv16i8:
6727 case AArch64::ADDv4i16:
6728 case AArch64::ADDv8i16:
6729 case AArch64::ADDv2i32:
6730 case AArch64::ADDv4i32:
6731 case AArch64::SUBv8i8:
6732 case AArch64::SUBv16i8:
6733 case AArch64::SUBv4i16:
6734 case AArch64::SUBv8i16:
6735 case AArch64::SUBv2i32:
6736 case AArch64::SUBv4i32:
6737 return true;
6738 default:
6739 break;
6740 }
6741 return false;
6742}
6743
6744// FP Opcodes that can be combined with a FMUL.
6745static bool isCombineInstrCandidateFP(const MachineInstr &Inst) {
6746 switch (Inst.getOpcode()) {
6747 default:
6748 break;
6749 case AArch64::FADDHrr:
6750 case AArch64::FADDSrr:
6751 case AArch64::FADDDrr:
6752 case AArch64::FADDv4f16:
6753 case AArch64::FADDv8f16:
6754 case AArch64::FADDv2f32:
6755 case AArch64::FADDv2f64:
6756 case AArch64::FADDv4f32:
6757 case AArch64::FSUBHrr:
6758 case AArch64::FSUBSrr:
6759 case AArch64::FSUBDrr:
6760 case AArch64::FSUBv4f16:
6761 case AArch64::FSUBv8f16:
6762 case AArch64::FSUBv2f32:
6763 case AArch64::FSUBv2f64:
6764 case AArch64::FSUBv4f32:
6766 // We can fuse FADD/FSUB with FMUL, if fusion is either allowed globally by
6767 // the target options or if FADD/FSUB has the contract fast-math flag.
6768 return Options.AllowFPOpFusion == FPOpFusion::Fast ||
6770 }
6771 return false;
6772}
6773
6774// Opcodes that can be combined with a MUL
6778
6779//
6780// Utility routine that checks if \param MO is defined by an
6781// \param CombineOpc instruction in the basic block \param MBB
6783 unsigned CombineOpc, unsigned ZeroReg = 0,
6784 bool CheckZeroReg = false) {
6785 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
6786 MachineInstr *MI = nullptr;
6787
6788 if (MO.isReg() && MO.getReg().isVirtual())
6789 MI = MRI.getUniqueVRegDef(MO.getReg());
6790 // And it needs to be in the trace (otherwise, it won't have a depth).
6791 if (!MI || MI->getParent() != &MBB || MI->getOpcode() != CombineOpc)
6792 return false;
6793 // Must only used by the user we combine with.
6794 if (!MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
6795 return false;
6796
6797 if (CheckZeroReg) {
6798 assert(MI->getNumOperands() >= 4 && MI->getOperand(0).isReg() &&
6799 MI->getOperand(1).isReg() && MI->getOperand(2).isReg() &&
6800 MI->getOperand(3).isReg() && "MAdd/MSub must have a least 4 regs");
6801 // The third input reg must be zero.
6802 if (MI->getOperand(3).getReg() != ZeroReg)
6803 return false;
6804 }
6805
6806 if (isCombineInstrSettingFlag(CombineOpc) &&
6807 MI->findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr, true) == -1)
6808 return false;
6809
6810 return true;
6811}
6812
6813//
6814// Is \param MO defined by an integer multiply and can be combined?
6816 unsigned MulOpc, unsigned ZeroReg) {
6817 return canCombine(MBB, MO, MulOpc, ZeroReg, true);
6818}
6819
6820//
6821// Is \param MO defined by a floating-point multiply and can be combined?
6823 unsigned MulOpc) {
6824 return canCombine(MBB, MO, MulOpc);
6825}
6826
6827// TODO: There are many more machine instruction opcodes to match:
6828// 1. Other data types (integer, vectors)
6829// 2. Other math / logic operations (xor, or)
6830// 3. Other forms of the same operation (intrinsics and other variants)
6831bool AArch64InstrInfo::isAssociativeAndCommutative(const MachineInstr &Inst,
6832 bool Invert) const {
6833 if (Invert)
6834 return false;
6835 switch (Inst.getOpcode()) {
6836 // == Floating-point types ==
6837 // -- Floating-point instructions --
6838 case AArch64::FADDHrr:
6839 case AArch64::FADDSrr:
6840 case AArch64::FADDDrr:
6841 case AArch64::FMULHrr:
6842 case AArch64::FMULSrr:
6843 case AArch64::FMULDrr:
6844 case AArch64::FMULX16:
6845 case AArch64::FMULX32:
6846 case AArch64::FMULX64:
6847 // -- Advanced SIMD instructions --
6848 case AArch64::FADDv4f16:
6849 case AArch64::FADDv8f16:
6850 case AArch64::FADDv2f32:
6851 case AArch64::FADDv4f32:
6852 case AArch64::FADDv2f64:
6853 case AArch64::FMULv4f16:
6854 case AArch64::FMULv8f16:
6855 case AArch64::FMULv2f32:
6856 case AArch64::FMULv4f32:
6857 case AArch64::FMULv2f64:
6858 case AArch64::FMULXv4f16:
6859 case AArch64::FMULXv8f16:
6860 case AArch64::FMULXv2f32:
6861 case AArch64::FMULXv4f32:
6862 case AArch64::FMULXv2f64:
6863 // -- SVE instructions --
6864 // Opcodes FMULX_ZZZ_? don't exist because there is no unpredicated FMULX
6865 // in the SVE instruction set (though there are predicated ones).
6866 case AArch64::FADD_ZZZ_H:
6867 case AArch64::FADD_ZZZ_S:
6868 case AArch64::FADD_ZZZ_D:
6869 case AArch64::FMUL_ZZZ_H:
6870 case AArch64::FMUL_ZZZ_S:
6871 case AArch64::FMUL_ZZZ_D:
6874
6875 // == Integer types ==
6876 // -- Base instructions --
6877 // Opcodes MULWrr and MULXrr don't exist because
6878 // `MUL <Wd>, <Wn>, <Wm>` and `MUL <Xd>, <Xn>, <Xm>` are aliases of
6879 // `MADD <Wd>, <Wn>, <Wm>, WZR` and `MADD <Xd>, <Xn>, <Xm>, XZR` respectively.
6880 // The machine-combiner does not support three-source-operands machine
6881 // instruction. So we cannot reassociate MULs.
6882 case AArch64::ADDWrr:
6883 case AArch64::ADDXrr:
6884 case AArch64::ANDWrr:
6885 case AArch64::ANDXrr:
6886 case AArch64::ORRWrr:
6887 case AArch64::ORRXrr:
6888 case AArch64::EORWrr:
6889 case AArch64::EORXrr:
6890 case AArch64::EONWrr:
6891 case AArch64::EONXrr:
6892 // -- Advanced SIMD instructions --
6893 // Opcodes MULv1i64 and MULv2i64 don't exist because there is no 64-bit MUL
6894 // in the Advanced SIMD instruction set.
6895 case AArch64::ADDv8i8:
6896 case AArch64::ADDv16i8:
6897 case AArch64::ADDv4i16:
6898 case AArch64::ADDv8i16:
6899 case AArch64::ADDv2i32:
6900 case AArch64::ADDv4i32:
6901 case AArch64::ADDv1i64:
6902 case AArch64::ADDv2i64:
6903 case AArch64::MULv8i8:
6904 case AArch64::MULv16i8:
6905 case AArch64::MULv4i16:
6906 case AArch64::MULv8i16:
6907 case AArch64::MULv2i32:
6908 case AArch64::MULv4i32:
6909 case AArch64::ANDv8i8:
6910 case AArch64::ANDv16i8:
6911 case AArch64::ORRv8i8:
6912 case AArch64::ORRv16i8:
6913 case AArch64::EORv8i8:
6914 case AArch64::EORv16i8:
6915 // -- SVE instructions --
6916 case AArch64::ADD_ZZZ_B:
6917 case AArch64::ADD_ZZZ_H:
6918 case AArch64::ADD_ZZZ_S:
6919 case AArch64::ADD_ZZZ_D:
6920 case AArch64::MUL_ZZZ_B:
6921 case AArch64::MUL_ZZZ_H:
6922 case AArch64::MUL_ZZZ_S:
6923 case AArch64::MUL_ZZZ_D:
6924 case AArch64::AND_ZZZ:
6925 case AArch64::ORR_ZZZ:
6926 case AArch64::EOR_ZZZ:
6927 return true;
6928
6929 default:
6930 return false;
6931 }
6932}
6933
6934/// Find instructions that can be turned into madd.
6936 SmallVectorImpl<unsigned> &Patterns) {
6937 unsigned Opc = Root.getOpcode();
6938 MachineBasicBlock &MBB = *Root.getParent();
6939 bool Found = false;
6940
6942 return false;
6944 int Cmp_NZCV =
6945 Root.findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr, true);
6946 // When NZCV is live bail out.
6947 if (Cmp_NZCV == -1)
6948 return false;
6949 unsigned NewOpc = convertToNonFlagSettingOpc(Root);
6950 // When opcode can't change bail out.
6951 // CHECKME: do we miss any cases for opcode conversion?
6952 if (NewOpc == Opc)
6953 return false;
6954 Opc = NewOpc;
6955 }
6956
6957 auto setFound = [&](int Opcode, int Operand, unsigned ZeroReg,
6958 unsigned Pattern) {
6959 if (canCombineWithMUL(MBB, Root.getOperand(Operand), Opcode, ZeroReg)) {
6960 Patterns.push_back(Pattern);
6961 Found = true;
6962 }
6963 };
6964
6965 auto setVFound = [&](int Opcode, int Operand, unsigned Pattern) {
6966 if (canCombine(MBB, Root.getOperand(Operand), Opcode)) {
6967 Patterns.push_back(Pattern);
6968 Found = true;
6969 }
6970 };
6971
6973
6974 switch (Opc) {
6975 default:
6976 break;
6977 case AArch64::ADDWrr:
6978 assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
6979 "ADDWrr does not have register operands");
6980 setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULADDW_OP1);
6981 setFound(AArch64::MADDWrrr, 2, AArch64::WZR, MCP::MULADDW_OP2);
6982 break;
6983 case AArch64::ADDXrr:
6984 setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULADDX_OP1);
6985 setFound(AArch64::MADDXrrr, 2, AArch64::XZR, MCP::MULADDX_OP2);
6986 break;
6987 case AArch64::SUBWrr:
6988 setFound(AArch64::MADDWrrr, 2, AArch64::WZR, MCP::MULSUBW_OP2);
6989 setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULSUBW_OP1);
6990 break;
6991 case AArch64::SUBXrr:
6992 setFound(AArch64::MADDXrrr, 2, AArch64::XZR, MCP::MULSUBX_OP2);
6993 setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULSUBX_OP1);
6994 break;
6995 case AArch64::ADDWri:
6996 setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULADDWI_OP1);
6997 break;
6998 case AArch64::ADDXri:
6999 setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULADDXI_OP1);
7000 break;
7001 case AArch64::SUBWri:
7002 setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULSUBWI_OP1);
7003 break;
7004 case AArch64::SUBXri:
7005 setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULSUBXI_OP1);
7006 break;
7007 case AArch64::ADDv8i8:
7008 setVFound(AArch64::MULv8i8, 1, MCP::MULADDv8i8_OP1);
7009 setVFound(AArch64::MULv8i8, 2, MCP::MULADDv8i8_OP2);
7010 break;
7011 case AArch64::ADDv16i8:
7012 setVFound(AArch64::MULv16i8, 1, MCP::MULADDv16i8_OP1);
7013 setVFound(AArch64::MULv16i8, 2, MCP::MULADDv16i8_OP2);
7014 break;
7015 case AArch64::ADDv4i16:
7016 setVFound(AArch64::MULv4i16, 1, MCP::MULADDv4i16_OP1);
7017 setVFound(AArch64::MULv4i16, 2, MCP::MULADDv4i16_OP2);
7018 setVFound(AArch64::MULv4i16_indexed, 1, MCP::MULADDv4i16_indexed_OP1);
7019 setVFound(AArch64::MULv4i16_indexed, 2, MCP::MULADDv4i16_indexed_OP2);
7020 break;
7021 case AArch64::ADDv8i16:
7022 setVFound(AArch64::MULv8i16, 1, MCP::MULADDv8i16_OP1);
7023 setVFound(AArch64::MULv8i16, 2, MCP::MULADDv8i16_OP2);
7024 setVFound(AArch64::MULv8i16_indexed, 1, MCP::MULADDv8i16_indexed_OP1);
7025 setVFound(AArch64::MULv8i16_indexed, 2, MCP::MULADDv8i16_indexed_OP2);
7026 break;
7027 case AArch64::ADDv2i32:
7028 setVFound(AArch64::MULv2i32, 1, MCP::MULADDv2i32_OP1);
7029 setVFound(AArch64::MULv2i32, 2, MCP::MULADDv2i32_OP2);
7030 setVFound(AArch64::MULv2i32_indexed, 1, MCP::MULADDv2i32_indexed_OP1);
7031 setVFound(AArch64::MULv2i32_indexed, 2, MCP::MULADDv2i32_indexed_OP2);
7032 break;
7033 case AArch64::ADDv4i32:
7034 setVFound(AArch64::MULv4i32, 1, MCP::MULADDv4i32_OP1);
7035 setVFound(AArch64::MULv4i32, 2, MCP::MULADDv4i32_OP2);
7036 setVFound(AArch64::MULv4i32_indexed, 1, MCP::MULADDv4i32_indexed_OP1);
7037 setVFound(AArch64::MULv4i32_indexed, 2, MCP::MULADDv4i32_indexed_OP2);
7038 break;
7039 case AArch64::SUBv8i8:
7040 setVFound(AArch64::MULv8i8, 1, MCP::MULSUBv8i8_OP1);
7041 setVFound(AArch64::MULv8i8, 2, MCP::MULSUBv8i8_OP2);
7042 break;
7043 case AArch64::SUBv16i8:
7044 setVFound(AArch64::MULv16i8, 1, MCP::MULSUBv16i8_OP1);
7045 setVFound(AArch64::MULv16i8, 2, MCP::MULSUBv16i8_OP2);
7046 break;
7047 case AArch64::SUBv4i16:
7048 setVFound(AArch64::MULv4i16, 1, MCP::MULSUBv4i16_OP1);
7049 setVFound(AArch64::MULv4i16, 2, MCP::MULSUBv4i16_OP2);
7050 setVFound(AArch64::MULv4i16_indexed, 1, MCP::MULSUBv4i16_indexed_OP1);
7051 setVFound(AArch64::MULv4i16_indexed, 2, MCP::MULSUBv4i16_indexed_OP2);
7052 break;
7053 case AArch64::SUBv8i16:
7054 setVFound(AArch64::MULv8i16, 1, MCP::MULSUBv8i16_OP1);
7055 setVFound(AArch64::MULv8i16, 2, MCP::MULSUBv8i16_OP2);
7056 setVFound(AArch64::MULv8i16_indexed, 1, MCP::MULSUBv8i16_indexed_OP1);
7057 setVFound(AArch64::MULv8i16_indexed, 2, MCP::MULSUBv8i16_indexed_OP2);
7058 break;
7059 case AArch64::SUBv2i32:
7060 setVFound(AArch64::MULv2i32, 1, MCP::MULSUBv2i32_OP1);
7061 setVFound(AArch64::MULv2i32, 2, MCP::MULSUBv2i32_OP2);
7062 setVFound(AArch64::MULv2i32_indexed, 1, MCP::MULSUBv2i32_indexed_OP1);
7063 setVFound(AArch64::MULv2i32_indexed, 2, MCP::MULSUBv2i32_indexed_OP2);
7064 break;
7065 case AArch64::SUBv4i32:
7066 setVFound(AArch64::MULv4i32, 1, MCP::MULSUBv4i32_OP1);
7067 setVFound(AArch64::MULv4i32, 2, MCP::MULSUBv4i32_OP2);
7068 setVFound(AArch64::MULv4i32_indexed, 1, MCP::MULSUBv4i32_indexed_OP1);
7069 setVFound(AArch64::MULv4i32_indexed, 2, MCP::MULSUBv4i32_indexed_OP2);
7070 break;
7071 }
7072 return Found;
7073}
7074
7075bool AArch64InstrInfo::isAccumulationOpcode(unsigned Opcode) const {
7076 switch (Opcode) {
7077 default:
7078 break;
7079 case AArch64::UABALB_ZZZ_D:
7080 case AArch64::UABALB_ZZZ_H:
7081 case AArch64::UABALB_ZZZ_S:
7082 case AArch64::UABALT_ZZZ_D:
7083 case AArch64::UABALT_ZZZ_H:
7084 case AArch64::UABALT_ZZZ_S:
7085 case AArch64::SABALB_ZZZ_D:
7086 case AArch64::SABALB_ZZZ_S:
7087 case AArch64::SABALB_ZZZ_H:
7088 case AArch64::SABALT_ZZZ_D:
7089 case AArch64::SABALT_ZZZ_S:
7090 case AArch64::SABALT_ZZZ_H:
7091 case AArch64::UABALv16i8_v8i16:
7092 case AArch64::UABALv2i32_v2i64:
7093 case AArch64::UABALv4i16_v4i32:
7094 case AArch64::UABALv4i32_v2i64:
7095 case AArch64::UABALv8i16_v4i32:
7096 case AArch64::UABALv8i8_v8i16:
7097 case AArch64::UABAv16i8:
7098 case AArch64::UABAv2i32:
7099 case AArch64::UABAv4i16:
7100 case AArch64::UABAv4i32:
7101 case AArch64::UABAv8i16:
7102 case AArch64::UABAv8i8:
7103 case AArch64::SABALv16i8_v8i16:
7104 case AArch64::SABALv2i32_v2i64:
7105 case AArch64::SABALv4i16_v4i32:
7106 case AArch64::SABALv4i32_v2i64:
7107 case AArch64::SABALv8i16_v4i32:
7108 case AArch64::SABALv8i8_v8i16:
7109 case AArch64::SABAv16i8:
7110 case AArch64::SABAv2i32:
7111 case AArch64::SABAv4i16:
7112 case AArch64::SABAv4i32:
7113 case AArch64::SABAv8i16:
7114 case AArch64::SABAv8i8:
7115 return true;
7116 }
7117
7118 return false;
7119}
7120
7121unsigned AArch64InstrInfo::getAccumulationStartOpcode(
7122 unsigned AccumulationOpcode) const {
7123 switch (AccumulationOpcode) {
7124 default:
7125 llvm_unreachable("Unsupported accumulation Opcode!");
7126 case AArch64::UABALB_ZZZ_D:
7127 return AArch64::UABDLB_ZZZ_D;
7128 case AArch64::UABALB_ZZZ_H:
7129 return AArch64::UABDLB_ZZZ_H;
7130 case AArch64::UABALB_ZZZ_S:
7131 return AArch64::UABDLB_ZZZ_S;
7132 case AArch64::UABALT_ZZZ_D:
7133 return AArch64::UABDLT_ZZZ_D;
7134 case AArch64::UABALT_ZZZ_H:
7135 return AArch64::UABDLT_ZZZ_H;
7136 case AArch64::UABALT_ZZZ_S:
7137 return AArch64::UABDLT_ZZZ_S;
7138 case AArch64::UABALv16i8_v8i16:
7139 return AArch64::UABDLv16i8_v8i16;
7140 case AArch64::UABALv2i32_v2i64:
7141 return AArch64::UABDLv2i32_v2i64;
7142 case AArch64::UABALv4i16_v4i32:
7143 return AArch64::UABDLv4i16_v4i32;
7144 case AArch64::UABALv4i32_v2i64:
7145 return AArch64::UABDLv4i32_v2i64;
7146 case AArch64::UABALv8i16_v4i32:
7147 return AArch64::UABDLv8i16_v4i32;
7148 case AArch64::UABALv8i8_v8i16:
7149 return AArch64::UABDLv8i8_v8i16;
7150 case AArch64::UABAv16i8:
7151 return AArch64::UABDv16i8;
7152 case AArch64::UABAv2i32:
7153 return AArch64::UABDv2i32;
7154 case AArch64::UABAv4i16:
7155 return AArch64::UABDv4i16;
7156 case AArch64::UABAv4i32:
7157 return AArch64::UABDv4i32;
7158 case AArch64::UABAv8i16:
7159 return AArch64::UABDv8i16;
7160 case AArch64::UABAv8i8:
7161 return AArch64::UABDv8i8;
7162 case AArch64::SABALB_ZZZ_D:
7163 return AArch64::SABDLB_ZZZ_D;
7164 case AArch64::SABALB_ZZZ_S:
7165 return AArch64::SABDLB_ZZZ_S;
7166 case AArch64::SABALB_ZZZ_H:
7167 return AArch64::SABDLB_ZZZ_H;
7168 case AArch64::SABALT_ZZZ_D:
7169 return AArch64::SABDLT_ZZZ_D;
7170 case AArch64::SABALT_ZZZ_S:
7171 return AArch64::SABDLT_ZZZ_S;
7172 case AArch64::SABALT_ZZZ_H:
7173 return AArch64::SABDLT_ZZZ_H;
7174 case AArch64::SABALv16i8_v8i16:
7175 return AArch64::SABDLv16i8_v8i16;
7176 case AArch64::SABALv2i32_v2i64:
7177 return AArch64::SABDLv2i32_v2i64;
7178 case AArch64::SABALv4i16_v4i32:
7179 return AArch64::SABDLv4i16_v4i32;
7180 case AArch64::SABALv4i32_v2i64:
7181 return AArch64::SABDLv4i32_v2i64;
7182 case AArch64::SABALv8i16_v4i32:
7183 return AArch64::SABDLv8i16_v4i32;
7184 case AArch64::SABALv8i8_v8i16:
7185 return AArch64::SABDLv8i8_v8i16;
7186 case AArch64::SABAv16i8:
7187 return AArch64::SABDv16i8;
7188 case AArch64::SABAv2i32:
7189 return AArch64::SABAv2i32;
7190 case AArch64::SABAv4i16:
7191 return AArch64::SABDv4i16;
7192 case AArch64::SABAv4i32:
7193 return AArch64::SABDv4i32;
7194 case AArch64::SABAv8i16:
7195 return AArch64::SABDv8i16;
7196 case AArch64::SABAv8i8:
7197 return AArch64::SABDv8i8;
7198 }
7199}
7200
7201/// Floating-Point Support
7202
7203/// Find instructions that can be turned into madd.
7205 SmallVectorImpl<unsigned> &Patterns) {
7206
7207 if (!isCombineInstrCandidateFP(Root))
7208 return false;
7209
7210 MachineBasicBlock &MBB = *Root.getParent();
7211 bool Found = false;
7212
7213 auto Match = [&](int Opcode, int Operand, unsigned Pattern) -> bool {
7214 if (canCombineWithFMUL(MBB, Root.getOperand(Operand), Opcode)) {
7215 Patterns.push_back(Pattern);
7216 return true;
7217 }
7218 return false;
7219 };
7220
7222
7223 switch (Root.getOpcode()) {
7224 default:
7225 assert(false && "Unsupported FP instruction in combiner\n");
7226 break;
7227 case AArch64::FADDHrr:
7228 assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
7229 "FADDHrr does not have register operands");
7230
7231 Found = Match(AArch64::FMULHrr, 1, MCP::FMULADDH_OP1);
7232 Found |= Match(AArch64::FMULHrr, 2, MCP::FMULADDH_OP2);
7233 break;
7234 case AArch64::FADDSrr:
7235 assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
7236 "FADDSrr does not have register operands");
7237
7238 Found |= Match(AArch64::FMULSrr, 1, MCP::FMULADDS_OP1) ||
7239 Match(AArch64::FMULv1i32_indexed, 1, MCP::FMLAv1i32_indexed_OP1);
7240
7241 Found |= Match(AArch64::FMULSrr, 2, MCP::FMULADDS_OP2) ||
7242 Match(AArch64::FMULv1i32_indexed, 2, MCP::FMLAv1i32_indexed_OP2);
7243 break;
7244 case AArch64::FADDDrr:
7245 Found |= Match(AArch64::FMULDrr, 1, MCP::FMULADDD_OP1) ||
7246 Match(AArch64::FMULv1i64_indexed, 1, MCP::FMLAv1i64_indexed_OP1);
7247
7248 Found |= Match(AArch64::FMULDrr, 2, MCP::FMULADDD_OP2) ||
7249 Match(AArch64::FMULv1i64_indexed, 2, MCP::FMLAv1i64_indexed_OP2);
7250 break;
7251 case AArch64::FADDv4f16:
7252 Found |= Match(AArch64::FMULv4i16_indexed, 1, MCP::FMLAv4i16_indexed_OP1) ||
7253 Match(AArch64::FMULv4f16, 1, MCP::FMLAv4f16_OP1);
7254
7255 Found |= Match(AArch64::FMULv4i16_indexed, 2, MCP::FMLAv4i16_indexed_OP2) ||
7256 Match(AArch64::FMULv4f16, 2, MCP::FMLAv4f16_OP2);
7257 break;
7258 case AArch64::FADDv8f16:
7259 Found |= Match(AArch64::FMULv8i16_indexed, 1, MCP::FMLAv8i16_indexed_OP1) ||
7260 Match(AArch64::FMULv8f16, 1, MCP::FMLAv8f16_OP1);
7261
7262 Found |= Match(AArch64::FMULv8i16_indexed, 2, MCP::FMLAv8i16_indexed_OP2) ||
7263 Match(AArch64::FMULv8f16, 2, MCP::FMLAv8f16_OP2);
7264 break;
7265 case AArch64::FADDv2f32:
7266 Found |= Match(AArch64::FMULv2i32_indexed, 1, MCP::FMLAv2i32_indexed_OP1) ||
7267 Match(AArch64::FMULv2f32, 1, MCP::FMLAv2f32_OP1);
7268
7269 Found |= Match(AArch64::FMULv2i32_indexed, 2, MCP::FMLAv2i32_indexed_OP2) ||
7270 Match(AArch64::FMULv2f32, 2, MCP::FMLAv2f32_OP2);
7271 break;
7272 case AArch64::FADDv2f64:
7273 Found |= Match(AArch64::FMULv2i64_indexed, 1, MCP::FMLAv2i64_indexed_OP1) ||
7274 Match(AArch64::FMULv2f64, 1, MCP::FMLAv2f64_OP1);
7275
7276 Found |= Match(AArch64::FMULv2i64_indexed, 2, MCP::FMLAv2i64_indexed_OP2) ||
7277 Match(AArch64::FMULv2f64, 2, MCP::FMLAv2f64_OP2);
7278 break;
7279 case AArch64::FADDv4f32:
7280 Found |= Match(AArch64::FMULv4i32_indexed, 1, MCP::FMLAv4i32_indexed_OP1) ||
7281 Match(AArch64::FMULv4f32, 1, MCP::FMLAv4f32_OP1);
7282
7283 Found |= Match(AArch64::FMULv4i32_indexed, 2, MCP::FMLAv4i32_indexed_OP2) ||
7284 Match(AArch64::FMULv4f32, 2, MCP::FMLAv4f32_OP2);
7285 break;
7286 case AArch64::FSUBHrr:
7287 Found = Match(AArch64::FMULHrr, 1, MCP::FMULSUBH_OP1);
7288 Found |= Match(AArch64::FMULHrr, 2, MCP::FMULSUBH_OP2);
7289 Found |= Match(AArch64::FNMULHrr, 1, MCP::FNMULSUBH_OP1);
7290 break;
7291 case AArch64::FSUBSrr:
7292 Found = Match(AArch64::FMULSrr, 1, MCP::FMULSUBS_OP1);
7293
7294 Found |= Match(AArch64::FMULSrr, 2, MCP::FMULSUBS_OP2) ||
7295 Match(AArch64::FMULv1i32_indexed, 2, MCP::FMLSv1i32_indexed_OP2);
7296
7297 Found |= Match(AArch64::FNMULSrr, 1, MCP::FNMULSUBS_OP1);
7298 break;
7299 case AArch64::FSUBDrr:
7300 Found = Match(AArch64::FMULDrr, 1, MCP::FMULSUBD_OP1);
7301
7302 Found |= Match(AArch64::FMULDrr, 2, MCP::FMULSUBD_OP2) ||
7303 Match(AArch64::FMULv1i64_indexed, 2, MCP::FMLSv1i64_indexed_OP2);
7304
7305 Found |= Match(AArch64::FNMULDrr, 1, MCP::FNMULSUBD_OP1);
7306 break;
7307 case AArch64::FSUBv4f16:
7308 Found |= Match(AArch64::FMULv4i16_indexed, 2, MCP::FMLSv4i16_indexed_OP2) ||
7309 Match(AArch64::FMULv4f16, 2, MCP::FMLSv4f16_OP2);
7310
7311 Found |= Match(AArch64::FMULv4i16_indexed, 1, MCP::FMLSv4i16_indexed_OP1) ||
7312 Match(AArch64::FMULv4f16, 1, MCP::FMLSv4f16_OP1);
7313 break;
7314 case AArch64::FSUBv8f16:
7315 Found |= Match(AArch64::FMULv8i16_indexed, 2, MCP::FMLSv8i16_indexed_OP2) ||
7316 Match(AArch64::FMULv8f16, 2, MCP::FMLSv8f16_OP2);
7317
7318 Found |= Match(AArch64::FMULv8i16_indexed, 1, MCP::FMLSv8i16_indexed_OP1) ||
7319 Match(AArch64::FMULv8f16, 1, MCP::FMLSv8f16_OP1);
7320 break;
7321 case AArch64::FSUBv2f32:
7322 Found |= Match(AArch64::FMULv2i32_indexed, 2, MCP::FMLSv2i32_indexed_OP2) ||
7323 Match(AArch64::FMULv2f32, 2, MCP::FMLSv2f32_OP2);
7324
7325 Found |= Match(AArch64::FMULv2i32_indexed, 1, MCP::FMLSv2i32_indexed_OP1) ||
7326 Match(AArch64::FMULv2f32, 1, MCP::FMLSv2f32_OP1);
7327 break;
7328 case AArch64::FSUBv2f64:
7329 Found |= Match(AArch64::FMULv2i64_indexed, 2, MCP::FMLSv2i64_indexed_OP2) ||
7330 Match(AArch64::FMULv2f64, 2, MCP::FMLSv2f64_OP2);
7331
7332 Found |= Match(AArch64::FMULv2i64_indexed, 1, MCP::FMLSv2i64_indexed_OP1) ||
7333 Match(AArch64::FMULv2f64, 1, MCP::FMLSv2f64_OP1);
7334 break;
7335 case AArch64::FSUBv4f32:
7336 Found |= Match(AArch64::FMULv4i32_indexed, 2, MCP::FMLSv4i32_indexed_OP2) ||
7337 Match(AArch64::FMULv4f32, 2, MCP::FMLSv4f32_OP2);
7338
7339 Found |= Match(AArch64::FMULv4i32_indexed, 1, MCP::FMLSv4i32_indexed_OP1) ||
7340 Match(AArch64::FMULv4f32, 1, MCP::FMLSv4f32_OP1);
7341 break;
7342 }
7343 return Found;
7344}
7345
7347 SmallVectorImpl<unsigned> &Patterns) {
7348 MachineBasicBlock &MBB = *Root.getParent();
7349 bool Found = false;
7350
7351 auto Match = [&](unsigned Opcode, int Operand, unsigned Pattern) -> bool {
7352 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
7353 MachineOperand &MO = Root.getOperand(Operand);
7354 MachineInstr *MI = nullptr;
7355 if (MO.isReg() && MO.getReg().isVirtual())
7356 MI = MRI.getUniqueVRegDef(MO.getReg());
7357 // Ignore No-op COPYs in FMUL(COPY(DUP(..)))
7358 if (MI && MI->getOpcode() == TargetOpcode::COPY &&
7359 MI->getOperand(1).getReg().isVirtual())
7360 MI = MRI.getUniqueVRegDef(MI->getOperand(1).getReg());
7361 if (MI && MI->getOpcode() == Opcode) {
7362 Patterns.push_back(Pattern);
7363 return true;
7364 }
7365 return false;
7366 };
7367
7369
7370 switch (Root.getOpcode()) {
7371 default:
7372 return false;
7373 case AArch64::FMULv2f32:
7374 Found = Match(AArch64::DUPv2i32lane, 1, MCP::FMULv2i32_indexed_OP1);
7375 Found |= Match(AArch64::DUPv2i32lane, 2, MCP::FMULv2i32_indexed_OP2);
7376 break;
7377 case AArch64::FMULv2f64:
7378 Found = Match(AArch64::DUPv2i64lane, 1, MCP::FMULv2i64_indexed_OP1);
7379 Found |= Match(AArch64::DUPv2i64lane, 2, MCP::FMULv2i64_indexed_OP2);
7380 break;
7381 case AArch64::FMULv4f16:
7382 Found = Match(AArch64::DUPv4i16lane, 1, MCP::FMULv4i16_indexed_OP1);
7383 Found |= Match(AArch64::DUPv4i16lane, 2, MCP::FMULv4i16_indexed_OP2);
7384 break;
7385 case AArch64::FMULv4f32:
7386 Found = Match(AArch64::DUPv4i32lane, 1, MCP::FMULv4i32_indexed_OP1);
7387 Found |= Match(AArch64::DUPv4i32lane, 2, MCP::FMULv4i32_indexed_OP2);
7388 break;
7389 case AArch64::FMULv8f16:
7390 Found = Match(AArch64::DUPv8i16lane, 1, MCP::FMULv8i16_indexed_OP1);
7391 Found |= Match(AArch64::DUPv8i16lane, 2, MCP::FMULv8i16_indexed_OP2);
7392 break;
7393 }
7394
7395 return Found;
7396}
7397
7399 SmallVectorImpl<unsigned> &Patterns) {
7400 unsigned Opc = Root.getOpcode();
7401 MachineBasicBlock &MBB = *Root.getParent();
7402 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
7403
7404 auto Match = [&](unsigned Opcode, unsigned Pattern) -> bool {
7405 MachineOperand &MO = Root.getOperand(1);
7406 MachineInstr *MI = MRI.getUniqueVRegDef(MO.getReg());
7407 if (MI != nullptr && (MI->getOpcode() == Opcode) &&
7408 MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()) &&
7412 MI->getFlag(MachineInstr::MIFlag::FmNsz)) {
7413 Patterns.push_back(Pattern);
7414 return true;
7415 }
7416 return false;
7417 };
7418
7419 switch (Opc) {
7420 default:
7421 break;
7422 case AArch64::FNEGDr:
7423 return Match(AArch64::FMADDDrrr, AArch64MachineCombinerPattern::FNMADD);
7424 case AArch64::FNEGSr:
7425 return Match(AArch64::FMADDSrrr, AArch64MachineCombinerPattern::FNMADD);
7426 }
7427
7428 return false;
7429}
7430
7431/// Return true when a code sequence can improve throughput. It
7432/// should be called only for instructions in loops.
7433/// \param Pattern - combiner pattern
7435 switch (Pattern) {
7436 default:
7437 break;
7543 return true;
7544 } // end switch (Pattern)
7545 return false;
7546}
7547
7548/// Find other MI combine patterns.
7550 SmallVectorImpl<unsigned> &Patterns) {
7551 // A - (B + C) ==> (A - B) - C or (A - C) - B
7552 unsigned Opc = Root.getOpcode();
7553 MachineBasicBlock &MBB = *Root.getParent();
7554
7555 switch (Opc) {
7556 case AArch64::SUBWrr:
7557 case AArch64::SUBSWrr:
7558 case AArch64::SUBXrr:
7559 case AArch64::SUBSXrr:
7560 // Found candidate root.
7561 break;
7562 default:
7563 return false;
7564 }
7565
7567 Root.findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr, true) ==
7568 -1)
7569 return false;
7570
7571 if (canCombine(MBB, Root.getOperand(2), AArch64::ADDWrr) ||
7572 canCombine(MBB, Root.getOperand(2), AArch64::ADDSWrr) ||
7573 canCombine(MBB, Root.getOperand(2), AArch64::ADDXrr) ||
7574 canCombine(MBB, Root.getOperand(2), AArch64::ADDSXrr)) {
7577 return true;
7578 }
7579
7580 return false;
7581}
7582
7583/// Check if the given instruction forms a gather load pattern that can be
7584/// optimized for better Memory-Level Parallelism (MLP). This function
7585/// identifies chains of NEON lane load instructions that load data from
7586/// different memory addresses into individual lanes of a 128-bit vector
7587/// register, then attempts to split the pattern into parallel loads to break
7588/// the serial dependency between instructions.
7589///
7590/// Pattern Matched:
7591/// Initial scalar load -> SUBREG_TO_REG (lane 0) -> LD1i* (lane 1) ->
7592/// LD1i* (lane 2) -> ... -> LD1i* (lane N-1, Root)
7593///
7594/// Transformed Into:
7595/// Two parallel vector loads using fewer lanes each, followed by ZIP1v2i64
7596/// to combine the results, enabling better memory-level parallelism.
7597///
7598/// Supported Element Types:
7599/// - 32-bit elements (LD1i32, 4 lanes total)
7600/// - 16-bit elements (LD1i16, 8 lanes total)
7601/// - 8-bit elements (LD1i8, 16 lanes total)
7603 SmallVectorImpl<unsigned> &Patterns,
7604 unsigned LoadLaneOpCode, unsigned NumLanes) {
7605 const MachineFunction *MF = Root.getMF();
7606
7607 // Early exit if optimizing for size.
7608 if (MF->getFunction().hasMinSize())
7609 return false;
7610
7611 const MachineRegisterInfo &MRI = MF->getRegInfo();
7613
7614 // The root of the pattern must load into the last lane of the vector.
7615 if (Root.getOperand(2).getImm() != NumLanes - 1)
7616 return false;
7617
7618 // Check that we have load into all lanes except lane 0.
7619 // For each load we also want to check that:
7620 // 1. It has a single non-debug use (since we will be replacing the virtual
7621 // register)
7622 // 2. That the addressing mode only uses a single pointer operand
7623 auto *CurrInstr = MRI.getUniqueVRegDef(Root.getOperand(1).getReg());
7624 auto Range = llvm::seq<unsigned>(1, NumLanes - 1);
7625 SmallSet<unsigned, 16> RemainingLanes(Range.begin(), Range.end());
7627 while (!RemainingLanes.empty() && CurrInstr &&
7628 CurrInstr->getOpcode() == LoadLaneOpCode &&
7629 MRI.hasOneNonDBGUse(CurrInstr->getOperand(0).getReg()) &&
7630 CurrInstr->getNumOperands() == 4) {
7631 RemainingLanes.erase(CurrInstr->getOperand(2).getImm());
7632 LoadInstrs.push_back(CurrInstr);
7633 CurrInstr = MRI.getUniqueVRegDef(CurrInstr->getOperand(1).getReg());
7634 }
7635
7636 // Check that we have found a match for lanes N-1.. 1.
7637 if (!RemainingLanes.empty())
7638 return false;
7639
7640 // Match the SUBREG_TO_REG sequence.
7641 if (CurrInstr->getOpcode() != TargetOpcode::SUBREG_TO_REG)
7642 return false;
7643
7644 // Verify that the subreg to reg loads an integer into the first lane.
7645 auto Lane0LoadReg = CurrInstr->getOperand(2).getReg();
7646 unsigned SingleLaneSizeInBits = 128 / NumLanes;
7647 if (TRI->getRegSizeInBits(Lane0LoadReg, MRI) != SingleLaneSizeInBits)
7648 return false;
7649
7650 // Verify that it also has a single non debug use.
7651 if (!MRI.hasOneNonDBGUse(Lane0LoadReg))
7652 return false;
7653
7654 LoadInstrs.push_back(MRI.getUniqueVRegDef(Lane0LoadReg));
7655
7656 // If there is any chance of aliasing, do not apply the pattern.
7657 // Walk backward through the MBB starting from Root.
7658 // Exit early if we've encountered all load instructions or hit the search
7659 // limit.
7660 auto MBBItr = Root.getIterator();
7661 unsigned RemainingSteps = GatherOptSearchLimit;
7662 SmallPtrSet<const MachineInstr *, 16> RemainingLoadInstrs;
7663 RemainingLoadInstrs.insert(LoadInstrs.begin(), LoadInstrs.end());
7664 const MachineBasicBlock *MBB = Root.getParent();
7665
7666 for (; MBBItr != MBB->begin() && RemainingSteps > 0 &&
7667 !RemainingLoadInstrs.empty();
7668 --MBBItr, --RemainingSteps) {
7669 const MachineInstr &CurrInstr = *MBBItr;
7670
7671 // Remove this instruction from remaining loads if it's one we're tracking.
7672 RemainingLoadInstrs.erase(&CurrInstr);
7673
7674 // Check for potential aliasing with any of the load instructions to
7675 // optimize.
7676 if (CurrInstr.isLoadFoldBarrier())
7677 return false;
7678 }
7679
7680 // If we hit the search limit without finding all load instructions,
7681 // don't match the pattern.
7682 if (RemainingSteps == 0 && !RemainingLoadInstrs.empty())
7683 return false;
7684
7685 switch (NumLanes) {
7686 case 4:
7688 break;
7689 case 8:
7691 break;
7692 case 16:
7694 break;
7695 default:
7696 llvm_unreachable("Got bad number of lanes for gather pattern.");
7697 }
7698
7699 return true;
7700}
7701
7702/// Search for patterns of LD instructions we can optimize.
7704 SmallVectorImpl<unsigned> &Patterns) {
7705
7706 // The pattern searches for loads into single lanes.
7707 switch (Root.getOpcode()) {
7708 case AArch64::LD1i32:
7709 return getGatherLanePattern(Root, Patterns, Root.getOpcode(), 4);
7710 case AArch64::LD1i16:
7711 return getGatherLanePattern(Root, Patterns, Root.getOpcode(), 8);
7712 case AArch64::LD1i8:
7713 return getGatherLanePattern(Root, Patterns, Root.getOpcode(), 16);
7714 default:
7715 return false;
7716 }
7717}
7718
7719/// Generate optimized instruction sequence for gather load patterns to improve
7720/// Memory-Level Parallelism (MLP). This function transforms a chain of
7721/// sequential NEON lane loads into parallel vector loads that can execute
7722/// concurrently.
7723static void
7727 DenseMap<Register, unsigned> &InstrIdxForVirtReg,
7728 unsigned Pattern, unsigned NumLanes) {
7729 MachineFunction &MF = *Root.getParent()->getParent();
7732
7733 // Gather the initial load instructions to build the pattern.
7734 SmallVector<MachineInstr *, 16> LoadToLaneInstrs;
7735 MachineInstr *CurrInstr = &Root;
7736 for (unsigned i = 0; i < NumLanes - 1; ++i) {
7737 LoadToLaneInstrs.push_back(CurrInstr);
7738 CurrInstr = MRI.getUniqueVRegDef(CurrInstr->getOperand(1).getReg());
7739 }
7740
7741 // Sort the load instructions according to the lane.
7742 llvm::sort(LoadToLaneInstrs,
7743 [](const MachineInstr *A, const MachineInstr *B) {
7744 return A->getOperand(2).getImm() > B->getOperand(2).getImm();
7745 });
7746
7747 MachineInstr *SubregToReg = CurrInstr;
7748 LoadToLaneInstrs.push_back(
7749 MRI.getUniqueVRegDef(SubregToReg->getOperand(2).getReg()));
7750 auto LoadToLaneInstrsAscending = llvm::reverse(LoadToLaneInstrs);
7751
7752 const TargetRegisterClass *FPR128RegClass =
7753 MRI.getRegClass(Root.getOperand(0).getReg());
7754
7755 // Helper lambda to create a LD1 instruction.
7756 auto CreateLD1Instruction = [&](MachineInstr *OriginalInstr,
7757 Register SrcRegister, unsigned Lane,
7758 Register OffsetRegister,
7759 bool OffsetRegisterKillState) {
7760 auto NewRegister = MRI.createVirtualRegister(FPR128RegClass);
7761 MachineInstrBuilder LoadIndexIntoRegister =
7762 BuildMI(MF, MIMetadata(*OriginalInstr), TII->get(Root.getOpcode()),
7763 NewRegister)
7764 .addReg(SrcRegister)
7765 .addImm(Lane)
7766 .addReg(OffsetRegister, getKillRegState(OffsetRegisterKillState));
7767 InstrIdxForVirtReg.insert(std::make_pair(NewRegister, InsInstrs.size()));
7768 InsInstrs.push_back(LoadIndexIntoRegister);
7769 return NewRegister;
7770 };
7771
7772 // Helper to create load instruction based on the NumLanes in the NEON
7773 // register we are rewriting.
7774 auto CreateLDRInstruction = [&](unsigned NumLanes, Register DestReg,
7775 Register OffsetReg,
7776 bool KillState) -> MachineInstrBuilder {
7777 unsigned Opcode;
7778 switch (NumLanes) {
7779 case 4:
7780 Opcode = AArch64::LDRSui;
7781 break;
7782 case 8:
7783 Opcode = AArch64::LDRHui;
7784 break;
7785 case 16:
7786 Opcode = AArch64::LDRBui;
7787 break;
7788 default:
7790 "Got unsupported number of lanes in machine-combiner gather pattern");
7791 }
7792 // Immediate offset load
7793 return BuildMI(MF, MIMetadata(Root), TII->get(Opcode), DestReg)
7794 .addReg(OffsetReg)
7795 .addImm(0);
7796 };
7797
7798 // Load the remaining lanes into register 0.
7799 auto LanesToLoadToReg0 =
7800 llvm::make_range(LoadToLaneInstrsAscending.begin() + 1,
7801 LoadToLaneInstrsAscending.begin() + NumLanes / 2);
7802 Register PrevReg = SubregToReg->getOperand(0).getReg();
7803 for (auto [Index, LoadInstr] : llvm::enumerate(LanesToLoadToReg0)) {
7804 const MachineOperand &OffsetRegOperand = LoadInstr->getOperand(3);
7805 PrevReg = CreateLD1Instruction(LoadInstr, PrevReg, Index + 1,
7806 OffsetRegOperand.getReg(),
7807 OffsetRegOperand.isKill());
7808 DelInstrs.push_back(LoadInstr);
7809 }
7810 Register LastLoadReg0 = PrevReg;
7811
7812 // First load into register 1. Perform an integer load to zero out the upper
7813 // lanes in a single instruction.
7814 MachineInstr *Lane0Load = *LoadToLaneInstrsAscending.begin();
7815 MachineInstr *OriginalSplitLoad =
7816 *std::next(LoadToLaneInstrsAscending.begin(), NumLanes / 2);
7817 Register DestRegForMiddleIndex = MRI.createVirtualRegister(
7818 MRI.getRegClass(Lane0Load->getOperand(0).getReg()));
7819
7820 const MachineOperand &OriginalSplitToLoadOffsetOperand =
7821 OriginalSplitLoad->getOperand(3);
7822 MachineInstrBuilder MiddleIndexLoadInstr =
7823 CreateLDRInstruction(NumLanes, DestRegForMiddleIndex,
7824 OriginalSplitToLoadOffsetOperand.getReg(),
7825 OriginalSplitToLoadOffsetOperand.isKill());
7826
7827 InstrIdxForVirtReg.insert(
7828 std::make_pair(DestRegForMiddleIndex, InsInstrs.size()));
7829 InsInstrs.push_back(MiddleIndexLoadInstr);
7830 DelInstrs.push_back(OriginalSplitLoad);
7831
7832 // Subreg To Reg instruction for register 1.
7833 Register DestRegForSubregToReg = MRI.createVirtualRegister(FPR128RegClass);
7834 unsigned SubregType;
7835 switch (NumLanes) {
7836 case 4:
7837 SubregType = AArch64::ssub;
7838 break;
7839 case 8:
7840 SubregType = AArch64::hsub;
7841 break;
7842 case 16:
7843 SubregType = AArch64::bsub;
7844 break;
7845 default:
7847 "Got invalid NumLanes for machine-combiner gather pattern");
7848 }
7849
7850 auto SubRegToRegInstr =
7851 BuildMI(MF, MIMetadata(Root), TII->get(SubregToReg->getOpcode()),
7852 DestRegForSubregToReg)
7853 .addImm(0)
7854 .addReg(DestRegForMiddleIndex, getKillRegState(true))
7855 .addImm(SubregType);
7856 InstrIdxForVirtReg.insert(
7857 std::make_pair(DestRegForSubregToReg, InsInstrs.size()));
7858 InsInstrs.push_back(SubRegToRegInstr);
7859
7860 // Load remaining lanes into register 1.
7861 auto LanesToLoadToReg1 =
7862 llvm::make_range(LoadToLaneInstrsAscending.begin() + NumLanes / 2 + 1,
7863 LoadToLaneInstrsAscending.end());
7864 PrevReg = SubRegToRegInstr->getOperand(0).getReg();
7865 for (auto [Index, LoadInstr] : llvm::enumerate(LanesToLoadToReg1)) {
7866 const MachineOperand &OffsetRegOperand = LoadInstr->getOperand(3);
7867 PrevReg = CreateLD1Instruction(LoadInstr, PrevReg, Index + 1,
7868 OffsetRegOperand.getReg(),
7869 OffsetRegOperand.isKill());
7870
7871 // Do not add the last reg to DelInstrs - it will be removed later.
7872 if (Index == NumLanes / 2 - 2) {
7873 break;
7874 }
7875 DelInstrs.push_back(LoadInstr);
7876 }
7877 Register LastLoadReg1 = PrevReg;
7878
7879 // Create the final zip instruction to combine the results.
7880 MachineInstrBuilder ZipInstr =
7881 BuildMI(MF, MIMetadata(Root), TII->get(AArch64::ZIP1v2i64),
7882 Root.getOperand(0).getReg())
7883 .addReg(LastLoadReg0)
7884 .addReg(LastLoadReg1);
7885 InsInstrs.push_back(ZipInstr);
7886}
7887
7901
7902/// Return true when there is potentially a faster code sequence for an
7903/// instruction chain ending in \p Root. All potential patterns are listed in
7904/// the \p Pattern vector. Pattern should be sorted in priority order since the
7905/// pattern evaluator stops checking as soon as it finds a faster sequence.
7906
7907bool AArch64InstrInfo::getMachineCombinerPatterns(
7908 MachineInstr &Root, SmallVectorImpl<unsigned> &Patterns,
7909 bool DoRegPressureReduce) const {
7910 // Integer patterns
7911 if (getMaddPatterns(Root, Patterns))
7912 return true;
7913 // Floating point patterns
7914 if (getFMULPatterns(Root, Patterns))
7915 return true;
7916 if (getFMAPatterns(Root, Patterns))
7917 return true;
7918 if (getFNEGPatterns(Root, Patterns))
7919 return true;
7920
7921 // Other patterns
7922 if (getMiscPatterns(Root, Patterns))
7923 return true;
7924
7925 // Load patterns
7926 if (getLoadPatterns(Root, Patterns))
7927 return true;
7928
7929 return TargetInstrInfo::getMachineCombinerPatterns(Root, Patterns,
7930 DoRegPressureReduce);
7931}
7932
7934/// genFusedMultiply - Generate fused multiply instructions.
7935/// This function supports both integer and floating point instructions.
7936/// A typical example:
7937/// F|MUL I=A,B,0
7938/// F|ADD R,I,C
7939/// ==> F|MADD R,A,B,C
7940/// \param MF Containing MachineFunction
7941/// \param MRI Register information
7942/// \param TII Target information
7943/// \param Root is the F|ADD instruction
7944/// \param [out] InsInstrs is a vector of machine instructions and will
7945/// contain the generated madd instruction
7946/// \param IdxMulOpd is index of operand in Root that is the result of
7947/// the F|MUL. In the example above IdxMulOpd is 1.
7948/// \param MaddOpc the opcode fo the f|madd instruction
7949/// \param RC Register class of operands
7950/// \param kind of fma instruction (addressing mode) to be generated
7951/// \param ReplacedAddend is the result register from the instruction
7952/// replacing the non-combined operand, if any.
7953static MachineInstr *
7955 const TargetInstrInfo *TII, MachineInstr &Root,
7956 SmallVectorImpl<MachineInstr *> &InsInstrs, unsigned IdxMulOpd,
7957 unsigned MaddOpc, const TargetRegisterClass *RC,
7959 const Register *ReplacedAddend = nullptr) {
7960 assert(IdxMulOpd == 1 || IdxMulOpd == 2);
7961
7962 unsigned IdxOtherOpd = IdxMulOpd == 1 ? 2 : 1;
7963 MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg());
7964 Register ResultReg = Root.getOperand(0).getReg();
7965 Register SrcReg0 = MUL->getOperand(1).getReg();
7966 bool Src0IsKill = MUL->getOperand(1).isKill();
7967 Register SrcReg1 = MUL->getOperand(2).getReg();
7968 bool Src1IsKill = MUL->getOperand(2).isKill();
7969
7970 Register SrcReg2;
7971 bool Src2IsKill;
7972 if (ReplacedAddend) {
7973 // If we just generated a new addend, we must be it's only use.
7974 SrcReg2 = *ReplacedAddend;
7975 Src2IsKill = true;
7976 } else {
7977 SrcReg2 = Root.getOperand(IdxOtherOpd).getReg();
7978 Src2IsKill = Root.getOperand(IdxOtherOpd).isKill();
7979 }
7980
7981 if (ResultReg.isVirtual())
7982 MRI.constrainRegClass(ResultReg, RC);
7983 if (SrcReg0.isVirtual())
7984 MRI.constrainRegClass(SrcReg0, RC);
7985 if (SrcReg1.isVirtual())
7986 MRI.constrainRegClass(SrcReg1, RC);
7987 if (SrcReg2.isVirtual())
7988 MRI.constrainRegClass(SrcReg2, RC);
7989
7991 if (kind == FMAInstKind::Default)
7992 MIB = BuildMI(MF, MIMetadata(Root), TII->get(MaddOpc), ResultReg)
7993 .addReg(SrcReg0, getKillRegState(Src0IsKill))
7994 .addReg(SrcReg1, getKillRegState(Src1IsKill))
7995 .addReg(SrcReg2, getKillRegState(Src2IsKill));
7996 else if (kind == FMAInstKind::Indexed)
7997 MIB = BuildMI(MF, MIMetadata(Root), TII->get(MaddOpc), ResultReg)
7998 .addReg(SrcReg2, getKillRegState(Src2IsKill))
7999 .addReg(SrcReg0, getKillRegState(Src0IsKill))
8000 .addReg(SrcReg1, getKillRegState(Src1IsKill))
8001 .addImm(MUL->getOperand(3).getImm());
8002 else if (kind == FMAInstKind::Accumulator)
8003 MIB = BuildMI(MF, MIMetadata(Root), TII->get(MaddOpc), ResultReg)
8004 .addReg(SrcReg2, getKillRegState(Src2IsKill))
8005 .addReg(SrcReg0, getKillRegState(Src0IsKill))
8006 .addReg(SrcReg1, getKillRegState(Src1IsKill));
8007 else
8008 assert(false && "Invalid FMA instruction kind \n");
8009 // Insert the MADD (MADD, FMA, FMS, FMLA, FMSL)
8010 InsInstrs.push_back(MIB);
8011 return MUL;
8012}
8013
8014static MachineInstr *
8016 const TargetInstrInfo *TII, MachineInstr &Root,
8018 MachineInstr *MAD = MRI.getUniqueVRegDef(Root.getOperand(1).getReg());
8019
8020 unsigned Opc = 0;
8021 const TargetRegisterClass *RC = MRI.getRegClass(MAD->getOperand(0).getReg());
8022 if (AArch64::FPR32RegClass.hasSubClassEq(RC))
8023 Opc = AArch64::FNMADDSrrr;
8024 else if (AArch64::FPR64RegClass.hasSubClassEq(RC))
8025 Opc = AArch64::FNMADDDrrr;
8026 else
8027 return nullptr;
8028
8029 Register ResultReg = Root.getOperand(0).getReg();
8030 Register SrcReg0 = MAD->getOperand(1).getReg();
8031 Register SrcReg1 = MAD->getOperand(2).getReg();
8032 Register SrcReg2 = MAD->getOperand(3).getReg();
8033 bool Src0IsKill = MAD->getOperand(1).isKill();
8034 bool Src1IsKill = MAD->getOperand(2).isKill();
8035 bool Src2IsKill = MAD->getOperand(3).isKill();
8036 if (ResultReg.isVirtual())
8037 MRI.constrainRegClass(ResultReg, RC);
8038 if (SrcReg0.isVirtual())
8039 MRI.constrainRegClass(SrcReg0, RC);
8040 if (SrcReg1.isVirtual())
8041 MRI.constrainRegClass(SrcReg1, RC);
8042 if (SrcReg2.isVirtual())
8043 MRI.constrainRegClass(SrcReg2, RC);
8044
8046 BuildMI(MF, MIMetadata(Root), TII->get(Opc), ResultReg)
8047 .addReg(SrcReg0, getKillRegState(Src0IsKill))
8048 .addReg(SrcReg1, getKillRegState(Src1IsKill))
8049 .addReg(SrcReg2, getKillRegState(Src2IsKill));
8050 InsInstrs.push_back(MIB);
8051
8052 return MAD;
8053}
8054
8055/// Fold (FMUL x (DUP y lane)) into (FMUL_indexed x y lane)
8056static MachineInstr *
8059 unsigned IdxDupOp, unsigned MulOpc,
8061 assert(((IdxDupOp == 1) || (IdxDupOp == 2)) &&
8062 "Invalid index of FMUL operand");
8063
8064 MachineFunction &MF = *Root.getMF();
8066
8067 MachineInstr *Dup =
8068 MF.getRegInfo().getUniqueVRegDef(Root.getOperand(IdxDupOp).getReg());
8069
8070 if (Dup->getOpcode() == TargetOpcode::COPY)
8071 Dup = MRI.getUniqueVRegDef(Dup->getOperand(1).getReg());
8072
8073 Register DupSrcReg = Dup->getOperand(1).getReg();
8074 MRI.clearKillFlags(DupSrcReg);
8075 MRI.constrainRegClass(DupSrcReg, RC);
8076
8077 unsigned DupSrcLane = Dup->getOperand(2).getImm();
8078
8079 unsigned IdxMulOp = IdxDupOp == 1 ? 2 : 1;
8080 MachineOperand &MulOp = Root.getOperand(IdxMulOp);
8081
8082 Register ResultReg = Root.getOperand(0).getReg();
8083
8085 MIB = BuildMI(MF, MIMetadata(Root), TII->get(MulOpc), ResultReg)
8086 .add(MulOp)
8087 .addReg(DupSrcReg)
8088 .addImm(DupSrcLane);
8089
8090 InsInstrs.push_back(MIB);
8091 return &Root;
8092}
8093
8094/// genFusedMultiplyAcc - Helper to generate fused multiply accumulate
8095/// instructions.
8096///
8097/// \see genFusedMultiply
8101 unsigned IdxMulOpd, unsigned MaddOpc, const TargetRegisterClass *RC) {
8102 return genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd, MaddOpc, RC,
8104}
8105
8106/// genNeg - Helper to generate an intermediate negation of the second operand
8107/// of Root
8109 const TargetInstrInfo *TII, MachineInstr &Root,
8111 DenseMap<Register, unsigned> &InstrIdxForVirtReg,
8112 unsigned MnegOpc, const TargetRegisterClass *RC) {
8113 Register NewVR = MRI.createVirtualRegister(RC);
8115 BuildMI(MF, MIMetadata(Root), TII->get(MnegOpc), NewVR)
8116 .add(Root.getOperand(2));
8117 InsInstrs.push_back(MIB);
8118
8119 assert(InstrIdxForVirtReg.empty());
8120 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
8121
8122 return NewVR;
8123}
8124
8125/// genFusedMultiplyAccNeg - Helper to generate fused multiply accumulate
8126/// instructions with an additional negation of the accumulator
8130 DenseMap<Register, unsigned> &InstrIdxForVirtReg, unsigned IdxMulOpd,
8131 unsigned MaddOpc, unsigned MnegOpc, const TargetRegisterClass *RC) {
8132 assert(IdxMulOpd == 1);
8133
8134 Register NewVR =
8135 genNeg(MF, MRI, TII, Root, InsInstrs, InstrIdxForVirtReg, MnegOpc, RC);
8136 return genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd, MaddOpc, RC,
8137 FMAInstKind::Accumulator, &NewVR);
8138}
8139
8140/// genFusedMultiplyIdx - Helper to generate fused multiply accumulate
8141/// instructions.
8142///
8143/// \see genFusedMultiply
8147 unsigned IdxMulOpd, unsigned MaddOpc, const TargetRegisterClass *RC) {
8148 return genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd, MaddOpc, RC,
8150}
8151
8152/// genFusedMultiplyAccNeg - Helper to generate fused multiply accumulate
8153/// instructions with an additional negation of the accumulator
8157 DenseMap<Register, unsigned> &InstrIdxForVirtReg, unsigned IdxMulOpd,
8158 unsigned MaddOpc, unsigned MnegOpc, const TargetRegisterClass *RC) {
8159 assert(IdxMulOpd == 1);
8160
8161 Register NewVR =
8162 genNeg(MF, MRI, TII, Root, InsInstrs, InstrIdxForVirtReg, MnegOpc, RC);
8163
8164 return genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd, MaddOpc, RC,
8165 FMAInstKind::Indexed, &NewVR);
8166}
8167
8168/// genMaddR - Generate madd instruction and combine mul and add using
8169/// an extra virtual register
8170/// Example - an ADD intermediate needs to be stored in a register:
8171/// MUL I=A,B,0
8172/// ADD R,I,Imm
8173/// ==> ORR V, ZR, Imm
8174/// ==> MADD R,A,B,V
8175/// \param MF Containing MachineFunction
8176/// \param MRI Register information
8177/// \param TII Target information
8178/// \param Root is the ADD instruction
8179/// \param [out] InsInstrs is a vector of machine instructions and will
8180/// contain the generated madd instruction
8181/// \param IdxMulOpd is index of operand in Root that is the result of
8182/// the MUL. In the example above IdxMulOpd is 1.
8183/// \param MaddOpc the opcode fo the madd instruction
8184/// \param VR is a virtual register that holds the value of an ADD operand
8185/// (V in the example above).
8186/// \param RC Register class of operands
8188 const TargetInstrInfo *TII, MachineInstr &Root,
8190 unsigned IdxMulOpd, unsigned MaddOpc, unsigned VR,
8191 const TargetRegisterClass *RC) {
8192 assert(IdxMulOpd == 1 || IdxMulOpd == 2);
8193
8194 MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg());
8195 Register ResultReg = Root.getOperand(0).getReg();
8196 Register SrcReg0 = MUL->getOperand(1).getReg();
8197 bool Src0IsKill = MUL->getOperand(1).isKill();
8198 Register SrcReg1 = MUL->getOperand(2).getReg();
8199 bool Src1IsKill = MUL->getOperand(2).isKill();
8200
8201 if (ResultReg.isVirtual())
8202 MRI.constrainRegClass(ResultReg, RC);
8203 if (SrcReg0.isVirtual())
8204 MRI.constrainRegClass(SrcReg0, RC);
8205 if (SrcReg1.isVirtual())
8206 MRI.constrainRegClass(SrcReg1, RC);
8208 MRI.constrainRegClass(VR, RC);
8209
8211 BuildMI(MF, MIMetadata(Root), TII->get(MaddOpc), ResultReg)
8212 .addReg(SrcReg0, getKillRegState(Src0IsKill))
8213 .addReg(SrcReg1, getKillRegState(Src1IsKill))
8214 .addReg(VR);
8215 // Insert the MADD
8216 InsInstrs.push_back(MIB);
8217 return MUL;
8218}
8219
8220/// Do the following transformation
8221/// A - (B + C) ==> (A - B) - C
8222/// A - (B + C) ==> (A - C) - B
8224 const TargetInstrInfo *TII, MachineInstr &Root,
8227 unsigned IdxOpd1,
8228 DenseMap<Register, unsigned> &InstrIdxForVirtReg) {
8229 assert(IdxOpd1 == 1 || IdxOpd1 == 2);
8230 unsigned IdxOtherOpd = IdxOpd1 == 1 ? 2 : 1;
8231 MachineInstr *AddMI = MRI.getUniqueVRegDef(Root.getOperand(2).getReg());
8232
8233 Register ResultReg = Root.getOperand(0).getReg();
8234 Register RegA = Root.getOperand(1).getReg();
8235 bool RegAIsKill = Root.getOperand(1).isKill();
8236 Register RegB = AddMI->getOperand(IdxOpd1).getReg();
8237 bool RegBIsKill = AddMI->getOperand(IdxOpd1).isKill();
8238 Register RegC = AddMI->getOperand(IdxOtherOpd).getReg();
8239 bool RegCIsKill = AddMI->getOperand(IdxOtherOpd).isKill();
8240 Register NewVR =
8241 MRI.createVirtualRegister(MRI.getRegClass(Root.getOperand(2).getReg()));
8242
8243 unsigned Opcode = Root.getOpcode();
8244 if (Opcode == AArch64::SUBSWrr)
8245 Opcode = AArch64::SUBWrr;
8246 else if (Opcode == AArch64::SUBSXrr)
8247 Opcode = AArch64::SUBXrr;
8248 else
8249 assert((Opcode == AArch64::SUBWrr || Opcode == AArch64::SUBXrr) &&
8250 "Unexpected instruction opcode.");
8251
8252 uint32_t Flags = Root.mergeFlagsWith(*AddMI);
8253 Flags &= ~MachineInstr::NoSWrap;
8254 Flags &= ~MachineInstr::NoUWrap;
8255
8256 MachineInstrBuilder MIB1 =
8257 BuildMI(MF, MIMetadata(Root), TII->get(Opcode), NewVR)
8258 .addReg(RegA, getKillRegState(RegAIsKill))
8259 .addReg(RegB, getKillRegState(RegBIsKill))
8260 .setMIFlags(Flags);
8261 MachineInstrBuilder MIB2 =
8262 BuildMI(MF, MIMetadata(Root), TII->get(Opcode), ResultReg)
8263 .addReg(NewVR, getKillRegState(true))
8264 .addReg(RegC, getKillRegState(RegCIsKill))
8265 .setMIFlags(Flags);
8266
8267 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
8268 InsInstrs.push_back(MIB1);
8269 InsInstrs.push_back(MIB2);
8270 DelInstrs.push_back(AddMI);
8271 DelInstrs.push_back(&Root);
8272}
8273
8274unsigned AArch64InstrInfo::getReduceOpcodeForAccumulator(
8275 unsigned int AccumulatorOpCode) const {
8276 switch (AccumulatorOpCode) {
8277 case AArch64::UABALB_ZZZ_D:
8278 case AArch64::SABALB_ZZZ_D:
8279 case AArch64::UABALT_ZZZ_D:
8280 case AArch64::SABALT_ZZZ_D:
8281 return AArch64::ADD_ZZZ_D;
8282 case AArch64::UABALB_ZZZ_H:
8283 case AArch64::SABALB_ZZZ_H:
8284 case AArch64::UABALT_ZZZ_H:
8285 case AArch64::SABALT_ZZZ_H:
8286 return AArch64::ADD_ZZZ_H;
8287 case AArch64::UABALB_ZZZ_S:
8288 case AArch64::SABALB_ZZZ_S:
8289 case AArch64::UABALT_ZZZ_S:
8290 case AArch64::SABALT_ZZZ_S:
8291 return AArch64::ADD_ZZZ_S;
8292 case AArch64::UABALv16i8_v8i16:
8293 case AArch64::SABALv8i8_v8i16:
8294 case AArch64::SABAv8i16:
8295 case AArch64::UABAv8i16:
8296 return AArch64::ADDv8i16;
8297 case AArch64::SABALv2i32_v2i64:
8298 case AArch64::UABALv2i32_v2i64:
8299 case AArch64::SABALv4i32_v2i64:
8300 return AArch64::ADDv2i64;
8301 case AArch64::UABALv4i16_v4i32:
8302 case AArch64::SABALv4i16_v4i32:
8303 case AArch64::SABALv8i16_v4i32:
8304 case AArch64::SABAv4i32:
8305 case AArch64::UABAv4i32:
8306 return AArch64::ADDv4i32;
8307 case AArch64::UABALv4i32_v2i64:
8308 return AArch64::ADDv2i64;
8309 case AArch64::UABALv8i16_v4i32:
8310 return AArch64::ADDv4i32;
8311 case AArch64::UABALv8i8_v8i16:
8312 case AArch64::SABALv16i8_v8i16:
8313 return AArch64::ADDv8i16;
8314 case AArch64::UABAv16i8:
8315 case AArch64::SABAv16i8:
8316 return AArch64::ADDv16i8;
8317 case AArch64::UABAv4i16:
8318 case AArch64::SABAv4i16:
8319 return AArch64::ADDv4i16;
8320 case AArch64::UABAv2i32:
8321 case AArch64::SABAv2i32:
8322 return AArch64::ADDv2i32;
8323 case AArch64::UABAv8i8:
8324 case AArch64::SABAv8i8:
8325 return AArch64::ADDv8i8;
8326 default:
8327 llvm_unreachable("Unknown accumulator opcode");
8328 }
8329}
8330
8331/// When getMachineCombinerPatterns() finds potential patterns,
8332/// this function generates the instructions that could replace the
8333/// original code sequence
8334void AArch64InstrInfo::genAlternativeCodeSequence(
8335 MachineInstr &Root, unsigned Pattern,
8338 DenseMap<Register, unsigned> &InstrIdxForVirtReg) const {
8339 MachineBasicBlock &MBB = *Root.getParent();
8340 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
8341 MachineFunction &MF = *MBB.getParent();
8342 const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
8343
8344 MachineInstr *MUL = nullptr;
8345 const TargetRegisterClass *RC;
8346 unsigned Opc;
8347 switch (Pattern) {
8348 default:
8349 // Reassociate instructions.
8350 TargetInstrInfo::genAlternativeCodeSequence(Root, Pattern, InsInstrs,
8351 DelInstrs, InstrIdxForVirtReg);
8352 return;
8354 // A - (B + C)
8355 // ==> (A - B) - C
8356 genSubAdd2SubSub(MF, MRI, TII, Root, InsInstrs, DelInstrs, 1,
8357 InstrIdxForVirtReg);
8358 return;
8360 // A - (B + C)
8361 // ==> (A - C) - B
8362 genSubAdd2SubSub(MF, MRI, TII, Root, InsInstrs, DelInstrs, 2,
8363 InstrIdxForVirtReg);
8364 return;
8367 // MUL I=A,B,0
8368 // ADD R,I,C
8369 // ==> MADD R,A,B,C
8370 // --- Create(MADD);
8372 Opc = AArch64::MADDWrrr;
8373 RC = &AArch64::GPR32RegClass;
8374 } else {
8375 Opc = AArch64::MADDXrrr;
8376 RC = &AArch64::GPR64RegClass;
8377 }
8378 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8379 break;
8382 // MUL I=A,B,0
8383 // ADD R,C,I
8384 // ==> MADD R,A,B,C
8385 // --- Create(MADD);
8387 Opc = AArch64::MADDWrrr;
8388 RC = &AArch64::GPR32RegClass;
8389 } else {
8390 Opc = AArch64::MADDXrrr;
8391 RC = &AArch64::GPR64RegClass;
8392 }
8393 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8394 break;
8399 // MUL I=A,B,0
8400 // ADD/SUB R,I,Imm
8401 // ==> MOV V, Imm/-Imm
8402 // ==> MADD R,A,B,V
8403 // --- Create(MADD);
8404 const TargetRegisterClass *RC;
8405 unsigned BitSize, MovImm;
8408 MovImm = AArch64::MOVi32imm;
8409 RC = &AArch64::GPR32spRegClass;
8410 BitSize = 32;
8411 Opc = AArch64::MADDWrrr;
8412 RC = &AArch64::GPR32RegClass;
8413 } else {
8414 MovImm = AArch64::MOVi64imm;
8415 RC = &AArch64::GPR64spRegClass;
8416 BitSize = 64;
8417 Opc = AArch64::MADDXrrr;
8418 RC = &AArch64::GPR64RegClass;
8419 }
8420 Register NewVR = MRI.createVirtualRegister(RC);
8421 uint64_t Imm = Root.getOperand(2).getImm();
8422
8423 if (Root.getOperand(3).isImm()) {
8424 unsigned Val = Root.getOperand(3).getImm();
8425 Imm = Imm << Val;
8426 }
8427 bool IsSub = Pattern == AArch64MachineCombinerPattern::MULSUBWI_OP1 ||
8429 uint64_t UImm = SignExtend64(IsSub ? -Imm : Imm, BitSize);
8430 // Check that the immediate can be composed via a single instruction.
8432 AArch64_IMM::expandMOVImm(UImm, BitSize, Insn);
8433 if (Insn.size() != 1)
8434 return;
8435 MachineInstrBuilder MIB1 =
8436 BuildMI(MF, MIMetadata(Root), TII->get(MovImm), NewVR)
8437 .addImm(IsSub ? -Imm : Imm);
8438 InsInstrs.push_back(MIB1);
8439 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
8440 MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
8441 break;
8442 }
8445 // MUL I=A,B,0
8446 // SUB R,I, C
8447 // ==> SUB V, 0, C
8448 // ==> MADD R,A,B,V // = -C + A*B
8449 // --- Create(MADD);
8450 const TargetRegisterClass *SubRC;
8451 unsigned SubOpc, ZeroReg;
8453 SubOpc = AArch64::SUBWrr;
8454 SubRC = &AArch64::GPR32spRegClass;
8455 ZeroReg = AArch64::WZR;
8456 Opc = AArch64::MADDWrrr;
8457 RC = &AArch64::GPR32RegClass;
8458 } else {
8459 SubOpc = AArch64::SUBXrr;
8460 SubRC = &AArch64::GPR64spRegClass;
8461 ZeroReg = AArch64::XZR;
8462 Opc = AArch64::MADDXrrr;
8463 RC = &AArch64::GPR64RegClass;
8464 }
8465 Register NewVR = MRI.createVirtualRegister(SubRC);
8466 // SUB NewVR, 0, C
8467 MachineInstrBuilder MIB1 =
8468 BuildMI(MF, MIMetadata(Root), TII->get(SubOpc), NewVR)
8469 .addReg(ZeroReg)
8470 .add(Root.getOperand(2));
8471 InsInstrs.push_back(MIB1);
8472 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
8473 MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
8474 break;
8475 }
8478 // MUL I=A,B,0
8479 // SUB R,C,I
8480 // ==> MSUB R,A,B,C (computes C - A*B)
8481 // --- Create(MSUB);
8483 Opc = AArch64::MSUBWrrr;
8484 RC = &AArch64::GPR32RegClass;
8485 } else {
8486 Opc = AArch64::MSUBXrrr;
8487 RC = &AArch64::GPR64RegClass;
8488 }
8489 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8490 break;
8492 Opc = AArch64::MLAv8i8;
8493 RC = &AArch64::FPR64RegClass;
8494 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8495 break;
8497 Opc = AArch64::MLAv8i8;
8498 RC = &AArch64::FPR64RegClass;
8499 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8500 break;
8502 Opc = AArch64::MLAv16i8;
8503 RC = &AArch64::FPR128RegClass;
8504 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8505 break;
8507 Opc = AArch64::MLAv16i8;
8508 RC = &AArch64::FPR128RegClass;
8509 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8510 break;
8512 Opc = AArch64::MLAv4i16;
8513 RC = &AArch64::FPR64RegClass;
8514 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8515 break;
8517 Opc = AArch64::MLAv4i16;
8518 RC = &AArch64::FPR64RegClass;
8519 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8520 break;
8522 Opc = AArch64::MLAv8i16;
8523 RC = &AArch64::FPR128RegClass;
8524 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8525 break;
8527 Opc = AArch64::MLAv8i16;
8528 RC = &AArch64::FPR128RegClass;
8529 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8530 break;
8532 Opc = AArch64::MLAv2i32;
8533 RC = &AArch64::FPR64RegClass;
8534 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8535 break;
8537 Opc = AArch64::MLAv2i32;
8538 RC = &AArch64::FPR64RegClass;
8539 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8540 break;
8542 Opc = AArch64::MLAv4i32;
8543 RC = &AArch64::FPR128RegClass;
8544 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8545 break;
8547 Opc = AArch64::MLAv4i32;
8548 RC = &AArch64::FPR128RegClass;
8549 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8550 break;
8551
8553 Opc = AArch64::MLAv8i8;
8554 RC = &AArch64::FPR64RegClass;
8555 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
8556 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv8i8,
8557 RC);
8558 break;
8560 Opc = AArch64::MLSv8i8;
8561 RC = &AArch64::FPR64RegClass;
8562 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8563 break;
8565 Opc = AArch64::MLAv16i8;
8566 RC = &AArch64::FPR128RegClass;
8567 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
8568 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv16i8,
8569 RC);
8570 break;
8572 Opc = AArch64::MLSv16i8;
8573 RC = &AArch64::FPR128RegClass;
8574 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8575 break;
8577 Opc = AArch64::MLAv4i16;
8578 RC = &AArch64::FPR64RegClass;
8579 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
8580 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i16,
8581 RC);
8582 break;
8584 Opc = AArch64::MLSv4i16;
8585 RC = &AArch64::FPR64RegClass;
8586 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8587 break;
8589 Opc = AArch64::MLAv8i16;
8590 RC = &AArch64::FPR128RegClass;
8591 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
8592 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv8i16,
8593 RC);
8594 break;
8596 Opc = AArch64::MLSv8i16;
8597 RC = &AArch64::FPR128RegClass;
8598 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8599 break;
8601 Opc = AArch64::MLAv2i32;
8602 RC = &AArch64::FPR64RegClass;
8603 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
8604 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv2i32,
8605 RC);
8606 break;
8608 Opc = AArch64::MLSv2i32;
8609 RC = &AArch64::FPR64RegClass;
8610 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8611 break;
8613 Opc = AArch64::MLAv4i32;
8614 RC = &AArch64::FPR128RegClass;
8615 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
8616 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i32,
8617 RC);
8618 break;
8620 Opc = AArch64::MLSv4i32;
8621 RC = &AArch64::FPR128RegClass;
8622 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8623 break;
8624
8626 Opc = AArch64::MLAv4i16_indexed;
8627 RC = &AArch64::FPR64RegClass;
8628 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8629 break;
8631 Opc = AArch64::MLAv4i16_indexed;
8632 RC = &AArch64::FPR64RegClass;
8633 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8634 break;
8636 Opc = AArch64::MLAv8i16_indexed;
8637 RC = &AArch64::FPR128RegClass;
8638 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8639 break;
8641 Opc = AArch64::MLAv8i16_indexed;
8642 RC = &AArch64::FPR128RegClass;
8643 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8644 break;
8646 Opc = AArch64::MLAv2i32_indexed;
8647 RC = &AArch64::FPR64RegClass;
8648 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8649 break;
8651 Opc = AArch64::MLAv2i32_indexed;
8652 RC = &AArch64::FPR64RegClass;
8653 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8654 break;
8656 Opc = AArch64::MLAv4i32_indexed;
8657 RC = &AArch64::FPR128RegClass;
8658 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8659 break;
8661 Opc = AArch64::MLAv4i32_indexed;
8662 RC = &AArch64::FPR128RegClass;
8663 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8664 break;
8665
8667 Opc = AArch64::MLAv4i16_indexed;
8668 RC = &AArch64::FPR64RegClass;
8669 MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs,
8670 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i16,
8671 RC);
8672 break;
8674 Opc = AArch64::MLSv4i16_indexed;
8675 RC = &AArch64::FPR64RegClass;
8676 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8677 break;
8679 Opc = AArch64::MLAv8i16_indexed;
8680 RC = &AArch64::FPR128RegClass;
8681 MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs,
8682 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv8i16,
8683 RC);
8684 break;
8686 Opc = AArch64::MLSv8i16_indexed;
8687 RC = &AArch64::FPR128RegClass;
8688 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8689 break;
8691 Opc = AArch64::MLAv2i32_indexed;
8692 RC = &AArch64::FPR64RegClass;
8693 MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs,
8694 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv2i32,
8695 RC);
8696 break;
8698 Opc = AArch64::MLSv2i32_indexed;
8699 RC = &AArch64::FPR64RegClass;
8700 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8701 break;
8703 Opc = AArch64::MLAv4i32_indexed;
8704 RC = &AArch64::FPR128RegClass;
8705 MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs,
8706 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i32,
8707 RC);
8708 break;
8710 Opc = AArch64::MLSv4i32_indexed;
8711 RC = &AArch64::FPR128RegClass;
8712 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8713 break;
8714
8715 // Floating Point Support
8717 Opc = AArch64::FMADDHrrr;
8718 RC = &AArch64::FPR16RegClass;
8719 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8720 break;
8722 Opc = AArch64::FMADDSrrr;
8723 RC = &AArch64::FPR32RegClass;
8724 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8725 break;
8727 Opc = AArch64::FMADDDrrr;
8728 RC = &AArch64::FPR64RegClass;
8729 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8730 break;
8731
8733 Opc = AArch64::FMADDHrrr;
8734 RC = &AArch64::FPR16RegClass;
8735 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8736 break;
8738 Opc = AArch64::FMADDSrrr;
8739 RC = &AArch64::FPR32RegClass;
8740 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8741 break;
8743 Opc = AArch64::FMADDDrrr;
8744 RC = &AArch64::FPR64RegClass;
8745 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8746 break;
8747
8749 Opc = AArch64::FMLAv1i32_indexed;
8750 RC = &AArch64::FPR32RegClass;
8751 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
8753 break;
8755 Opc = AArch64::FMLAv1i32_indexed;
8756 RC = &AArch64::FPR32RegClass;
8757 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8759 break;
8760
8762 Opc = AArch64::FMLAv1i64_indexed;
8763 RC = &AArch64::FPR64RegClass;
8764 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
8766 break;
8768 Opc = AArch64::FMLAv1i64_indexed;
8769 RC = &AArch64::FPR64RegClass;
8770 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8772 break;
8773
8775 RC = &AArch64::FPR64RegClass;
8776 Opc = AArch64::FMLAv4i16_indexed;
8777 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
8779 break;
8781 RC = &AArch64::FPR64RegClass;
8782 Opc = AArch64::FMLAv4f16;
8783 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
8785 break;
8787 RC = &AArch64::FPR64RegClass;
8788 Opc = AArch64::FMLAv4i16_indexed;
8789 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8791 break;
8793 RC = &AArch64::FPR64RegClass;
8794 Opc = AArch64::FMLAv4f16;
8795 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8797 break;
8798
8801 RC = &AArch64::FPR64RegClass;
8803 Opc = AArch64::FMLAv2i32_indexed;
8804 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
8806 } else {
8807 Opc = AArch64::FMLAv2f32;
8808 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
8810 }
8811 break;
8814 RC = &AArch64::FPR64RegClass;
8816 Opc = AArch64::FMLAv2i32_indexed;
8817 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8819 } else {
8820 Opc = AArch64::FMLAv2f32;
8821 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8823 }
8824 break;
8825
8827 RC = &AArch64::FPR128RegClass;
8828 Opc = AArch64::FMLAv8i16_indexed;
8829 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
8831 break;
8833 RC = &AArch64::FPR128RegClass;
8834 Opc = AArch64::FMLAv8f16;
8835 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
8837 break;
8839 RC = &AArch64::FPR128RegClass;
8840 Opc = AArch64::FMLAv8i16_indexed;
8841 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8843 break;
8845 RC = &AArch64::FPR128RegClass;
8846 Opc = AArch64::FMLAv8f16;
8847 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8849 break;
8850
8853 RC = &AArch64::FPR128RegClass;
8855 Opc = AArch64::FMLAv2i64_indexed;
8856 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
8858 } else {
8859 Opc = AArch64::FMLAv2f64;
8860 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
8862 }
8863 break;
8866 RC = &AArch64::FPR128RegClass;
8868 Opc = AArch64::FMLAv2i64_indexed;
8869 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8871 } else {
8872 Opc = AArch64::FMLAv2f64;
8873 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8875 }
8876 break;
8877
8880 RC = &AArch64::FPR128RegClass;
8882 Opc = AArch64::FMLAv4i32_indexed;
8883 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
8885 } else {
8886 Opc = AArch64::FMLAv4f32;
8887 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
8889 }
8890 break;
8891
8894 RC = &AArch64::FPR128RegClass;
8896 Opc = AArch64::FMLAv4i32_indexed;
8897 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8899 } else {
8900 Opc = AArch64::FMLAv4f32;
8901 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8903 }
8904 break;
8905
8907 Opc = AArch64::FNMSUBHrrr;
8908 RC = &AArch64::FPR16RegClass;
8909 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8910 break;
8912 Opc = AArch64::FNMSUBSrrr;
8913 RC = &AArch64::FPR32RegClass;
8914 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8915 break;
8917 Opc = AArch64::FNMSUBDrrr;
8918 RC = &AArch64::FPR64RegClass;
8919 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8920 break;
8921
8923 Opc = AArch64::FNMADDHrrr;
8924 RC = &AArch64::FPR16RegClass;
8925 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8926 break;
8928 Opc = AArch64::FNMADDSrrr;
8929 RC = &AArch64::FPR32RegClass;
8930 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8931 break;
8933 Opc = AArch64::FNMADDDrrr;
8934 RC = &AArch64::FPR64RegClass;
8935 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8936 break;
8937
8939 Opc = AArch64::FMSUBHrrr;
8940 RC = &AArch64::FPR16RegClass;
8941 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8942 break;
8944 Opc = AArch64::FMSUBSrrr;
8945 RC = &AArch64::FPR32RegClass;
8946 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8947 break;
8949 Opc = AArch64::FMSUBDrrr;
8950 RC = &AArch64::FPR64RegClass;
8951 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8952 break;
8953
8955 Opc = AArch64::FMLSv1i32_indexed;
8956 RC = &AArch64::FPR32RegClass;
8957 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8959 break;
8960
8962 Opc = AArch64::FMLSv1i64_indexed;
8963 RC = &AArch64::FPR64RegClass;
8964 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8966 break;
8967
8970 RC = &AArch64::FPR64RegClass;
8971 Register NewVR = MRI.createVirtualRegister(RC);
8972 MachineInstrBuilder MIB1 =
8973 BuildMI(MF, MIMetadata(Root), TII->get(AArch64::FNEGv4f16), NewVR)
8974 .add(Root.getOperand(2));
8975 InsInstrs.push_back(MIB1);
8976 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
8978 Opc = AArch64::FMLAv4f16;
8979 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
8980 FMAInstKind::Accumulator, &NewVR);
8981 } else {
8982 Opc = AArch64::FMLAv4i16_indexed;
8983 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
8984 FMAInstKind::Indexed, &NewVR);
8985 }
8986 break;
8987 }
8989 RC = &AArch64::FPR64RegClass;
8990 Opc = AArch64::FMLSv4f16;
8991 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8993 break;
8995 RC = &AArch64::FPR64RegClass;
8996 Opc = AArch64::FMLSv4i16_indexed;
8997 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8999 break;
9000
9003 RC = &AArch64::FPR64RegClass;
9005 Opc = AArch64::FMLSv2i32_indexed;
9006 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9008 } else {
9009 Opc = AArch64::FMLSv2f32;
9010 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9012 }
9013 break;
9014
9017 RC = &AArch64::FPR128RegClass;
9018 Register NewVR = MRI.createVirtualRegister(RC);
9019 MachineInstrBuilder MIB1 =
9020 BuildMI(MF, MIMetadata(Root), TII->get(AArch64::FNEGv8f16), NewVR)
9021 .add(Root.getOperand(2));
9022 InsInstrs.push_back(MIB1);
9023 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
9025 Opc = AArch64::FMLAv8f16;
9026 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9027 FMAInstKind::Accumulator, &NewVR);
9028 } else {
9029 Opc = AArch64::FMLAv8i16_indexed;
9030 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9031 FMAInstKind::Indexed, &NewVR);
9032 }
9033 break;
9034 }
9036 RC = &AArch64::FPR128RegClass;
9037 Opc = AArch64::FMLSv8f16;
9038 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9040 break;
9042 RC = &AArch64::FPR128RegClass;
9043 Opc = AArch64::FMLSv8i16_indexed;
9044 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9046 break;
9047
9050 RC = &AArch64::FPR128RegClass;
9052 Opc = AArch64::FMLSv2i64_indexed;
9053 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9055 } else {
9056 Opc = AArch64::FMLSv2f64;
9057 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9059 }
9060 break;
9061
9064 RC = &AArch64::FPR128RegClass;
9066 Opc = AArch64::FMLSv4i32_indexed;
9067 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9069 } else {
9070 Opc = AArch64::FMLSv4f32;
9071 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9073 }
9074 break;
9077 RC = &AArch64::FPR64RegClass;
9078 Register NewVR = MRI.createVirtualRegister(RC);
9079 MachineInstrBuilder MIB1 =
9080 BuildMI(MF, MIMetadata(Root), TII->get(AArch64::FNEGv2f32), NewVR)
9081 .add(Root.getOperand(2));
9082 InsInstrs.push_back(MIB1);
9083 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
9085 Opc = AArch64::FMLAv2i32_indexed;
9086 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9087 FMAInstKind::Indexed, &NewVR);
9088 } else {
9089 Opc = AArch64::FMLAv2f32;
9090 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9091 FMAInstKind::Accumulator, &NewVR);
9092 }
9093 break;
9094 }
9097 RC = &AArch64::FPR128RegClass;
9098 Register NewVR = MRI.createVirtualRegister(RC);
9099 MachineInstrBuilder MIB1 =
9100 BuildMI(MF, MIMetadata(Root), TII->get(AArch64::FNEGv4f32), NewVR)
9101 .add(Root.getOperand(2));
9102 InsInstrs.push_back(MIB1);
9103 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
9105 Opc = AArch64::FMLAv4i32_indexed;
9106 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9107 FMAInstKind::Indexed, &NewVR);
9108 } else {
9109 Opc = AArch64::FMLAv4f32;
9110 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9111 FMAInstKind::Accumulator, &NewVR);
9112 }
9113 break;
9114 }
9117 RC = &AArch64::FPR128RegClass;
9118 Register NewVR = MRI.createVirtualRegister(RC);
9119 MachineInstrBuilder MIB1 =
9120 BuildMI(MF, MIMetadata(Root), TII->get(AArch64::FNEGv2f64), NewVR)
9121 .add(Root.getOperand(2));
9122 InsInstrs.push_back(MIB1);
9123 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
9125 Opc = AArch64::FMLAv2i64_indexed;
9126 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9127 FMAInstKind::Indexed, &NewVR);
9128 } else {
9129 Opc = AArch64::FMLAv2f64;
9130 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9131 FMAInstKind::Accumulator, &NewVR);
9132 }
9133 break;
9134 }
9137 unsigned IdxDupOp =
9139 : 2;
9140 genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv2i32_indexed,
9141 &AArch64::FPR128RegClass, MRI);
9142 break;
9143 }
9146 unsigned IdxDupOp =
9148 : 2;
9149 genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv2i64_indexed,
9150 &AArch64::FPR128RegClass, MRI);
9151 break;
9152 }
9155 unsigned IdxDupOp =
9157 : 2;
9158 genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv4i16_indexed,
9159 &AArch64::FPR128_loRegClass, MRI);
9160 break;
9161 }
9164 unsigned IdxDupOp =
9166 : 2;
9167 genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv4i32_indexed,
9168 &AArch64::FPR128RegClass, MRI);
9169 break;
9170 }
9173 unsigned IdxDupOp =
9175 : 2;
9176 genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv8i16_indexed,
9177 &AArch64::FPR128_loRegClass, MRI);
9178 break;
9179 }
9181 MUL = genFNegatedMAD(MF, MRI, TII, Root, InsInstrs);
9182 break;
9183 }
9185 generateGatherLanePattern(Root, InsInstrs, DelInstrs, InstrIdxForVirtReg,
9186 Pattern, 4);
9187 break;
9188 }
9190 generateGatherLanePattern(Root, InsInstrs, DelInstrs, InstrIdxForVirtReg,
9191 Pattern, 8);
9192 break;
9193 }
9195 generateGatherLanePattern(Root, InsInstrs, DelInstrs, InstrIdxForVirtReg,
9196 Pattern, 16);
9197 break;
9198 }
9199
9200 } // end switch (Pattern)
9201 // Record MUL and ADD/SUB for deletion
9202 if (MUL)
9203 DelInstrs.push_back(MUL);
9204 DelInstrs.push_back(&Root);
9205
9206 // Set the flags on the inserted instructions to be the merged flags of the
9207 // instructions that we have combined.
9208 uint32_t Flags = Root.getFlags();
9209 if (MUL)
9210 Flags = Root.mergeFlagsWith(*MUL);
9211 for (auto *MI : InsInstrs)
9212 MI->setFlags(Flags);
9213}
9214
9215/// Replace csincr-branch sequence by simple conditional branch
9216///
9217/// Examples:
9218/// 1. \code
9219/// csinc w9, wzr, wzr, <condition code>
9220/// tbnz w9, #0, 0x44
9221/// \endcode
9222/// to
9223/// \code
9224/// b.<inverted condition code>
9225/// \endcode
9226///
9227/// 2. \code
9228/// csinc w9, wzr, wzr, <condition code>
9229/// tbz w9, #0, 0x44
9230/// \endcode
9231/// to
9232/// \code
9233/// b.<condition code>
9234/// \endcode
9235///
9236/// Replace compare and branch sequence by TBZ/TBNZ instruction when the
9237/// compare's constant operand is power of 2.
9238///
9239/// Examples:
9240/// \code
9241/// and w8, w8, #0x400
9242/// cbnz w8, L1
9243/// \endcode
9244/// to
9245/// \code
9246/// tbnz w8, #10, L1
9247/// \endcode
9248///
9249/// \param MI Conditional Branch
9250/// \return True when the simple conditional branch is generated
9251///
9253 bool IsNegativeBranch = false;
9254 bool IsTestAndBranch = false;
9255 unsigned TargetBBInMI = 0;
9256 switch (MI.getOpcode()) {
9257 default:
9258 llvm_unreachable("Unknown branch instruction?");
9259 case AArch64::Bcc:
9260 case AArch64::CBWPri:
9261 case AArch64::CBXPri:
9262 case AArch64::CBWPrr:
9263 case AArch64::CBXPrr:
9264 return false;
9265 case AArch64::CBZW:
9266 case AArch64::CBZX:
9267 TargetBBInMI = 1;
9268 break;
9269 case AArch64::CBNZW:
9270 case AArch64::CBNZX:
9271 TargetBBInMI = 1;
9272 IsNegativeBranch = true;
9273 break;
9274 case AArch64::TBZW:
9275 case AArch64::TBZX:
9276 TargetBBInMI = 2;
9277 IsTestAndBranch = true;
9278 break;
9279 case AArch64::TBNZW:
9280 case AArch64::TBNZX:
9281 TargetBBInMI = 2;
9282 IsNegativeBranch = true;
9283 IsTestAndBranch = true;
9284 break;
9285 }
9286 // So we increment a zero register and test for bits other
9287 // than bit 0? Conservatively bail out in case the verifier
9288 // missed this case.
9289 if (IsTestAndBranch && MI.getOperand(1).getImm())
9290 return false;
9291
9292 // Find Definition.
9293 assert(MI.getParent() && "Incomplete machine instruction\n");
9294 MachineBasicBlock *MBB = MI.getParent();
9295 MachineFunction *MF = MBB->getParent();
9297 Register VReg = MI.getOperand(0).getReg();
9298 if (!VReg.isVirtual())
9299 return false;
9300
9301 MachineInstr *DefMI = MRI->getVRegDef(VReg);
9302
9303 // Look through COPY instructions to find definition.
9304 while (DefMI->isCopy()) {
9305 Register CopyVReg = DefMI->getOperand(1).getReg();
9306 if (!MRI->hasOneNonDBGUse(CopyVReg))
9307 return false;
9308 if (!MRI->hasOneDef(CopyVReg))
9309 return false;
9310 DefMI = MRI->getVRegDef(CopyVReg);
9311 }
9312
9313 switch (DefMI->getOpcode()) {
9314 default:
9315 return false;
9316 // Fold AND into a TBZ/TBNZ if constant operand is power of 2.
9317 case AArch64::ANDWri:
9318 case AArch64::ANDXri: {
9319 if (IsTestAndBranch)
9320 return false;
9321 if (DefMI->getParent() != MBB)
9322 return false;
9323 if (!MRI->hasOneNonDBGUse(VReg))
9324 return false;
9325
9326 bool Is32Bit = (DefMI->getOpcode() == AArch64::ANDWri);
9328 DefMI->getOperand(2).getImm(), Is32Bit ? 32 : 64);
9329 if (!isPowerOf2_64(Mask))
9330 return false;
9331
9332 MachineOperand &MO = DefMI->getOperand(1);
9333 Register NewReg = MO.getReg();
9334 if (!NewReg.isVirtual())
9335 return false;
9336
9337 assert(!MRI->def_empty(NewReg) && "Register must be defined.");
9338
9339 MachineBasicBlock &RefToMBB = *MBB;
9340 MachineBasicBlock *TBB = MI.getOperand(1).getMBB();
9341 DebugLoc DL = MI.getDebugLoc();
9342 unsigned Imm = Log2_64(Mask);
9343 unsigned Opc = (Imm < 32)
9344 ? (IsNegativeBranch ? AArch64::TBNZW : AArch64::TBZW)
9345 : (IsNegativeBranch ? AArch64::TBNZX : AArch64::TBZX);
9346 MachineInstr *NewMI = BuildMI(RefToMBB, MI, DL, get(Opc))
9347 .addReg(NewReg)
9348 .addImm(Imm)
9349 .addMBB(TBB);
9350 // Register lives on to the CBZ now.
9351 MO.setIsKill(false);
9352
9353 // For immediate smaller than 32, we need to use the 32-bit
9354 // variant (W) in all cases. Indeed the 64-bit variant does not
9355 // allow to encode them.
9356 // Therefore, if the input register is 64-bit, we need to take the
9357 // 32-bit sub-part.
9358 if (!Is32Bit && Imm < 32)
9359 NewMI->getOperand(0).setSubReg(AArch64::sub_32);
9360 MI.eraseFromParent();
9361 return true;
9362 }
9363 // Look for CSINC
9364 case AArch64::CSINCWr:
9365 case AArch64::CSINCXr: {
9366 if (!(DefMI->getOperand(1).getReg() == AArch64::WZR &&
9367 DefMI->getOperand(2).getReg() == AArch64::WZR) &&
9368 !(DefMI->getOperand(1).getReg() == AArch64::XZR &&
9369 DefMI->getOperand(2).getReg() == AArch64::XZR))
9370 return false;
9371
9372 if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr,
9373 true) != -1)
9374 return false;
9375
9376 AArch64CC::CondCode CC = (AArch64CC::CondCode)DefMI->getOperand(3).getImm();
9377 // Convert only when the condition code is not modified between
9378 // the CSINC and the branch. The CC may be used by other
9379 // instructions in between.
9381 return false;
9382 MachineBasicBlock &RefToMBB = *MBB;
9383 MachineBasicBlock *TBB = MI.getOperand(TargetBBInMI).getMBB();
9384 DebugLoc DL = MI.getDebugLoc();
9385 if (IsNegativeBranch)
9387 BuildMI(RefToMBB, MI, DL, get(AArch64::Bcc)).addImm(CC).addMBB(TBB);
9388 MI.eraseFromParent();
9389 return true;
9390 }
9391 }
9392}
9393
9394std::pair<unsigned, unsigned>
9395AArch64InstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF) const {
9396 const unsigned Mask = AArch64II::MO_FRAGMENT;
9397 return std::make_pair(TF & Mask, TF & ~Mask);
9398}
9399
9401AArch64InstrInfo::getSerializableDirectMachineOperandTargetFlags() const {
9402 using namespace AArch64II;
9403
9404 static const std::pair<unsigned, const char *> TargetFlags[] = {
9405 {MO_PAGE, "aarch64-page"}, {MO_PAGEOFF, "aarch64-pageoff"},
9406 {MO_G3, "aarch64-g3"}, {MO_G2, "aarch64-g2"},
9407 {MO_G1, "aarch64-g1"}, {MO_G0, "aarch64-g0"},
9408 {MO_HI12, "aarch64-hi12"}};
9409 return ArrayRef(TargetFlags);
9410}
9411
9413AArch64InstrInfo::getSerializableBitmaskMachineOperandTargetFlags() const {
9414 using namespace AArch64II;
9415
9416 static const std::pair<unsigned, const char *> TargetFlags[] = {
9417 {MO_COFFSTUB, "aarch64-coffstub"},
9418 {MO_GOT, "aarch64-got"},
9419 {MO_NC, "aarch64-nc"},
9420 {MO_S, "aarch64-s"},
9421 {MO_TLS, "aarch64-tls"},
9422 {MO_DLLIMPORT, "aarch64-dllimport"},
9423 {MO_PREL, "aarch64-prel"},
9424 {MO_TAGGED, "aarch64-tagged"},
9425 {MO_ARM64EC_CALLMANGLE, "aarch64-arm64ec-callmangle"},
9426 };
9427 return ArrayRef(TargetFlags);
9428}
9429
9431AArch64InstrInfo::getSerializableMachineMemOperandTargetFlags() const {
9432 static const std::pair<MachineMemOperand::Flags, const char *> TargetFlags[] =
9433 {{MOSuppressPair, "aarch64-suppress-pair"},
9434 {MOStridedAccess, "aarch64-strided-access"}};
9435 return ArrayRef(TargetFlags);
9436}
9437
9438/// Constants defining how certain sequences should be outlined.
9439/// This encompasses how an outlined function should be called, and what kind of
9440/// frame should be emitted for that outlined function.
9441///
9442/// \p MachineOutlinerDefault implies that the function should be called with
9443/// a save and restore of LR to the stack.
9444///
9445/// That is,
9446///
9447/// I1 Save LR OUTLINED_FUNCTION:
9448/// I2 --> BL OUTLINED_FUNCTION I1
9449/// I3 Restore LR I2
9450/// I3
9451/// RET
9452///
9453/// * Call construction overhead: 3 (save + BL + restore)
9454/// * Frame construction overhead: 1 (ret)
9455/// * Requires stack fixups? Yes
9456///
9457/// \p MachineOutlinerTailCall implies that the function is being created from
9458/// a sequence of instructions ending in a return.
9459///
9460/// That is,
9461///
9462/// I1 OUTLINED_FUNCTION:
9463/// I2 --> B OUTLINED_FUNCTION I1
9464/// RET I2
9465/// RET
9466///
9467/// * Call construction overhead: 1 (B)
9468/// * Frame construction overhead: 0 (Return included in sequence)
9469/// * Requires stack fixups? No
9470///
9471/// \p MachineOutlinerNoLRSave implies that the function should be called using
9472/// a BL instruction, but doesn't require LR to be saved and restored. This
9473/// happens when LR is known to be dead.
9474///
9475/// That is,
9476///
9477/// I1 OUTLINED_FUNCTION:
9478/// I2 --> BL OUTLINED_FUNCTION I1
9479/// I3 I2
9480/// I3
9481/// RET
9482///
9483/// * Call construction overhead: 1 (BL)
9484/// * Frame construction overhead: 1 (RET)
9485/// * Requires stack fixups? No
9486///
9487/// \p MachineOutlinerThunk implies that the function is being created from
9488/// a sequence of instructions ending in a call. The outlined function is
9489/// called with a BL instruction, and the outlined function tail-calls the
9490/// original call destination.
9491///
9492/// That is,
9493///
9494/// I1 OUTLINED_FUNCTION:
9495/// I2 --> BL OUTLINED_FUNCTION I1
9496/// BL f I2
9497/// B f
9498/// * Call construction overhead: 1 (BL)
9499/// * Frame construction overhead: 0
9500/// * Requires stack fixups? No
9501///
9502/// \p MachineOutlinerRegSave implies that the function should be called with a
9503/// save and restore of LR to an available register. This allows us to avoid
9504/// stack fixups. Note that this outlining variant is compatible with the
9505/// NoLRSave case.
9506///
9507/// That is,
9508///
9509/// I1 Save LR OUTLINED_FUNCTION:
9510/// I2 --> BL OUTLINED_FUNCTION I1
9511/// I3 Restore LR I2
9512/// I3
9513/// RET
9514///
9515/// * Call construction overhead: 3 (save + BL + restore)
9516/// * Frame construction overhead: 1 (ret)
9517/// * Requires stack fixups? No
9519 MachineOutlinerDefault, /// Emit a save, restore, call, and return.
9520 MachineOutlinerTailCall, /// Only emit a branch.
9521 MachineOutlinerNoLRSave, /// Emit a call and return.
9522 MachineOutlinerThunk, /// Emit a call and tail-call.
9523 MachineOutlinerRegSave /// Same as default, but save to a register.
9524};
9525
9531
9533AArch64InstrInfo::findRegisterToSaveLRTo(outliner::Candidate &C) const {
9534 MachineFunction *MF = C.getMF();
9535 const TargetRegisterInfo &TRI = *MF->getSubtarget().getRegisterInfo();
9536 const AArch64RegisterInfo *ARI =
9537 static_cast<const AArch64RegisterInfo *>(&TRI);
9538 // Check if there is an available register across the sequence that we can
9539 // use.
9540 for (unsigned Reg : AArch64::GPR64RegClass) {
9541 if (!ARI->isReservedReg(*MF, Reg) &&
9542 Reg != AArch64::LR && // LR is not reserved, but don't use it.
9543 Reg != AArch64::X16 && // X16 is not guaranteed to be preserved.
9544 Reg != AArch64::X17 && // Ditto for X17.
9545 C.isAvailableAcrossAndOutOfSeq(Reg, TRI) &&
9546 C.isAvailableInsideSeq(Reg, TRI))
9547 return Reg;
9548 }
9549 return Register();
9550}
9551
9552static bool
9554 const outliner::Candidate &b) {
9555 const auto &MFIa = a.getMF()->getInfo<AArch64FunctionInfo>();
9556 const auto &MFIb = b.getMF()->getInfo<AArch64FunctionInfo>();
9557
9558 return MFIa->shouldSignReturnAddress(false) == MFIb->shouldSignReturnAddress(false) &&
9559 MFIa->shouldSignReturnAddress(true) == MFIb->shouldSignReturnAddress(true);
9560}
9561
9562static bool
9564 const outliner::Candidate &b) {
9565 const auto &MFIa = a.getMF()->getInfo<AArch64FunctionInfo>();
9566 const auto &MFIb = b.getMF()->getInfo<AArch64FunctionInfo>();
9567
9568 return MFIa->shouldSignWithBKey() == MFIb->shouldSignWithBKey();
9569}
9570
9572 const outliner::Candidate &b) {
9573 const AArch64Subtarget &SubtargetA =
9575 const AArch64Subtarget &SubtargetB =
9576 b.getMF()->getSubtarget<AArch64Subtarget>();
9577 return SubtargetA.hasV8_3aOps() == SubtargetB.hasV8_3aOps();
9578}
9579
9580std::optional<std::unique_ptr<outliner::OutlinedFunction>>
9581AArch64InstrInfo::getOutliningCandidateInfo(
9582 const MachineModuleInfo &MMI,
9583 std::vector<outliner::Candidate> &RepeatedSequenceLocs,
9584 unsigned MinRepeats) const {
9585 unsigned SequenceSize = 0;
9586 for (auto &MI : RepeatedSequenceLocs[0])
9587 SequenceSize += getInstSizeInBytes(MI);
9588
9589 unsigned NumBytesToCreateFrame = 0;
9590
9591 // We only allow outlining for functions having exactly matching return
9592 // address signing attributes, i.e., all share the same value for the
9593 // attribute "sign-return-address" and all share the same type of key they
9594 // are signed with.
9595 // Additionally we require all functions to simultaneously either support
9596 // v8.3a features or not. Otherwise an outlined function could get signed
9597 // using dedicated v8.3 instructions and a call from a function that doesn't
9598 // support v8.3 instructions would therefore be invalid.
9599 if (std::adjacent_find(
9600 RepeatedSequenceLocs.begin(), RepeatedSequenceLocs.end(),
9601 [](const outliner::Candidate &a, const outliner::Candidate &b) {
9602 // Return true if a and b are non-equal w.r.t. return address
9603 // signing or support of v8.3a features
9604 if (outliningCandidatesSigningScopeConsensus(a, b) &&
9605 outliningCandidatesSigningKeyConsensus(a, b) &&
9606 outliningCandidatesV8_3OpsConsensus(a, b)) {
9607 return false;
9608 }
9609 return true;
9610 }) != RepeatedSequenceLocs.end()) {
9611 return std::nullopt;
9612 }
9613
9614 // Since at this point all candidates agree on their return address signing
9615 // picking just one is fine. If the candidate functions potentially sign their
9616 // return addresses, the outlined function should do the same. Note that in
9617 // the case of "sign-return-address"="non-leaf" this is an assumption: It is
9618 // not certainly true that the outlined function will have to sign its return
9619 // address but this decision is made later, when the decision to outline
9620 // has already been made.
9621 // The same holds for the number of additional instructions we need: On
9622 // v8.3a RET can be replaced by RETAA/RETAB and no AUT instruction is
9623 // necessary. However, at this point we don't know if the outlined function
9624 // will have a RET instruction so we assume the worst.
9625 const TargetRegisterInfo &TRI = getRegisterInfo();
9626 // Performing a tail call may require extra checks when PAuth is enabled.
9627 // If PAuth is disabled, set it to zero for uniformity.
9628 unsigned NumBytesToCheckLRInTCEpilogue = 0;
9629 if (RepeatedSequenceLocs[0]
9630 .getMF()
9631 ->getInfo<AArch64FunctionInfo>()
9632 ->shouldSignReturnAddress(true)) {
9633 // One PAC and one AUT instructions
9634 NumBytesToCreateFrame += 8;
9635
9636 // PAuth is enabled - set extra tail call cost, if any.
9637 auto LRCheckMethod = Subtarget.getAuthenticatedLRCheckMethod(
9638 *RepeatedSequenceLocs[0].getMF());
9639 NumBytesToCheckLRInTCEpilogue =
9641 // Checking the authenticated LR value may significantly impact
9642 // SequenceSize, so account for it for more precise results.
9643 if (isTailCallReturnInst(RepeatedSequenceLocs[0].back()))
9644 SequenceSize += NumBytesToCheckLRInTCEpilogue;
9645
9646 // We have to check if sp modifying instructions would get outlined.
9647 // If so we only allow outlining if sp is unchanged overall, so matching
9648 // sub and add instructions are okay to outline, all other sp modifications
9649 // are not
9650 auto hasIllegalSPModification = [&TRI](outliner::Candidate &C) {
9651 int SPValue = 0;
9652 for (auto &MI : C) {
9653 if (MI.modifiesRegister(AArch64::SP, &TRI)) {
9654 switch (MI.getOpcode()) {
9655 case AArch64::ADDXri:
9656 case AArch64::ADDWri:
9657 assert(MI.getNumOperands() == 4 && "Wrong number of operands");
9658 assert(MI.getOperand(2).isImm() &&
9659 "Expected operand to be immediate");
9660 assert(MI.getOperand(1).isReg() &&
9661 "Expected operand to be a register");
9662 // Check if the add just increments sp. If so, we search for
9663 // matching sub instructions that decrement sp. If not, the
9664 // modification is illegal
9665 if (MI.getOperand(1).getReg() == AArch64::SP)
9666 SPValue += MI.getOperand(2).getImm();
9667 else
9668 return true;
9669 break;
9670 case AArch64::SUBXri:
9671 case AArch64::SUBWri:
9672 assert(MI.getNumOperands() == 4 && "Wrong number of operands");
9673 assert(MI.getOperand(2).isImm() &&
9674 "Expected operand to be immediate");
9675 assert(MI.getOperand(1).isReg() &&
9676 "Expected operand to be a register");
9677 // Check if the sub just decrements sp. If so, we search for
9678 // matching add instructions that increment sp. If not, the
9679 // modification is illegal
9680 if (MI.getOperand(1).getReg() == AArch64::SP)
9681 SPValue -= MI.getOperand(2).getImm();
9682 else
9683 return true;
9684 break;
9685 default:
9686 return true;
9687 }
9688 }
9689 }
9690 if (SPValue)
9691 return true;
9692 return false;
9693 };
9694 // Remove candidates with illegal stack modifying instructions
9695 llvm::erase_if(RepeatedSequenceLocs, hasIllegalSPModification);
9696
9697 // If the sequence doesn't have enough candidates left, then we're done.
9698 if (RepeatedSequenceLocs.size() < MinRepeats)
9699 return std::nullopt;
9700 }
9701
9702 // Properties about candidate MBBs that hold for all of them.
9703 unsigned FlagsSetInAll = 0xF;
9704
9705 // Compute liveness information for each candidate, and set FlagsSetInAll.
9706 for (outliner::Candidate &C : RepeatedSequenceLocs)
9707 FlagsSetInAll &= C.Flags;
9708
9709 unsigned LastInstrOpcode = RepeatedSequenceLocs[0].back().getOpcode();
9710
9711 // Helper lambda which sets call information for every candidate.
9712 auto SetCandidateCallInfo =
9713 [&RepeatedSequenceLocs](unsigned CallID, unsigned NumBytesForCall) {
9714 for (outliner::Candidate &C : RepeatedSequenceLocs)
9715 C.setCallInfo(CallID, NumBytesForCall);
9716 };
9717
9718 unsigned FrameID = MachineOutlinerDefault;
9719 NumBytesToCreateFrame += 4;
9720
9721 bool HasBTI = any_of(RepeatedSequenceLocs, [](outliner::Candidate &C) {
9722 return C.getMF()->getInfo<AArch64FunctionInfo>()->branchTargetEnforcement();
9723 });
9724
9725 // We check to see if CFI Instructions are present, and if they are
9726 // we find the number of CFI Instructions in the candidates.
9727 unsigned CFICount = 0;
9728 for (auto &I : RepeatedSequenceLocs[0]) {
9729 if (I.isCFIInstruction())
9730 CFICount++;
9731 }
9732
9733 // We compare the number of found CFI Instructions to the number of CFI
9734 // instructions in the parent function for each candidate. We must check this
9735 // since if we outline one of the CFI instructions in a function, we have to
9736 // outline them all for correctness. If we do not, the address offsets will be
9737 // incorrect between the two sections of the program.
9738 for (outliner::Candidate &C : RepeatedSequenceLocs) {
9739 std::vector<MCCFIInstruction> CFIInstructions =
9740 C.getMF()->getFrameInstructions();
9741
9742 if (CFICount > 0 && CFICount != CFIInstructions.size())
9743 return std::nullopt;
9744 }
9745
9746 // Returns true if an instructions is safe to fix up, false otherwise.
9747 auto IsSafeToFixup = [this, &TRI](MachineInstr &MI) {
9748 if (MI.isCall())
9749 return true;
9750
9751 if (!MI.modifiesRegister(AArch64::SP, &TRI) &&
9752 !MI.readsRegister(AArch64::SP, &TRI))
9753 return true;
9754
9755 // Any modification of SP will break our code to save/restore LR.
9756 // FIXME: We could handle some instructions which add a constant
9757 // offset to SP, with a bit more work.
9758 if (MI.modifiesRegister(AArch64::SP, &TRI))
9759 return false;
9760
9761 // At this point, we have a stack instruction that we might need to
9762 // fix up. We'll handle it if it's a load or store.
9763 if (MI.mayLoadOrStore()) {
9764 const MachineOperand *Base; // Filled with the base operand of MI.
9765 int64_t Offset; // Filled with the offset of MI.
9766 bool OffsetIsScalable;
9767
9768 // Does it allow us to offset the base operand and is the base the
9769 // register SP?
9770 if (!getMemOperandWithOffset(MI, Base, Offset, OffsetIsScalable, &TRI) ||
9771 !Base->isReg() || Base->getReg() != AArch64::SP)
9772 return false;
9773
9774 // Fixe-up code below assumes bytes.
9775 if (OffsetIsScalable)
9776 return false;
9777
9778 // Find the minimum/maximum offset for this instruction and check
9779 // if fixing it up would be in range.
9780 int64_t MinOffset,
9781 MaxOffset; // Unscaled offsets for the instruction.
9782 // The scale to multiply the offsets by.
9783 TypeSize Scale(0U, false), DummyWidth(0U, false);
9784 getMemOpInfo(MI.getOpcode(), Scale, DummyWidth, MinOffset, MaxOffset);
9785
9786 Offset += 16; // Update the offset to what it would be if we outlined.
9787 if (Offset < MinOffset * (int64_t)Scale.getFixedValue() ||
9788 Offset > MaxOffset * (int64_t)Scale.getFixedValue())
9789 return false;
9790
9791 // It's in range, so we can outline it.
9792 return true;
9793 }
9794
9795 // FIXME: Add handling for instructions like "add x0, sp, #8".
9796
9797 // We can't fix it up, so don't outline it.
9798 return false;
9799 };
9800
9801 // True if it's possible to fix up each stack instruction in this sequence.
9802 // Important for frames/call variants that modify the stack.
9803 bool AllStackInstrsSafe =
9804 llvm::all_of(RepeatedSequenceLocs[0], IsSafeToFixup);
9805
9806 // If the last instruction in any candidate is a terminator, then we should
9807 // tail call all of the candidates.
9808 if (RepeatedSequenceLocs[0].back().isTerminator()) {
9809 FrameID = MachineOutlinerTailCall;
9810 NumBytesToCreateFrame = 0;
9811 unsigned NumBytesForCall = 4 + NumBytesToCheckLRInTCEpilogue;
9812 SetCandidateCallInfo(MachineOutlinerTailCall, NumBytesForCall);
9813 }
9814
9815 else if (LastInstrOpcode == AArch64::BL ||
9816 ((LastInstrOpcode == AArch64::BLR ||
9817 LastInstrOpcode == AArch64::BLRNoIP) &&
9818 !HasBTI)) {
9819 // FIXME: Do we need to check if the code after this uses the value of LR?
9820 FrameID = MachineOutlinerThunk;
9821 NumBytesToCreateFrame = NumBytesToCheckLRInTCEpilogue;
9822 SetCandidateCallInfo(MachineOutlinerThunk, 4);
9823 }
9824
9825 else {
9826 // We need to decide how to emit calls + frames. We can always emit the same
9827 // frame if we don't need to save to the stack. If we have to save to the
9828 // stack, then we need a different frame.
9829 unsigned NumBytesNoStackCalls = 0;
9830 std::vector<outliner::Candidate> CandidatesWithoutStackFixups;
9831
9832 // Check if we have to save LR.
9833 for (outliner::Candidate &C : RepeatedSequenceLocs) {
9834 bool LRAvailable =
9836 ? C.isAvailableAcrossAndOutOfSeq(AArch64::LR, TRI)
9837 : true;
9838 // If we have a noreturn caller, then we're going to be conservative and
9839 // say that we have to save LR. If we don't have a ret at the end of the
9840 // block, then we can't reason about liveness accurately.
9841 //
9842 // FIXME: We can probably do better than always disabling this in
9843 // noreturn functions by fixing up the liveness info.
9844 bool IsNoReturn =
9845 C.getMF()->getFunction().hasFnAttribute(Attribute::NoReturn);
9846
9847 // Is LR available? If so, we don't need a save.
9848 if (LRAvailable && !IsNoReturn) {
9849 NumBytesNoStackCalls += 4;
9850 C.setCallInfo(MachineOutlinerNoLRSave, 4);
9851 CandidatesWithoutStackFixups.push_back(C);
9852 }
9853
9854 // Is an unused register available? If so, we won't modify the stack, so
9855 // we can outline with the same frame type as those that don't save LR.
9856 else if (findRegisterToSaveLRTo(C)) {
9857 NumBytesNoStackCalls += 12;
9858 C.setCallInfo(MachineOutlinerRegSave, 12);
9859 CandidatesWithoutStackFixups.push_back(C);
9860 }
9861
9862 // Is SP used in the sequence at all? If not, we don't have to modify
9863 // the stack, so we are guaranteed to get the same frame.
9864 else if (C.isAvailableInsideSeq(AArch64::SP, TRI)) {
9865 NumBytesNoStackCalls += 12;
9866 C.setCallInfo(MachineOutlinerDefault, 12);
9867 CandidatesWithoutStackFixups.push_back(C);
9868 }
9869
9870 // If we outline this, we need to modify the stack. Pretend we don't
9871 // outline this by saving all of its bytes.
9872 else {
9873 NumBytesNoStackCalls += SequenceSize;
9874 }
9875 }
9876
9877 // If there are no places where we have to save LR, then note that we
9878 // don't have to update the stack. Otherwise, give every candidate the
9879 // default call type, as long as it's safe to do so.
9880 if (!AllStackInstrsSafe ||
9881 NumBytesNoStackCalls <= RepeatedSequenceLocs.size() * 12) {
9882 RepeatedSequenceLocs = CandidatesWithoutStackFixups;
9883 FrameID = MachineOutlinerNoLRSave;
9884 if (RepeatedSequenceLocs.size() < MinRepeats)
9885 return std::nullopt;
9886 } else {
9887 SetCandidateCallInfo(MachineOutlinerDefault, 12);
9888
9889 // Bugzilla ID: 46767
9890 // TODO: Check if fixing up the stack more than once is safe so we can
9891 // outline these.
9892 //
9893 // An outline resulting in a caller that requires stack fixups at the
9894 // callsite to a callee that also requires stack fixups can happen when
9895 // there are no available registers at the candidate callsite for a
9896 // candidate that itself also has calls.
9897 //
9898 // In other words if function_containing_sequence in the following pseudo
9899 // assembly requires that we save LR at the point of the call, but there
9900 // are no available registers: in this case we save using SP and as a
9901 // result the SP offsets requires stack fixups by multiples of 16.
9902 //
9903 // function_containing_sequence:
9904 // ...
9905 // save LR to SP <- Requires stack instr fixups in OUTLINED_FUNCTION_N
9906 // call OUTLINED_FUNCTION_N
9907 // restore LR from SP
9908 // ...
9909 //
9910 // OUTLINED_FUNCTION_N:
9911 // save LR to SP <- Requires stack instr fixups in OUTLINED_FUNCTION_N
9912 // ...
9913 // bl foo
9914 // restore LR from SP
9915 // ret
9916 //
9917 // Because the code to handle more than one stack fixup does not
9918 // currently have the proper checks for legality, these cases will assert
9919 // in the AArch64 MachineOutliner. This is because the code to do this
9920 // needs more hardening, testing, better checks that generated code is
9921 // legal, etc and because it is only verified to handle a single pass of
9922 // stack fixup.
9923 //
9924 // The assert happens in AArch64InstrInfo::buildOutlinedFrame to catch
9925 // these cases until they are known to be handled. Bugzilla 46767 is
9926 // referenced in comments at the assert site.
9927 //
9928 // To avoid asserting (or generating non-legal code on noassert builds)
9929 // we remove all candidates which would need more than one stack fixup by
9930 // pruning the cases where the candidate has calls while also having no
9931 // available LR and having no available general purpose registers to copy
9932 // LR to (ie one extra stack save/restore).
9933 //
9934 if (FlagsSetInAll & MachineOutlinerMBBFlags::HasCalls) {
9935 erase_if(RepeatedSequenceLocs, [this, &TRI](outliner::Candidate &C) {
9936 auto IsCall = [](const MachineInstr &MI) { return MI.isCall(); };
9937 return (llvm::any_of(C, IsCall)) &&
9938 (!C.isAvailableAcrossAndOutOfSeq(AArch64::LR, TRI) ||
9939 !findRegisterToSaveLRTo(C));
9940 });
9941 }
9942 }
9943
9944 // If we dropped all of the candidates, bail out here.
9945 if (RepeatedSequenceLocs.size() < MinRepeats)
9946 return std::nullopt;
9947 }
9948
9949 // Does every candidate's MBB contain a call? If so, then we might have a call
9950 // in the range.
9951 if (FlagsSetInAll & MachineOutlinerMBBFlags::HasCalls) {
9952 // Check if the range contains a call. These require a save + restore of the
9953 // link register.
9954 outliner::Candidate &FirstCand = RepeatedSequenceLocs[0];
9955 bool ModStackToSaveLR = false;
9956 if (any_of(drop_end(FirstCand),
9957 [](const MachineInstr &MI) { return MI.isCall(); }))
9958 ModStackToSaveLR = true;
9959
9960 // Handle the last instruction separately. If this is a tail call, then the
9961 // last instruction is a call. We don't want to save + restore in this case.
9962 // However, it could be possible that the last instruction is a call without
9963 // it being valid to tail call this sequence. We should consider this as
9964 // well.
9965 else if (FrameID != MachineOutlinerThunk &&
9966 FrameID != MachineOutlinerTailCall && FirstCand.back().isCall())
9967 ModStackToSaveLR = true;
9968
9969 if (ModStackToSaveLR) {
9970 // We can't fix up the stack. Bail out.
9971 if (!AllStackInstrsSafe)
9972 return std::nullopt;
9973
9974 // Save + restore LR.
9975 NumBytesToCreateFrame += 8;
9976 }
9977 }
9978
9979 // If we have CFI instructions, we can only outline if the outlined section
9980 // can be a tail call
9981 if (FrameID != MachineOutlinerTailCall && CFICount > 0)
9982 return std::nullopt;
9983
9984 return std::make_unique<outliner::OutlinedFunction>(
9985 RepeatedSequenceLocs, SequenceSize, NumBytesToCreateFrame, FrameID);
9986}
9987
9988void AArch64InstrInfo::mergeOutliningCandidateAttributes(
9989 Function &F, std::vector<outliner::Candidate> &Candidates) const {
9990 // If a bunch of candidates reach this point they must agree on their return
9991 // address signing. It is therefore enough to just consider the signing
9992 // behaviour of one of them
9993 const auto &CFn = Candidates.front().getMF()->getFunction();
9994
9995 if (CFn.hasFnAttribute("ptrauth-returns"))
9996 F.addFnAttr(CFn.getFnAttribute("ptrauth-returns"));
9997 if (CFn.hasFnAttribute("ptrauth-auth-traps"))
9998 F.addFnAttr(CFn.getFnAttribute("ptrauth-auth-traps"));
9999 // Since all candidates belong to the same module, just copy the
10000 // function-level attributes of an arbitrary function.
10001 if (CFn.hasFnAttribute("sign-return-address"))
10002 F.addFnAttr(CFn.getFnAttribute("sign-return-address"));
10003 if (CFn.hasFnAttribute("sign-return-address-key"))
10004 F.addFnAttr(CFn.getFnAttribute("sign-return-address-key"));
10005
10006 AArch64GenInstrInfo::mergeOutliningCandidateAttributes(F, Candidates);
10007}
10008
10009bool AArch64InstrInfo::isFunctionSafeToOutlineFrom(
10010 MachineFunction &MF, bool OutlineFromLinkOnceODRs) const {
10011 const Function &F = MF.getFunction();
10012
10013 // Can F be deduplicated by the linker? If it can, don't outline from it.
10014 if (!OutlineFromLinkOnceODRs && F.hasLinkOnceODRLinkage())
10015 return false;
10016
10017 // Don't outline from functions with section markings; the program could
10018 // expect that all the code is in the named section.
10019 // FIXME: Allow outlining from multiple functions with the same section
10020 // marking.
10021 if (F.hasSection())
10022 return false;
10023
10024 // Outlining from functions with redzones is unsafe since the outliner may
10025 // modify the stack. Check if hasRedZone is true or unknown; if yes, don't
10026 // outline from it.
10027 AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
10028 if (!AFI || AFI->hasRedZone().value_or(true))
10029 return false;
10030
10031 // FIXME: Determine whether it is safe to outline from functions which contain
10032 // streaming-mode changes. We may need to ensure any smstart/smstop pairs are
10033 // outlined together and ensure it is safe to outline with async unwind info,
10034 // required for saving & restoring VG around calls.
10035 if (AFI->hasStreamingModeChanges())
10036 return false;
10037
10038 // FIXME: Teach the outliner to generate/handle Windows unwind info.
10040 return false;
10041
10042 // It's safe to outline from MF.
10043 return true;
10044}
10045
10047AArch64InstrInfo::getOutlinableRanges(MachineBasicBlock &MBB,
10048 unsigned &Flags) const {
10050 "Must track liveness!");
10052 std::pair<MachineBasicBlock::iterator, MachineBasicBlock::iterator>>
10053 Ranges;
10054 // According to the AArch64 Procedure Call Standard, the following are
10055 // undefined on entry/exit from a function call:
10056 //
10057 // * Registers x16, x17, (and thus w16, w17)
10058 // * Condition codes (and thus the NZCV register)
10059 //
10060 // If any of these registers are used inside or live across an outlined
10061 // function, then they may be modified later, either by the compiler or
10062 // some other tool (like the linker).
10063 //
10064 // To avoid outlining in these situations, partition each block into ranges
10065 // where these registers are dead. We will only outline from those ranges.
10066 LiveRegUnits LRU(getRegisterInfo());
10067 auto AreAllUnsafeRegsDead = [&LRU]() {
10068 return LRU.available(AArch64::W16) && LRU.available(AArch64::W17) &&
10069 LRU.available(AArch64::NZCV);
10070 };
10071
10072 // We need to know if LR is live across an outlining boundary later on in
10073 // order to decide how we'll create the outlined call, frame, etc.
10074 //
10075 // It's pretty expensive to check this for *every candidate* within a block.
10076 // That's some potentially n^2 behaviour, since in the worst case, we'd need
10077 // to compute liveness from the end of the block for O(n) candidates within
10078 // the block.
10079 //
10080 // So, to improve the average case, let's keep track of liveness from the end
10081 // of the block to the beginning of *every outlinable range*. If we know that
10082 // LR is available in every range we could outline from, then we know that
10083 // we don't need to check liveness for any candidate within that range.
10084 bool LRAvailableEverywhere = true;
10085 // Compute liveness bottom-up.
10086 LRU.addLiveOuts(MBB);
10087 // Update flags that require info about the entire MBB.
10088 auto UpdateWholeMBBFlags = [&Flags](const MachineInstr &MI) {
10089 if (MI.isCall() && !MI.isTerminator())
10091 };
10092 // Range: [RangeBegin, RangeEnd)
10093 MachineBasicBlock::instr_iterator RangeBegin, RangeEnd;
10094 unsigned RangeLen;
10095 auto CreateNewRangeStartingAt =
10096 [&RangeBegin, &RangeEnd,
10097 &RangeLen](MachineBasicBlock::instr_iterator NewBegin) {
10098 RangeBegin = NewBegin;
10099 RangeEnd = std::next(RangeBegin);
10100 RangeLen = 0;
10101 };
10102 auto SaveRangeIfNonEmpty = [&RangeLen, &Ranges, &RangeBegin, &RangeEnd]() {
10103 // At least one unsafe register is not dead. We do not want to outline at
10104 // this point. If it is long enough to outline from and does not cross a
10105 // bundle boundary, save the range [RangeBegin, RangeEnd).
10106 if (RangeLen <= 1)
10107 return;
10108 if (!RangeBegin.isEnd() && RangeBegin->isBundledWithPred())
10109 return;
10110 if (!RangeEnd.isEnd() && RangeEnd->isBundledWithPred())
10111 return;
10112 Ranges.emplace_back(RangeBegin, RangeEnd);
10113 };
10114 // Find the first point where all unsafe registers are dead.
10115 // FIND: <safe instr> <-- end of first potential range
10116 // SKIP: <unsafe def>
10117 // SKIP: ... everything between ...
10118 // SKIP: <unsafe use>
10119 auto FirstPossibleEndPt = MBB.instr_rbegin();
10120 for (; FirstPossibleEndPt != MBB.instr_rend(); ++FirstPossibleEndPt) {
10121 LRU.stepBackward(*FirstPossibleEndPt);
10122 // Update flags that impact how we outline across the entire block,
10123 // regardless of safety.
10124 UpdateWholeMBBFlags(*FirstPossibleEndPt);
10125 if (AreAllUnsafeRegsDead())
10126 break;
10127 }
10128 // If we exhausted the entire block, we have no safe ranges to outline.
10129 if (FirstPossibleEndPt == MBB.instr_rend())
10130 return Ranges;
10131 // Current range.
10132 CreateNewRangeStartingAt(FirstPossibleEndPt->getIterator());
10133 // StartPt points to the first place where all unsafe registers
10134 // are dead (if there is any such point). Begin partitioning the MBB into
10135 // ranges.
10136 for (auto &MI : make_range(FirstPossibleEndPt, MBB.instr_rend())) {
10137 LRU.stepBackward(MI);
10138 UpdateWholeMBBFlags(MI);
10139 if (!AreAllUnsafeRegsDead()) {
10140 SaveRangeIfNonEmpty();
10141 CreateNewRangeStartingAt(MI.getIterator());
10142 continue;
10143 }
10144 LRAvailableEverywhere &= LRU.available(AArch64::LR);
10145 RangeBegin = MI.getIterator();
10146 ++RangeLen;
10147 }
10148 // Above loop misses the last (or only) range. If we are still safe, then
10149 // let's save the range.
10150 if (AreAllUnsafeRegsDead())
10151 SaveRangeIfNonEmpty();
10152 if (Ranges.empty())
10153 return Ranges;
10154 // We found the ranges bottom-up. Mapping expects the top-down. Reverse
10155 // the order.
10156 std::reverse(Ranges.begin(), Ranges.end());
10157 // If there is at least one outlinable range where LR is unavailable
10158 // somewhere, remember that.
10159 if (!LRAvailableEverywhere)
10161 return Ranges;
10162}
10163
10165AArch64InstrInfo::getOutliningTypeImpl(const MachineModuleInfo &MMI,
10167 unsigned Flags) const {
10168 MachineInstr &MI = *MIT;
10169
10170 // Don't outline anything used for return address signing. The outlined
10171 // function will get signed later if needed
10172 switch (MI.getOpcode()) {
10173 case AArch64::PACM:
10174 case AArch64::PACIASP:
10175 case AArch64::PACIBSP:
10176 case AArch64::PACIASPPC:
10177 case AArch64::PACIBSPPC:
10178 case AArch64::AUTIASP:
10179 case AArch64::AUTIBSP:
10180 case AArch64::AUTIASPPCi:
10181 case AArch64::AUTIASPPCr:
10182 case AArch64::AUTIBSPPCi:
10183 case AArch64::AUTIBSPPCr:
10184 case AArch64::RETAA:
10185 case AArch64::RETAB:
10186 case AArch64::RETAASPPCi:
10187 case AArch64::RETAASPPCr:
10188 case AArch64::RETABSPPCi:
10189 case AArch64::RETABSPPCr:
10190 case AArch64::EMITBKEY:
10191 case AArch64::PAUTH_PROLOGUE:
10192 case AArch64::PAUTH_EPILOGUE:
10194 }
10195
10196 // We can only outline these if we will tail call the outlined function, or
10197 // fix up the CFI offsets. Currently, CFI instructions are outlined only if
10198 // in a tail call.
10199 //
10200 // FIXME: If the proper fixups for the offset are implemented, this should be
10201 // possible.
10202 if (MI.isCFIInstruction())
10204
10205 // Is this a terminator for a basic block?
10206 if (MI.isTerminator())
10207 // TargetInstrInfo::getOutliningType has already filtered out anything
10208 // that would break this, so we can allow it here.
10210
10211 // Make sure none of the operands are un-outlinable.
10212 for (const MachineOperand &MOP : MI.operands()) {
10213 // A check preventing CFI indices was here before, but only CFI
10214 // instructions should have those.
10215 assert(!MOP.isCFIIndex());
10216
10217 // If it uses LR or W30 explicitly, then don't touch it.
10218 if (MOP.isReg() && !MOP.isImplicit() &&
10219 (MOP.getReg() == AArch64::LR || MOP.getReg() == AArch64::W30))
10221 }
10222
10223 // Special cases for instructions that can always be outlined, but will fail
10224 // the later tests. e.g, ADRPs, which are PC-relative use LR, but can always
10225 // be outlined because they don't require a *specific* value to be in LR.
10226 if (MI.getOpcode() == AArch64::ADRP)
10228
10229 // If MI is a call we might be able to outline it. We don't want to outline
10230 // any calls that rely on the position of items on the stack. When we outline
10231 // something containing a call, we have to emit a save and restore of LR in
10232 // the outlined function. Currently, this always happens by saving LR to the
10233 // stack. Thus, if we outline, say, half the parameters for a function call
10234 // plus the call, then we'll break the callee's expectations for the layout
10235 // of the stack.
10236 //
10237 // FIXME: Allow calls to functions which construct a stack frame, as long
10238 // as they don't access arguments on the stack.
10239 // FIXME: Figure out some way to analyze functions defined in other modules.
10240 // We should be able to compute the memory usage based on the IR calling
10241 // convention, even if we can't see the definition.
10242 if (MI.isCall()) {
10243 // Get the function associated with the call. Look at each operand and find
10244 // the one that represents the callee and get its name.
10245 const Function *Callee = nullptr;
10246 for (const MachineOperand &MOP : MI.operands()) {
10247 if (MOP.isGlobal()) {
10248 Callee = dyn_cast<Function>(MOP.getGlobal());
10249 break;
10250 }
10251 }
10252
10253 // Never outline calls to mcount. There isn't any rule that would require
10254 // this, but the Linux kernel's "ftrace" feature depends on it.
10255 if (Callee && Callee->getName() == "\01_mcount")
10257
10258 // If we don't know anything about the callee, assume it depends on the
10259 // stack layout of the caller. In that case, it's only legal to outline
10260 // as a tail-call. Explicitly list the call instructions we know about so we
10261 // don't get unexpected results with call pseudo-instructions.
10262 auto UnknownCallOutlineType = outliner::InstrType::Illegal;
10263 if (MI.getOpcode() == AArch64::BLR ||
10264 MI.getOpcode() == AArch64::BLRNoIP || MI.getOpcode() == AArch64::BL)
10265 UnknownCallOutlineType = outliner::InstrType::LegalTerminator;
10266
10267 if (!Callee)
10268 return UnknownCallOutlineType;
10269
10270 // We have a function we have information about. Check it if it's something
10271 // can safely outline.
10272 MachineFunction *CalleeMF = MMI.getMachineFunction(*Callee);
10273
10274 // We don't know what's going on with the callee at all. Don't touch it.
10275 if (!CalleeMF)
10276 return UnknownCallOutlineType;
10277
10278 // Check if we know anything about the callee saves on the function. If we
10279 // don't, then don't touch it, since that implies that we haven't
10280 // computed anything about its stack frame yet.
10281 MachineFrameInfo &MFI = CalleeMF->getFrameInfo();
10282 if (!MFI.isCalleeSavedInfoValid() || MFI.getStackSize() > 0 ||
10283 MFI.getNumObjects() > 0)
10284 return UnknownCallOutlineType;
10285
10286 // At this point, we can say that CalleeMF ought to not pass anything on the
10287 // stack. Therefore, we can outline it.
10289 }
10290
10291 // Don't touch the link register or W30.
10292 if (MI.readsRegister(AArch64::W30, &getRegisterInfo()) ||
10293 MI.modifiesRegister(AArch64::W30, &getRegisterInfo()))
10295
10296 // Don't outline BTI instructions, because that will prevent the outlining
10297 // site from being indirectly callable.
10298 if (hasBTISemantics(MI))
10300
10302}
10303
10304void AArch64InstrInfo::fixupPostOutline(MachineBasicBlock &MBB) const {
10305 for (MachineInstr &MI : MBB) {
10306 const MachineOperand *Base;
10307 TypeSize Width(0, false);
10308 int64_t Offset;
10309 bool OffsetIsScalable;
10310
10311 // Is this a load or store with an immediate offset with SP as the base?
10312 if (!MI.mayLoadOrStore() ||
10313 !getMemOperandWithOffsetWidth(MI, Base, Offset, OffsetIsScalable, Width,
10314 &RI) ||
10315 (Base->isReg() && Base->getReg() != AArch64::SP))
10316 continue;
10317
10318 // It is, so we have to fix it up.
10319 TypeSize Scale(0U, false);
10320 int64_t Dummy1, Dummy2;
10321
10322 MachineOperand &StackOffsetOperand = getMemOpBaseRegImmOfsOffsetOperand(MI);
10323 assert(StackOffsetOperand.isImm() && "Stack offset wasn't immediate!");
10324 getMemOpInfo(MI.getOpcode(), Scale, Width, Dummy1, Dummy2);
10325 assert(Scale != 0 && "Unexpected opcode!");
10326 assert(!OffsetIsScalable && "Expected offset to be a byte offset");
10327
10328 // We've pushed the return address to the stack, so add 16 to the offset.
10329 // This is safe, since we already checked if it would overflow when we
10330 // checked if this instruction was legal to outline.
10331 int64_t NewImm = (Offset + 16) / (int64_t)Scale.getFixedValue();
10332 StackOffsetOperand.setImm(NewImm);
10333 }
10334}
10335
10337 const AArch64InstrInfo *TII,
10338 bool ShouldSignReturnAddr) {
10339 if (!ShouldSignReturnAddr)
10340 return;
10341
10342 BuildMI(MBB, MBB.begin(), DebugLoc(), TII->get(AArch64::PAUTH_PROLOGUE))
10344 BuildMI(MBB, MBB.getFirstInstrTerminator(), DebugLoc(),
10345 TII->get(AArch64::PAUTH_EPILOGUE))
10347}
10348
10349void AArch64InstrInfo::buildOutlinedFrame(
10351 const outliner::OutlinedFunction &OF) const {
10352
10353 AArch64FunctionInfo *FI = MF.getInfo<AArch64FunctionInfo>();
10354
10355 if (OF.FrameConstructionID == MachineOutlinerTailCall)
10356 FI->setOutliningStyle("Tail Call");
10357 else if (OF.FrameConstructionID == MachineOutlinerThunk) {
10358 // For thunk outlining, rewrite the last instruction from a call to a
10359 // tail-call.
10360 MachineInstr *Call = &*--MBB.instr_end();
10361 unsigned TailOpcode;
10362 if (Call->getOpcode() == AArch64::BL) {
10363 TailOpcode = AArch64::TCRETURNdi;
10364 } else {
10365 assert(Call->getOpcode() == AArch64::BLR ||
10366 Call->getOpcode() == AArch64::BLRNoIP);
10367 TailOpcode = AArch64::TCRETURNriALL;
10368 }
10369 MachineInstr *TC = BuildMI(MF, DebugLoc(), get(TailOpcode))
10370 .add(Call->getOperand(0))
10371 .addImm(0);
10372 MBB.insert(MBB.end(), TC);
10374
10375 FI->setOutliningStyle("Thunk");
10376 }
10377
10378 bool IsLeafFunction = true;
10379
10380 // Is there a call in the outlined range?
10381 auto IsNonTailCall = [](const MachineInstr &MI) {
10382 return MI.isCall() && !MI.isReturn();
10383 };
10384
10385 if (llvm::any_of(MBB.instrs(), IsNonTailCall)) {
10386 // Fix up the instructions in the range, since we're going to modify the
10387 // stack.
10388
10389 // Bugzilla ID: 46767
10390 // TODO: Check if fixing up twice is safe so we can outline these.
10391 assert(OF.FrameConstructionID != MachineOutlinerDefault &&
10392 "Can only fix up stack references once");
10393 fixupPostOutline(MBB);
10394
10395 IsLeafFunction = false;
10396
10397 // LR has to be a live in so that we can save it.
10398 if (!MBB.isLiveIn(AArch64::LR))
10399 MBB.addLiveIn(AArch64::LR);
10400
10403
10404 if (OF.FrameConstructionID == MachineOutlinerTailCall ||
10405 OF.FrameConstructionID == MachineOutlinerThunk)
10406 Et = std::prev(MBB.end());
10407
10408 // Insert a save before the outlined region
10409 MachineInstr *STRXpre = BuildMI(MF, DebugLoc(), get(AArch64::STRXpre))
10410 .addReg(AArch64::SP, RegState::Define)
10411 .addReg(AArch64::LR)
10412 .addReg(AArch64::SP)
10413 .addImm(-16);
10414 It = MBB.insert(It, STRXpre);
10415
10416 if (MF.getInfo<AArch64FunctionInfo>()->needsDwarfUnwindInfo(MF)) {
10417 CFIInstBuilder CFIBuilder(MBB, It, MachineInstr::FrameSetup);
10418
10419 // Add a CFI saying the stack was moved 16 B down.
10420 CFIBuilder.buildDefCFAOffset(16);
10421
10422 // Add a CFI saying that the LR that we want to find is now 16 B higher
10423 // than before.
10424 CFIBuilder.buildOffset(AArch64::LR, -16);
10425 }
10426
10427 // Insert a restore before the terminator for the function.
10428 MachineInstr *LDRXpost = BuildMI(MF, DebugLoc(), get(AArch64::LDRXpost))
10429 .addReg(AArch64::SP, RegState::Define)
10430 .addReg(AArch64::LR, RegState::Define)
10431 .addReg(AArch64::SP)
10432 .addImm(16);
10433 Et = MBB.insert(Et, LDRXpost);
10434 }
10435
10436 bool ShouldSignReturnAddr = FI->shouldSignReturnAddress(!IsLeafFunction);
10437
10438 // If this is a tail call outlined function, then there's already a return.
10439 if (OF.FrameConstructionID == MachineOutlinerTailCall ||
10440 OF.FrameConstructionID == MachineOutlinerThunk) {
10441 signOutlinedFunction(MF, MBB, this, ShouldSignReturnAddr);
10442 return;
10443 }
10444
10445 // It's not a tail call, so we have to insert the return ourselves.
10446
10447 // LR has to be a live in so that we can return to it.
10448 if (!MBB.isLiveIn(AArch64::LR))
10449 MBB.addLiveIn(AArch64::LR);
10450
10451 MachineInstr *ret = BuildMI(MF, DebugLoc(), get(AArch64::RET))
10452 .addReg(AArch64::LR);
10453 MBB.insert(MBB.end(), ret);
10454
10455 signOutlinedFunction(MF, MBB, this, ShouldSignReturnAddr);
10456
10457 FI->setOutliningStyle("Function");
10458
10459 // Did we have to modify the stack by saving the link register?
10460 if (OF.FrameConstructionID != MachineOutlinerDefault)
10461 return;
10462
10463 // We modified the stack.
10464 // Walk over the basic block and fix up all the stack accesses.
10465 fixupPostOutline(MBB);
10466}
10467
10468MachineBasicBlock::iterator AArch64InstrInfo::insertOutlinedCall(
10471
10472 // Are we tail calling?
10473 if (C.CallConstructionID == MachineOutlinerTailCall) {
10474 // If yes, then we can just branch to the label.
10475 It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::TCRETURNdi))
10476 .addGlobalAddress(M.getNamedValue(MF.getName()))
10477 .addImm(0));
10478 return It;
10479 }
10480
10481 // Are we saving the link register?
10482 if (C.CallConstructionID == MachineOutlinerNoLRSave ||
10483 C.CallConstructionID == MachineOutlinerThunk) {
10484 // No, so just insert the call.
10485 It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::BL))
10486 .addGlobalAddress(M.getNamedValue(MF.getName())));
10487 return It;
10488 }
10489
10490 // We want to return the spot where we inserted the call.
10492
10493 // Instructions for saving and restoring LR around the call instruction we're
10494 // going to insert.
10495 MachineInstr *Save;
10496 MachineInstr *Restore;
10497 // Can we save to a register?
10498 if (C.CallConstructionID == MachineOutlinerRegSave) {
10499 // FIXME: This logic should be sunk into a target-specific interface so that
10500 // we don't have to recompute the register.
10501 Register Reg = findRegisterToSaveLRTo(C);
10502 assert(Reg && "No callee-saved register available?");
10503
10504 // LR has to be a live in so that we can save it.
10505 if (!MBB.isLiveIn(AArch64::LR))
10506 MBB.addLiveIn(AArch64::LR);
10507
10508 // Save and restore LR from Reg.
10509 Save = BuildMI(MF, DebugLoc(), get(AArch64::ORRXrs), Reg)
10510 .addReg(AArch64::XZR)
10511 .addReg(AArch64::LR)
10512 .addImm(0);
10513 Restore = BuildMI(MF, DebugLoc(), get(AArch64::ORRXrs), AArch64::LR)
10514 .addReg(AArch64::XZR)
10515 .addReg(Reg)
10516 .addImm(0);
10517 } else {
10518 // We have the default case. Save and restore from SP.
10519 Save = BuildMI(MF, DebugLoc(), get(AArch64::STRXpre))
10520 .addReg(AArch64::SP, RegState::Define)
10521 .addReg(AArch64::LR)
10522 .addReg(AArch64::SP)
10523 .addImm(-16);
10524 Restore = BuildMI(MF, DebugLoc(), get(AArch64::LDRXpost))
10525 .addReg(AArch64::SP, RegState::Define)
10526 .addReg(AArch64::LR, RegState::Define)
10527 .addReg(AArch64::SP)
10528 .addImm(16);
10529 }
10530
10531 It = MBB.insert(It, Save);
10532 It++;
10533
10534 // Insert the call.
10535 It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::BL))
10536 .addGlobalAddress(M.getNamedValue(MF.getName())));
10537 CallPt = It;
10538 It++;
10539
10540 It = MBB.insert(It, Restore);
10541 return CallPt;
10542}
10543
10544bool AArch64InstrInfo::shouldOutlineFromFunctionByDefault(
10545 MachineFunction &MF) const {
10546 return MF.getFunction().hasMinSize();
10547}
10548
10549void AArch64InstrInfo::buildClearRegister(Register Reg, MachineBasicBlock &MBB,
10551 DebugLoc &DL,
10552 bool AllowSideEffects) const {
10553 const MachineFunction &MF = *MBB.getParent();
10554 const AArch64Subtarget &STI = MF.getSubtarget<AArch64Subtarget>();
10555 const AArch64RegisterInfo &TRI = *STI.getRegisterInfo();
10556
10557 if (TRI.isGeneralPurposeRegister(MF, Reg)) {
10558 BuildMI(MBB, Iter, DL, get(AArch64::MOVZXi), Reg).addImm(0).addImm(0);
10559 } else if (STI.isSVEorStreamingSVEAvailable()) {
10560 BuildMI(MBB, Iter, DL, get(AArch64::DUP_ZI_D), Reg)
10561 .addImm(0)
10562 .addImm(0);
10563 } else if (STI.isNeonAvailable()) {
10564 BuildMI(MBB, Iter, DL, get(AArch64::MOVIv2d_ns), Reg)
10565 .addImm(0);
10566 } else {
10567 // This is a streaming-compatible function without SVE. We don't have full
10568 // Neon (just FPRs), so we can at most use the first 64-bit sub-register.
10569 // So given `movi v..` would be illegal use `fmov d..` instead.
10570 assert(STI.hasNEON() && "Expected to have NEON.");
10571 Register Reg64 = TRI.getSubReg(Reg, AArch64::dsub);
10572 BuildMI(MBB, Iter, DL, get(AArch64::FMOVD0), Reg64);
10573 }
10574}
10575
10576std::optional<DestSourcePair>
10578
10579 // AArch64::ORRWrs and AArch64::ORRXrs with WZR/XZR reg
10580 // and zero immediate operands used as an alias for mov instruction.
10581 if (((MI.getOpcode() == AArch64::ORRWrs &&
10582 MI.getOperand(1).getReg() == AArch64::WZR &&
10583 MI.getOperand(3).getImm() == 0x0) ||
10584 (MI.getOpcode() == AArch64::ORRWrr &&
10585 MI.getOperand(1).getReg() == AArch64::WZR)) &&
10586 // Check that the w->w move is not a zero-extending w->x mov.
10587 (!MI.getOperand(0).getReg().isVirtual() ||
10588 MI.getOperand(0).getSubReg() == 0) &&
10589 (!MI.getOperand(0).getReg().isPhysical() ||
10590 MI.findRegisterDefOperandIdx(getXRegFromWReg(MI.getOperand(0).getReg()),
10591 /*TRI=*/nullptr) == -1))
10592 return DestSourcePair{MI.getOperand(0), MI.getOperand(2)};
10593
10594 if (MI.getOpcode() == AArch64::ORRXrs &&
10595 MI.getOperand(1).getReg() == AArch64::XZR &&
10596 MI.getOperand(3).getImm() == 0x0)
10597 return DestSourcePair{MI.getOperand(0), MI.getOperand(2)};
10598
10599 return std::nullopt;
10600}
10601
10602std::optional<DestSourcePair>
10604 if ((MI.getOpcode() == AArch64::ORRWrs &&
10605 MI.getOperand(1).getReg() == AArch64::WZR &&
10606 MI.getOperand(3).getImm() == 0x0) ||
10607 (MI.getOpcode() == AArch64::ORRWrr &&
10608 MI.getOperand(1).getReg() == AArch64::WZR))
10609 return DestSourcePair{MI.getOperand(0), MI.getOperand(2)};
10610 return std::nullopt;
10611}
10612
10613std::optional<RegImmPair>
10614AArch64InstrInfo::isAddImmediate(const MachineInstr &MI, Register Reg) const {
10615 int Sign = 1;
10616 int64_t Offset = 0;
10617
10618 // TODO: Handle cases where Reg is a super- or sub-register of the
10619 // destination register.
10620 const MachineOperand &Op0 = MI.getOperand(0);
10621 if (!Op0.isReg() || Reg != Op0.getReg())
10622 return std::nullopt;
10623
10624 switch (MI.getOpcode()) {
10625 default:
10626 return std::nullopt;
10627 case AArch64::SUBWri:
10628 case AArch64::SUBXri:
10629 case AArch64::SUBSWri:
10630 case AArch64::SUBSXri:
10631 Sign *= -1;
10632 [[fallthrough]];
10633 case AArch64::ADDSWri:
10634 case AArch64::ADDSXri:
10635 case AArch64::ADDWri:
10636 case AArch64::ADDXri: {
10637 // TODO: Third operand can be global address (usually some string).
10638 if (!MI.getOperand(0).isReg() || !MI.getOperand(1).isReg() ||
10639 !MI.getOperand(2).isImm())
10640 return std::nullopt;
10641 int Shift = MI.getOperand(3).getImm();
10642 assert((Shift == 0 || Shift == 12) && "Shift can be either 0 or 12");
10643 Offset = Sign * (MI.getOperand(2).getImm() << Shift);
10644 }
10645 }
10646 return RegImmPair{MI.getOperand(1).getReg(), Offset};
10647}
10648
10649/// If the given ORR instruction is a copy, and \p DescribedReg overlaps with
10650/// the destination register then, if possible, describe the value in terms of
10651/// the source register.
10652static std::optional<ParamLoadedValue>
10654 const TargetInstrInfo *TII,
10655 const TargetRegisterInfo *TRI) {
10656 auto DestSrc = TII->isCopyLikeInstr(MI);
10657 if (!DestSrc)
10658 return std::nullopt;
10659
10660 Register DestReg = DestSrc->Destination->getReg();
10661 Register SrcReg = DestSrc->Source->getReg();
10662
10663 if (!DestReg.isValid() || !SrcReg.isValid())
10664 return std::nullopt;
10665
10666 auto Expr = DIExpression::get(MI.getMF()->getFunction().getContext(), {});
10667
10668 // If the described register is the destination, just return the source.
10669 if (DestReg == DescribedReg)
10670 return ParamLoadedValue(MachineOperand::CreateReg(SrcReg, false), Expr);
10671
10672 // ORRWrs zero-extends to 64-bits, so we need to consider such cases.
10673 if (MI.getOpcode() == AArch64::ORRWrs &&
10674 TRI->isSuperRegister(DestReg, DescribedReg))
10675 return ParamLoadedValue(MachineOperand::CreateReg(SrcReg, false), Expr);
10676
10677 // We may need to describe the lower part of a ORRXrs move.
10678 if (MI.getOpcode() == AArch64::ORRXrs &&
10679 TRI->isSubRegister(DestReg, DescribedReg)) {
10680 Register SrcSubReg = TRI->getSubReg(SrcReg, AArch64::sub_32);
10681 return ParamLoadedValue(MachineOperand::CreateReg(SrcSubReg, false), Expr);
10682 }
10683
10684 assert(!TRI->isSuperOrSubRegisterEq(DestReg, DescribedReg) &&
10685 "Unhandled ORR[XW]rs copy case");
10686
10687 return std::nullopt;
10688}
10689
10690bool AArch64InstrInfo::isFunctionSafeToSplit(const MachineFunction &MF) const {
10691 // Functions cannot be split to different sections on AArch64 if they have
10692 // a red zone. This is because relaxing a cross-section branch may require
10693 // incrementing the stack pointer to spill a register, which would overwrite
10694 // the red zone.
10695 if (MF.getInfo<AArch64FunctionInfo>()->hasRedZone().value_or(true))
10696 return false;
10697
10699}
10700
10701bool AArch64InstrInfo::isMBBSafeToSplitToCold(
10702 const MachineBasicBlock &MBB) const {
10703 // Asm Goto blocks can contain conditional branches to goto labels, which can
10704 // get moved out of range of the branch instruction.
10705 auto isAsmGoto = [](const MachineInstr &MI) {
10706 return MI.getOpcode() == AArch64::INLINEASM_BR;
10707 };
10708 if (llvm::any_of(MBB, isAsmGoto) || MBB.isInlineAsmBrIndirectTarget())
10709 return false;
10710
10711 // Because jump tables are label-relative instead of table-relative, they all
10712 // must be in the same section or relocation fixup handling will fail.
10713
10714 // Check if MBB is a jump table target
10715 const MachineJumpTableInfo *MJTI = MBB.getParent()->getJumpTableInfo();
10716 auto containsMBB = [&MBB](const MachineJumpTableEntry &JTE) {
10717 return llvm::is_contained(JTE.MBBs, &MBB);
10718 };
10719 if (MJTI != nullptr && llvm::any_of(MJTI->getJumpTables(), containsMBB))
10720 return false;
10721
10722 // Check if MBB contains a jump table lookup
10723 for (const MachineInstr &MI : MBB) {
10724 switch (MI.getOpcode()) {
10725 case TargetOpcode::G_BRJT:
10726 case AArch64::JumpTableDest32:
10727 case AArch64::JumpTableDest16:
10728 case AArch64::JumpTableDest8:
10729 return false;
10730 default:
10731 continue;
10732 }
10733 }
10734
10735 // MBB isn't a special case, so it's safe to be split to the cold section.
10736 return true;
10737}
10738
10739std::optional<ParamLoadedValue>
10740AArch64InstrInfo::describeLoadedValue(const MachineInstr &MI,
10741 Register Reg) const {
10742 const MachineFunction *MF = MI.getMF();
10743 const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
10744 switch (MI.getOpcode()) {
10745 case AArch64::MOVZWi:
10746 case AArch64::MOVZXi: {
10747 // MOVZWi may be used for producing zero-extended 32-bit immediates in
10748 // 64-bit parameters, so we need to consider super-registers.
10749 if (!TRI->isSuperRegisterEq(MI.getOperand(0).getReg(), Reg))
10750 return std::nullopt;
10751
10752 if (!MI.getOperand(1).isImm())
10753 return std::nullopt;
10754 int64_t Immediate = MI.getOperand(1).getImm();
10755 int Shift = MI.getOperand(2).getImm();
10756 return ParamLoadedValue(MachineOperand::CreateImm(Immediate << Shift),
10757 nullptr);
10758 }
10759 case AArch64::ORRWrs:
10760 case AArch64::ORRXrs:
10761 return describeORRLoadedValue(MI, Reg, this, TRI);
10762 }
10763
10765}
10766
10767bool AArch64InstrInfo::isExtendLikelyToBeFolded(
10768 MachineInstr &ExtMI, MachineRegisterInfo &MRI) const {
10769 assert(ExtMI.getOpcode() == TargetOpcode::G_SEXT ||
10770 ExtMI.getOpcode() == TargetOpcode::G_ZEXT ||
10771 ExtMI.getOpcode() == TargetOpcode::G_ANYEXT);
10772
10773 // Anyexts are nops.
10774 if (ExtMI.getOpcode() == TargetOpcode::G_ANYEXT)
10775 return true;
10776
10777 Register DefReg = ExtMI.getOperand(0).getReg();
10778 if (!MRI.hasOneNonDBGUse(DefReg))
10779 return false;
10780
10781 // It's likely that a sext/zext as a G_PTR_ADD offset will be folded into an
10782 // addressing mode.
10783 auto *UserMI = &*MRI.use_instr_nodbg_begin(DefReg);
10784 return UserMI->getOpcode() == TargetOpcode::G_PTR_ADD;
10785}
10786
10787uint64_t AArch64InstrInfo::getElementSizeForOpcode(unsigned Opc) const {
10788 return get(Opc).TSFlags & AArch64::ElementSizeMask;
10789}
10790
10791bool AArch64InstrInfo::isPTestLikeOpcode(unsigned Opc) const {
10792 return get(Opc).TSFlags & AArch64::InstrFlagIsPTestLike;
10793}
10794
10795bool AArch64InstrInfo::isWhileOpcode(unsigned Opc) const {
10796 return get(Opc).TSFlags & AArch64::InstrFlagIsWhile;
10797}
10798
10799unsigned int
10800AArch64InstrInfo::getTailDuplicateSize(CodeGenOptLevel OptLevel) const {
10801 return OptLevel >= CodeGenOptLevel::Aggressive ? 6 : 2;
10802}
10803
10804bool AArch64InstrInfo::isLegalAddressingMode(unsigned NumBytes, int64_t Offset,
10805 unsigned Scale) const {
10806 if (Offset && Scale)
10807 return false;
10808
10809 // Check Reg + Imm
10810 if (!Scale) {
10811 // 9-bit signed offset
10812 if (isInt<9>(Offset))
10813 return true;
10814
10815 // 12-bit unsigned offset
10816 unsigned Shift = Log2_64(NumBytes);
10817 if (NumBytes && Offset > 0 && (Offset / NumBytes) <= (1LL << 12) - 1 &&
10818 // Must be a multiple of NumBytes (NumBytes is a power of 2)
10819 (Offset >> Shift) << Shift == Offset)
10820 return true;
10821 return false;
10822 }
10823
10824 // Check reg1 + SIZE_IN_BYTES * reg2 and reg1 + reg2
10825 return Scale == 1 || (Scale > 0 && Scale == NumBytes);
10826}
10827
10829 if (MF.getSubtarget<AArch64Subtarget>().hardenSlsBlr())
10830 return AArch64::BLRNoIP;
10831 else
10832 return AArch64::BLR;
10833}
10834
10837 Register TargetReg, bool FrameSetup) const {
10838 assert(TargetReg != AArch64::SP && "New top of stack cannot already be in SP");
10839
10840 MachineBasicBlock &MBB = *MBBI->getParent();
10841 MachineFunction &MF = *MBB.getParent();
10842 const AArch64InstrInfo *TII =
10843 MF.getSubtarget<AArch64Subtarget>().getInstrInfo();
10844 int64_t ProbeSize = MF.getInfo<AArch64FunctionInfo>()->getStackProbeSize();
10845 DebugLoc DL = MBB.findDebugLoc(MBBI);
10846
10847 MachineFunction::iterator MBBInsertPoint = std::next(MBB.getIterator());
10848 MachineBasicBlock *LoopTestMBB =
10849 MF.CreateMachineBasicBlock(MBB.getBasicBlock());
10850 MF.insert(MBBInsertPoint, LoopTestMBB);
10851 MachineBasicBlock *LoopBodyMBB =
10852 MF.CreateMachineBasicBlock(MBB.getBasicBlock());
10853 MF.insert(MBBInsertPoint, LoopBodyMBB);
10854 MachineBasicBlock *ExitMBB = MF.CreateMachineBasicBlock(MBB.getBasicBlock());
10855 MF.insert(MBBInsertPoint, ExitMBB);
10856 MachineInstr::MIFlag Flags =
10858
10859 // LoopTest:
10860 // SUB SP, SP, #ProbeSize
10861 emitFrameOffset(*LoopTestMBB, LoopTestMBB->end(), DL, AArch64::SP,
10862 AArch64::SP, StackOffset::getFixed(-ProbeSize), TII, Flags);
10863
10864 // CMP SP, TargetReg
10865 BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL, TII->get(AArch64::SUBSXrx64),
10866 AArch64::XZR)
10867 .addReg(AArch64::SP)
10868 .addReg(TargetReg)
10870 .setMIFlags(Flags);
10871
10872 // B.<Cond> LoopExit
10873 BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL, TII->get(AArch64::Bcc))
10875 .addMBB(ExitMBB)
10876 .setMIFlags(Flags);
10877
10878 // STR XZR, [SP]
10879 BuildMI(*LoopBodyMBB, LoopBodyMBB->end(), DL, TII->get(AArch64::STRXui))
10880 .addReg(AArch64::XZR)
10881 .addReg(AArch64::SP)
10882 .addImm(0)
10883 .setMIFlags(Flags);
10884
10885 // B loop
10886 BuildMI(*LoopBodyMBB, LoopBodyMBB->end(), DL, TII->get(AArch64::B))
10887 .addMBB(LoopTestMBB)
10888 .setMIFlags(Flags);
10889
10890 // LoopExit:
10891 // MOV SP, TargetReg
10892 BuildMI(*ExitMBB, ExitMBB->end(), DL, TII->get(AArch64::ADDXri), AArch64::SP)
10893 .addReg(TargetReg)
10894 .addImm(0)
10896 .setMIFlags(Flags);
10897
10898 // LDR XZR, [SP]
10899 BuildMI(*ExitMBB, ExitMBB->end(), DL, TII->get(AArch64::LDRXui))
10900 .addReg(AArch64::XZR, RegState::Define)
10901 .addReg(AArch64::SP)
10902 .addImm(0)
10903 .setMIFlags(Flags);
10904
10905 ExitMBB->splice(ExitMBB->end(), &MBB, std::next(MBBI), MBB.end());
10907
10908 LoopTestMBB->addSuccessor(ExitMBB);
10909 LoopTestMBB->addSuccessor(LoopBodyMBB);
10910 LoopBodyMBB->addSuccessor(LoopTestMBB);
10911 MBB.addSuccessor(LoopTestMBB);
10912
10913 // Update liveins.
10914 if (MF.getRegInfo().reservedRegsFrozen())
10915 fullyRecomputeLiveIns({ExitMBB, LoopBodyMBB, LoopTestMBB});
10916
10917 return ExitMBB->begin();
10918}
10919
10920namespace {
10921class AArch64PipelinerLoopInfo : public TargetInstrInfo::PipelinerLoopInfo {
10922 MachineFunction *MF;
10923 const TargetInstrInfo *TII;
10924 const TargetRegisterInfo *TRI;
10926
10927 /// The block of the loop
10928 MachineBasicBlock *LoopBB;
10929 /// The conditional branch of the loop
10930 MachineInstr *CondBranch;
10931 /// The compare instruction for loop control
10932 MachineInstr *Comp;
10933 /// The number of the operand of the loop counter value in Comp
10934 unsigned CompCounterOprNum;
10935 /// The instruction that updates the loop counter value
10936 MachineInstr *Update;
10937 /// The number of the operand of the loop counter value in Update
10938 unsigned UpdateCounterOprNum;
10939 /// The initial value of the loop counter
10940 Register Init;
10941 /// True iff Update is a predecessor of Comp
10942 bool IsUpdatePriorComp;
10943
10944 /// The normalized condition used by createTripCountGreaterCondition()
10946
10947public:
10948 AArch64PipelinerLoopInfo(MachineBasicBlock *LoopBB, MachineInstr *CondBranch,
10949 MachineInstr *Comp, unsigned CompCounterOprNum,
10950 MachineInstr *Update, unsigned UpdateCounterOprNum,
10951 Register Init, bool IsUpdatePriorComp,
10953 : MF(Comp->getParent()->getParent()),
10954 TII(MF->getSubtarget().getInstrInfo()),
10955 TRI(MF->getSubtarget().getRegisterInfo()), MRI(MF->getRegInfo()),
10956 LoopBB(LoopBB), CondBranch(CondBranch), Comp(Comp),
10957 CompCounterOprNum(CompCounterOprNum), Update(Update),
10958 UpdateCounterOprNum(UpdateCounterOprNum), Init(Init),
10959 IsUpdatePriorComp(IsUpdatePriorComp), Cond(Cond.begin(), Cond.end()) {}
10960
10961 bool shouldIgnoreForPipelining(const MachineInstr *MI) const override {
10962 // Make the instructions for loop control be placed in stage 0.
10963 // The predecessors of Comp are considered by the caller.
10964 return MI == Comp;
10965 }
10966
10967 std::optional<bool> createTripCountGreaterCondition(
10968 int TC, MachineBasicBlock &MBB,
10969 SmallVectorImpl<MachineOperand> &CondParam) override {
10970 // A branch instruction will be inserted as "if (Cond) goto epilogue".
10971 // Cond is normalized for such use.
10972 // The predecessors of the branch are assumed to have already been inserted.
10973 CondParam = Cond;
10974 return {};
10975 }
10976
10977 void createRemainingIterationsGreaterCondition(
10978 int TC, MachineBasicBlock &MBB, SmallVectorImpl<MachineOperand> &Cond,
10979 DenseMap<MachineInstr *, MachineInstr *> &LastStage0Insts) override;
10980
10981 void setPreheader(MachineBasicBlock *NewPreheader) override {}
10982
10983 void adjustTripCount(int TripCountAdjust) override {}
10984
10985 bool isMVEExpanderSupported() override { return true; }
10986};
10987} // namespace
10988
10989/// Clone an instruction from MI. The register of ReplaceOprNum-th operand
10990/// is replaced by ReplaceReg. The output register is newly created.
10991/// The other operands are unchanged from MI.
10992static Register cloneInstr(const MachineInstr *MI, unsigned ReplaceOprNum,
10993 Register ReplaceReg, MachineBasicBlock &MBB,
10994 MachineBasicBlock::iterator InsertTo) {
10995 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
10996 const TargetInstrInfo *TII = MBB.getParent()->getSubtarget().getInstrInfo();
10997 const TargetRegisterInfo *TRI =
10998 MBB.getParent()->getSubtarget().getRegisterInfo();
10999 MachineInstr *NewMI = MBB.getParent()->CloneMachineInstr(MI);
11000 Register Result = 0;
11001 for (unsigned I = 0; I < NewMI->getNumOperands(); ++I) {
11002 if (I == 0 && NewMI->getOperand(0).getReg().isVirtual()) {
11003 Result = MRI.createVirtualRegister(
11004 MRI.getRegClass(NewMI->getOperand(0).getReg()));
11005 NewMI->getOperand(I).setReg(Result);
11006 } else if (I == ReplaceOprNum) {
11007 MRI.constrainRegClass(ReplaceReg,
11008 TII->getRegClass(NewMI->getDesc(), I, TRI));
11009 NewMI->getOperand(I).setReg(ReplaceReg);
11010 }
11011 }
11012 MBB.insert(InsertTo, NewMI);
11013 return Result;
11014}
11015
11016void AArch64PipelinerLoopInfo::createRemainingIterationsGreaterCondition(
11019 // Create and accumulate conditions for next TC iterations.
11020 // Example:
11021 // SUBSXrr N, counter, implicit-def $nzcv # compare instruction for the last
11022 // # iteration of the kernel
11023 //
11024 // # insert the following instructions
11025 // cond = CSINCXr 0, 0, C, implicit $nzcv
11026 // counter = ADDXri counter, 1 # clone from this->Update
11027 // SUBSXrr n, counter, implicit-def $nzcv # clone from this->Comp
11028 // cond = CSINCXr cond, cond, C, implicit $nzcv
11029 // ... (repeat TC times)
11030 // SUBSXri cond, 0, implicit-def $nzcv
11031
11032 assert(CondBranch->getOpcode() == AArch64::Bcc);
11033 // CondCode to exit the loop
11035 (AArch64CC::CondCode)CondBranch->getOperand(0).getImm();
11036 if (CondBranch->getOperand(1).getMBB() == LoopBB)
11038
11039 // Accumulate conditions to exit the loop
11040 Register AccCond = AArch64::XZR;
11041
11042 // If CC holds, CurCond+1 is returned; otherwise CurCond is returned.
11043 auto AccumulateCond = [&](Register CurCond,
11045 Register NewCond = MRI.createVirtualRegister(&AArch64::GPR64commonRegClass);
11046 BuildMI(MBB, MBB.end(), Comp->getDebugLoc(), TII->get(AArch64::CSINCXr))
11047 .addReg(NewCond, RegState::Define)
11048 .addReg(CurCond)
11049 .addReg(CurCond)
11051 return NewCond;
11052 };
11053
11054 if (!LastStage0Insts.empty() && LastStage0Insts[Comp]->getParent() == &MBB) {
11055 // Update and Comp for I==0 are already exists in MBB
11056 // (MBB is an unrolled kernel)
11057 Register Counter;
11058 for (int I = 0; I <= TC; ++I) {
11059 Register NextCounter;
11060 if (I != 0)
11061 NextCounter =
11062 cloneInstr(Comp, CompCounterOprNum, Counter, MBB, MBB.end());
11063
11064 AccCond = AccumulateCond(AccCond, CC);
11065
11066 if (I != TC) {
11067 if (I == 0) {
11068 if (Update != Comp && IsUpdatePriorComp) {
11069 Counter =
11070 LastStage0Insts[Comp]->getOperand(CompCounterOprNum).getReg();
11071 NextCounter = cloneInstr(Update, UpdateCounterOprNum, Counter, MBB,
11072 MBB.end());
11073 } else {
11074 // can use already calculated value
11075 NextCounter = LastStage0Insts[Update]->getOperand(0).getReg();
11076 }
11077 } else if (Update != Comp) {
11078 NextCounter =
11079 cloneInstr(Update, UpdateCounterOprNum, Counter, MBB, MBB.end());
11080 }
11081 }
11082 Counter = NextCounter;
11083 }
11084 } else {
11085 Register Counter;
11086 if (LastStage0Insts.empty()) {
11087 // use initial counter value (testing if the trip count is sufficient to
11088 // be executed by pipelined code)
11089 Counter = Init;
11090 if (IsUpdatePriorComp)
11091 Counter =
11092 cloneInstr(Update, UpdateCounterOprNum, Counter, MBB, MBB.end());
11093 } else {
11094 // MBB is an epilogue block. LastStage0Insts[Comp] is in the kernel block.
11095 Counter = LastStage0Insts[Comp]->getOperand(CompCounterOprNum).getReg();
11096 }
11097
11098 for (int I = 0; I <= TC; ++I) {
11099 Register NextCounter;
11100 NextCounter =
11101 cloneInstr(Comp, CompCounterOprNum, Counter, MBB, MBB.end());
11102 AccCond = AccumulateCond(AccCond, CC);
11103 if (I != TC && Update != Comp)
11104 NextCounter =
11105 cloneInstr(Update, UpdateCounterOprNum, Counter, MBB, MBB.end());
11106 Counter = NextCounter;
11107 }
11108 }
11109
11110 // If AccCond == 0, the remainder is greater than TC.
11111 BuildMI(MBB, MBB.end(), Comp->getDebugLoc(), TII->get(AArch64::SUBSXri))
11112 .addReg(AArch64::XZR, RegState::Define | RegState::Dead)
11113 .addReg(AccCond)
11114 .addImm(0)
11115 .addImm(0);
11116 Cond.clear();
11118}
11119
11120static void extractPhiReg(const MachineInstr &Phi, const MachineBasicBlock *MBB,
11121 Register &RegMBB, Register &RegOther) {
11122 assert(Phi.getNumOperands() == 5);
11123 if (Phi.getOperand(2).getMBB() == MBB) {
11124 RegMBB = Phi.getOperand(1).getReg();
11125 RegOther = Phi.getOperand(3).getReg();
11126 } else {
11127 assert(Phi.getOperand(4).getMBB() == MBB);
11128 RegMBB = Phi.getOperand(3).getReg();
11129 RegOther = Phi.getOperand(1).getReg();
11130 }
11131}
11132
11134 if (!Reg.isVirtual())
11135 return false;
11136 const MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
11137 return MRI.getVRegDef(Reg)->getParent() != BB;
11138}
11139
11140/// If Reg is an induction variable, return true and set some parameters
11141static bool getIndVarInfo(Register Reg, const MachineBasicBlock *LoopBB,
11142 MachineInstr *&UpdateInst,
11143 unsigned &UpdateCounterOprNum, Register &InitReg,
11144 bool &IsUpdatePriorComp) {
11145 // Example:
11146 //
11147 // Preheader:
11148 // InitReg = ...
11149 // LoopBB:
11150 // Reg0 = PHI (InitReg, Preheader), (Reg1, LoopBB)
11151 // Reg = COPY Reg0 ; COPY is ignored.
11152 // Reg1 = ADD Reg, #1; UpdateInst. Incremented by a loop invariant value.
11153 // ; Reg is the value calculated in the previous
11154 // ; iteration, so IsUpdatePriorComp == false.
11155
11156 if (LoopBB->pred_size() != 2)
11157 return false;
11158 if (!Reg.isVirtual())
11159 return false;
11160 const MachineRegisterInfo &MRI = LoopBB->getParent()->getRegInfo();
11161 UpdateInst = nullptr;
11162 UpdateCounterOprNum = 0;
11163 InitReg = 0;
11164 IsUpdatePriorComp = true;
11165 Register CurReg = Reg;
11166 while (true) {
11167 MachineInstr *Def = MRI.getVRegDef(CurReg);
11168 if (Def->getParent() != LoopBB)
11169 return false;
11170 if (Def->isCopy()) {
11171 // Ignore copy instructions unless they contain subregisters
11172 if (Def->getOperand(0).getSubReg() || Def->getOperand(1).getSubReg())
11173 return false;
11174 CurReg = Def->getOperand(1).getReg();
11175 } else if (Def->isPHI()) {
11176 if (InitReg != 0)
11177 return false;
11178 if (!UpdateInst)
11179 IsUpdatePriorComp = false;
11180 extractPhiReg(*Def, LoopBB, CurReg, InitReg);
11181 } else {
11182 if (UpdateInst)
11183 return false;
11184 switch (Def->getOpcode()) {
11185 case AArch64::ADDSXri:
11186 case AArch64::ADDSWri:
11187 case AArch64::SUBSXri:
11188 case AArch64::SUBSWri:
11189 case AArch64::ADDXri:
11190 case AArch64::ADDWri:
11191 case AArch64::SUBXri:
11192 case AArch64::SUBWri:
11193 UpdateInst = Def;
11194 UpdateCounterOprNum = 1;
11195 break;
11196 case AArch64::ADDSXrr:
11197 case AArch64::ADDSWrr:
11198 case AArch64::SUBSXrr:
11199 case AArch64::SUBSWrr:
11200 case AArch64::ADDXrr:
11201 case AArch64::ADDWrr:
11202 case AArch64::SUBXrr:
11203 case AArch64::SUBWrr:
11204 UpdateInst = Def;
11205 if (isDefinedOutside(Def->getOperand(2).getReg(), LoopBB))
11206 UpdateCounterOprNum = 1;
11207 else if (isDefinedOutside(Def->getOperand(1).getReg(), LoopBB))
11208 UpdateCounterOprNum = 2;
11209 else
11210 return false;
11211 break;
11212 default:
11213 return false;
11214 }
11215 CurReg = Def->getOperand(UpdateCounterOprNum).getReg();
11216 }
11217
11218 if (!CurReg.isVirtual())
11219 return false;
11220 if (Reg == CurReg)
11221 break;
11222 }
11223
11224 if (!UpdateInst)
11225 return false;
11226
11227 return true;
11228}
11229
11230std::unique_ptr<TargetInstrInfo::PipelinerLoopInfo>
11232 // Accept loops that meet the following conditions
11233 // * The conditional branch is BCC
11234 // * The compare instruction is ADDS/SUBS/WHILEXX
11235 // * One operand of the compare is an induction variable and the other is a
11236 // loop invariant value
11237 // * The induction variable is incremented/decremented by a single instruction
11238 // * Does not contain CALL or instructions which have unmodeled side effects
11239
11240 for (MachineInstr &MI : *LoopBB)
11241 if (MI.isCall() || MI.hasUnmodeledSideEffects())
11242 // This instruction may use NZCV, which interferes with the instruction to
11243 // be inserted for loop control.
11244 return nullptr;
11245
11246 MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
11248 if (analyzeBranch(*LoopBB, TBB, FBB, Cond))
11249 return nullptr;
11250
11251 // Infinite loops are not supported
11252 if (TBB == LoopBB && FBB == LoopBB)
11253 return nullptr;
11254
11255 // Must be conditional branch
11256 if (TBB != LoopBB && FBB == nullptr)
11257 return nullptr;
11258
11259 assert((TBB == LoopBB || FBB == LoopBB) &&
11260 "The Loop must be a single-basic-block loop");
11261
11262 MachineInstr *CondBranch = &*LoopBB->getFirstTerminator();
11264
11265 if (CondBranch->getOpcode() != AArch64::Bcc)
11266 return nullptr;
11267
11268 // Normalization for createTripCountGreaterCondition()
11269 if (TBB == LoopBB)
11271
11272 MachineInstr *Comp = nullptr;
11273 unsigned CompCounterOprNum = 0;
11274 for (MachineInstr &MI : reverse(*LoopBB)) {
11275 if (MI.modifiesRegister(AArch64::NZCV, &TRI)) {
11276 // Guarantee that the compare is SUBS/ADDS/WHILEXX and that one of the
11277 // operands is a loop invariant value
11278
11279 switch (MI.getOpcode()) {
11280 case AArch64::SUBSXri:
11281 case AArch64::SUBSWri:
11282 case AArch64::ADDSXri:
11283 case AArch64::ADDSWri:
11284 Comp = &MI;
11285 CompCounterOprNum = 1;
11286 break;
11287 case AArch64::ADDSWrr:
11288 case AArch64::ADDSXrr:
11289 case AArch64::SUBSWrr:
11290 case AArch64::SUBSXrr:
11291 Comp = &MI;
11292 break;
11293 default:
11294 if (isWhileOpcode(MI.getOpcode())) {
11295 Comp = &MI;
11296 break;
11297 }
11298 return nullptr;
11299 }
11300
11301 if (CompCounterOprNum == 0) {
11302 if (isDefinedOutside(Comp->getOperand(1).getReg(), LoopBB))
11303 CompCounterOprNum = 2;
11304 else if (isDefinedOutside(Comp->getOperand(2).getReg(), LoopBB))
11305 CompCounterOprNum = 1;
11306 else
11307 return nullptr;
11308 }
11309 break;
11310 }
11311 }
11312 if (!Comp)
11313 return nullptr;
11314
11315 MachineInstr *Update = nullptr;
11316 Register Init;
11317 bool IsUpdatePriorComp;
11318 unsigned UpdateCounterOprNum;
11319 if (!getIndVarInfo(Comp->getOperand(CompCounterOprNum).getReg(), LoopBB,
11320 Update, UpdateCounterOprNum, Init, IsUpdatePriorComp))
11321 return nullptr;
11322
11323 return std::make_unique<AArch64PipelinerLoopInfo>(
11324 LoopBB, CondBranch, Comp, CompCounterOprNum, Update, UpdateCounterOprNum,
11325 Init, IsUpdatePriorComp, Cond);
11326}
11327
11328/// verifyInstruction - Perform target specific instruction verification.
11329bool AArch64InstrInfo::verifyInstruction(const MachineInstr &MI,
11330 StringRef &ErrInfo) const {
11331 // Verify that immediate offsets on load/store instructions are within range.
11332 // Stack objects with an FI operand are excluded as they can be fixed up
11333 // during PEI.
11334 TypeSize Scale(0U, false), Width(0U, false);
11335 int64_t MinOffset, MaxOffset;
11336 if (getMemOpInfo(MI.getOpcode(), Scale, Width, MinOffset, MaxOffset)) {
11337 unsigned ImmIdx = getLoadStoreImmIdx(MI.getOpcode());
11338 if (MI.getOperand(ImmIdx).isImm() && !MI.getOperand(ImmIdx - 1).isFI()) {
11339 int64_t Imm = MI.getOperand(ImmIdx).getImm();
11340 if (Imm < MinOffset || Imm > MaxOffset) {
11341 ErrInfo = "Unexpected immediate on load/store instruction";
11342 return false;
11343 }
11344 }
11345 }
11346
11347 const MCInstrDesc &MCID = MI.getDesc();
11348 for (unsigned Op = 0; Op < MCID.getNumOperands(); Op++) {
11349 const MachineOperand &MO = MI.getOperand(Op);
11350 switch (MCID.operands()[Op].OperandType) {
11352 if (!MO.isImm() || MO.getImm() != 0) {
11353 ErrInfo = "OPERAND_IMPLICIT_IMM_0 should be 0";
11354 return false;
11355 }
11356 break;
11358 if (!MO.isImm() ||
11360 (AArch64_AM::getShiftValue(MO.getImm()) != 8 &&
11361 AArch64_AM::getShiftValue(MO.getImm()) != 16)) {
11362 ErrInfo = "OPERAND_SHIFT_MSL should be msl shift of 8 or 16";
11363 return false;
11364 }
11365 break;
11366 default:
11367 break;
11368 }
11369 }
11370 return true;
11371}
11372
11373#define GET_INSTRINFO_HELPERS
11374#define GET_INSTRMAP_INFO
11375#include "AArch64GenInstrInfo.inc"
unsigned SubReg
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
static bool forwardCopyWillClobberTuple(unsigned DestReg, unsigned SrcReg, unsigned NumRegs)
static cl::opt< unsigned > BCCDisplacementBits("aarch64-bcc-offset-bits", cl::Hidden, cl::init(19), cl::desc("Restrict range of Bcc instructions (DEBUG)"))
static Register genNeg(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, DenseMap< Register, unsigned > &InstrIdxForVirtReg, unsigned MnegOpc, const TargetRegisterClass *RC)
genNeg - Helper to generate an intermediate negation of the second operand of Root
static cl::opt< unsigned > GatherOptSearchLimit("aarch64-search-limit", cl::Hidden, cl::init(2048), cl::desc("Restrict range of instructions to search for the " "machine-combiner gather pattern optimization"))
static bool getMaddPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns)
Find instructions that can be turned into madd.
static AArch64CC::CondCode findCondCodeUsedByInstr(const MachineInstr &Instr)
Find a condition code used by the instruction.
static MachineInstr * genFusedMultiplyAcc(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, unsigned IdxMulOpd, unsigned MaddOpc, const TargetRegisterClass *RC)
genFusedMultiplyAcc - Helper to generate fused multiply accumulate instructions.
static MachineInstr * genFusedMultiplyAccNeg(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, DenseMap< Register, unsigned > &InstrIdxForVirtReg, unsigned IdxMulOpd, unsigned MaddOpc, unsigned MnegOpc, const TargetRegisterClass *RC)
genFusedMultiplyAccNeg - Helper to generate fused multiply accumulate instructions with an additional...
static bool isCombineInstrCandidate64(unsigned Opc)
static unsigned removeCopies(const MachineRegisterInfo &MRI, unsigned VReg)
static bool areCFlagsAccessedBetweenInstrs(MachineBasicBlock::iterator From, MachineBasicBlock::iterator To, const TargetRegisterInfo *TRI, const AccessKind AccessToCheck=AK_All)
True when condition flags are accessed (either by writing or reading) on the instruction trace starti...
static bool getFMAPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns)
Floating-Point Support.
static bool isADDSRegImm(unsigned Opcode)
static void appendOffsetComment(int NumBytes, llvm::raw_string_ostream &Comment, StringRef RegScale={})
static unsigned sForm(MachineInstr &Instr)
Get opcode of S version of Instr.
static bool isCombineInstrSettingFlag(unsigned Opc)
static bool getFNEGPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns)
static bool getIndVarInfo(Register Reg, const MachineBasicBlock *LoopBB, MachineInstr *&UpdateInst, unsigned &UpdateCounterOprNum, Register &InitReg, bool &IsUpdatePriorComp)
If Reg is an induction variable, return true and set some parameters.
static const MachineInstrBuilder & AddSubReg(const MachineInstrBuilder &MIB, MCRegister Reg, unsigned SubIdx, unsigned State, const TargetRegisterInfo *TRI)
static bool canPairLdStOpc(unsigned FirstOpc, unsigned SecondOpc)
static int findCondCodeUseOperandIdxForBranchOrSelect(const MachineInstr &Instr)
static bool isPostIndexLdStOpcode(unsigned Opcode)
Return true if the opcode is a post-index ld/st instruction, which really loads from base+0.
static unsigned getBranchDisplacementBits(unsigned Opc)
static cl::opt< unsigned > CBDisplacementBits("aarch64-cb-offset-bits", cl::Hidden, cl::init(9), cl::desc("Restrict range of CB instructions (DEBUG)"))
static std::optional< ParamLoadedValue > describeORRLoadedValue(const MachineInstr &MI, Register DescribedReg, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
If the given ORR instruction is a copy, and DescribedReg overlaps with the destination register then,...
static bool getFMULPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns)
static void appendReadRegExpr(SmallVectorImpl< char > &Expr, unsigned RegNum)
static MachineInstr * genMaddR(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, unsigned IdxMulOpd, unsigned MaddOpc, unsigned VR, const TargetRegisterClass *RC)
genMaddR - Generate madd instruction and combine mul and add using an extra virtual register Example ...
static Register cloneInstr(const MachineInstr *MI, unsigned ReplaceOprNum, Register ReplaceReg, MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertTo)
Clone an instruction from MI.
static bool scaleOffset(unsigned Opc, int64_t &Offset)
static bool canCombineWithFMUL(MachineBasicBlock &MBB, MachineOperand &MO, unsigned MulOpc)
unsigned scaledOffsetOpcode(unsigned Opcode, unsigned &Scale)
static MachineInstr * genFusedMultiplyIdx(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, unsigned IdxMulOpd, unsigned MaddOpc, const TargetRegisterClass *RC)
genFusedMultiplyIdx - Helper to generate fused multiply accumulate instructions.
static MachineInstr * genIndexedMultiply(MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, unsigned IdxDupOp, unsigned MulOpc, const TargetRegisterClass *RC, MachineRegisterInfo &MRI)
Fold (FMUL x (DUP y lane)) into (FMUL_indexed x y lane)
static bool isSUBSRegImm(unsigned Opcode)
static bool UpdateOperandRegClass(MachineInstr &Instr)
static const TargetRegisterClass * getRegClass(const MachineInstr &MI, Register Reg)
static bool canCmpInstrBeRemoved(MachineInstr &MI, MachineInstr &CmpInstr, int CmpValue, const TargetRegisterInfo &TRI, SmallVectorImpl< MachineInstr * > &CCUseInstrs, bool &IsInvertCC)
unsigned unscaledOffsetOpcode(unsigned Opcode)
static bool getLoadPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns)
Search for patterns of LD instructions we can optimize.
static bool canInstrSubstituteCmpInstr(MachineInstr &MI, MachineInstr &CmpInstr, const TargetRegisterInfo &TRI)
Check if CmpInstr can be substituted by MI.
static UsedNZCV getUsedNZCV(AArch64CC::CondCode CC)
static bool isCombineInstrCandidateFP(const MachineInstr &Inst)
static void appendLoadRegExpr(SmallVectorImpl< char > &Expr, int64_t OffsetFromDefCFA)
static void appendConstantExpr(SmallVectorImpl< char > &Expr, int64_t Constant, dwarf::LocationAtom Operation)
static unsigned convertToNonFlagSettingOpc(const MachineInstr &MI)
Return the opcode that does not set flags when possible - otherwise return the original opcode.
static bool outliningCandidatesV8_3OpsConsensus(const outliner::Candidate &a, const outliner::Candidate &b)
static bool isCombineInstrCandidate32(unsigned Opc)
static void parseCondBranch(MachineInstr *LastInst, MachineBasicBlock *&Target, SmallVectorImpl< MachineOperand > &Cond)
static unsigned offsetExtendOpcode(unsigned Opcode)
MachineOutlinerMBBFlags
@ LRUnavailableSomewhere
@ UnsafeRegsDead
static void loadRegPairFromStackSlot(const TargetRegisterInfo &TRI, MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore, const MCInstrDesc &MCID, Register DestReg, unsigned SubIdx0, unsigned SubIdx1, int FI, MachineMemOperand *MMO)
static void generateGatherLanePattern(MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, SmallVectorImpl< MachineInstr * > &DelInstrs, DenseMap< Register, unsigned > &InstrIdxForVirtReg, unsigned Pattern, unsigned NumLanes)
Generate optimized instruction sequence for gather load patterns to improve Memory-Level Parallelism ...
static bool getMiscPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns)
Find other MI combine patterns.
static bool outliningCandidatesSigningKeyConsensus(const outliner::Candidate &a, const outliner::Candidate &b)
static bool outliningCandidatesSigningScopeConsensus(const outliner::Candidate &a, const outliner::Candidate &b)
static bool shouldClusterFI(const MachineFrameInfo &MFI, int FI1, int64_t Offset1, unsigned Opcode1, int FI2, int64_t Offset2, unsigned Opcode2)
static cl::opt< unsigned > TBZDisplacementBits("aarch64-tbz-offset-bits", cl::Hidden, cl::init(14), cl::desc("Restrict range of TB[N]Z instructions (DEBUG)"))
static void extractPhiReg(const MachineInstr &Phi, const MachineBasicBlock *MBB, Register &RegMBB, Register &RegOther)
static MCCFIInstruction createDefCFAExpression(const TargetRegisterInfo &TRI, unsigned Reg, const StackOffset &Offset)
static bool isDefinedOutside(Register Reg, const MachineBasicBlock *BB)
static MachineInstr * genFusedMultiply(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, unsigned IdxMulOpd, unsigned MaddOpc, const TargetRegisterClass *RC, FMAInstKind kind=FMAInstKind::Default, const Register *ReplacedAddend=nullptr)
genFusedMultiply - Generate fused multiply instructions.
static bool getGatherLanePattern(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns, unsigned LoadLaneOpCode, unsigned NumLanes)
Check if the given instruction forms a gather load pattern that can be optimized for better Memory-Le...
static MachineInstr * genFusedMultiplyIdxNeg(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, DenseMap< Register, unsigned > &InstrIdxForVirtReg, unsigned IdxMulOpd, unsigned MaddOpc, unsigned MnegOpc, const TargetRegisterClass *RC)
genFusedMultiplyAccNeg - Helper to generate fused multiply accumulate instructions with an additional...
static bool isCombineInstrCandidate(unsigned Opc)
static unsigned regOffsetOpcode(unsigned Opcode)
MachineOutlinerClass
Constants defining how certain sequences should be outlined.
@ MachineOutlinerTailCall
Emit a save, restore, call, and return.
@ MachineOutlinerRegSave
Emit a call and tail-call.
@ MachineOutlinerNoLRSave
Only emit a branch.
@ MachineOutlinerThunk
Emit a call and return.
@ MachineOutlinerDefault
static cl::opt< unsigned > BDisplacementBits("aarch64-b-offset-bits", cl::Hidden, cl::init(26), cl::desc("Restrict range of B instructions (DEBUG)"))
static bool areCFlagsAliveInSuccessors(const MachineBasicBlock *MBB)
Check if AArch64::NZCV should be alive in successors of MBB.
static void emitFrameOffsetAdj(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, unsigned DestReg, unsigned SrcReg, int64_t Offset, unsigned Opc, const TargetInstrInfo *TII, MachineInstr::MIFlag Flag, bool NeedsWinCFI, bool *HasWinCFI, bool EmitCFAOffset, StackOffset CFAOffset, unsigned FrameReg)
static bool isCheapImmediate(const MachineInstr &MI, unsigned BitSize)
static cl::opt< unsigned > CBZDisplacementBits("aarch64-cbz-offset-bits", cl::Hidden, cl::init(19), cl::desc("Restrict range of CB[N]Z instructions (DEBUG)"))
static void genSubAdd2SubSub(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, SmallVectorImpl< MachineInstr * > &DelInstrs, unsigned IdxOpd1, DenseMap< Register, unsigned > &InstrIdxForVirtReg)
Do the following transformation A - (B + C) ==> (A - B) - C A - (B + C) ==> (A - C) - B.
static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg, unsigned *NewReg=nullptr)
static void signOutlinedFunction(MachineFunction &MF, MachineBasicBlock &MBB, const AArch64InstrInfo *TII, bool ShouldSignReturnAddr)
static MachineInstr * genFNegatedMAD(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs)
static bool canCombineWithMUL(MachineBasicBlock &MBB, MachineOperand &MO, unsigned MulOpc, unsigned ZeroReg)
static void storeRegPairToStackSlot(const TargetRegisterInfo &TRI, MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore, const MCInstrDesc &MCID, Register SrcReg, bool IsKill, unsigned SubIdx0, unsigned SubIdx1, int FI, MachineMemOperand *MMO)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static const Function * getParent(const Value *V)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
@ Default
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
Module.h This file contains the declarations for the Module class.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static LVOptions Options
Definition LVOptions.cpp:25
This file implements the LivePhysRegs utility for tracking liveness of physical registers.
#define F(x, y, z)
Definition MD5.cpp:55
#define I(x, y, z)
Definition MD5.cpp:58
Machine Check Debug Module
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
MachineInstr unsigned OpIdx
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
PowerPC Reduce CR logical Operation
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
const SmallVectorImpl< MachineOperand > & Cond
This file declares the machine register scavenger class.
This file contains some templates that are useful if you are working with the STL at all.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition Value.cpp:480
This file defines the SmallSet class.
This file defines the SmallVector class.
static bool canCombine(MachineBasicBlock &MBB, MachineOperand &MO, unsigned CombineOpc=0)
AArch64FunctionInfo - This class is derived from MachineFunctionInfo and contains private AArch64-spe...
bool shouldSignReturnAddress(const MachineFunction &MF) const
void setOutliningStyle(const std::string &Style)
std::optional< bool > hasRedZone() const
static bool isHForm(const MachineInstr &MI)
Returns whether the instruction is in H form (16 bit operands)
void insertSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, Register DstReg, ArrayRef< MachineOperand > Cond, Register TrueReg, Register FalseReg) const override
static bool hasBTISemantics(const MachineInstr &MI)
Returns whether the instruction can be compatible with non-zero BTYPE.
static bool isQForm(const MachineInstr &MI)
Returns whether the instruction is in Q form (128 bit operands)
static bool getMemOpInfo(unsigned Opcode, TypeSize &Scale, TypeSize &Width, int64_t &MinOffset, int64_t &MaxOffset)
Returns true if opcode Opc is a memory operation.
static bool isTailCallReturnInst(const MachineInstr &MI)
Returns true if MI is one of the TCRETURN* instructions.
static bool isFPRCopy(const MachineInstr &MI)
Does this instruction rename an FPR without modifying bits?
MachineInstr * emitLdStWithAddr(MachineInstr &MemI, const ExtAddrMode &AM) const override
std::optional< DestSourcePair > isCopyInstrImpl(const MachineInstr &MI) const override
If the specific machine instruction is an instruction that moves/copies value from one register to an...
MachineBasicBlock * getBranchDestBlock(const MachineInstr &MI) const override
unsigned getInstSizeInBytes(const MachineInstr &MI) const override
GetInstSize - Return the number of bytes of code the specified instruction may be.
bool areMemAccessesTriviallyDisjoint(const MachineInstr &MIa, const MachineInstr &MIb) const override
void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register DestReg, Register SrcReg, bool KillSrc, bool RenamableDest=false, bool RenamableSrc=false) const override
static bool isGPRCopy(const MachineInstr &MI)
Does this instruction rename a GPR without modifying bits?
static unsigned convertToFlagSettingOpc(unsigned Opc)
Return the opcode that set flags when possible.
bool isBranchOffsetInRange(unsigned BranchOpc, int64_t BrOffset) const override
bool canInsertSelect(const MachineBasicBlock &, ArrayRef< MachineOperand > Cond, Register, Register, Register, int &, int &, int &) const override
static const MachineOperand & getLdStOffsetOp(const MachineInstr &MI)
Returns the immediate offset operator of a load/store.
bool isCoalescableExtInstr(const MachineInstr &MI, Register &SrcReg, Register &DstReg, unsigned &SubIdx) const override
static std::optional< unsigned > getUnscaledLdSt(unsigned Opc)
Returns the unscaled load/store for the scaled load/store opcode, if there is a corresponding unscale...
static bool hasUnscaledLdStOffset(unsigned Opc)
Return true if it has an unscaled load/store offset.
static const MachineOperand & getLdStAmountOp(const MachineInstr &MI)
Returns the shift amount operator of a load/store.
static bool isPreLdSt(const MachineInstr &MI)
Returns whether the instruction is a pre-indexed load/store.
std::optional< ExtAddrMode > getAddrModeFromMemoryOp(const MachineInstr &MemI, const TargetRegisterInfo *TRI) const override
bool getMemOperandsWithOffsetWidth(const MachineInstr &MI, SmallVectorImpl< const MachineOperand * > &BaseOps, int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width, const TargetRegisterInfo *TRI) const override
bool analyzeBranchPredicate(MachineBasicBlock &MBB, MachineBranchPredicate &MBP, bool AllowModify) const override
void insertIndirectBranch(MachineBasicBlock &MBB, MachineBasicBlock &NewDestBB, MachineBasicBlock &RestoreBB, const DebugLoc &DL, int64_t BrOffset, RegScavenger *RS) const override
static bool isPairableLdStInst(const MachineInstr &MI)
Return true if pairing the given load or store may be paired with another.
const AArch64RegisterInfo & getRegisterInfo() const
getRegisterInfo - TargetInstrInfo is a superset of MRegister info.
static bool isPreSt(const MachineInstr &MI)
Returns whether the instruction is a pre-indexed store.
MachineInstr * foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI, ArrayRef< unsigned > Ops, MachineBasicBlock::iterator InsertPt, int FrameIndex, LiveIntervals *LIS=nullptr, VirtRegMap *VRM=nullptr) const override
void insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const override
AArch64InstrInfo(const AArch64Subtarget &STI)
static bool isPairedLdSt(const MachineInstr &MI)
Returns whether the instruction is a paired load/store.
bool getMemOperandWithOffsetWidth(const MachineInstr &MI, const MachineOperand *&BaseOp, int64_t &Offset, bool &OffsetIsScalable, TypeSize &Width, const TargetRegisterInfo *TRI) const
If OffsetIsScalable is set to 'true', the offset is scaled by vscale.
Register isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const override
bool reverseBranchCondition(SmallVectorImpl< MachineOperand > &Cond) const override
static bool isStridedAccess(const MachineInstr &MI)
Return true if the given load or store is a strided memory access.
bool shouldClusterMemOps(ArrayRef< const MachineOperand * > BaseOps1, int64_t Offset1, bool OffsetIsScalable1, ArrayRef< const MachineOperand * > BaseOps2, int64_t Offset2, bool OffsetIsScalable2, unsigned ClusterSize, unsigned NumBytes) const override
Detect opportunities for ldp/stp formation.
unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const override
bool isThroughputPattern(unsigned Pattern) const override
Return true when a code sequence can improve throughput.
MachineOperand & getMemOpBaseRegImmOfsOffsetOperand(MachineInstr &LdSt) const
Return the immediate offset of the base register in a load/store LdSt.
bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify=false) const override
bool canFoldIntoAddrMode(const MachineInstr &MemI, Register Reg, const MachineInstr &AddrI, ExtAddrMode &AM) const override
static bool isLdStPairSuppressed(const MachineInstr &MI)
Return true if pairing the given load or store is hinted to be unprofitable.
std::unique_ptr< TargetInstrInfo::PipelinerLoopInfo > analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const override
bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const override
unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const override
MachineBasicBlock::iterator probedStackAlloc(MachineBasicBlock::iterator MBBI, Register TargetReg, bool FrameSetup) const
Return true when there is potentially a faster code sequence for an instruction chain ending in Root.
bool optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int64_t CmpMask, int64_t CmpValue, const MachineRegisterInfo *MRI) const override
optimizeCompareInstr - Convert the instruction supplying the argument to the comparison into one that...
static unsigned getLoadStoreImmIdx(unsigned Opc)
Returns the index for the immediate for a given instruction.
static bool isGPRZero(const MachineInstr &MI)
Does this instruction set its full destination register to zero?
void copyGPRRegTuple(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc, unsigned Opcode, unsigned ZeroReg, llvm::ArrayRef< unsigned > Indices) const
bool analyzeCompare(const MachineInstr &MI, Register &SrcReg, Register &SrcReg2, int64_t &CmpMask, int64_t &CmpValue) const override
analyzeCompare - For a comparison instruction, return the source registers in SrcReg and SrcReg2,...
CombinerObjective getCombinerObjective(unsigned Pattern) const override
static bool isFpOrNEON(Register Reg)
Returns whether the physical register is FP or NEON.
bool isAsCheapAsAMove(const MachineInstr &MI) const override
std::optional< DestSourcePair > isCopyLikeInstrImpl(const MachineInstr &MI) const override
static void suppressLdStPair(MachineInstr &MI)
Hint that pairing the given load or store is unprofitable.
Register isStoreToStackSlot(const MachineInstr &MI, int &FrameIndex) const override
static bool isPreLd(const MachineInstr &MI)
Returns whether the instruction is a pre-indexed load.
void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg, MachineInstr::MIFlag Flags=MachineInstr::NoFlags) const override
void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register DestReg, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg, MachineInstr::MIFlag Flags=MachineInstr::NoFlags) const override
void copyPhysRegTuple(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc, unsigned Opcode, llvm::ArrayRef< unsigned > Indices) const
bool optimizeCondBranch(MachineInstr &MI) const override
Replace csincr-branch sequence by simple conditional branch.
static int getMemScale(unsigned Opc)
Scaling factor for (scaled or unscaled) load or store.
bool isCandidateToMergeOrPair(const MachineInstr &MI) const
Return true if this is a load/store that can be potentially paired/merged.
MCInst getNop() const override
static const MachineOperand & getLdStBaseOp(const MachineInstr &MI)
Returns the base register operator of a load/store.
bool isReservedReg(const MachineFunction &MF, MCRegister Reg) const
const AArch64RegisterInfo * getRegisterInfo() const override
bool isNeonAvailable() const
Returns true if the target has NEON and the function at runtime is known to have NEON enabled (e....
bool isSVEorStreamingSVEAvailable() const
Returns true if the target has access to either the full range of SVE instructions,...
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:41
const T & front() const
front - Get the first element.
Definition ArrayRef.h:150
size_t size() const
size - Get the array size.
Definition ArrayRef.h:147
This is an important base class in LLVM.
Definition Constant.h:43
A debug info location.
Definition DebugLoc.h:124
bool empty() const
Definition DenseMap.h:109
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition DenseMap.h:233
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition Function.h:706
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition Function.h:703
Module * getParent()
Get the module that this global value is contained inside of...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
static LocationSize precise(uint64_t Value)
This class is intended to be used as a base class for asm properties and features specific to the tar...
Definition MCAsmInfo.h:64
bool usesWindowsCFI() const
Definition MCAsmInfo.h:652
static MCCFIInstruction cfiDefCfa(MCSymbol *L, unsigned Register, int64_t Offset, SMLoc Loc={})
.cfi_def_cfa defines a rule for computing CFA as: take address from Register and add Offset to it.
Definition MCDwarf.h:585
static MCCFIInstruction createOffset(MCSymbol *L, unsigned Register, int64_t Offset, SMLoc Loc={})
.cfi_offset Previous value of Register is saved at offset Offset from CFA.
Definition MCDwarf.h:627
static MCCFIInstruction cfiDefCfaOffset(MCSymbol *L, int64_t Offset, SMLoc Loc={})
.cfi_def_cfa_offset modifies a rule for computing CFA.
Definition MCDwarf.h:600
static MCCFIInstruction createEscape(MCSymbol *L, StringRef Vals, SMLoc Loc={}, StringRef Comment="")
.cfi_escape Allows the user to add arbitrary bytes to the unwind info.
Definition MCDwarf.h:697
MCInstBuilder & addImm(int64_t Val)
Add a new integer immediate operand.
Instances of this class represent a single low-level machine instruction.
Definition MCInst.h:188
Describe properties that are true of each instruction in the target description file.
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
ArrayRef< MCOperandInfo > operands() const
Wrapper class representing physical registers. Should be passed by value.
Definition MCRegister.h:33
constexpr bool isValid() const
Definition MCRegister.h:76
static constexpr unsigned NoRegister
Definition MCRegister.h:52
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition Metadata.h:1569
Set of metadata that should be preserved when using BuildMI().
bool isInlineAsmBrIndirectTarget() const
Returns true if this is the indirect dest of an INLINEASM_BR.
LLVM_ABI void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
LLVM_ABI instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
reverse_instr_iterator instr_rbegin()
LLVM_ABI iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
reverse_instr_iterator instr_rend()
Instructions::iterator instr_iterator
Instructions::const_iterator const_instr_iterator
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
LLVM_ABI instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
void setMachineBlockAddressTaken()
Set this block to indicate that its address is used as something other than the target of a terminato...
LLVM_ABI bool isLiveIn(MCRegister Reg, LaneBitmask LaneMask=LaneBitmask::getAll()) const
Return true if the specified register is in the live in set.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
uint64_t getStackSize() const
Return the number of bytes that must be allocated to hold all of the fixed size frame objects.
void setStackID(int ObjectIdx, uint8_t ID)
bool isCalleeSavedInfoValid() const
Has the callee saved info been calculated yet?
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
unsigned getNumObjects() const
Return the number of objects.
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
unsigned addFrameInst(const MCCFIInstruction &Inst)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
BasicBlockListType::iterator iterator
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineJumpTableInfo * getJumpTableInfo() const
getJumpTableInfo - Return the jump table info object for the current function.
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineInstr - Allocate a new MachineInstr.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
const MachineInstrBuilder & setMemRefs(ArrayRef< MachineMemOperand * > MMOs) const
const MachineInstrBuilder & addCFIIndex(unsigned CFIIndex) const
const MachineInstrBuilder & setMIFlag(MachineInstr::MIFlag Flag) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addSym(MCSymbol *Sym, unsigned char TargetFlags=0) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
reverse_iterator getReverse() const
Get a reverse iterator to the same node.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
bool mayLoadOrStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read or modify memory.
bool isCopy() const
const MachineBasicBlock * getParent() const
bool isCall(QueryType Type=AnyInBundle) const
bool getFlag(MIFlag Flag) const
Return whether an MI flag is set.
LLVM_ABI uint32_t mergeFlagsWith(const MachineInstr &Other) const
Return the MIFlags which represent both MachineInstrs.
unsigned getNumOperands() const
Retuns the total number of operands.
LLVM_ABI unsigned getNumExplicitOperands() const
Returns the number of non-implicit operands.
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
LLVM_ABI bool hasUnmodeledSideEffects() const
Return true if this instruction has side effects that are not modeled by mayLoad / mayStore,...
bool registerDefIsDead(Register Reg, const TargetRegisterInfo *TRI) const
Returns true if the register is dead in this machine instruction.
bool definesRegister(Register Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr fully defines the specified register.
LLVM_ABI void setDesc(const MCInstrDesc &TID)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one.
LLVM_ABI bool hasOrderedMemoryRef() const
Return true if this instruction may have an ordered or volatile memory reference, or if the informati...
LLVM_ABI const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
ArrayRef< MachineMemOperand * > memoperands() const
Access to memory operands of the instruction.
LLVM_ABI bool isLoadFoldBarrier() const
Returns true if it is illegal to fold a load across this instruction.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
LLVM_ABI void removeOperand(unsigned OpNo)
Erase an operand from an instruction, leaving it with one fewer operand than it started with.
LLVM_ABI void addRegisterDefined(Register Reg, const TargetRegisterInfo *RegInfo=nullptr)
We have determined MI defines a register.
const MachineOperand & getOperand(unsigned i) const
uint32_t getFlags() const
Return the MI flags bitvector.
LLVM_ABI int findRegisterDefOperandIdx(Register Reg, const TargetRegisterInfo *TRI, bool isDead=false, bool Overlap=false) const
Returns the operand index that is a def of the specified register or -1 if it is not found.
const std::vector< MachineJumpTableEntry > & getJumpTables() const
A description of a memory reference used in the backend.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
This class contains meta information specific to a module.
LLVM_ABI MachineFunction * getMachineFunction(const Function &F) const
Returns the MachineFunction associated to IR function F if there is one, otherwise nullptr.
MachineOperand class - Representation of each machine instruction operand.
void setSubReg(unsigned subReg)
unsigned getSubReg() const
void setImm(int64_t immVal)
int64_t getImm() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
MachineBasicBlock * getMBB() const
void setIsDead(bool Val=true)
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
void setIsKill(bool Val=true)
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
static MachineOperand CreateImm(int64_t Val)
MachineOperandType getType() const
getType - Returns the MachineOperandType for this operand.
void setIsUndef(bool Val=true)
Register getReg() const
getReg - Returns the register number.
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
LLVM_ABI bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
bool tracksLiveness() const
tracksLiveness - Returns true when tracking register liveness accurately.
bool reservedRegsFrozen() const
reservedRegsFrozen - Returns true after freezeReservedRegs() was called to ensure the set of reserved...
const TargetRegisterClass * getRegClassOrNull(Register Reg) const
Return the register class of Reg, or null if Reg has not been assigned a register class yet.
LLVM_ABI const TargetRegisterClass * constrainRegClass(Register Reg, const TargetRegisterClass *RC, unsigned MinNumRegs=0)
constrainRegClass - Constrain the register class of the specified virtual register to be a common sub...
LLVM_ABI MachineInstr * getUniqueVRegDef(Register Reg) const
getUniqueVRegDef - Return the unique machine instr that defines the specified virtual register or nul...
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
MI-level patchpoint operands.
Definition StackMaps.h:77
uint32_t getNumPatchBytes() const
Return the number of patchable bytes the given patchpoint should emit.
Definition StackMaps.h:105
Wrapper class representing virtual and physical registers.
Definition Register.h:19
MCRegister asMCReg() const
Utility to check-convert this value to a MCRegister.
Definition Register.h:102
constexpr bool isValid() const
Definition Register.h:107
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:74
static constexpr bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:61
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition Register.h:78
Represents a location in source code.
Definition SMLoc.h:23
bool erase(PtrType Ptr)
Remove pointer from the set.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition SmallSet.h:133
bool empty() const
Definition SmallSet.h:168
bool erase(const T &V)
Definition SmallSet.h:199
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition SmallString.h:26
void append(StringRef RHS)
Append from a StringRef.
Definition SmallString.h:68
StringRef str() const
Explicit conversion to StringRef.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
MI-level stackmap operands.
Definition StackMaps.h:36
uint32_t getNumPatchBytes() const
Return the number of patchable bytes the given stackmap should emit.
Definition StackMaps.h:51
StackOffset holds a fixed and a scalable offset in bytes.
Definition TypeSize.h:31
int64_t getFixed() const
Returns the fixed component of the stack.
Definition TypeSize.h:47
int64_t getScalable() const
Returns the scalable component of the stack.
Definition TypeSize.h:50
static StackOffset get(int64_t Fixed, int64_t Scalable)
Definition TypeSize.h:42
static StackOffset getScalable(int64_t Scalable)
Definition TypeSize.h:41
static StackOffset getFixed(int64_t Fixed)
Definition TypeSize.h:40
MI-level Statepoint operands.
Definition StackMaps.h:159
uint32_t getNumPatchBytes() const
Return the number of patchable bytes the given statepoint should emit.
Definition StackMaps.h:208
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
Object returned by analyzeLoopForPipelining.
TargetInstrInfo - Interface to description of machine instruction set.
virtual void genAlternativeCodeSequence(MachineInstr &Root, unsigned Pattern, SmallVectorImpl< MachineInstr * > &InsInstrs, SmallVectorImpl< MachineInstr * > &DelInstrs, DenseMap< Register, unsigned > &InstIdxForVirtReg) const
When getMachineCombinerPatterns() finds patterns, this function generates the instructions that could...
virtual std::optional< ParamLoadedValue > describeLoadedValue(const MachineInstr &MI, Register Reg) const
Produce the expression describing the MI loading a value into the physical register Reg.
virtual bool getMachineCombinerPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns, bool DoRegPressureReduce) const
Return true when there is potentially a faster code sequence for an instruction chain ending in Root.
virtual bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const
Test if the given instruction should be considered a scheduling boundary.
virtual CombinerObjective getCombinerObjective(unsigned Pattern) const
Return the objective of a combiner pattern.
virtual bool isFunctionSafeToSplit(const MachineFunction &MF) const
Return true if the function is a viable candidate for machine function splitting.
TargetOptions Options
const MCAsmInfo * getMCAsmInfo() const
Return target specific asm information.
bool contains(Register Reg) const
Return true if the specified register is included in this register class.
bool hasSubClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a sub-class of or equal to this class.
bool hasSuperClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a super-class of or equal to this class.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
TargetSubtargetInfo - Generic base class for all target subtargets.
virtual const TargetInstrInfo * getInstrInfo() const
virtual const TargetRegisterInfo * getRegisterInfo() const =0
Return the target's register information.
Target - Wrapper for Target specific information.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition TypeSize.h:344
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
Definition TypeSize.h:347
Value * getOperand(unsigned i) const
Definition User.h:232
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition TypeSize.h:169
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition TypeSize.h:166
self_iterator getIterator()
Definition ilist_node.h:123
A raw_ostream that writes to an std::string.
CallInst * Call
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static CondCode getInvertedCondCode(CondCode Code)
@ MO_DLLIMPORT
MO_DLLIMPORT - On a symbol operand, this represents that the reference to the symbol is for an import...
@ MO_NC
MO_NC - Indicates whether the linker is expected to check the symbol reference for overflow.
@ MO_G1
MO_G1 - A symbol operand with this flag (granule 1) represents the bits 16-31 of a 64-bit address,...
@ MO_S
MO_S - Indicates that the bits of the symbol operand represented by MO_G0 etc are signed.
@ MO_PAGEOFF
MO_PAGEOFF - A symbol operand with this flag represents the offset of that symbol within a 4K page.
@ MO_GOT
MO_GOT - This flag indicates that a symbol operand represents the address of the GOT entry for the sy...
@ MO_PREL
MO_PREL - Indicates that the bits of the symbol operand represented by MO_G0 etc are PC relative.
@ MO_G0
MO_G0 - A symbol operand with this flag (granule 0) represents the bits 0-15 of a 64-bit address,...
@ MO_ARM64EC_CALLMANGLE
MO_ARM64EC_CALLMANGLE - Operand refers to the Arm64EC-mangled version of a symbol,...
@ MO_PAGE
MO_PAGE - A symbol operand with this flag represents the pc-relative offset of the 4K page containing...
@ MO_HI12
MO_HI12 - This flag indicates that a symbol operand represents the bits 13-24 of a 64-bit address,...
@ MO_TLS
MO_TLS - Indicates that the operand being accessed is some kind of thread-local symbol.
@ MO_G2
MO_G2 - A symbol operand with this flag (granule 2) represents the bits 32-47 of a 64-bit address,...
@ MO_TAGGED
MO_TAGGED - With MO_PAGE, indicates that the page includes a memory tag in bits 56-63.
@ MO_G3
MO_G3 - A symbol operand with this flag (granule 3) represents the high 16-bits of a 64-bit address,...
@ MO_COFFSTUB
MO_COFFSTUB - On a symbol operand "FOO", this indicates that the reference is actually to the "....
unsigned getCheckerSizeInBytes(AuthCheckMethod Method)
Returns the number of bytes added by checkAuthenticatedRegister.
static uint64_t decodeLogicalImmediate(uint64_t val, unsigned regSize)
decodeLogicalImmediate - Decode a logical immediate value in the form "N:immr:imms" (where the immr a...
static unsigned getShiftValue(unsigned Imm)
getShiftValue - Extract the shift value.
static unsigned getArithExtendImm(AArch64_AM::ShiftExtendType ET, unsigned Imm)
getArithExtendImm - Encode the extend type and shift amount for an arithmetic instruction: imm: 3-bit...
static unsigned getArithShiftValue(unsigned Imm)
getArithShiftValue - get the arithmetic shift value.
static uint64_t encodeLogicalImmediate(uint64_t imm, unsigned regSize)
encodeLogicalImmediate - Return the encoded immediate value for a logical immediate instruction of th...
static AArch64_AM::ShiftExtendType getArithExtendType(unsigned Imm)
static AArch64_AM::ShiftExtendType getShiftType(unsigned Imm)
getShiftType - Extract the shift type.
static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)
getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...
void expandMOVImm(uint64_t Imm, unsigned BitSize, SmallVectorImpl< ImmInsnModel > &Insn)
Expand a MOVi32imm or MOVi64imm pseudo instruction to one or more real move-immediate instructions to...
static const uint64_t InstrFlagIsWhile
static const uint64_t InstrFlagIsPTestLike
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Dead
Unused definition.
@ Renamable
Register that may be renamed.
@ Define
Register definition.
@ Kill
The last use of a register.
@ Undef
Value of the register doesn't matter.
initializer< Ty > init(const Ty &Val)
constexpr double e
InstrType
Represents how an instruction should be mapped by the outliner.
LLVM_ABI Instruction & back() const
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:477
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1725
static bool isCondBranchOpcode(int Opc)
MCCFIInstruction createDefCFA(const TargetRegisterInfo &TRI, unsigned FrameReg, unsigned Reg, const StackOffset &Offset, bool LastAdjustmentWasScalable=true)
static bool isPTrueOpcode(unsigned Opc)
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
bool succeeded(LogicalResult Result)
Utility function that returns true if the provided LogicalResult corresponds to a success value.
int isAArch64FrameOffsetLegal(const MachineInstr &MI, StackOffset &Offset, bool *OutUseUnscaledOp=nullptr, unsigned *OutUnscaledOp=nullptr, int64_t *EmittableOffset=nullptr)
Check if the Offset is a valid frame offset for MI.
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
Definition MathExtras.h:165
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition STLExtras.h:2472
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
static bool isIndirectBranchOpcode(int Opc)
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
unsigned getBLRCallOpcode(const MachineFunction &MF)
Return opcode to be used for indirect calls.
@ AArch64FrameOffsetIsLegal
Offset is legal.
@ AArch64FrameOffsetCanUpdate
Offset can apply, at least partly.
@ AArch64FrameOffsetCannotUpdate
Offset cannot apply.
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:284
Op::Description Desc
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:337
static bool isSEHInstruction(const MachineInstr &MI)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1732
auto reverse(ContainerTy &&C)
Definition STLExtras.h:406
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
void sort(IteratorTy Start, IteratorTy End)
Definition STLExtras.h:1622
AArch64MachineCombinerPattern
@ MULSUBv8i16_OP2
@ FMULv4i16_indexed_OP1
@ FMLSv1i32_indexed_OP2
@ MULSUBv2i32_indexed_OP1
@ FMLAv2i32_indexed_OP2
@ MULADDv4i16_indexed_OP2
@ FMLAv1i64_indexed_OP1
@ MULSUBv16i8_OP1
@ FMLAv8i16_indexed_OP2
@ FMULv2i32_indexed_OP1
@ MULSUBv8i16_indexed_OP2
@ FMLAv1i64_indexed_OP2
@ MULSUBv4i16_indexed_OP2
@ FMLAv1i32_indexed_OP1
@ FMLAv2i64_indexed_OP2
@ FMLSv8i16_indexed_OP1
@ MULSUBv2i32_OP1
@ FMULv4i16_indexed_OP2
@ MULSUBv4i32_indexed_OP2
@ FMULv2i64_indexed_OP2
@ FMLAv4i32_indexed_OP1
@ MULADDv4i16_OP2
@ FMULv8i16_indexed_OP2
@ MULSUBv4i16_OP1
@ MULADDv4i32_OP2
@ MULADDv2i32_OP2
@ MULADDv16i8_OP2
@ FMLSv4i16_indexed_OP1
@ MULADDv16i8_OP1
@ FMLAv2i64_indexed_OP1
@ FMLAv1i32_indexed_OP2
@ FMLSv2i64_indexed_OP2
@ MULADDv2i32_OP1
@ MULADDv4i32_OP1
@ MULADDv2i32_indexed_OP1
@ MULSUBv16i8_OP2
@ MULADDv4i32_indexed_OP1
@ MULADDv2i32_indexed_OP2
@ FMLAv4i16_indexed_OP2
@ MULSUBv8i16_OP1
@ FMULv2i32_indexed_OP2
@ FMLSv2i32_indexed_OP2
@ FMLSv4i32_indexed_OP1
@ FMULv2i64_indexed_OP1
@ MULSUBv4i16_OP2
@ FMLSv4i16_indexed_OP2
@ FMLAv2i32_indexed_OP1
@ FMLSv2i32_indexed_OP1
@ FMLAv8i16_indexed_OP1
@ MULSUBv4i16_indexed_OP1
@ FMLSv4i32_indexed_OP2
@ MULADDv4i32_indexed_OP2
@ MULSUBv4i32_OP2
@ MULSUBv8i16_indexed_OP1
@ MULADDv8i16_OP2
@ MULSUBv2i32_indexed_OP2
@ FMULv4i32_indexed_OP2
@ FMLSv2i64_indexed_OP1
@ MULADDv4i16_OP1
@ FMLAv4i32_indexed_OP2
@ MULADDv8i16_indexed_OP1
@ FMULv4i32_indexed_OP1
@ FMLAv4i16_indexed_OP1
@ FMULv8i16_indexed_OP1
@ MULADDv8i16_OP1
@ MULSUBv4i32_indexed_OP1
@ MULSUBv4i32_OP1
@ FMLSv8i16_indexed_OP2
@ MULADDv8i16_indexed_OP2
@ MULSUBv2i32_OP2
@ FMLSv1i64_indexed_OP2
@ MULADDv4i16_indexed_OP1
void emitFrameOffset(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, unsigned DestReg, unsigned SrcReg, StackOffset Offset, const TargetInstrInfo *TII, MachineInstr::MIFlag=MachineInstr::NoFlags, bool SetNZCV=false, bool NeedsWinCFI=false, bool *HasWinCFI=nullptr, bool EmitCFAOffset=false, StackOffset InitialOffset={}, unsigned FrameReg=AArch64::SP)
emitFrameOffset - Emit instructions as needed to set DestReg to SrcReg plus Offset.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:167
CombinerObjective
The combiner's goal may differ based on which pattern it is attempting to optimize.
std::optional< UsedNZCV > examineCFlagsUse(MachineInstr &MI, MachineInstr &CmpInstr, const TargetRegisterInfo &TRI, SmallVectorImpl< MachineInstr * > *CCUseInstrs=nullptr)
CodeGenOptLevel
Code generation optimization level.
Definition CodeGen.h:82
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
auto instructionsWithoutDebug(IterT It, IterT End, bool SkipPseudoOp=true)
Construct a range iterator which begins at It and moves forwards until End is reached,...
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
auto drop_end(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the last N elements excluded.
Definition STLExtras.h:323
unsigned getUndefRegState(bool B)
static MCRegister getXRegFromWReg(MCRegister Reg)
unsigned getDefRegState(bool B)
MCCFIInstruction createCFAOffset(const TargetRegisterInfo &MRI, unsigned Reg, const StackOffset &OffsetFromDefCFA, std::optional< int64_t > IncomingVGOffsetFromDefCFA)
unsigned getKillRegState(bool B)
FunctionAddr VTableAddr Next
Definition InstrProf.h:141
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
static bool isUncondBranchOpcode(int Opc)
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition STLExtras.h:2120
constexpr bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
Definition MathExtras.h:248
bool rewriteAArch64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, unsigned FrameReg, StackOffset &Offset, const AArch64InstrInfo *TII)
rewriteAArch64FrameIndex - Rewrite MI to access 'Offset' bytes from the FP.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1897
static const MachineMemOperand::Flags MOSuppressPair
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition MathExtras.h:572
void appendLEB128(SmallVectorImpl< U > &Buffer, T Value)
Definition LEB128.h:238
std::pair< MachineOperand, DIExpression * > ParamLoadedValue
bool isNZCVTouchedInInstructionRange(const MachineInstr &DefMI, const MachineInstr &UseMI, const TargetRegisterInfo *TRI)
Return true if there is an instruction /after/ DefMI and before UseMI which either reads or clobbers ...
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
Definition Sequence.h:305
static const MachineMemOperand::Flags MOStridedAccess
void fullyRecomputeLiveIns(ArrayRef< MachineBasicBlock * > MBBs)
Convenience function for recomputing live-in's for a set of MBBs until the computation converges.
LLVM_ABI Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
Used to describe addressing mode similar to ExtAddrMode in CodeGenPrepare.
LLVM_ABI static const MBBSectionID ColdSectionID
This class contains a discriminated union of information about pointers in memory operands,...
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
An individual sequence of instructions to be replaced with a call to an outlined function.
MachineFunction * getMF() const
The information necessary to create an outlined function for some class of candidate.