LLVM 19.0.0git
AArch64InstrInfo.cpp
Go to the documentation of this file.
1//===- AArch64InstrInfo.cpp - AArch64 Instruction Information -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains the AArch64 implementation of the TargetInstrInfo class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "AArch64InstrInfo.h"
14#include "AArch64ExpandImm.h"
17#include "AArch64PointerAuth.h"
18#include "AArch64Subtarget.h"
21#include "llvm/ADT/ArrayRef.h"
22#include "llvm/ADT/STLExtras.h"
40#include "llvm/IR/DebugLoc.h"
41#include "llvm/IR/GlobalValue.h"
42#include "llvm/MC/MCAsmInfo.h"
43#include "llvm/MC/MCInst.h"
45#include "llvm/MC/MCInstrDesc.h"
50#include "llvm/Support/LEB128.h"
54#include <cassert>
55#include <cstdint>
56#include <iterator>
57#include <utility>
58
59using namespace llvm;
60
61#define GET_INSTRINFO_CTOR_DTOR
62#include "AArch64GenInstrInfo.inc"
63
65 "aarch64-tbz-offset-bits", cl::Hidden, cl::init(14),
66 cl::desc("Restrict range of TB[N]Z instructions (DEBUG)"));
67
69 "aarch64-cbz-offset-bits", cl::Hidden, cl::init(19),
70 cl::desc("Restrict range of CB[N]Z instructions (DEBUG)"));
71
73 BCCDisplacementBits("aarch64-bcc-offset-bits", cl::Hidden, cl::init(19),
74 cl::desc("Restrict range of Bcc instructions (DEBUG)"));
75
77 BDisplacementBits("aarch64-b-offset-bits", cl::Hidden, cl::init(26),
78 cl::desc("Restrict range of B instructions (DEBUG)"));
79
81 : AArch64GenInstrInfo(AArch64::ADJCALLSTACKDOWN, AArch64::ADJCALLSTACKUP,
82 AArch64::CATCHRET),
83 RI(STI.getTargetTriple()), Subtarget(STI) {}
84
85/// GetInstSize - Return the number of bytes of code the specified
86/// instruction may be. This returns the maximum number of bytes.
88 const MachineBasicBlock &MBB = *MI.getParent();
89 const MachineFunction *MF = MBB.getParent();
90 const Function &F = MF->getFunction();
91 const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo();
92
93 {
94 auto Op = MI.getOpcode();
95 if (Op == AArch64::INLINEASM || Op == AArch64::INLINEASM_BR)
96 return getInlineAsmLength(MI.getOperand(0).getSymbolName(), *MAI);
97 }
98
99 // Meta-instructions emit no code.
100 if (MI.isMetaInstruction())
101 return 0;
102
103 // FIXME: We currently only handle pseudoinstructions that don't get expanded
104 // before the assembly printer.
105 unsigned NumBytes = 0;
106 const MCInstrDesc &Desc = MI.getDesc();
107
108 // Size should be preferably set in
109 // llvm/lib/Target/AArch64/AArch64InstrInfo.td (default case).
110 // Specific cases handle instructions of variable sizes
111 switch (Desc.getOpcode()) {
112 default:
113 if (Desc.getSize())
114 return Desc.getSize();
115
116 // Anything not explicitly designated otherwise (i.e. pseudo-instructions
117 // with fixed constant size but not specified in .td file) is a normal
118 // 4-byte insn.
119 NumBytes = 4;
120 break;
121 case TargetOpcode::STACKMAP:
122 // The upper bound for a stackmap intrinsic is the full length of its shadow
123 NumBytes = StackMapOpers(&MI).getNumPatchBytes();
124 assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
125 break;
126 case TargetOpcode::PATCHPOINT:
127 // The size of the patchpoint intrinsic is the number of bytes requested
128 NumBytes = PatchPointOpers(&MI).getNumPatchBytes();
129 assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
130 break;
131 case TargetOpcode::STATEPOINT:
132 NumBytes = StatepointOpers(&MI).getNumPatchBytes();
133 assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
134 // No patch bytes means a normal call inst is emitted
135 if (NumBytes == 0)
136 NumBytes = 4;
137 break;
138 case TargetOpcode::PATCHABLE_FUNCTION_ENTER:
139 // If `patchable-function-entry` is set, PATCHABLE_FUNCTION_ENTER
140 // instructions are expanded to the specified number of NOPs. Otherwise,
141 // they are expanded to 36-byte XRay sleds.
142 NumBytes =
143 F.getFnAttributeAsParsedInteger("patchable-function-entry", 9) * 4;
144 break;
145 case TargetOpcode::PATCHABLE_FUNCTION_EXIT:
146 case TargetOpcode::PATCHABLE_TYPED_EVENT_CALL:
147 // An XRay sled can be 4 bytes of alignment plus a 32-byte block.
148 NumBytes = 36;
149 break;
150 case TargetOpcode::PATCHABLE_EVENT_CALL:
151 // EVENT_CALL XRay sleds are exactly 6 instructions long (no alignment).
152 NumBytes = 24;
153 break;
154
155 case AArch64::SPACE:
156 NumBytes = MI.getOperand(1).getImm();
157 break;
158 case TargetOpcode::BUNDLE:
159 NumBytes = getInstBundleLength(MI);
160 break;
161 }
162
163 return NumBytes;
164}
165
166unsigned AArch64InstrInfo::getInstBundleLength(const MachineInstr &MI) const {
167 unsigned Size = 0;
169 MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end();
170 while (++I != E && I->isInsideBundle()) {
171 assert(!I->isBundle() && "No nested bundle!");
173 }
174 return Size;
175}
176
179 // Block ends with fall-through condbranch.
180 switch (LastInst->getOpcode()) {
181 default:
182 llvm_unreachable("Unknown branch instruction?");
183 case AArch64::Bcc:
184 Target = LastInst->getOperand(1).getMBB();
185 Cond.push_back(LastInst->getOperand(0));
186 break;
187 case AArch64::CBZW:
188 case AArch64::CBZX:
189 case AArch64::CBNZW:
190 case AArch64::CBNZX:
191 Target = LastInst->getOperand(1).getMBB();
192 Cond.push_back(MachineOperand::CreateImm(-1));
193 Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
194 Cond.push_back(LastInst->getOperand(0));
195 break;
196 case AArch64::TBZW:
197 case AArch64::TBZX:
198 case AArch64::TBNZW:
199 case AArch64::TBNZX:
200 Target = LastInst->getOperand(2).getMBB();
201 Cond.push_back(MachineOperand::CreateImm(-1));
202 Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
203 Cond.push_back(LastInst->getOperand(0));
204 Cond.push_back(LastInst->getOperand(1));
205 }
206}
207
208static unsigned getBranchDisplacementBits(unsigned Opc) {
209 switch (Opc) {
210 default:
211 llvm_unreachable("unexpected opcode!");
212 case AArch64::B:
213 return BDisplacementBits;
214 case AArch64::TBNZW:
215 case AArch64::TBZW:
216 case AArch64::TBNZX:
217 case AArch64::TBZX:
218 return TBZDisplacementBits;
219 case AArch64::CBNZW:
220 case AArch64::CBZW:
221 case AArch64::CBNZX:
222 case AArch64::CBZX:
223 return CBZDisplacementBits;
224 case AArch64::Bcc:
225 return BCCDisplacementBits;
226 }
227}
228
230 int64_t BrOffset) const {
231 unsigned Bits = getBranchDisplacementBits(BranchOp);
232 assert(Bits >= 3 && "max branch displacement must be enough to jump"
233 "over conditional branch expansion");
234 return isIntN(Bits, BrOffset / 4);
235}
236
239 switch (MI.getOpcode()) {
240 default:
241 llvm_unreachable("unexpected opcode!");
242 case AArch64::B:
243 return MI.getOperand(0).getMBB();
244 case AArch64::TBZW:
245 case AArch64::TBNZW:
246 case AArch64::TBZX:
247 case AArch64::TBNZX:
248 return MI.getOperand(2).getMBB();
249 case AArch64::CBZW:
250 case AArch64::CBNZW:
251 case AArch64::CBZX:
252 case AArch64::CBNZX:
253 case AArch64::Bcc:
254 return MI.getOperand(1).getMBB();
255 }
256}
257
259 MachineBasicBlock &NewDestBB,
260 MachineBasicBlock &RestoreBB,
261 const DebugLoc &DL,
262 int64_t BrOffset,
263 RegScavenger *RS) const {
264 assert(RS && "RegScavenger required for long branching");
265 assert(MBB.empty() &&
266 "new block should be inserted for expanding unconditional branch");
267 assert(MBB.pred_size() == 1);
268 assert(RestoreBB.empty() &&
269 "restore block should be inserted for restoring clobbered registers");
270
271 auto buildIndirectBranch = [&](Register Reg, MachineBasicBlock &DestBB) {
272 // Offsets outside of the signed 33-bit range are not supported for ADRP +
273 // ADD.
274 if (!isInt<33>(BrOffset))
276 "Branch offsets outside of the signed 33-bit range not supported");
277
278 BuildMI(MBB, MBB.end(), DL, get(AArch64::ADRP), Reg)
279 .addSym(DestBB.getSymbol(), AArch64II::MO_PAGE);
280 BuildMI(MBB, MBB.end(), DL, get(AArch64::ADDXri), Reg)
281 .addReg(Reg)
282 .addSym(DestBB.getSymbol(), AArch64II::MO_PAGEOFF | AArch64II::MO_NC)
283 .addImm(0);
284 BuildMI(MBB, MBB.end(), DL, get(AArch64::BR)).addReg(Reg);
285 };
286
288 // If X16 is unused, we can rely on the linker to insert a range extension
289 // thunk if NewDestBB is out of range of a single B instruction.
290 constexpr Register Reg = AArch64::X16;
291 if (!RS->isRegUsed(Reg)) {
292 insertUnconditionalBranch(MBB, &NewDestBB, DL);
293 RS->setRegUsed(Reg);
294 return;
295 }
296
297 // If there's a free register and it's worth inflating the code size,
298 // manually insert the indirect branch.
299 Register Scavenged = RS->FindUnusedReg(&AArch64::GPR64RegClass);
300 if (Scavenged != AArch64::NoRegister &&
302 buildIndirectBranch(Scavenged, NewDestBB);
303 RS->setRegUsed(Scavenged);
304 return;
305 }
306
307 // Note: Spilling X16 briefly moves the stack pointer, making it incompatible
308 // with red zones.
310 if (!AFI || AFI->hasRedZone().value_or(true))
312 "Unable to insert indirect branch inside function that has red zone");
313
314 // Otherwise, spill X16 and defer range extension to the linker.
315 BuildMI(MBB, MBB.end(), DL, get(AArch64::STRXpre))
316 .addReg(AArch64::SP, RegState::Define)
317 .addReg(Reg)
318 .addReg(AArch64::SP)
319 .addImm(-16);
320
321 BuildMI(MBB, MBB.end(), DL, get(AArch64::B)).addMBB(&RestoreBB);
322
323 BuildMI(RestoreBB, RestoreBB.end(), DL, get(AArch64::LDRXpost))
324 .addReg(AArch64::SP, RegState::Define)
326 .addReg(AArch64::SP)
327 .addImm(16);
328}
329
330// Branch analysis.
333 MachineBasicBlock *&FBB,
335 bool AllowModify) const {
336 // If the block has no terminators, it just falls into the block after it.
338 if (I == MBB.end())
339 return false;
340
341 // Skip over SpeculationBarrierEndBB terminators
342 if (I->getOpcode() == AArch64::SpeculationBarrierISBDSBEndBB ||
343 I->getOpcode() == AArch64::SpeculationBarrierSBEndBB) {
344 --I;
345 }
346
347 if (!isUnpredicatedTerminator(*I))
348 return false;
349
350 // Get the last instruction in the block.
351 MachineInstr *LastInst = &*I;
352
353 // If there is only one terminator instruction, process it.
354 unsigned LastOpc = LastInst->getOpcode();
355 if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
356 if (isUncondBranchOpcode(LastOpc)) {
357 TBB = LastInst->getOperand(0).getMBB();
358 return false;
359 }
360 if (isCondBranchOpcode(LastOpc)) {
361 // Block ends with fall-through condbranch.
362 parseCondBranch(LastInst, TBB, Cond);
363 return false;
364 }
365 return true; // Can't handle indirect branch.
366 }
367
368 // Get the instruction before it if it is a terminator.
369 MachineInstr *SecondLastInst = &*I;
370 unsigned SecondLastOpc = SecondLastInst->getOpcode();
371
372 // If AllowModify is true and the block ends with two or more unconditional
373 // branches, delete all but the first unconditional branch.
374 if (AllowModify && isUncondBranchOpcode(LastOpc)) {
375 while (isUncondBranchOpcode(SecondLastOpc)) {
376 LastInst->eraseFromParent();
377 LastInst = SecondLastInst;
378 LastOpc = LastInst->getOpcode();
379 if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
380 // Return now the only terminator is an unconditional branch.
381 TBB = LastInst->getOperand(0).getMBB();
382 return false;
383 }
384 SecondLastInst = &*I;
385 SecondLastOpc = SecondLastInst->getOpcode();
386 }
387 }
388
389 // If we're allowed to modify and the block ends in a unconditional branch
390 // which could simply fallthrough, remove the branch. (Note: This case only
391 // matters when we can't understand the whole sequence, otherwise it's also
392 // handled by BranchFolding.cpp.)
393 if (AllowModify && isUncondBranchOpcode(LastOpc) &&
395 LastInst->eraseFromParent();
396 LastInst = SecondLastInst;
397 LastOpc = LastInst->getOpcode();
398 if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
399 assert(!isUncondBranchOpcode(LastOpc) &&
400 "unreachable unconditional branches removed above");
401
402 if (isCondBranchOpcode(LastOpc)) {
403 // Block ends with fall-through condbranch.
404 parseCondBranch(LastInst, TBB, Cond);
405 return false;
406 }
407 return true; // Can't handle indirect branch.
408 }
409 SecondLastInst = &*I;
410 SecondLastOpc = SecondLastInst->getOpcode();
411 }
412
413 // If there are three terminators, we don't know what sort of block this is.
414 if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(*--I))
415 return true;
416
417 // If the block ends with a B and a Bcc, handle it.
418 if (isCondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
419 parseCondBranch(SecondLastInst, TBB, Cond);
420 FBB = LastInst->getOperand(0).getMBB();
421 return false;
422 }
423
424 // If the block ends with two unconditional branches, handle it. The second
425 // one is not executed, so remove it.
426 if (isUncondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
427 TBB = SecondLastInst->getOperand(0).getMBB();
428 I = LastInst;
429 if (AllowModify)
430 I->eraseFromParent();
431 return false;
432 }
433
434 // ...likewise if it ends with an indirect branch followed by an unconditional
435 // branch.
436 if (isIndirectBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
437 I = LastInst;
438 if (AllowModify)
439 I->eraseFromParent();
440 return true;
441 }
442
443 // Otherwise, can't handle this.
444 return true;
445}
446
448 MachineBranchPredicate &MBP,
449 bool AllowModify) const {
450 // For the moment, handle only a block which ends with a cb(n)zx followed by
451 // a fallthrough. Why this? Because it is a common form.
452 // TODO: Should we handle b.cc?
453
455 if (I == MBB.end())
456 return true;
457
458 // Skip over SpeculationBarrierEndBB terminators
459 if (I->getOpcode() == AArch64::SpeculationBarrierISBDSBEndBB ||
460 I->getOpcode() == AArch64::SpeculationBarrierSBEndBB) {
461 --I;
462 }
463
464 if (!isUnpredicatedTerminator(*I))
465 return true;
466
467 // Get the last instruction in the block.
468 MachineInstr *LastInst = &*I;
469 unsigned LastOpc = LastInst->getOpcode();
470 if (!isCondBranchOpcode(LastOpc))
471 return true;
472
473 switch (LastOpc) {
474 default:
475 return true;
476 case AArch64::CBZW:
477 case AArch64::CBZX:
478 case AArch64::CBNZW:
479 case AArch64::CBNZX:
480 break;
481 };
482
483 MBP.TrueDest = LastInst->getOperand(1).getMBB();
484 assert(MBP.TrueDest && "expected!");
485 MBP.FalseDest = MBB.getNextNode();
486
487 MBP.ConditionDef = nullptr;
488 MBP.SingleUseCondition = false;
489
490 MBP.LHS = LastInst->getOperand(0);
491 MBP.RHS = MachineOperand::CreateImm(0);
492 MBP.Predicate = LastOpc == AArch64::CBNZX ? MachineBranchPredicate::PRED_NE
493 : MachineBranchPredicate::PRED_EQ;
494 return false;
495}
496
499 if (Cond[0].getImm() != -1) {
500 // Regular Bcc
503 } else {
504 // Folded compare-and-branch
505 switch (Cond[1].getImm()) {
506 default:
507 llvm_unreachable("Unknown conditional branch!");
508 case AArch64::CBZW:
509 Cond[1].setImm(AArch64::CBNZW);
510 break;
511 case AArch64::CBNZW:
512 Cond[1].setImm(AArch64::CBZW);
513 break;
514 case AArch64::CBZX:
515 Cond[1].setImm(AArch64::CBNZX);
516 break;
517 case AArch64::CBNZX:
518 Cond[1].setImm(AArch64::CBZX);
519 break;
520 case AArch64::TBZW:
521 Cond[1].setImm(AArch64::TBNZW);
522 break;
523 case AArch64::TBNZW:
524 Cond[1].setImm(AArch64::TBZW);
525 break;
526 case AArch64::TBZX:
527 Cond[1].setImm(AArch64::TBNZX);
528 break;
529 case AArch64::TBNZX:
530 Cond[1].setImm(AArch64::TBZX);
531 break;
532 }
533 }
534
535 return false;
536}
537
539 int *BytesRemoved) const {
541 if (I == MBB.end())
542 return 0;
543
544 if (!isUncondBranchOpcode(I->getOpcode()) &&
545 !isCondBranchOpcode(I->getOpcode()))
546 return 0;
547
548 // Remove the branch.
549 I->eraseFromParent();
550
551 I = MBB.end();
552
553 if (I == MBB.begin()) {
554 if (BytesRemoved)
555 *BytesRemoved = 4;
556 return 1;
557 }
558 --I;
559 if (!isCondBranchOpcode(I->getOpcode())) {
560 if (BytesRemoved)
561 *BytesRemoved = 4;
562 return 1;
563 }
564
565 // Remove the branch.
566 I->eraseFromParent();
567 if (BytesRemoved)
568 *BytesRemoved = 8;
569
570 return 2;
571}
572
573void AArch64InstrInfo::instantiateCondBranch(
576 if (Cond[0].getImm() != -1) {
577 // Regular Bcc
578 BuildMI(&MBB, DL, get(AArch64::Bcc)).addImm(Cond[0].getImm()).addMBB(TBB);
579 } else {
580 // Folded compare-and-branch
581 // Note that we use addOperand instead of addReg to keep the flags.
582 const MachineInstrBuilder MIB =
583 BuildMI(&MBB, DL, get(Cond[1].getImm())).add(Cond[2]);
584 if (Cond.size() > 3)
585 MIB.addImm(Cond[3].getImm());
586 MIB.addMBB(TBB);
587 }
588}
589
592 ArrayRef<MachineOperand> Cond, const DebugLoc &DL, int *BytesAdded) const {
593 // Shouldn't be a fall through.
594 assert(TBB && "insertBranch must not be told to insert a fallthrough");
595
596 if (!FBB) {
597 if (Cond.empty()) // Unconditional branch?
598 BuildMI(&MBB, DL, get(AArch64::B)).addMBB(TBB);
599 else
600 instantiateCondBranch(MBB, DL, TBB, Cond);
601
602 if (BytesAdded)
603 *BytesAdded = 4;
604
605 return 1;
606 }
607
608 // Two-way conditional branch.
609 instantiateCondBranch(MBB, DL, TBB, Cond);
610 BuildMI(&MBB, DL, get(AArch64::B)).addMBB(FBB);
611
612 if (BytesAdded)
613 *BytesAdded = 8;
614
615 return 2;
616}
617
618// Find the original register that VReg is copied from.
619static unsigned removeCopies(const MachineRegisterInfo &MRI, unsigned VReg) {
620 while (Register::isVirtualRegister(VReg)) {
621 const MachineInstr *DefMI = MRI.getVRegDef(VReg);
622 if (!DefMI->isFullCopy())
623 return VReg;
624 VReg = DefMI->getOperand(1).getReg();
625 }
626 return VReg;
627}
628
629// Determine if VReg is defined by an instruction that can be folded into a
630// csel instruction. If so, return the folded opcode, and the replacement
631// register.
632static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg,
633 unsigned *NewVReg = nullptr) {
634 VReg = removeCopies(MRI, VReg);
636 return 0;
637
638 bool Is64Bit = AArch64::GPR64allRegClass.hasSubClassEq(MRI.getRegClass(VReg));
639 const MachineInstr *DefMI = MRI.getVRegDef(VReg);
640 unsigned Opc = 0;
641 unsigned SrcOpNum = 0;
642 switch (DefMI->getOpcode()) {
643 case AArch64::ADDSXri:
644 case AArch64::ADDSWri:
645 // if NZCV is used, do not fold.
646 if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) == -1)
647 return 0;
648 // fall-through to ADDXri and ADDWri.
649 [[fallthrough]];
650 case AArch64::ADDXri:
651 case AArch64::ADDWri:
652 // add x, 1 -> csinc.
653 if (!DefMI->getOperand(2).isImm() || DefMI->getOperand(2).getImm() != 1 ||
654 DefMI->getOperand(3).getImm() != 0)
655 return 0;
656 SrcOpNum = 1;
657 Opc = Is64Bit ? AArch64::CSINCXr : AArch64::CSINCWr;
658 break;
659
660 case AArch64::ORNXrr:
661 case AArch64::ORNWrr: {
662 // not x -> csinv, represented as orn dst, xzr, src.
663 unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg());
664 if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)
665 return 0;
666 SrcOpNum = 2;
667 Opc = Is64Bit ? AArch64::CSINVXr : AArch64::CSINVWr;
668 break;
669 }
670
671 case AArch64::SUBSXrr:
672 case AArch64::SUBSWrr:
673 // if NZCV is used, do not fold.
674 if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) == -1)
675 return 0;
676 // fall-through to SUBXrr and SUBWrr.
677 [[fallthrough]];
678 case AArch64::SUBXrr:
679 case AArch64::SUBWrr: {
680 // neg x -> csneg, represented as sub dst, xzr, src.
681 unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg());
682 if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)
683 return 0;
684 SrcOpNum = 2;
685 Opc = Is64Bit ? AArch64::CSNEGXr : AArch64::CSNEGWr;
686 break;
687 }
688 default:
689 return 0;
690 }
691 assert(Opc && SrcOpNum && "Missing parameters");
692
693 if (NewVReg)
694 *NewVReg = DefMI->getOperand(SrcOpNum).getReg();
695 return Opc;
696}
697
700 Register DstReg, Register TrueReg,
701 Register FalseReg, int &CondCycles,
702 int &TrueCycles,
703 int &FalseCycles) const {
704 // Check register classes.
706 const TargetRegisterClass *RC =
707 RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg));
708 if (!RC)
709 return false;
710
711 // Also need to check the dest regclass, in case we're trying to optimize
712 // something like:
713 // %1(gpr) = PHI %2(fpr), bb1, %(fpr), bb2
714 if (!RI.getCommonSubClass(RC, MRI.getRegClass(DstReg)))
715 return false;
716
717 // Expanding cbz/tbz requires an extra cycle of latency on the condition.
718 unsigned ExtraCondLat = Cond.size() != 1;
719
720 // GPRs are handled by csel.
721 // FIXME: Fold in x+1, -x, and ~x when applicable.
722 if (AArch64::GPR64allRegClass.hasSubClassEq(RC) ||
723 AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
724 // Single-cycle csel, csinc, csinv, and csneg.
725 CondCycles = 1 + ExtraCondLat;
726 TrueCycles = FalseCycles = 1;
727 if (canFoldIntoCSel(MRI, TrueReg))
728 TrueCycles = 0;
729 else if (canFoldIntoCSel(MRI, FalseReg))
730 FalseCycles = 0;
731 return true;
732 }
733
734 // Scalar floating point is handled by fcsel.
735 // FIXME: Form fabs, fmin, and fmax when applicable.
736 if (AArch64::FPR64RegClass.hasSubClassEq(RC) ||
737 AArch64::FPR32RegClass.hasSubClassEq(RC)) {
738 CondCycles = 5 + ExtraCondLat;
739 TrueCycles = FalseCycles = 2;
740 return true;
741 }
742
743 // Can't do vectors.
744 return false;
745}
746
749 const DebugLoc &DL, Register DstReg,
751 Register TrueReg, Register FalseReg) const {
753
754 // Parse the condition code, see parseCondBranch() above.
756 switch (Cond.size()) {
757 default:
758 llvm_unreachable("Unknown condition opcode in Cond");
759 case 1: // b.cc
760 CC = AArch64CC::CondCode(Cond[0].getImm());
761 break;
762 case 3: { // cbz/cbnz
763 // We must insert a compare against 0.
764 bool Is64Bit;
765 switch (Cond[1].getImm()) {
766 default:
767 llvm_unreachable("Unknown branch opcode in Cond");
768 case AArch64::CBZW:
769 Is64Bit = false;
771 break;
772 case AArch64::CBZX:
773 Is64Bit = true;
775 break;
776 case AArch64::CBNZW:
777 Is64Bit = false;
779 break;
780 case AArch64::CBNZX:
781 Is64Bit = true;
783 break;
784 }
785 Register SrcReg = Cond[2].getReg();
786 if (Is64Bit) {
787 // cmp reg, #0 is actually subs xzr, reg, #0.
788 MRI.constrainRegClass(SrcReg, &AArch64::GPR64spRegClass);
789 BuildMI(MBB, I, DL, get(AArch64::SUBSXri), AArch64::XZR)
790 .addReg(SrcReg)
791 .addImm(0)
792 .addImm(0);
793 } else {
794 MRI.constrainRegClass(SrcReg, &AArch64::GPR32spRegClass);
795 BuildMI(MBB, I, DL, get(AArch64::SUBSWri), AArch64::WZR)
796 .addReg(SrcReg)
797 .addImm(0)
798 .addImm(0);
799 }
800 break;
801 }
802 case 4: { // tbz/tbnz
803 // We must insert a tst instruction.
804 switch (Cond[1].getImm()) {
805 default:
806 llvm_unreachable("Unknown branch opcode in Cond");
807 case AArch64::TBZW:
808 case AArch64::TBZX:
810 break;
811 case AArch64::TBNZW:
812 case AArch64::TBNZX:
814 break;
815 }
816 // cmp reg, #foo is actually ands xzr, reg, #1<<foo.
817 if (Cond[1].getImm() == AArch64::TBZW || Cond[1].getImm() == AArch64::TBNZW)
818 BuildMI(MBB, I, DL, get(AArch64::ANDSWri), AArch64::WZR)
819 .addReg(Cond[2].getReg())
820 .addImm(
821 AArch64_AM::encodeLogicalImmediate(1ull << Cond[3].getImm(), 32));
822 else
823 BuildMI(MBB, I, DL, get(AArch64::ANDSXri), AArch64::XZR)
824 .addReg(Cond[2].getReg())
825 .addImm(
826 AArch64_AM::encodeLogicalImmediate(1ull << Cond[3].getImm(), 64));
827 break;
828 }
829 }
830
831 unsigned Opc = 0;
832 const TargetRegisterClass *RC = nullptr;
833 bool TryFold = false;
834 if (MRI.constrainRegClass(DstReg, &AArch64::GPR64RegClass)) {
835 RC = &AArch64::GPR64RegClass;
836 Opc = AArch64::CSELXr;
837 TryFold = true;
838 } else if (MRI.constrainRegClass(DstReg, &AArch64::GPR32RegClass)) {
839 RC = &AArch64::GPR32RegClass;
840 Opc = AArch64::CSELWr;
841 TryFold = true;
842 } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR64RegClass)) {
843 RC = &AArch64::FPR64RegClass;
844 Opc = AArch64::FCSELDrrr;
845 } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR32RegClass)) {
846 RC = &AArch64::FPR32RegClass;
847 Opc = AArch64::FCSELSrrr;
848 }
849 assert(RC && "Unsupported regclass");
850
851 // Try folding simple instructions into the csel.
852 if (TryFold) {
853 unsigned NewVReg = 0;
854 unsigned FoldedOpc = canFoldIntoCSel(MRI, TrueReg, &NewVReg);
855 if (FoldedOpc) {
856 // The folded opcodes csinc, csinc and csneg apply the operation to
857 // FalseReg, so we need to invert the condition.
859 TrueReg = FalseReg;
860 } else
861 FoldedOpc = canFoldIntoCSel(MRI, FalseReg, &NewVReg);
862
863 // Fold the operation. Leave any dead instructions for DCE to clean up.
864 if (FoldedOpc) {
865 FalseReg = NewVReg;
866 Opc = FoldedOpc;
867 // The extends the live range of NewVReg.
868 MRI.clearKillFlags(NewVReg);
869 }
870 }
871
872 // Pull all virtual register into the appropriate class.
873 MRI.constrainRegClass(TrueReg, RC);
874 MRI.constrainRegClass(FalseReg, RC);
875
876 // Insert the csel.
877 BuildMI(MBB, I, DL, get(Opc), DstReg)
878 .addReg(TrueReg)
879 .addReg(FalseReg)
880 .addImm(CC);
881}
882
883// Return true if Imm can be loaded into a register by a "cheap" sequence of
884// instructions. For now, "cheap" means at most two instructions.
885static bool isCheapImmediate(const MachineInstr &MI, unsigned BitSize) {
886 if (BitSize == 32)
887 return true;
888
889 assert(BitSize == 64 && "Only bit sizes of 32 or 64 allowed");
890 uint64_t Imm = static_cast<uint64_t>(MI.getOperand(1).getImm());
892 AArch64_IMM::expandMOVImm(Imm, BitSize, Is);
893
894 return Is.size() <= 2;
895}
896
897// FIXME: this implementation should be micro-architecture dependent, so a
898// micro-architecture target hook should be introduced here in future.
900 if (Subtarget.hasExynosCheapAsMoveHandling()) {
901 if (isExynosCheapAsMove(MI))
902 return true;
903 return MI.isAsCheapAsAMove();
904 }
905
906 switch (MI.getOpcode()) {
907 default:
908 return MI.isAsCheapAsAMove();
909
910 case AArch64::ADDWrs:
911 case AArch64::ADDXrs:
912 case AArch64::SUBWrs:
913 case AArch64::SUBXrs:
914 return Subtarget.hasALULSLFast() && MI.getOperand(3).getImm() <= 4;
915
916 // If MOVi32imm or MOVi64imm can be expanded into ORRWri or
917 // ORRXri, it is as cheap as MOV.
918 // Likewise if it can be expanded to MOVZ/MOVN/MOVK.
919 case AArch64::MOVi32imm:
920 return isCheapImmediate(MI, 32);
921 case AArch64::MOVi64imm:
922 return isCheapImmediate(MI, 64);
923 }
924}
925
927 switch (MI.getOpcode()) {
928 default:
929 return false;
930
931 case AArch64::ADDWrs:
932 case AArch64::ADDXrs:
933 case AArch64::ADDSWrs:
934 case AArch64::ADDSXrs: {
935 unsigned Imm = MI.getOperand(3).getImm();
936 unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
937 if (ShiftVal == 0)
938 return true;
939 return AArch64_AM::getShiftType(Imm) == AArch64_AM::LSL && ShiftVal <= 5;
940 }
941
942 case AArch64::ADDWrx:
943 case AArch64::ADDXrx:
944 case AArch64::ADDXrx64:
945 case AArch64::ADDSWrx:
946 case AArch64::ADDSXrx:
947 case AArch64::ADDSXrx64: {
948 unsigned Imm = MI.getOperand(3).getImm();
949 switch (AArch64_AM::getArithExtendType(Imm)) {
950 default:
951 return false;
952 case AArch64_AM::UXTB:
953 case AArch64_AM::UXTH:
954 case AArch64_AM::UXTW:
955 case AArch64_AM::UXTX:
956 return AArch64_AM::getArithShiftValue(Imm) <= 4;
957 }
958 }
959
960 case AArch64::SUBWrs:
961 case AArch64::SUBSWrs: {
962 unsigned Imm = MI.getOperand(3).getImm();
963 unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
964 return ShiftVal == 0 ||
965 (AArch64_AM::getShiftType(Imm) == AArch64_AM::ASR && ShiftVal == 31);
966 }
967
968 case AArch64::SUBXrs:
969 case AArch64::SUBSXrs: {
970 unsigned Imm = MI.getOperand(3).getImm();
971 unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
972 return ShiftVal == 0 ||
973 (AArch64_AM::getShiftType(Imm) == AArch64_AM::ASR && ShiftVal == 63);
974 }
975
976 case AArch64::SUBWrx:
977 case AArch64::SUBXrx:
978 case AArch64::SUBXrx64:
979 case AArch64::SUBSWrx:
980 case AArch64::SUBSXrx:
981 case AArch64::SUBSXrx64: {
982 unsigned Imm = MI.getOperand(3).getImm();
983 switch (AArch64_AM::getArithExtendType(Imm)) {
984 default:
985 return false;
986 case AArch64_AM::UXTB:
987 case AArch64_AM::UXTH:
988 case AArch64_AM::UXTW:
989 case AArch64_AM::UXTX:
990 return AArch64_AM::getArithShiftValue(Imm) == 0;
991 }
992 }
993
994 case AArch64::LDRBBroW:
995 case AArch64::LDRBBroX:
996 case AArch64::LDRBroW:
997 case AArch64::LDRBroX:
998 case AArch64::LDRDroW:
999 case AArch64::LDRDroX:
1000 case AArch64::LDRHHroW:
1001 case AArch64::LDRHHroX:
1002 case AArch64::LDRHroW:
1003 case AArch64::LDRHroX:
1004 case AArch64::LDRQroW:
1005 case AArch64::LDRQroX:
1006 case AArch64::LDRSBWroW:
1007 case AArch64::LDRSBWroX:
1008 case AArch64::LDRSBXroW:
1009 case AArch64::LDRSBXroX:
1010 case AArch64::LDRSHWroW:
1011 case AArch64::LDRSHWroX:
1012 case AArch64::LDRSHXroW:
1013 case AArch64::LDRSHXroX:
1014 case AArch64::LDRSWroW:
1015 case AArch64::LDRSWroX:
1016 case AArch64::LDRSroW:
1017 case AArch64::LDRSroX:
1018 case AArch64::LDRWroW:
1019 case AArch64::LDRWroX:
1020 case AArch64::LDRXroW:
1021 case AArch64::LDRXroX:
1022 case AArch64::PRFMroW:
1023 case AArch64::PRFMroX:
1024 case AArch64::STRBBroW:
1025 case AArch64::STRBBroX:
1026 case AArch64::STRBroW:
1027 case AArch64::STRBroX:
1028 case AArch64::STRDroW:
1029 case AArch64::STRDroX:
1030 case AArch64::STRHHroW:
1031 case AArch64::STRHHroX:
1032 case AArch64::STRHroW:
1033 case AArch64::STRHroX:
1034 case AArch64::STRQroW:
1035 case AArch64::STRQroX:
1036 case AArch64::STRSroW:
1037 case AArch64::STRSroX:
1038 case AArch64::STRWroW:
1039 case AArch64::STRWroX:
1040 case AArch64::STRXroW:
1041 case AArch64::STRXroX: {
1042 unsigned IsSigned = MI.getOperand(3).getImm();
1043 return !IsSigned;
1044 }
1045 }
1046}
1047
1049 unsigned Opc = MI.getOpcode();
1050 switch (Opc) {
1051 default:
1052 return false;
1053 case AArch64::SEH_StackAlloc:
1054 case AArch64::SEH_SaveFPLR:
1055 case AArch64::SEH_SaveFPLR_X:
1056 case AArch64::SEH_SaveReg:
1057 case AArch64::SEH_SaveReg_X:
1058 case AArch64::SEH_SaveRegP:
1059 case AArch64::SEH_SaveRegP_X:
1060 case AArch64::SEH_SaveFReg:
1061 case AArch64::SEH_SaveFReg_X:
1062 case AArch64::SEH_SaveFRegP:
1063 case AArch64::SEH_SaveFRegP_X:
1064 case AArch64::SEH_SetFP:
1065 case AArch64::SEH_AddFP:
1066 case AArch64::SEH_Nop:
1067 case AArch64::SEH_PrologEnd:
1068 case AArch64::SEH_EpilogStart:
1069 case AArch64::SEH_EpilogEnd:
1070 case AArch64::SEH_PACSignLR:
1071 case AArch64::SEH_SaveAnyRegQP:
1072 case AArch64::SEH_SaveAnyRegQPX:
1073 return true;
1074 }
1075}
1076
1078 Register &SrcReg, Register &DstReg,
1079 unsigned &SubIdx) const {
1080 switch (MI.getOpcode()) {
1081 default:
1082 return false;
1083 case AArch64::SBFMXri: // aka sxtw
1084 case AArch64::UBFMXri: // aka uxtw
1085 // Check for the 32 -> 64 bit extension case, these instructions can do
1086 // much more.
1087 if (MI.getOperand(2).getImm() != 0 || MI.getOperand(3).getImm() != 31)
1088 return false;
1089 // This is a signed or unsigned 32 -> 64 bit extension.
1090 SrcReg = MI.getOperand(1).getReg();
1091 DstReg = MI.getOperand(0).getReg();
1092 SubIdx = AArch64::sub_32;
1093 return true;
1094 }
1095}
1096
1098 const MachineInstr &MIa, const MachineInstr &MIb) const {
1100 const MachineOperand *BaseOpA = nullptr, *BaseOpB = nullptr;
1101 int64_t OffsetA = 0, OffsetB = 0;
1102 TypeSize WidthA(0, false), WidthB(0, false);
1103 bool OffsetAIsScalable = false, OffsetBIsScalable = false;
1104
1105 assert(MIa.mayLoadOrStore() && "MIa must be a load or store.");
1106 assert(MIb.mayLoadOrStore() && "MIb must be a load or store.");
1107
1110 return false;
1111
1112 // Retrieve the base, offset from the base and width. Width
1113 // is the size of memory that is being loaded/stored (e.g. 1, 2, 4, 8). If
1114 // base are identical, and the offset of a lower memory access +
1115 // the width doesn't overlap the offset of a higher memory access,
1116 // then the memory accesses are different.
1117 // If OffsetAIsScalable and OffsetBIsScalable are both true, they
1118 // are assumed to have the same scale (vscale).
1119 if (getMemOperandWithOffsetWidth(MIa, BaseOpA, OffsetA, OffsetAIsScalable,
1120 WidthA, TRI) &&
1121 getMemOperandWithOffsetWidth(MIb, BaseOpB, OffsetB, OffsetBIsScalable,
1122 WidthB, TRI)) {
1123 if (BaseOpA->isIdenticalTo(*BaseOpB) &&
1124 OffsetAIsScalable == OffsetBIsScalable) {
1125 int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB;
1126 int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA;
1127 TypeSize LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
1128 if (LowWidth.isScalable() == OffsetAIsScalable &&
1129 LowOffset + (int)LowWidth.getKnownMinValue() <= HighOffset)
1130 return true;
1131 }
1132 }
1133 return false;
1134}
1135
1137 const MachineBasicBlock *MBB,
1138 const MachineFunction &MF) const {
1140 return true;
1141
1142 // Do not move an instruction that can be recognized as a branch target.
1143 if (hasBTISemantics(MI))
1144 return true;
1145
1146 switch (MI.getOpcode()) {
1147 case AArch64::HINT:
1148 // CSDB hints are scheduling barriers.
1149 if (MI.getOperand(0).getImm() == 0x14)
1150 return true;
1151 break;
1152 case AArch64::DSB:
1153 case AArch64::ISB:
1154 // DSB and ISB also are scheduling barriers.
1155 return true;
1156 case AArch64::MSRpstatesvcrImm1:
1157 // SMSTART and SMSTOP are also scheduling barriers.
1158 return true;
1159 default:;
1160 }
1161 if (isSEHInstruction(MI))
1162 return true;
1163 auto Next = std::next(MI.getIterator());
1164 return Next != MBB->end() && Next->isCFIInstruction();
1165}
1166
1167/// analyzeCompare - For a comparison instruction, return the source registers
1168/// in SrcReg and SrcReg2, and the value it compares against in CmpValue.
1169/// Return true if the comparison instruction can be analyzed.
1171 Register &SrcReg2, int64_t &CmpMask,
1172 int64_t &CmpValue) const {
1173 // The first operand can be a frame index where we'd normally expect a
1174 // register.
1175 assert(MI.getNumOperands() >= 2 && "All AArch64 cmps should have 2 operands");
1176 if (!MI.getOperand(1).isReg())
1177 return false;
1178
1179 switch (MI.getOpcode()) {
1180 default:
1181 break;
1182 case AArch64::PTEST_PP:
1183 case AArch64::PTEST_PP_ANY:
1184 SrcReg = MI.getOperand(0).getReg();
1185 SrcReg2 = MI.getOperand(1).getReg();
1186 // Not sure about the mask and value for now...
1187 CmpMask = ~0;
1188 CmpValue = 0;
1189 return true;
1190 case AArch64::SUBSWrr:
1191 case AArch64::SUBSWrs:
1192 case AArch64::SUBSWrx:
1193 case AArch64::SUBSXrr:
1194 case AArch64::SUBSXrs:
1195 case AArch64::SUBSXrx:
1196 case AArch64::ADDSWrr:
1197 case AArch64::ADDSWrs:
1198 case AArch64::ADDSWrx:
1199 case AArch64::ADDSXrr:
1200 case AArch64::ADDSXrs:
1201 case AArch64::ADDSXrx:
1202 // Replace SUBSWrr with SUBWrr if NZCV is not used.
1203 SrcReg = MI.getOperand(1).getReg();
1204 SrcReg2 = MI.getOperand(2).getReg();
1205 CmpMask = ~0;
1206 CmpValue = 0;
1207 return true;
1208 case AArch64::SUBSWri:
1209 case AArch64::ADDSWri:
1210 case AArch64::SUBSXri:
1211 case AArch64::ADDSXri:
1212 SrcReg = MI.getOperand(1).getReg();
1213 SrcReg2 = 0;
1214 CmpMask = ~0;
1215 CmpValue = MI.getOperand(2).getImm();
1216 return true;
1217 case AArch64::ANDSWri:
1218 case AArch64::ANDSXri:
1219 // ANDS does not use the same encoding scheme as the others xxxS
1220 // instructions.
1221 SrcReg = MI.getOperand(1).getReg();
1222 SrcReg2 = 0;
1223 CmpMask = ~0;
1225 MI.getOperand(2).getImm(),
1226 MI.getOpcode() == AArch64::ANDSWri ? 32 : 64);
1227 return true;
1228 }
1229
1230 return false;
1231}
1232
1234 MachineBasicBlock *MBB = Instr.getParent();
1235 assert(MBB && "Can't get MachineBasicBlock here");
1236 MachineFunction *MF = MBB->getParent();
1237 assert(MF && "Can't get MachineFunction here");
1241
1242 for (unsigned OpIdx = 0, EndIdx = Instr.getNumOperands(); OpIdx < EndIdx;
1243 ++OpIdx) {
1244 MachineOperand &MO = Instr.getOperand(OpIdx);
1245 const TargetRegisterClass *OpRegCstraints =
1246 Instr.getRegClassConstraint(OpIdx, TII, TRI);
1247
1248 // If there's no constraint, there's nothing to do.
1249 if (!OpRegCstraints)
1250 continue;
1251 // If the operand is a frame index, there's nothing to do here.
1252 // A frame index operand will resolve correctly during PEI.
1253 if (MO.isFI())
1254 continue;
1255
1256 assert(MO.isReg() &&
1257 "Operand has register constraints without being a register!");
1258
1259 Register Reg = MO.getReg();
1260 if (Reg.isPhysical()) {
1261 if (!OpRegCstraints->contains(Reg))
1262 return false;
1263 } else if (!OpRegCstraints->hasSubClassEq(MRI->getRegClass(Reg)) &&
1264 !MRI->constrainRegClass(Reg, OpRegCstraints))
1265 return false;
1266 }
1267
1268 return true;
1269}
1270
1271/// Return the opcode that does not set flags when possible - otherwise
1272/// return the original opcode. The caller is responsible to do the actual
1273/// substitution and legality checking.
1275 // Don't convert all compare instructions, because for some the zero register
1276 // encoding becomes the sp register.
1277 bool MIDefinesZeroReg = false;
1278 if (MI.definesRegister(AArch64::WZR) || MI.definesRegister(AArch64::XZR))
1279 MIDefinesZeroReg = true;
1280
1281 switch (MI.getOpcode()) {
1282 default:
1283 return MI.getOpcode();
1284 case AArch64::ADDSWrr:
1285 return AArch64::ADDWrr;
1286 case AArch64::ADDSWri:
1287 return MIDefinesZeroReg ? AArch64::ADDSWri : AArch64::ADDWri;
1288 case AArch64::ADDSWrs:
1289 return MIDefinesZeroReg ? AArch64::ADDSWrs : AArch64::ADDWrs;
1290 case AArch64::ADDSWrx:
1291 return AArch64::ADDWrx;
1292 case AArch64::ADDSXrr:
1293 return AArch64::ADDXrr;
1294 case AArch64::ADDSXri:
1295 return MIDefinesZeroReg ? AArch64::ADDSXri : AArch64::ADDXri;
1296 case AArch64::ADDSXrs:
1297 return MIDefinesZeroReg ? AArch64::ADDSXrs : AArch64::ADDXrs;
1298 case AArch64::ADDSXrx:
1299 return AArch64::ADDXrx;
1300 case AArch64::SUBSWrr:
1301 return AArch64::SUBWrr;
1302 case AArch64::SUBSWri:
1303 return MIDefinesZeroReg ? AArch64::SUBSWri : AArch64::SUBWri;
1304 case AArch64::SUBSWrs:
1305 return MIDefinesZeroReg ? AArch64::SUBSWrs : AArch64::SUBWrs;
1306 case AArch64::SUBSWrx:
1307 return AArch64::SUBWrx;
1308 case AArch64::SUBSXrr:
1309 return AArch64::SUBXrr;
1310 case AArch64::SUBSXri:
1311 return MIDefinesZeroReg ? AArch64::SUBSXri : AArch64::SUBXri;
1312 case AArch64::SUBSXrs:
1313 return MIDefinesZeroReg ? AArch64::SUBSXrs : AArch64::SUBXrs;
1314 case AArch64::SUBSXrx:
1315 return AArch64::SUBXrx;
1316 }
1317}
1318
1319enum AccessKind { AK_Write = 0x01, AK_Read = 0x10, AK_All = 0x11 };
1320
1321/// True when condition flags are accessed (either by writing or reading)
1322/// on the instruction trace starting at From and ending at To.
1323///
1324/// Note: If From and To are from different blocks it's assumed CC are accessed
1325/// on the path.
1328 const TargetRegisterInfo *TRI, const AccessKind AccessToCheck = AK_All) {
1329 // Early exit if To is at the beginning of the BB.
1330 if (To == To->getParent()->begin())
1331 return true;
1332
1333 // Check whether the instructions are in the same basic block
1334 // If not, assume the condition flags might get modified somewhere.
1335 if (To->getParent() != From->getParent())
1336 return true;
1337
1338 // From must be above To.
1339 assert(std::any_of(
1340 ++To.getReverse(), To->getParent()->rend(),
1341 [From](MachineInstr &MI) { return MI.getIterator() == From; }));
1342
1343 // We iterate backward starting at \p To until we hit \p From.
1344 for (const MachineInstr &Instr :
1345 instructionsWithoutDebug(++To.getReverse(), From.getReverse())) {
1346 if (((AccessToCheck & AK_Write) &&
1347 Instr.modifiesRegister(AArch64::NZCV, TRI)) ||
1348 ((AccessToCheck & AK_Read) && Instr.readsRegister(AArch64::NZCV, TRI)))
1349 return true;
1350 }
1351 return false;
1352}
1353
1354/// optimizePTestInstr - Attempt to remove a ptest of a predicate-generating
1355/// operation which could set the flags in an identical manner
1356bool AArch64InstrInfo::optimizePTestInstr(
1357 MachineInstr *PTest, unsigned MaskReg, unsigned PredReg,
1358 const MachineRegisterInfo *MRI) const {
1359 auto *Mask = MRI->getUniqueVRegDef(MaskReg);
1360 auto *Pred = MRI->getUniqueVRegDef(PredReg);
1361 auto NewOp = Pred->getOpcode();
1362 bool OpChanged = false;
1363
1364 unsigned MaskOpcode = Mask->getOpcode();
1365 unsigned PredOpcode = Pred->getOpcode();
1366 bool PredIsPTestLike = isPTestLikeOpcode(PredOpcode);
1367 bool PredIsWhileLike = isWhileOpcode(PredOpcode);
1368
1369 if (isPTrueOpcode(MaskOpcode) && (PredIsPTestLike || PredIsWhileLike) &&
1370 getElementSizeForOpcode(MaskOpcode) ==
1371 getElementSizeForOpcode(PredOpcode) &&
1372 Mask->getOperand(1).getImm() == 31) {
1373 // For PTEST(PTRUE_ALL, WHILE), if the element size matches, the PTEST is
1374 // redundant since WHILE performs an implicit PTEST with an all active
1375 // mask. Must be an all active predicate of matching element size.
1376
1377 // For PTEST(PTRUE_ALL, PTEST_LIKE), the PTEST is redundant if the
1378 // PTEST_LIKE instruction uses the same all active mask and the element
1379 // size matches. If the PTEST has a condition of any then it is always
1380 // redundant.
1381 if (PredIsPTestLike) {
1382 auto PTestLikeMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg());
1383 if (Mask != PTestLikeMask && PTest->getOpcode() != AArch64::PTEST_PP_ANY)
1384 return false;
1385 }
1386
1387 // Fallthough to simply remove the PTEST.
1388 } else if ((Mask == Pred) && (PredIsPTestLike || PredIsWhileLike) &&
1389 PTest->getOpcode() == AArch64::PTEST_PP_ANY) {
1390 // For PTEST(PG, PG), PTEST is redundant when PG is the result of an
1391 // instruction that sets the flags as PTEST would. This is only valid when
1392 // the condition is any.
1393
1394 // Fallthough to simply remove the PTEST.
1395 } else if (PredIsPTestLike) {
1396 // For PTEST(PG, PTEST_LIKE(PG, ...)), the PTEST is redundant since the
1397 // flags are set based on the same mask 'PG', but PTEST_LIKE must operate
1398 // on 8-bit predicates like the PTEST. Otherwise, for instructions like
1399 // compare that also support 16/32/64-bit predicates, the implicit PTEST
1400 // performed by the compare could consider fewer lanes for these element
1401 // sizes.
1402 //
1403 // For example, consider
1404 //
1405 // ptrue p0.b ; P0=1111-1111-1111-1111
1406 // index z0.s, #0, #1 ; Z0=<0,1,2,3>
1407 // index z1.s, #1, #1 ; Z1=<1,2,3,4>
1408 // cmphi p1.s, p0/z, z1.s, z0.s ; P1=0001-0001-0001-0001
1409 // ; ^ last active
1410 // ptest p0, p1.b ; P1=0001-0001-0001-0001
1411 // ; ^ last active
1412 //
1413 // where the compare generates a canonical all active 32-bit predicate
1414 // (equivalent to 'ptrue p1.s, all'). The implicit PTEST sets the last
1415 // active flag, whereas the PTEST instruction with the same mask doesn't.
1416 // For PTEST_ANY this doesn't apply as the flags in this case would be
1417 // identical regardless of element size.
1418 auto PTestLikeMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg());
1419 uint64_t PredElementSize = getElementSizeForOpcode(PredOpcode);
1420 if ((Mask != PTestLikeMask) ||
1421 (PredElementSize != AArch64::ElementSizeB &&
1422 PTest->getOpcode() != AArch64::PTEST_PP_ANY))
1423 return false;
1424
1425 // Fallthough to simply remove the PTEST.
1426 } else {
1427 // If OP in PTEST(PG, OP(PG, ...)) has a flag-setting variant change the
1428 // opcode so the PTEST becomes redundant.
1429 switch (PredOpcode) {
1430 case AArch64::AND_PPzPP:
1431 case AArch64::BIC_PPzPP:
1432 case AArch64::EOR_PPzPP:
1433 case AArch64::NAND_PPzPP:
1434 case AArch64::NOR_PPzPP:
1435 case AArch64::ORN_PPzPP:
1436 case AArch64::ORR_PPzPP:
1437 case AArch64::BRKA_PPzP:
1438 case AArch64::BRKPA_PPzPP:
1439 case AArch64::BRKB_PPzP:
1440 case AArch64::BRKPB_PPzPP:
1441 case AArch64::RDFFR_PPz: {
1442 // Check to see if our mask is the same. If not the resulting flag bits
1443 // may be different and we can't remove the ptest.
1444 auto *PredMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg());
1445 if (Mask != PredMask)
1446 return false;
1447 break;
1448 }
1449 case AArch64::BRKN_PPzP: {
1450 // BRKN uses an all active implicit mask to set flags unlike the other
1451 // flag-setting instructions.
1452 // PTEST(PTRUE_B(31), BRKN(PG, A, B)) -> BRKNS(PG, A, B).
1453 if ((MaskOpcode != AArch64::PTRUE_B) ||
1454 (Mask->getOperand(1).getImm() != 31))
1455 return false;
1456 break;
1457 }
1458 case AArch64::PTRUE_B:
1459 // PTEST(OP=PTRUE_B(A), OP) -> PTRUES_B(A)
1460 break;
1461 default:
1462 // Bail out if we don't recognize the input
1463 return false;
1464 }
1465
1466 NewOp = convertToFlagSettingOpc(PredOpcode);
1467 OpChanged = true;
1468 }
1469
1471
1472 // If another instruction between Pred and PTest accesses flags, don't remove
1473 // the ptest or update the earlier instruction to modify them.
1474 if (areCFlagsAccessedBetweenInstrs(Pred, PTest, TRI))
1475 return false;
1476
1477 // If we pass all the checks, it's safe to remove the PTEST and use the flags
1478 // as they are prior to PTEST. Sometimes this requires the tested PTEST
1479 // operand to be replaced with an equivalent instruction that also sets the
1480 // flags.
1481 Pred->setDesc(get(NewOp));
1482 PTest->eraseFromParent();
1483 if (OpChanged) {
1484 bool succeeded = UpdateOperandRegClass(*Pred);
1485 (void)succeeded;
1486 assert(succeeded && "Operands have incompatible register classes!");
1487 Pred->addRegisterDefined(AArch64::NZCV, TRI);
1488 }
1489
1490 // Ensure that the flags def is live.
1491 if (Pred->registerDefIsDead(AArch64::NZCV, TRI)) {
1492 unsigned i = 0, e = Pred->getNumOperands();
1493 for (; i != e; ++i) {
1494 MachineOperand &MO = Pred->getOperand(i);
1495 if (MO.isReg() && MO.isDef() && MO.getReg() == AArch64::NZCV) {
1496 MO.setIsDead(false);
1497 break;
1498 }
1499 }
1500 }
1501 return true;
1502}
1503
1504/// Try to optimize a compare instruction. A compare instruction is an
1505/// instruction which produces AArch64::NZCV. It can be truly compare
1506/// instruction
1507/// when there are no uses of its destination register.
1508///
1509/// The following steps are tried in order:
1510/// 1. Convert CmpInstr into an unconditional version.
1511/// 2. Remove CmpInstr if above there is an instruction producing a needed
1512/// condition code or an instruction which can be converted into such an
1513/// instruction.
1514/// Only comparison with zero is supported.
1516 MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int64_t CmpMask,
1517 int64_t CmpValue, const MachineRegisterInfo *MRI) const {
1518 assert(CmpInstr.getParent());
1519 assert(MRI);
1520
1521 // Replace SUBSWrr with SUBWrr if NZCV is not used.
1522 int DeadNZCVIdx = CmpInstr.findRegisterDefOperandIdx(AArch64::NZCV, true);
1523 if (DeadNZCVIdx != -1) {
1524 if (CmpInstr.definesRegister(AArch64::WZR) ||
1525 CmpInstr.definesRegister(AArch64::XZR)) {
1526 CmpInstr.eraseFromParent();
1527 return true;
1528 }
1529 unsigned Opc = CmpInstr.getOpcode();
1530 unsigned NewOpc = convertToNonFlagSettingOpc(CmpInstr);
1531 if (NewOpc == Opc)
1532 return false;
1533 const MCInstrDesc &MCID = get(NewOpc);
1534 CmpInstr.setDesc(MCID);
1535 CmpInstr.removeOperand(DeadNZCVIdx);
1536 bool succeeded = UpdateOperandRegClass(CmpInstr);
1537 (void)succeeded;
1538 assert(succeeded && "Some operands reg class are incompatible!");
1539 return true;
1540 }
1541
1542 if (CmpInstr.getOpcode() == AArch64::PTEST_PP ||
1543 CmpInstr.getOpcode() == AArch64::PTEST_PP_ANY)
1544 return optimizePTestInstr(&CmpInstr, SrcReg, SrcReg2, MRI);
1545
1546 if (SrcReg2 != 0)
1547 return false;
1548
1549 // CmpInstr is a Compare instruction if destination register is not used.
1550 if (!MRI->use_nodbg_empty(CmpInstr.getOperand(0).getReg()))
1551 return false;
1552
1553 if (CmpValue == 0 && substituteCmpToZero(CmpInstr, SrcReg, *MRI))
1554 return true;
1555 return (CmpValue == 0 || CmpValue == 1) &&
1556 removeCmpToZeroOrOne(CmpInstr, SrcReg, CmpValue, *MRI);
1557}
1558
1559/// Get opcode of S version of Instr.
1560/// If Instr is S version its opcode is returned.
1561/// AArch64::INSTRUCTION_LIST_END is returned if Instr does not have S version
1562/// or we are not interested in it.
1563static unsigned sForm(MachineInstr &Instr) {
1564 switch (Instr.getOpcode()) {
1565 default:
1566 return AArch64::INSTRUCTION_LIST_END;
1567
1568 case AArch64::ADDSWrr:
1569 case AArch64::ADDSWri:
1570 case AArch64::ADDSXrr:
1571 case AArch64::ADDSXri:
1572 case AArch64::SUBSWrr:
1573 case AArch64::SUBSWri:
1574 case AArch64::SUBSXrr:
1575 case AArch64::SUBSXri:
1576 return Instr.getOpcode();
1577
1578 case AArch64::ADDWrr:
1579 return AArch64::ADDSWrr;
1580 case AArch64::ADDWri:
1581 return AArch64::ADDSWri;
1582 case AArch64::ADDXrr:
1583 return AArch64::ADDSXrr;
1584 case AArch64::ADDXri:
1585 return AArch64::ADDSXri;
1586 case AArch64::ADCWr:
1587 return AArch64::ADCSWr;
1588 case AArch64::ADCXr:
1589 return AArch64::ADCSXr;
1590 case AArch64::SUBWrr:
1591 return AArch64::SUBSWrr;
1592 case AArch64::SUBWri:
1593 return AArch64::SUBSWri;
1594 case AArch64::SUBXrr:
1595 return AArch64::SUBSXrr;
1596 case AArch64::SUBXri:
1597 return AArch64::SUBSXri;
1598 case AArch64::SBCWr:
1599 return AArch64::SBCSWr;
1600 case AArch64::SBCXr:
1601 return AArch64::SBCSXr;
1602 case AArch64::ANDWri:
1603 return AArch64::ANDSWri;
1604 case AArch64::ANDXri:
1605 return AArch64::ANDSXri;
1606 }
1607}
1608
1609/// Check if AArch64::NZCV should be alive in successors of MBB.
1611 for (auto *BB : MBB->successors())
1612 if (BB->isLiveIn(AArch64::NZCV))
1613 return true;
1614 return false;
1615}
1616
1617/// \returns The condition code operand index for \p Instr if it is a branch
1618/// or select and -1 otherwise.
1619static int
1621 switch (Instr.getOpcode()) {
1622 default:
1623 return -1;
1624
1625 case AArch64::Bcc: {
1626 int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV);
1627 assert(Idx >= 2);
1628 return Idx - 2;
1629 }
1630
1631 case AArch64::CSINVWr:
1632 case AArch64::CSINVXr:
1633 case AArch64::CSINCWr:
1634 case AArch64::CSINCXr:
1635 case AArch64::CSELWr:
1636 case AArch64::CSELXr:
1637 case AArch64::CSNEGWr:
1638 case AArch64::CSNEGXr:
1639 case AArch64::FCSELSrrr:
1640 case AArch64::FCSELDrrr: {
1641 int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV);
1642 assert(Idx >= 1);
1643 return Idx - 1;
1644 }
1645 }
1646}
1647
1648/// Find a condition code used by the instruction.
1649/// Returns AArch64CC::Invalid if either the instruction does not use condition
1650/// codes or we don't optimize CmpInstr in the presence of such instructions.
1653 return CCIdx >= 0 ? static_cast<AArch64CC::CondCode>(
1654 Instr.getOperand(CCIdx).getImm())
1656}
1657
1660 UsedNZCV UsedFlags;
1661 switch (CC) {
1662 default:
1663 break;
1664
1665 case AArch64CC::EQ: // Z set
1666 case AArch64CC::NE: // Z clear
1667 UsedFlags.Z = true;
1668 break;
1669
1670 case AArch64CC::HI: // Z clear and C set
1671 case AArch64CC::LS: // Z set or C clear
1672 UsedFlags.Z = true;
1673 [[fallthrough]];
1674 case AArch64CC::HS: // C set
1675 case AArch64CC::LO: // C clear
1676 UsedFlags.C = true;
1677 break;
1678
1679 case AArch64CC::MI: // N set
1680 case AArch64CC::PL: // N clear
1681 UsedFlags.N = true;
1682 break;
1683
1684 case AArch64CC::VS: // V set
1685 case AArch64CC::VC: // V clear
1686 UsedFlags.V = true;
1687 break;
1688
1689 case AArch64CC::GT: // Z clear, N and V the same
1690 case AArch64CC::LE: // Z set, N and V differ
1691 UsedFlags.Z = true;
1692 [[fallthrough]];
1693 case AArch64CC::GE: // N and V the same
1694 case AArch64CC::LT: // N and V differ
1695 UsedFlags.N = true;
1696 UsedFlags.V = true;
1697 break;
1698 }
1699 return UsedFlags;
1700}
1701
1702/// \returns Conditions flags used after \p CmpInstr in its MachineBB if NZCV
1703/// flags are not alive in successors of the same \p CmpInstr and \p MI parent.
1704/// \returns std::nullopt otherwise.
1705///
1706/// Collect instructions using that flags in \p CCUseInstrs if provided.
1707std::optional<UsedNZCV>
1709 const TargetRegisterInfo &TRI,
1710 SmallVectorImpl<MachineInstr *> *CCUseInstrs) {
1711 MachineBasicBlock *CmpParent = CmpInstr.getParent();
1712 if (MI.getParent() != CmpParent)
1713 return std::nullopt;
1714
1715 if (areCFlagsAliveInSuccessors(CmpParent))
1716 return std::nullopt;
1717
1718 UsedNZCV NZCVUsedAfterCmp;
1720 std::next(CmpInstr.getIterator()), CmpParent->instr_end())) {
1721 if (Instr.readsRegister(AArch64::NZCV, &TRI)) {
1723 if (CC == AArch64CC::Invalid) // Unsupported conditional instruction
1724 return std::nullopt;
1725 NZCVUsedAfterCmp |= getUsedNZCV(CC);
1726 if (CCUseInstrs)
1727 CCUseInstrs->push_back(&Instr);
1728 }
1729 if (Instr.modifiesRegister(AArch64::NZCV, &TRI))
1730 break;
1731 }
1732 return NZCVUsedAfterCmp;
1733}
1734
1735static bool isADDSRegImm(unsigned Opcode) {
1736 return Opcode == AArch64::ADDSWri || Opcode == AArch64::ADDSXri;
1737}
1738
1739static bool isSUBSRegImm(unsigned Opcode) {
1740 return Opcode == AArch64::SUBSWri || Opcode == AArch64::SUBSXri;
1741}
1742
1743/// Check if CmpInstr can be substituted by MI.
1744///
1745/// CmpInstr can be substituted:
1746/// - CmpInstr is either 'ADDS %vreg, 0' or 'SUBS %vreg, 0'
1747/// - and, MI and CmpInstr are from the same MachineBB
1748/// - and, condition flags are not alive in successors of the CmpInstr parent
1749/// - and, if MI opcode is the S form there must be no defs of flags between
1750/// MI and CmpInstr
1751/// or if MI opcode is not the S form there must be neither defs of flags
1752/// nor uses of flags between MI and CmpInstr.
1753/// - and, if C/V flags are not used after CmpInstr
1754/// or if N flag is used but MI produces poison value if signed overflow
1755/// occurs.
1757 const TargetRegisterInfo &TRI) {
1758 // NOTE this assertion guarantees that MI.getOpcode() is add or subtraction
1759 // that may or may not set flags.
1760 assert(sForm(MI) != AArch64::INSTRUCTION_LIST_END);
1761
1762 const unsigned CmpOpcode = CmpInstr.getOpcode();
1763 if (!isADDSRegImm(CmpOpcode) && !isSUBSRegImm(CmpOpcode))
1764 return false;
1765
1766 assert((CmpInstr.getOperand(2).isImm() &&
1767 CmpInstr.getOperand(2).getImm() == 0) &&
1768 "Caller guarantees that CmpInstr compares with constant 0");
1769
1770 std::optional<UsedNZCV> NZVCUsed = examineCFlagsUse(MI, CmpInstr, TRI);
1771 if (!NZVCUsed || NZVCUsed->C)
1772 return false;
1773
1774 // CmpInstr is either 'ADDS %vreg, 0' or 'SUBS %vreg, 0', and MI is either
1775 // '%vreg = add ...' or '%vreg = sub ...'.
1776 // Condition flag V is used to indicate signed overflow.
1777 // 1) MI and CmpInstr set N and V to the same value.
1778 // 2) If MI is add/sub with no-signed-wrap, it produces a poison value when
1779 // signed overflow occurs, so CmpInstr could still be simplified away.
1780 if (NZVCUsed->V && !MI.getFlag(MachineInstr::NoSWrap))
1781 return false;
1782
1783 AccessKind AccessToCheck = AK_Write;
1784 if (sForm(MI) != MI.getOpcode())
1785 AccessToCheck = AK_All;
1786 return !areCFlagsAccessedBetweenInstrs(&MI, &CmpInstr, &TRI, AccessToCheck);
1787}
1788
1789/// Substitute an instruction comparing to zero with another instruction
1790/// which produces needed condition flags.
1791///
1792/// Return true on success.
1793bool AArch64InstrInfo::substituteCmpToZero(
1794 MachineInstr &CmpInstr, unsigned SrcReg,
1795 const MachineRegisterInfo &MRI) const {
1796 // Get the unique definition of SrcReg.
1797 MachineInstr *MI = MRI.getUniqueVRegDef(SrcReg);
1798 if (!MI)
1799 return false;
1800
1802
1803 unsigned NewOpc = sForm(*MI);
1804 if (NewOpc == AArch64::INSTRUCTION_LIST_END)
1805 return false;
1806
1807 if (!canInstrSubstituteCmpInstr(*MI, CmpInstr, TRI))
1808 return false;
1809
1810 // Update the instruction to set NZCV.
1811 MI->setDesc(get(NewOpc));
1812 CmpInstr.eraseFromParent();
1813 bool succeeded = UpdateOperandRegClass(*MI);
1814 (void)succeeded;
1815 assert(succeeded && "Some operands reg class are incompatible!");
1816 MI->addRegisterDefined(AArch64::NZCV, &TRI);
1817 return true;
1818}
1819
1820/// \returns True if \p CmpInstr can be removed.
1821///
1822/// \p IsInvertCC is true if, after removing \p CmpInstr, condition
1823/// codes used in \p CCUseInstrs must be inverted.
1825 int CmpValue, const TargetRegisterInfo &TRI,
1827 bool &IsInvertCC) {
1828 assert((CmpValue == 0 || CmpValue == 1) &&
1829 "Only comparisons to 0 or 1 considered for removal!");
1830
1831 // MI is 'CSINCWr %vreg, wzr, wzr, <cc>' or 'CSINCXr %vreg, xzr, xzr, <cc>'
1832 unsigned MIOpc = MI.getOpcode();
1833 if (MIOpc == AArch64::CSINCWr) {
1834 if (MI.getOperand(1).getReg() != AArch64::WZR ||
1835 MI.getOperand(2).getReg() != AArch64::WZR)
1836 return false;
1837 } else if (MIOpc == AArch64::CSINCXr) {
1838 if (MI.getOperand(1).getReg() != AArch64::XZR ||
1839 MI.getOperand(2).getReg() != AArch64::XZR)
1840 return false;
1841 } else {
1842 return false;
1843 }
1845 if (MICC == AArch64CC::Invalid)
1846 return false;
1847
1848 // NZCV needs to be defined
1849 if (MI.findRegisterDefOperandIdx(AArch64::NZCV, true) != -1)
1850 return false;
1851
1852 // CmpInstr is 'ADDS %vreg, 0' or 'SUBS %vreg, 0' or 'SUBS %vreg, 1'
1853 const unsigned CmpOpcode = CmpInstr.getOpcode();
1854 bool IsSubsRegImm = isSUBSRegImm(CmpOpcode);
1855 if (CmpValue && !IsSubsRegImm)
1856 return false;
1857 if (!CmpValue && !IsSubsRegImm && !isADDSRegImm(CmpOpcode))
1858 return false;
1859
1860 // MI conditions allowed: eq, ne, mi, pl
1861 UsedNZCV MIUsedNZCV = getUsedNZCV(MICC);
1862 if (MIUsedNZCV.C || MIUsedNZCV.V)
1863 return false;
1864
1865 std::optional<UsedNZCV> NZCVUsedAfterCmp =
1866 examineCFlagsUse(MI, CmpInstr, TRI, &CCUseInstrs);
1867 // Condition flags are not used in CmpInstr basic block successors and only
1868 // Z or N flags allowed to be used after CmpInstr within its basic block
1869 if (!NZCVUsedAfterCmp || NZCVUsedAfterCmp->C || NZCVUsedAfterCmp->V)
1870 return false;
1871 // Z or N flag used after CmpInstr must correspond to the flag used in MI
1872 if ((MIUsedNZCV.Z && NZCVUsedAfterCmp->N) ||
1873 (MIUsedNZCV.N && NZCVUsedAfterCmp->Z))
1874 return false;
1875 // If CmpInstr is comparison to zero MI conditions are limited to eq, ne
1876 if (MIUsedNZCV.N && !CmpValue)
1877 return false;
1878
1879 // There must be no defs of flags between MI and CmpInstr
1880 if (areCFlagsAccessedBetweenInstrs(&MI, &CmpInstr, &TRI, AK_Write))
1881 return false;
1882
1883 // Condition code is inverted in the following cases:
1884 // 1. MI condition is ne; CmpInstr is 'ADDS %vreg, 0' or 'SUBS %vreg, 0'
1885 // 2. MI condition is eq, pl; CmpInstr is 'SUBS %vreg, 1'
1886 IsInvertCC = (CmpValue && (MICC == AArch64CC::EQ || MICC == AArch64CC::PL)) ||
1887 (!CmpValue && MICC == AArch64CC::NE);
1888 return true;
1889}
1890
1891/// Remove comparison in csinc-cmp sequence
1892///
1893/// Examples:
1894/// 1. \code
1895/// csinc w9, wzr, wzr, ne
1896/// cmp w9, #0
1897/// b.eq
1898/// \endcode
1899/// to
1900/// \code
1901/// csinc w9, wzr, wzr, ne
1902/// b.ne
1903/// \endcode
1904///
1905/// 2. \code
1906/// csinc x2, xzr, xzr, mi
1907/// cmp x2, #1
1908/// b.pl
1909/// \endcode
1910/// to
1911/// \code
1912/// csinc x2, xzr, xzr, mi
1913/// b.pl
1914/// \endcode
1915///
1916/// \param CmpInstr comparison instruction
1917/// \return True when comparison removed
1918bool AArch64InstrInfo::removeCmpToZeroOrOne(
1919 MachineInstr &CmpInstr, unsigned SrcReg, int CmpValue,
1920 const MachineRegisterInfo &MRI) const {
1921 MachineInstr *MI = MRI.getUniqueVRegDef(SrcReg);
1922 if (!MI)
1923 return false;
1926 bool IsInvertCC = false;
1927 if (!canCmpInstrBeRemoved(*MI, CmpInstr, CmpValue, TRI, CCUseInstrs,
1928 IsInvertCC))
1929 return false;
1930 // Make transformation
1931 CmpInstr.eraseFromParent();
1932 if (IsInvertCC) {
1933 // Invert condition codes in CmpInstr CC users
1934 for (MachineInstr *CCUseInstr : CCUseInstrs) {
1936 assert(Idx >= 0 && "Unexpected instruction using CC.");
1937 MachineOperand &CCOperand = CCUseInstr->getOperand(Idx);
1939 static_cast<AArch64CC::CondCode>(CCOperand.getImm()));
1940 CCOperand.setImm(CCUse);
1941 }
1942 }
1943 return true;
1944}
1945
1947 if (MI.getOpcode() != TargetOpcode::LOAD_STACK_GUARD &&
1948 MI.getOpcode() != AArch64::CATCHRET)
1949 return false;
1950
1951 MachineBasicBlock &MBB = *MI.getParent();
1952 auto &Subtarget = MBB.getParent()->getSubtarget<AArch64Subtarget>();
1953 auto TRI = Subtarget.getRegisterInfo();
1954 DebugLoc DL = MI.getDebugLoc();
1955
1956 if (MI.getOpcode() == AArch64::CATCHRET) {
1957 // Skip to the first instruction before the epilog.
1958 const TargetInstrInfo *TII =
1960 MachineBasicBlock *TargetMBB = MI.getOperand(0).getMBB();
1962 MachineBasicBlock::iterator FirstEpilogSEH = std::prev(MBBI);
1963 while (FirstEpilogSEH->getFlag(MachineInstr::FrameDestroy) &&
1964 FirstEpilogSEH != MBB.begin())
1965 FirstEpilogSEH = std::prev(FirstEpilogSEH);
1966 if (FirstEpilogSEH != MBB.begin())
1967 FirstEpilogSEH = std::next(FirstEpilogSEH);
1968 BuildMI(MBB, FirstEpilogSEH, DL, TII->get(AArch64::ADRP))
1969 .addReg(AArch64::X0, RegState::Define)
1970 .addMBB(TargetMBB);
1971 BuildMI(MBB, FirstEpilogSEH, DL, TII->get(AArch64::ADDXri))
1972 .addReg(AArch64::X0, RegState::Define)
1973 .addReg(AArch64::X0)
1974 .addMBB(TargetMBB)
1975 .addImm(0);
1976 return true;
1977 }
1978
1979 Register Reg = MI.getOperand(0).getReg();
1981 if (M.getStackProtectorGuard() == "sysreg") {
1982 const AArch64SysReg::SysReg *SrcReg =
1983 AArch64SysReg::lookupSysRegByName(M.getStackProtectorGuardReg());
1984 if (!SrcReg)
1985 report_fatal_error("Unknown SysReg for Stack Protector Guard Register");
1986
1987 // mrs xN, sysreg
1988 BuildMI(MBB, MI, DL, get(AArch64::MRS))
1990 .addImm(SrcReg->Encoding);
1991 int Offset = M.getStackProtectorGuardOffset();
1992 if (Offset >= 0 && Offset <= 32760 && Offset % 8 == 0) {
1993 // ldr xN, [xN, #offset]
1994 BuildMI(MBB, MI, DL, get(AArch64::LDRXui))
1995 .addDef(Reg)
1996 .addUse(Reg, RegState::Kill)
1997 .addImm(Offset / 8);
1998 } else if (Offset >= -256 && Offset <= 255) {
1999 // ldur xN, [xN, #offset]
2000 BuildMI(MBB, MI, DL, get(AArch64::LDURXi))
2001 .addDef(Reg)
2002 .addUse(Reg, RegState::Kill)
2003 .addImm(Offset);
2004 } else if (Offset >= -4095 && Offset <= 4095) {
2005 if (Offset > 0) {
2006 // add xN, xN, #offset
2007 BuildMI(MBB, MI, DL, get(AArch64::ADDXri))
2008 .addDef(Reg)
2009 .addUse(Reg, RegState::Kill)
2010 .addImm(Offset)
2011 .addImm(0);
2012 } else {
2013 // sub xN, xN, #offset
2014 BuildMI(MBB, MI, DL, get(AArch64::SUBXri))
2015 .addDef(Reg)
2016 .addUse(Reg, RegState::Kill)
2017 .addImm(-Offset)
2018 .addImm(0);
2019 }
2020 // ldr xN, [xN]
2021 BuildMI(MBB, MI, DL, get(AArch64::LDRXui))
2022 .addDef(Reg)
2023 .addUse(Reg, RegState::Kill)
2024 .addImm(0);
2025 } else {
2026 // Cases that are larger than +/- 4095 and not a multiple of 8, or larger
2027 // than 23760.
2028 // It might be nice to use AArch64::MOVi32imm here, which would get
2029 // expanded in PreSched2 after PostRA, but our lone scratch Reg already
2030 // contains the MRS result. findScratchNonCalleeSaveRegister() in
2031 // AArch64FrameLowering might help us find such a scratch register
2032 // though. If we failed to find a scratch register, we could emit a
2033 // stream of add instructions to build up the immediate. Or, we could try
2034 // to insert a AArch64::MOVi32imm before register allocation so that we
2035 // didn't need to scavenge for a scratch register.
2036 report_fatal_error("Unable to encode Stack Protector Guard Offset");
2037 }
2038 MBB.erase(MI);
2039 return true;
2040 }
2041
2042 const GlobalValue *GV =
2043 cast<GlobalValue>((*MI.memoperands_begin())->getValue());
2044 const TargetMachine &TM = MBB.getParent()->getTarget();
2045 unsigned OpFlags = Subtarget.ClassifyGlobalReference(GV, TM);
2046 const unsigned char MO_NC = AArch64II::MO_NC;
2047
2048 if ((OpFlags & AArch64II::MO_GOT) != 0) {
2049 BuildMI(MBB, MI, DL, get(AArch64::LOADgot), Reg)
2050 .addGlobalAddress(GV, 0, OpFlags);
2051 if (Subtarget.isTargetILP32()) {
2052 unsigned Reg32 = TRI->getSubReg(Reg, AArch64::sub_32);
2053 BuildMI(MBB, MI, DL, get(AArch64::LDRWui))
2054 .addDef(Reg32, RegState::Dead)
2055 .addUse(Reg, RegState::Kill)
2056 .addImm(0)
2057 .addMemOperand(*MI.memoperands_begin())
2059 } else {
2060 BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
2061 .addReg(Reg, RegState::Kill)
2062 .addImm(0)
2063 .addMemOperand(*MI.memoperands_begin());
2064 }
2065 } else if (TM.getCodeModel() == CodeModel::Large) {
2066 assert(!Subtarget.isTargetILP32() && "how can large exist in ILP32?");
2067 BuildMI(MBB, MI, DL, get(AArch64::MOVZXi), Reg)
2068 .addGlobalAddress(GV, 0, AArch64II::MO_G0 | MO_NC)
2069 .addImm(0);
2070 BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
2071 .addReg(Reg, RegState::Kill)
2072 .addGlobalAddress(GV, 0, AArch64II::MO_G1 | MO_NC)
2073 .addImm(16);
2074 BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
2075 .addReg(Reg, RegState::Kill)
2076 .addGlobalAddress(GV, 0, AArch64II::MO_G2 | MO_NC)
2077 .addImm(32);
2078 BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
2079 .addReg(Reg, RegState::Kill)
2081 .addImm(48);
2082 BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
2083 .addReg(Reg, RegState::Kill)
2084 .addImm(0)
2085 .addMemOperand(*MI.memoperands_begin());
2086 } else if (TM.getCodeModel() == CodeModel::Tiny) {
2087 BuildMI(MBB, MI, DL, get(AArch64::ADR), Reg)
2088 .addGlobalAddress(GV, 0, OpFlags);
2089 } else {
2090 BuildMI(MBB, MI, DL, get(AArch64::ADRP), Reg)
2091 .addGlobalAddress(GV, 0, OpFlags | AArch64II::MO_PAGE);
2092 unsigned char LoFlags = OpFlags | AArch64II::MO_PAGEOFF | MO_NC;
2093 if (Subtarget.isTargetILP32()) {
2094 unsigned Reg32 = TRI->getSubReg(Reg, AArch64::sub_32);
2095 BuildMI(MBB, MI, DL, get(AArch64::LDRWui))
2096 .addDef(Reg32, RegState::Dead)
2097 .addUse(Reg, RegState::Kill)
2098 .addGlobalAddress(GV, 0, LoFlags)
2099 .addMemOperand(*MI.memoperands_begin())
2101 } else {
2102 BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
2103 .addReg(Reg, RegState::Kill)
2104 .addGlobalAddress(GV, 0, LoFlags)
2105 .addMemOperand(*MI.memoperands_begin());
2106 }
2107 }
2108
2109 MBB.erase(MI);
2110
2111 return true;
2112}
2113
2114// Return true if this instruction simply sets its single destination register
2115// to zero. This is equivalent to a register rename of the zero-register.
2117 switch (MI.getOpcode()) {
2118 default:
2119 break;
2120 case AArch64::MOVZWi:
2121 case AArch64::MOVZXi: // movz Rd, #0 (LSL #0)
2122 if (MI.getOperand(1).isImm() && MI.getOperand(1).getImm() == 0) {
2123 assert(MI.getDesc().getNumOperands() == 3 &&
2124 MI.getOperand(2).getImm() == 0 && "invalid MOVZi operands");
2125 return true;
2126 }
2127 break;
2128 case AArch64::ANDWri: // and Rd, Rzr, #imm
2129 return MI.getOperand(1).getReg() == AArch64::WZR;
2130 case AArch64::ANDXri:
2131 return MI.getOperand(1).getReg() == AArch64::XZR;
2132 case TargetOpcode::COPY:
2133 return MI.getOperand(1).getReg() == AArch64::WZR;
2134 }
2135 return false;
2136}
2137
2138// Return true if this instruction simply renames a general register without
2139// modifying bits.
2141 switch (MI.getOpcode()) {
2142 default:
2143 break;
2144 case TargetOpcode::COPY: {
2145 // GPR32 copies will by lowered to ORRXrs
2146 Register DstReg = MI.getOperand(0).getReg();
2147 return (AArch64::GPR32RegClass.contains(DstReg) ||
2148 AArch64::GPR64RegClass.contains(DstReg));
2149 }
2150 case AArch64::ORRXrs: // orr Xd, Xzr, Xm (LSL #0)
2151 if (MI.getOperand(1).getReg() == AArch64::XZR) {
2152 assert(MI.getDesc().getNumOperands() == 4 &&
2153 MI.getOperand(3).getImm() == 0 && "invalid ORRrs operands");
2154 return true;
2155 }
2156 break;
2157 case AArch64::ADDXri: // add Xd, Xn, #0 (LSL #0)
2158 if (MI.getOperand(2).getImm() == 0) {
2159 assert(MI.getDesc().getNumOperands() == 4 &&
2160 MI.getOperand(3).getImm() == 0 && "invalid ADDXri operands");
2161 return true;
2162 }
2163 break;
2164 }
2165 return false;
2166}
2167
2168// Return true if this instruction simply renames a general register without
2169// modifying bits.
2171 switch (MI.getOpcode()) {
2172 default:
2173 break;
2174 case TargetOpcode::COPY: {
2175 Register DstReg = MI.getOperand(0).getReg();
2176 return AArch64::FPR128RegClass.contains(DstReg);
2177 }
2178 case AArch64::ORRv16i8:
2179 if (MI.getOperand(1).getReg() == MI.getOperand(2).getReg()) {
2180 assert(MI.getDesc().getNumOperands() == 3 && MI.getOperand(0).isReg() &&
2181 "invalid ORRv16i8 operands");
2182 return true;
2183 }
2184 break;
2185 }
2186 return false;
2187}
2188
2190 int &FrameIndex) const {
2191 switch (MI.getOpcode()) {
2192 default:
2193 break;
2194 case AArch64::LDRWui:
2195 case AArch64::LDRXui:
2196 case AArch64::LDRBui:
2197 case AArch64::LDRHui:
2198 case AArch64::LDRSui:
2199 case AArch64::LDRDui:
2200 case AArch64::LDRQui:
2201 case AArch64::LDR_PXI:
2202 if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() &&
2203 MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) {
2204 FrameIndex = MI.getOperand(1).getIndex();
2205 return MI.getOperand(0).getReg();
2206 }
2207 break;
2208 }
2209
2210 return 0;
2211}
2212
2214 int &FrameIndex) const {
2215 switch (MI.getOpcode()) {
2216 default:
2217 break;
2218 case AArch64::STRWui:
2219 case AArch64::STRXui:
2220 case AArch64::STRBui:
2221 case AArch64::STRHui:
2222 case AArch64::STRSui:
2223 case AArch64::STRDui:
2224 case AArch64::STRQui:
2225 case AArch64::STR_PXI:
2226 if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() &&
2227 MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) {
2228 FrameIndex = MI.getOperand(1).getIndex();
2229 return MI.getOperand(0).getReg();
2230 }
2231 break;
2232 }
2233 return 0;
2234}
2235
2236/// Check all MachineMemOperands for a hint to suppress pairing.
2238 return llvm::any_of(MI.memoperands(), [](MachineMemOperand *MMO) {
2239 return MMO->getFlags() & MOSuppressPair;
2240 });
2241}
2242
2243/// Set a flag on the first MachineMemOperand to suppress pairing.
2245 if (MI.memoperands_empty())
2246 return;
2247 (*MI.memoperands_begin())->setFlags(MOSuppressPair);
2248}
2249
2250/// Check all MachineMemOperands for a hint that the load/store is strided.
2252 return llvm::any_of(MI.memoperands(), [](MachineMemOperand *MMO) {
2253 return MMO->getFlags() & MOStridedAccess;
2254 });
2255}
2256
2258 switch (Opc) {
2259 default:
2260 return false;
2261 case AArch64::STURSi:
2262 case AArch64::STRSpre:
2263 case AArch64::STURDi:
2264 case AArch64::STRDpre:
2265 case AArch64::STURQi:
2266 case AArch64::STRQpre:
2267 case AArch64::STURBBi:
2268 case AArch64::STURHHi:
2269 case AArch64::STURWi:
2270 case AArch64::STRWpre:
2271 case AArch64::STURXi:
2272 case AArch64::STRXpre:
2273 case AArch64::LDURSi:
2274 case AArch64::LDRSpre:
2275 case AArch64::LDURDi:
2276 case AArch64::LDRDpre:
2277 case AArch64::LDURQi:
2278 case AArch64::LDRQpre:
2279 case AArch64::LDURWi:
2280 case AArch64::LDRWpre:
2281 case AArch64::LDURXi:
2282 case AArch64::LDRXpre:
2283 case AArch64::LDRSWpre:
2284 case AArch64::LDURSWi:
2285 case AArch64::LDURHHi:
2286 case AArch64::LDURBBi:
2287 case AArch64::LDURSBWi:
2288 case AArch64::LDURSHWi:
2289 return true;
2290 }
2291}
2292
2293std::optional<unsigned> AArch64InstrInfo::getUnscaledLdSt(unsigned Opc) {
2294 switch (Opc) {
2295 default: return {};
2296 case AArch64::PRFMui: return AArch64::PRFUMi;
2297 case AArch64::LDRXui: return AArch64::LDURXi;
2298 case AArch64::LDRWui: return AArch64::LDURWi;
2299 case AArch64::LDRBui: return AArch64::LDURBi;
2300 case AArch64::LDRHui: return AArch64::LDURHi;
2301 case AArch64::LDRSui: return AArch64::LDURSi;
2302 case AArch64::LDRDui: return AArch64::LDURDi;
2303 case AArch64::LDRQui: return AArch64::LDURQi;
2304 case AArch64::LDRBBui: return AArch64::LDURBBi;
2305 case AArch64::LDRHHui: return AArch64::LDURHHi;
2306 case AArch64::LDRSBXui: return AArch64::LDURSBXi;
2307 case AArch64::LDRSBWui: return AArch64::LDURSBWi;
2308 case AArch64::LDRSHXui: return AArch64::LDURSHXi;
2309 case AArch64::LDRSHWui: return AArch64::LDURSHWi;
2310 case AArch64::LDRSWui: return AArch64::LDURSWi;
2311 case AArch64::STRXui: return AArch64::STURXi;
2312 case AArch64::STRWui: return AArch64::STURWi;
2313 case AArch64::STRBui: return AArch64::STURBi;
2314 case AArch64::STRHui: return AArch64::STURHi;
2315 case AArch64::STRSui: return AArch64::STURSi;
2316 case AArch64::STRDui: return AArch64::STURDi;
2317 case AArch64::STRQui: return AArch64::STURQi;
2318 case AArch64::STRBBui: return AArch64::STURBBi;
2319 case AArch64::STRHHui: return AArch64::STURHHi;
2320 }
2321}
2322
2324 switch (Opc) {
2325 default:
2326 return 2;
2327 case AArch64::LDPXi:
2328 case AArch64::LDPDi:
2329 case AArch64::STPXi:
2330 case AArch64::STPDi:
2331 case AArch64::LDNPXi:
2332 case AArch64::LDNPDi:
2333 case AArch64::STNPXi:
2334 case AArch64::STNPDi:
2335 case AArch64::LDPQi:
2336 case AArch64::STPQi:
2337 case AArch64::LDNPQi:
2338 case AArch64::STNPQi:
2339 case AArch64::LDPWi:
2340 case AArch64::LDPSi:
2341 case AArch64::STPWi:
2342 case AArch64::STPSi:
2343 case AArch64::LDNPWi:
2344 case AArch64::LDNPSi:
2345 case AArch64::STNPWi:
2346 case AArch64::STNPSi:
2347 case AArch64::LDG:
2348 case AArch64::STGPi:
2349
2350 case AArch64::LD1B_IMM:
2351 case AArch64::LD1B_H_IMM:
2352 case AArch64::LD1B_S_IMM:
2353 case AArch64::LD1B_D_IMM:
2354 case AArch64::LD1SB_H_IMM:
2355 case AArch64::LD1SB_S_IMM:
2356 case AArch64::LD1SB_D_IMM:
2357 case AArch64::LD1H_IMM:
2358 case AArch64::LD1H_S_IMM:
2359 case AArch64::LD1H_D_IMM:
2360 case AArch64::LD1SH_S_IMM:
2361 case AArch64::LD1SH_D_IMM:
2362 case AArch64::LD1W_IMM:
2363 case AArch64::LD1W_D_IMM:
2364 case AArch64::LD1SW_D_IMM:
2365 case AArch64::LD1D_IMM:
2366
2367 case AArch64::LD2B_IMM:
2368 case AArch64::LD2H_IMM:
2369 case AArch64::LD2W_IMM:
2370 case AArch64::LD2D_IMM:
2371 case AArch64::LD3B_IMM:
2372 case AArch64::LD3H_IMM:
2373 case AArch64::LD3W_IMM:
2374 case AArch64::LD3D_IMM:
2375 case AArch64::LD4B_IMM:
2376 case AArch64::LD4H_IMM:
2377 case AArch64::LD4W_IMM:
2378 case AArch64::LD4D_IMM:
2379
2380 case AArch64::ST1B_IMM:
2381 case AArch64::ST1B_H_IMM:
2382 case AArch64::ST1B_S_IMM:
2383 case AArch64::ST1B_D_IMM:
2384 case AArch64::ST1H_IMM:
2385 case AArch64::ST1H_S_IMM:
2386 case AArch64::ST1H_D_IMM:
2387 case AArch64::ST1W_IMM:
2388 case AArch64::ST1W_D_IMM:
2389 case AArch64::ST1D_IMM:
2390
2391 case AArch64::ST2B_IMM:
2392 case AArch64::ST2H_IMM:
2393 case AArch64::ST2W_IMM:
2394 case AArch64::ST2D_IMM:
2395 case AArch64::ST3B_IMM:
2396 case AArch64::ST3H_IMM:
2397 case AArch64::ST3W_IMM:
2398 case AArch64::ST3D_IMM:
2399 case AArch64::ST4B_IMM:
2400 case AArch64::ST4H_IMM:
2401 case AArch64::ST4W_IMM:
2402 case AArch64::ST4D_IMM:
2403
2404 case AArch64::LD1RB_IMM:
2405 case AArch64::LD1RB_H_IMM:
2406 case AArch64::LD1RB_S_IMM:
2407 case AArch64::LD1RB_D_IMM:
2408 case AArch64::LD1RSB_H_IMM:
2409 case AArch64::LD1RSB_S_IMM:
2410 case AArch64::LD1RSB_D_IMM:
2411 case AArch64::LD1RH_IMM:
2412 case AArch64::LD1RH_S_IMM:
2413 case AArch64::LD1RH_D_IMM:
2414 case AArch64::LD1RSH_S_IMM:
2415 case AArch64::LD1RSH_D_IMM:
2416 case AArch64::LD1RW_IMM:
2417 case AArch64::LD1RW_D_IMM:
2418 case AArch64::LD1RSW_IMM:
2419 case AArch64::LD1RD_IMM:
2420
2421 case AArch64::LDNT1B_ZRI:
2422 case AArch64::LDNT1H_ZRI:
2423 case AArch64::LDNT1W_ZRI:
2424 case AArch64::LDNT1D_ZRI:
2425 case AArch64::STNT1B_ZRI:
2426 case AArch64::STNT1H_ZRI:
2427 case AArch64::STNT1W_ZRI:
2428 case AArch64::STNT1D_ZRI:
2429
2430 case AArch64::LDNF1B_IMM:
2431 case AArch64::LDNF1B_H_IMM:
2432 case AArch64::LDNF1B_S_IMM:
2433 case AArch64::LDNF1B_D_IMM:
2434 case AArch64::LDNF1SB_H_IMM:
2435 case AArch64::LDNF1SB_S_IMM:
2436 case AArch64::LDNF1SB_D_IMM:
2437 case AArch64::LDNF1H_IMM:
2438 case AArch64::LDNF1H_S_IMM:
2439 case AArch64::LDNF1H_D_IMM:
2440 case AArch64::LDNF1SH_S_IMM:
2441 case AArch64::LDNF1SH_D_IMM:
2442 case AArch64::LDNF1W_IMM:
2443 case AArch64::LDNF1W_D_IMM:
2444 case AArch64::LDNF1SW_D_IMM:
2445 case AArch64::LDNF1D_IMM:
2446 return 3;
2447 case AArch64::ADDG:
2448 case AArch64::STGi:
2449 case AArch64::LDR_PXI:
2450 case AArch64::STR_PXI:
2451 return 2;
2452 }
2453}
2454
2456 switch (MI.getOpcode()) {
2457 default:
2458 return false;
2459 // Scaled instructions.
2460 case AArch64::STRSui:
2461 case AArch64::STRDui:
2462 case AArch64::STRQui:
2463 case AArch64::STRXui:
2464 case AArch64::STRWui:
2465 case AArch64::LDRSui:
2466 case AArch64::LDRDui:
2467 case AArch64::LDRQui:
2468 case AArch64::LDRXui:
2469 case AArch64::LDRWui:
2470 case AArch64::LDRSWui:
2471 // Unscaled instructions.
2472 case AArch64::STURSi:
2473 case AArch64::STRSpre:
2474 case AArch64::STURDi:
2475 case AArch64::STRDpre:
2476 case AArch64::STURQi:
2477 case AArch64::STRQpre:
2478 case AArch64::STURWi:
2479 case AArch64::STRWpre:
2480 case AArch64::STURXi:
2481 case AArch64::STRXpre:
2482 case AArch64::LDURSi:
2483 case AArch64::LDRSpre:
2484 case AArch64::LDURDi:
2485 case AArch64::LDRDpre:
2486 case AArch64::LDURQi:
2487 case AArch64::LDRQpre:
2488 case AArch64::LDURWi:
2489 case AArch64::LDRWpre:
2490 case AArch64::LDURXi:
2491 case AArch64::LDRXpre:
2492 case AArch64::LDURSWi:
2493 case AArch64::LDRSWpre:
2494 return true;
2495 }
2496}
2497
2499 switch (MI.getOpcode()) {
2500 default:
2501 assert((!MI.isCall() || !MI.isReturn()) &&
2502 "Unexpected instruction - was a new tail call opcode introduced?");
2503 return false;
2504 case AArch64::TCRETURNdi:
2505 case AArch64::TCRETURNri:
2506 case AArch64::TCRETURNrix16x17:
2507 case AArch64::TCRETURNrix17:
2508 case AArch64::TCRETURNrinotx16:
2509 case AArch64::TCRETURNriALL:
2510 return true;
2511 }
2512}
2513
2515 switch (Opc) {
2516 default:
2517 llvm_unreachable("Opcode has no flag setting equivalent!");
2518 // 32-bit cases:
2519 case AArch64::ADDWri:
2520 return AArch64::ADDSWri;
2521 case AArch64::ADDWrr:
2522 return AArch64::ADDSWrr;
2523 case AArch64::ADDWrs:
2524 return AArch64::ADDSWrs;
2525 case AArch64::ADDWrx:
2526 return AArch64::ADDSWrx;
2527 case AArch64::ANDWri:
2528 return AArch64::ANDSWri;
2529 case AArch64::ANDWrr:
2530 return AArch64::ANDSWrr;
2531 case AArch64::ANDWrs:
2532 return AArch64::ANDSWrs;
2533 case AArch64::BICWrr:
2534 return AArch64::BICSWrr;
2535 case AArch64::BICWrs:
2536 return AArch64::BICSWrs;
2537 case AArch64::SUBWri:
2538 return AArch64::SUBSWri;
2539 case AArch64::SUBWrr:
2540 return AArch64::SUBSWrr;
2541 case AArch64::SUBWrs:
2542 return AArch64::SUBSWrs;
2543 case AArch64::SUBWrx:
2544 return AArch64::SUBSWrx;
2545 // 64-bit cases:
2546 case AArch64::ADDXri:
2547 return AArch64::ADDSXri;
2548 case AArch64::ADDXrr:
2549 return AArch64::ADDSXrr;
2550 case AArch64::ADDXrs:
2551 return AArch64::ADDSXrs;
2552 case AArch64::ADDXrx:
2553 return AArch64::ADDSXrx;
2554 case AArch64::ANDXri:
2555 return AArch64::ANDSXri;
2556 case AArch64::ANDXrr:
2557 return AArch64::ANDSXrr;
2558 case AArch64::ANDXrs:
2559 return AArch64::ANDSXrs;
2560 case AArch64::BICXrr:
2561 return AArch64::BICSXrr;
2562 case AArch64::BICXrs:
2563 return AArch64::BICSXrs;
2564 case AArch64::SUBXri:
2565 return AArch64::SUBSXri;
2566 case AArch64::SUBXrr:
2567 return AArch64::SUBSXrr;
2568 case AArch64::SUBXrs:
2569 return AArch64::SUBSXrs;
2570 case AArch64::SUBXrx:
2571 return AArch64::SUBSXrx;
2572 // SVE instructions:
2573 case AArch64::AND_PPzPP:
2574 return AArch64::ANDS_PPzPP;
2575 case AArch64::BIC_PPzPP:
2576 return AArch64::BICS_PPzPP;
2577 case AArch64::EOR_PPzPP:
2578 return AArch64::EORS_PPzPP;
2579 case AArch64::NAND_PPzPP:
2580 return AArch64::NANDS_PPzPP;
2581 case AArch64::NOR_PPzPP:
2582 return AArch64::NORS_PPzPP;
2583 case AArch64::ORN_PPzPP:
2584 return AArch64::ORNS_PPzPP;
2585 case AArch64::ORR_PPzPP:
2586 return AArch64::ORRS_PPzPP;
2587 case AArch64::BRKA_PPzP:
2588 return AArch64::BRKAS_PPzP;
2589 case AArch64::BRKPA_PPzPP:
2590 return AArch64::BRKPAS_PPzPP;
2591 case AArch64::BRKB_PPzP:
2592 return AArch64::BRKBS_PPzP;
2593 case AArch64::BRKPB_PPzPP:
2594 return AArch64::BRKPBS_PPzPP;
2595 case AArch64::BRKN_PPzP:
2596 return AArch64::BRKNS_PPzP;
2597 case AArch64::RDFFR_PPz:
2598 return AArch64::RDFFRS_PPz;
2599 case AArch64::PTRUE_B:
2600 return AArch64::PTRUES_B;
2601 }
2602}
2603
2604// Is this a candidate for ld/st merging or pairing? For example, we don't
2605// touch volatiles or load/stores that have a hint to avoid pair formation.
2607
2608 bool IsPreLdSt = isPreLdSt(MI);
2609
2610 // If this is a volatile load/store, don't mess with it.
2611 if (MI.hasOrderedMemoryRef())
2612 return false;
2613
2614 // Make sure this is a reg/fi+imm (as opposed to an address reloc).
2615 // For Pre-inc LD/ST, the operand is shifted by one.
2616 assert((MI.getOperand(IsPreLdSt ? 2 : 1).isReg() ||
2617 MI.getOperand(IsPreLdSt ? 2 : 1).isFI()) &&
2618 "Expected a reg or frame index operand.");
2619
2620 // For Pre-indexed addressing quadword instructions, the third operand is the
2621 // immediate value.
2622 bool IsImmPreLdSt = IsPreLdSt && MI.getOperand(3).isImm();
2623
2624 if (!MI.getOperand(2).isImm() && !IsImmPreLdSt)
2625 return false;
2626
2627 // Can't merge/pair if the instruction modifies the base register.
2628 // e.g., ldr x0, [x0]
2629 // This case will never occur with an FI base.
2630 // However, if the instruction is an LDR<S,D,Q,W,X,SW>pre or
2631 // STR<S,D,Q,W,X>pre, it can be merged.
2632 // For example:
2633 // ldr q0, [x11, #32]!
2634 // ldr q1, [x11, #16]
2635 // to
2636 // ldp q0, q1, [x11, #32]!
2637 if (MI.getOperand(1).isReg() && !IsPreLdSt) {
2638 Register BaseReg = MI.getOperand(1).getReg();
2640 if (MI.modifiesRegister(BaseReg, TRI))
2641 return false;
2642 }
2643
2644 // Check if this load/store has a hint to avoid pair formation.
2645 // MachineMemOperands hints are set by the AArch64StorePairSuppress pass.
2647 return false;
2648
2649 // Do not pair any callee-save store/reload instructions in the
2650 // prologue/epilogue if the CFI information encoded the operations as separate
2651 // instructions, as that will cause the size of the actual prologue to mismatch
2652 // with the prologue size recorded in the Windows CFI.
2653 const MCAsmInfo *MAI = MI.getMF()->getTarget().getMCAsmInfo();
2654 bool NeedsWinCFI = MAI->usesWindowsCFI() &&
2655 MI.getMF()->getFunction().needsUnwindTableEntry();
2656 if (NeedsWinCFI && (MI.getFlag(MachineInstr::FrameSetup) ||
2658 return false;
2659
2660 // On some CPUs quad load/store pairs are slower than two single load/stores.
2661 if (Subtarget.isPaired128Slow()) {
2662 switch (MI.getOpcode()) {
2663 default:
2664 break;
2665 case AArch64::LDURQi:
2666 case AArch64::STURQi:
2667 case AArch64::LDRQui:
2668 case AArch64::STRQui:
2669 return false;
2670 }
2671 }
2672
2673 return true;
2674}
2675
2678 int64_t &Offset, bool &OffsetIsScalable, unsigned &Width,
2679 const TargetRegisterInfo *TRI) const {
2680 if (!LdSt.mayLoadOrStore())
2681 return false;
2682
2683 const MachineOperand *BaseOp;
2684 TypeSize WidthN(0, false);
2685 if (!getMemOperandWithOffsetWidth(LdSt, BaseOp, Offset, OffsetIsScalable,
2686 WidthN, TRI))
2687 return false;
2688 // The maximum vscale is 16 under AArch64, return the maximal extent for the
2689 // vector.
2690 Width = WidthN.isScalable()
2693 : WidthN.getKnownMinValue();
2694 BaseOps.push_back(BaseOp);
2695 return true;
2696}
2697
2698std::optional<ExtAddrMode>
2700 const TargetRegisterInfo *TRI) const {
2701 const MachineOperand *Base; // Filled with the base operand of MI.
2702 int64_t Offset; // Filled with the offset of MI.
2703 bool OffsetIsScalable;
2704 if (!getMemOperandWithOffset(MemI, Base, Offset, OffsetIsScalable, TRI))
2705 return std::nullopt;
2706
2707 if (!Base->isReg())
2708 return std::nullopt;
2709 ExtAddrMode AM;
2710 AM.BaseReg = Base->getReg();
2711 AM.Displacement = Offset;
2712 AM.ScaledReg = 0;
2713 AM.Scale = 0;
2714 return AM;
2715}
2716
2718 Register Reg,
2719 const MachineInstr &AddrI,
2720 ExtAddrMode &AM) const {
2721 // Filter out instructions into which we cannot fold.
2722 unsigned NumBytes;
2723 int64_t OffsetScale = 1;
2724 switch (MemI.getOpcode()) {
2725 default:
2726 return false;
2727
2728 case AArch64::LDURQi:
2729 case AArch64::STURQi:
2730 NumBytes = 16;
2731 break;
2732
2733 case AArch64::LDURDi:
2734 case AArch64::STURDi:
2735 case AArch64::LDURXi:
2736 case AArch64::STURXi:
2737 NumBytes = 8;
2738 break;
2739
2740 case AArch64::LDURWi:
2741 case AArch64::LDURSWi:
2742 case AArch64::STURWi:
2743 NumBytes = 4;
2744 break;
2745
2746 case AArch64::LDURHi:
2747 case AArch64::STURHi:
2748 case AArch64::LDURHHi:
2749 case AArch64::STURHHi:
2750 case AArch64::LDURSHXi:
2751 case AArch64::LDURSHWi:
2752 NumBytes = 2;
2753 break;
2754
2755 case AArch64::LDRBroX:
2756 case AArch64::LDRBBroX:
2757 case AArch64::LDRSBXroX:
2758 case AArch64::LDRSBWroX:
2759 case AArch64::STRBroX:
2760 case AArch64::STRBBroX:
2761 case AArch64::LDURBi:
2762 case AArch64::LDURBBi:
2763 case AArch64::LDURSBXi:
2764 case AArch64::LDURSBWi:
2765 case AArch64::STURBi:
2766 case AArch64::STURBBi:
2767 case AArch64::LDRBui:
2768 case AArch64::LDRBBui:
2769 case AArch64::LDRSBXui:
2770 case AArch64::LDRSBWui:
2771 case AArch64::STRBui:
2772 case AArch64::STRBBui:
2773 NumBytes = 1;
2774 break;
2775
2776 case AArch64::LDRQroX:
2777 case AArch64::STRQroX:
2778 case AArch64::LDRQui:
2779 case AArch64::STRQui:
2780 NumBytes = 16;
2781 OffsetScale = 16;
2782 break;
2783
2784 case AArch64::LDRDroX:
2785 case AArch64::STRDroX:
2786 case AArch64::LDRXroX:
2787 case AArch64::STRXroX:
2788 case AArch64::LDRDui:
2789 case AArch64::STRDui:
2790 case AArch64::LDRXui:
2791 case AArch64::STRXui:
2792 NumBytes = 8;
2793 OffsetScale = 8;
2794 break;
2795
2796 case AArch64::LDRWroX:
2797 case AArch64::LDRSWroX:
2798 case AArch64::STRWroX:
2799 case AArch64::LDRWui:
2800 case AArch64::LDRSWui:
2801 case AArch64::STRWui:
2802 NumBytes = 4;
2803 OffsetScale = 4;
2804 break;
2805
2806 case AArch64::LDRHroX:
2807 case AArch64::STRHroX:
2808 case AArch64::LDRHHroX:
2809 case AArch64::STRHHroX:
2810 case AArch64::LDRSHXroX:
2811 case AArch64::LDRSHWroX:
2812 case AArch64::LDRHui:
2813 case AArch64::STRHui:
2814 case AArch64::LDRHHui:
2815 case AArch64::STRHHui:
2816 case AArch64::LDRSHXui:
2817 case AArch64::LDRSHWui:
2818 NumBytes = 2;
2819 OffsetScale = 2;
2820 break;
2821 }
2822
2823 // Check the fold operand is not the loaded/stored value.
2824 const MachineOperand &BaseRegOp = MemI.getOperand(0);
2825 if (BaseRegOp.isReg() && BaseRegOp.getReg() == Reg)
2826 return false;
2827
2828 // Handle memory instructions with a [Reg, Reg] addressing mode.
2829 if (MemI.getOperand(2).isReg()) {
2830 // Bail if the addressing mode already includes extension of the offset
2831 // register.
2832 if (MemI.getOperand(3).getImm())
2833 return false;
2834
2835 // Check if we actually have a scaled offset.
2836 if (MemI.getOperand(4).getImm() == 0)
2837 OffsetScale = 1;
2838
2839 // If the address instructions is folded into the base register, then the
2840 // addressing mode must not have a scale. Then we can swap the base and the
2841 // scaled registers.
2842 if (MemI.getOperand(1).getReg() == Reg && OffsetScale != 1)
2843 return false;
2844
2845 switch (AddrI.getOpcode()) {
2846 default:
2847 return false;
2848
2849 case AArch64::SBFMXri:
2850 // sxtw Xa, Wm
2851 // ldr Xd, [Xn, Xa, lsl #N]
2852 // ->
2853 // ldr Xd, [Xn, Wm, sxtw #N]
2854 if (AddrI.getOperand(2).getImm() != 0 ||
2855 AddrI.getOperand(3).getImm() != 31)
2856 return false;
2857
2858 AM.BaseReg = MemI.getOperand(1).getReg();
2859 if (AM.BaseReg == Reg)
2860 AM.BaseReg = MemI.getOperand(2).getReg();
2861 AM.ScaledReg = AddrI.getOperand(1).getReg();
2862 AM.Scale = OffsetScale;
2863 AM.Displacement = 0;
2865 return true;
2866
2867 case TargetOpcode::SUBREG_TO_REG: {
2868 // mov Wa, Wm
2869 // ldr Xd, [Xn, Xa, lsl #N]
2870 // ->
2871 // ldr Xd, [Xn, Wm, uxtw #N]
2872
2873 // Zero-extension looks like an ORRWrs followed by a SUBREG_TO_REG.
2874 if (AddrI.getOperand(1).getImm() != 0 ||
2875 AddrI.getOperand(3).getImm() != AArch64::sub_32)
2876 return false;
2877
2878 const MachineRegisterInfo &MRI = AddrI.getMF()->getRegInfo();
2879 Register OffsetReg = AddrI.getOperand(2).getReg();
2880 if (!OffsetReg.isVirtual() || !MRI.hasOneNonDBGUse(OffsetReg))
2881 return false;
2882
2883 const MachineInstr &DefMI = *MRI.getVRegDef(OffsetReg);
2884 if (DefMI.getOpcode() != AArch64::ORRWrs ||
2885 DefMI.getOperand(1).getReg() != AArch64::WZR ||
2886 DefMI.getOperand(3).getImm() != 0)
2887 return false;
2888
2889 AM.BaseReg = MemI.getOperand(1).getReg();
2890 if (AM.BaseReg == Reg)
2891 AM.BaseReg = MemI.getOperand(2).getReg();
2892 AM.ScaledReg = DefMI.getOperand(2).getReg();
2893 AM.Scale = OffsetScale;
2894 AM.Displacement = 0;
2896 return true;
2897 }
2898 }
2899 }
2900
2901 // Handle memory instructions with a [Reg, #Imm] addressing mode.
2902
2903 // Check we are not breaking a potential conversion to an LDP.
2904 auto validateOffsetForLDP = [](unsigned NumBytes, int64_t OldOffset,
2905 int64_t NewOffset) -> bool {
2906 int64_t MinOffset, MaxOffset;
2907 switch (NumBytes) {
2908 default:
2909 return true;
2910 case 4:
2911 MinOffset = -256;
2912 MaxOffset = 252;
2913 break;
2914 case 8:
2915 MinOffset = -512;
2916 MaxOffset = 504;
2917 break;
2918 case 16:
2919 MinOffset = -1024;
2920 MaxOffset = 1008;
2921 break;
2922 }
2923 return OldOffset < MinOffset || OldOffset > MaxOffset ||
2924 (NewOffset >= MinOffset && NewOffset <= MaxOffset);
2925 };
2926 auto canFoldAddSubImmIntoAddrMode = [&](int64_t Disp) -> bool {
2927 int64_t OldOffset = MemI.getOperand(2).getImm() * OffsetScale;
2928 int64_t NewOffset = OldOffset + Disp;
2929 if (!isLegalAddressingMode(NumBytes, NewOffset, /* Scale */ 0))
2930 return false;
2931 // If the old offset would fit into an LDP, but the new offset wouldn't,
2932 // bail out.
2933 if (!validateOffsetForLDP(NumBytes, OldOffset, NewOffset))
2934 return false;
2935 AM.BaseReg = AddrI.getOperand(1).getReg();
2936 AM.ScaledReg = 0;
2937 AM.Scale = 0;
2938 AM.Displacement = NewOffset;
2940 return true;
2941 };
2942
2943 auto canFoldAddRegIntoAddrMode =
2944 [&](int64_t Scale,
2946 if (MemI.getOperand(2).getImm() != 0)
2947 return false;
2948 if (!isLegalAddressingMode(NumBytes, /* Offset */ 0, Scale))
2949 return false;
2950 AM.BaseReg = AddrI.getOperand(1).getReg();
2951 AM.ScaledReg = AddrI.getOperand(2).getReg();
2952 AM.Scale = Scale;
2953 AM.Displacement = 0;
2954 AM.Form = Form;
2955 return true;
2956 };
2957
2958 auto avoidSlowSTRQ = [&](const MachineInstr &MemI) {
2959 unsigned Opcode = MemI.getOpcode();
2960 return (Opcode == AArch64::STURQi || Opcode == AArch64::STRQui) &&
2961 Subtarget.isSTRQroSlow();
2962 };
2963
2964 int64_t Disp = 0;
2965 const bool OptSize = MemI.getMF()->getFunction().hasOptSize();
2966 switch (AddrI.getOpcode()) {
2967 default:
2968 return false;
2969
2970 case AArch64::ADDXri:
2971 // add Xa, Xn, #N
2972 // ldr Xd, [Xa, #M]
2973 // ->
2974 // ldr Xd, [Xn, #N'+M]
2975 Disp = AddrI.getOperand(2).getImm() << AddrI.getOperand(3).getImm();
2976 return canFoldAddSubImmIntoAddrMode(Disp);
2977
2978 case AArch64::SUBXri:
2979 // sub Xa, Xn, #N
2980 // ldr Xd, [Xa, #M]
2981 // ->
2982 // ldr Xd, [Xn, #N'+M]
2983 Disp = AddrI.getOperand(2).getImm() << AddrI.getOperand(3).getImm();
2984 return canFoldAddSubImmIntoAddrMode(-Disp);
2985
2986 case AArch64::ADDXrs: {
2987 // add Xa, Xn, Xm, lsl #N
2988 // ldr Xd, [Xa]
2989 // ->
2990 // ldr Xd, [Xn, Xm, lsl #N]
2991
2992 // Don't fold the add if the result would be slower, unless optimising for
2993 // size.
2994 unsigned Shift = static_cast<unsigned>(AddrI.getOperand(3).getImm());
2996 return false;
2997 Shift = AArch64_AM::getShiftValue(Shift);
2998 if (!OptSize) {
2999 if ((Shift != 2 && Shift != 3) || !Subtarget.hasAddrLSLFast())
3000 return false;
3001 if (avoidSlowSTRQ(MemI))
3002 return false;
3003 }
3004 return canFoldAddRegIntoAddrMode(1ULL << Shift);
3005 }
3006
3007 case AArch64::ADDXrr:
3008 // add Xa, Xn, Xm
3009 // ldr Xd, [Xa]
3010 // ->
3011 // ldr Xd, [Xn, Xm, lsl #0]
3012
3013 // Don't fold the add if the result would be slower, unless optimising for
3014 // size.
3015 if (!OptSize && avoidSlowSTRQ(MemI))
3016 return false;
3017 return canFoldAddRegIntoAddrMode(1);
3018
3019 case AArch64::ADDXrx:
3020 // add Xa, Xn, Wm, {s,u}xtw #N
3021 // ldr Xd, [Xa]
3022 // ->
3023 // ldr Xd, [Xn, Wm, {s,u}xtw #N]
3024
3025 // Don't fold the add if the result would be slower, unless optimising for
3026 // size.
3027 if (!OptSize && avoidSlowSTRQ(MemI))
3028 return false;
3029
3030 // Can fold only sign-/zero-extend of a word.
3031 unsigned Imm = static_cast<unsigned>(AddrI.getOperand(3).getImm());
3033 if (Extend != AArch64_AM::UXTW && Extend != AArch64_AM::SXTW)
3034 return false;
3035
3036 return canFoldAddRegIntoAddrMode(
3037 1ULL << AArch64_AM::getArithShiftValue(Imm),
3040 }
3041}
3042
3043// Given an opcode for an instruction with a [Reg, #Imm] addressing mode,
3044// return the opcode of an instruction performing the same operation, but using
3045// the [Reg, Reg] addressing mode.
3046static unsigned regOffsetOpcode(unsigned Opcode) {
3047 switch (Opcode) {
3048 default:
3049 llvm_unreachable("Address folding not implemented for instruction");
3050
3051 case AArch64::LDURQi:
3052 case AArch64::LDRQui:
3053 return AArch64::LDRQroX;
3054 case AArch64::STURQi:
3055 case AArch64::STRQui:
3056 return AArch64::STRQroX;
3057 case AArch64::LDURDi:
3058 case AArch64::LDRDui:
3059 return AArch64::LDRDroX;
3060 case AArch64::STURDi:
3061 case AArch64::STRDui:
3062 return AArch64::STRDroX;
3063 case AArch64::LDURXi:
3064 case AArch64::LDRXui:
3065 return AArch64::LDRXroX;
3066 case AArch64::STURXi:
3067 case AArch64::STRXui:
3068 return AArch64::STRXroX;
3069 case AArch64::LDURWi:
3070 case AArch64::LDRWui:
3071 return AArch64::LDRWroX;
3072 case AArch64::LDURSWi:
3073 case AArch64::LDRSWui:
3074 return AArch64::LDRSWroX;
3075 case AArch64::STURWi:
3076 case AArch64::STRWui:
3077 return AArch64::STRWroX;
3078 case AArch64::LDURHi:
3079 case AArch64::LDRHui:
3080 return AArch64::LDRHroX;
3081 case AArch64::STURHi:
3082 case AArch64::STRHui:
3083 return AArch64::STRHroX;
3084 case AArch64::LDURHHi:
3085 case AArch64::LDRHHui:
3086 return AArch64::LDRHHroX;
3087 case AArch64::STURHHi:
3088 case AArch64::STRHHui:
3089 return AArch64::STRHHroX;
3090 case AArch64::LDURSHXi:
3091 case AArch64::LDRSHXui:
3092 return AArch64::LDRSHXroX;
3093 case AArch64::LDURSHWi:
3094 case AArch64::LDRSHWui:
3095 return AArch64::LDRSHWroX;
3096 case AArch64::LDURBi:
3097 case AArch64::LDRBui:
3098 return AArch64::LDRBroX;
3099 case AArch64::LDURBBi:
3100 case AArch64::LDRBBui:
3101 return AArch64::LDRBBroX;
3102 case AArch64::LDURSBXi:
3103 case AArch64::LDRSBXui:
3104 return AArch64::LDRSBXroX;
3105 case AArch64::LDURSBWi:
3106 case AArch64::LDRSBWui:
3107 return AArch64::LDRSBWroX;
3108 case AArch64::STURBi:
3109 case AArch64::STRBui:
3110 return AArch64::STRBroX;
3111 case AArch64::STURBBi:
3112 case AArch64::STRBBui:
3113 return AArch64::STRBBroX;
3114 }
3115}
3116
3117// Given an opcode for an instruction with a [Reg, #Imm] addressing mode, return
3118// the opcode of an instruction performing the same operation, but using the
3119// [Reg, #Imm] addressing mode with scaled offset.
3120unsigned scaledOffsetOpcode(unsigned Opcode, unsigned &Scale) {
3121 switch (Opcode) {
3122 default:
3123 llvm_unreachable("Address folding not implemented for instruction");
3124
3125 case AArch64::LDURQi:
3126 Scale = 16;
3127 return AArch64::LDRQui;
3128 case AArch64::STURQi:
3129 Scale = 16;
3130 return AArch64::STRQui;
3131 case AArch64::LDURDi:
3132 Scale = 8;
3133 return AArch64::LDRDui;
3134 case AArch64::STURDi:
3135 Scale = 8;
3136 return AArch64::STRDui;
3137 case AArch64::LDURXi:
3138 Scale = 8;
3139 return AArch64::LDRXui;
3140 case AArch64::STURXi:
3141 Scale = 8;
3142 return AArch64::STRXui;
3143 case AArch64::LDURWi:
3144 Scale = 4;
3145 return AArch64::LDRWui;
3146 case AArch64::LDURSWi:
3147 Scale = 4;
3148 return AArch64::LDRSWui;
3149 case AArch64::STURWi:
3150 Scale = 4;
3151 return AArch64::STRWui;
3152 case AArch64::LDURHi:
3153 Scale = 2;
3154 return AArch64::LDRHui;
3155 case AArch64::STURHi:
3156 Scale = 2;
3157 return AArch64::STRHui;
3158 case AArch64::LDURHHi:
3159 Scale = 2;
3160 return AArch64::LDRHHui;
3161 case AArch64::STURHHi:
3162 Scale = 2;
3163 return AArch64::STRHHui;
3164 case AArch64::LDURSHXi:
3165 Scale = 2;
3166 return AArch64::LDRSHXui;
3167 case AArch64::LDURSHWi:
3168 Scale = 2;
3169 return AArch64::LDRSHWui;
3170 case AArch64::LDURBi:
3171 Scale = 1;
3172 return AArch64::LDRBui;
3173 case AArch64::LDURBBi:
3174 Scale = 1;
3175 return AArch64::LDRBBui;
3176 case AArch64::LDURSBXi:
3177 Scale = 1;
3178 return AArch64::LDRSBXui;
3179 case AArch64::LDURSBWi:
3180 Scale = 1;
3181 return AArch64::LDRSBWui;
3182 case AArch64::STURBi:
3183 Scale = 1;
3184 return AArch64::STRBui;
3185 case AArch64::STURBBi:
3186 Scale = 1;
3187 return AArch64::STRBBui;
3188 case AArch64::LDRQui:
3189 case AArch64::STRQui:
3190 Scale = 16;
3191 return Opcode;
3192 case AArch64::LDRDui:
3193 case AArch64::STRDui:
3194 case AArch64::LDRXui:
3195 case AArch64::STRXui:
3196 Scale = 8;
3197 return Opcode;
3198 case AArch64::LDRWui:
3199 case AArch64::LDRSWui:
3200 case AArch64::STRWui:
3201 Scale = 4;
3202 return Opcode;
3203 case AArch64::LDRHui:
3204 case AArch64::STRHui:
3205 case AArch64::LDRHHui:
3206 case AArch64::STRHHui:
3207 case AArch64::LDRSHXui:
3208 case AArch64::LDRSHWui:
3209 Scale = 2;
3210 return Opcode;
3211 case AArch64::LDRBui:
3212 case AArch64::LDRBBui:
3213 case AArch64::LDRSBXui:
3214 case AArch64::LDRSBWui:
3215 case AArch64::STRBui:
3216 case AArch64::STRBBui:
3217 Scale = 1;
3218 return Opcode;
3219 }
3220}
3221
3222// Given an opcode for an instruction with a [Reg, #Imm] addressing mode, return
3223// the opcode of an instruction performing the same operation, but using the
3224// [Reg, #Imm] addressing mode with unscaled offset.
3225unsigned unscaledOffsetOpcode(unsigned Opcode) {
3226 switch (Opcode) {
3227 default:
3228 llvm_unreachable("Address folding not implemented for instruction");
3229
3230 case AArch64::LDURQi:
3231 case AArch64::STURQi:
3232 case AArch64::LDURDi:
3233 case AArch64::STURDi:
3234 case AArch64::LDURXi:
3235 case AArch64::STURXi:
3236 case AArch64::LDURWi:
3237 case AArch64::LDURSWi:
3238 case AArch64::STURWi:
3239 case AArch64::LDURHi:
3240 case AArch64::STURHi:
3241 case AArch64::LDURHHi:
3242 case AArch64::STURHHi:
3243 case AArch64::LDURSHXi:
3244 case AArch64::LDURSHWi:
3245 case AArch64::LDURBi:
3246 case AArch64::STURBi:
3247 case AArch64::LDURBBi:
3248 case AArch64::STURBBi:
3249 case AArch64::LDURSBWi:
3250 case AArch64::LDURSBXi:
3251 return Opcode;
3252 case AArch64::LDRQui:
3253 return AArch64::LDURQi;
3254 case AArch64::STRQui:
3255 return AArch64::STURQi;
3256 case AArch64::LDRDui:
3257 return AArch64::LDURDi;
3258 case AArch64::STRDui:
3259 return AArch64::STURDi;
3260 case AArch64::LDRXui:
3261 return AArch64::LDURXi;
3262 case AArch64::STRXui:
3263 return AArch64::STURXi;
3264 case AArch64::LDRWui:
3265 return AArch64::LDURWi;
3266 case AArch64::LDRSWui:
3267 return AArch64::LDURSWi;
3268 case AArch64::STRWui:
3269 return AArch64::STURWi;
3270 case AArch64::LDRHui:
3271 return AArch64::LDURHi;
3272 case AArch64::STRHui:
3273 return AArch64::STURHi;
3274 case AArch64::LDRHHui:
3275 return AArch64::LDURHHi;
3276 case AArch64::STRHHui:
3277 return AArch64::STURHHi;
3278 case AArch64::LDRSHXui:
3279 return AArch64::LDURSHXi;
3280 case AArch64::LDRSHWui:
3281 return AArch64::LDURSHWi;
3282 case AArch64::LDRBBui:
3283 return AArch64::LDURBBi;
3284 case AArch64::LDRBui:
3285 return AArch64::LDURBi;
3286 case AArch64::STRBBui:
3287 return AArch64::STURBBi;
3288 case AArch64::STRBui:
3289 return AArch64::STURBi;
3290 case AArch64::LDRSBWui:
3291 return AArch64::LDURSBWi;
3292 case AArch64::LDRSBXui:
3293 return AArch64::LDURSBXi;
3294 }
3295}
3296
3297// Given the opcode of a memory load/store instruction, return the opcode of an
3298// instruction performing the same operation, but using
3299// the [Reg, Reg, {s,u}xtw #N] addressing mode with sign-/zero-extend of the
3300// offset register.
3301static unsigned offsetExtendOpcode(unsigned Opcode) {
3302 switch (Opcode) {
3303 default:
3304 llvm_unreachable("Address folding not implemented for instruction");
3305
3306 case AArch64::LDRQroX:
3307 case AArch64::LDURQi:
3308 case AArch64::LDRQui:
3309 return AArch64::LDRQroW;
3310 case AArch64::STRQroX:
3311 case AArch64::STURQi:
3312 case AArch64::STRQui:
3313 return AArch64::STRQroW;
3314 case AArch64::LDRDroX:
3315 case AArch64::LDURDi:
3316 case AArch64::LDRDui:
3317 return AArch64::LDRDroW;
3318 case AArch64::STRDroX:
3319 case AArch64::STURDi:
3320 case AArch64::STRDui:
3321 return AArch64::STRDroW;
3322 case AArch64::LDRXroX:
3323 case AArch64::LDURXi:
3324 case AArch64::LDRXui:
3325 return AArch64::LDRXroW;
3326 case AArch64::STRXroX:
3327 case AArch64::STURXi:
3328 case AArch64::STRXui:
3329 return AArch64::STRXroW;
3330 case AArch64::LDRWroX:
3331 case AArch64::LDURWi:
3332 case AArch64::LDRWui:
3333 return AArch64::LDRWroW;
3334 case AArch64::LDRSWroX:
3335 case AArch64::LDURSWi:
3336 case AArch64::LDRSWui:
3337 return AArch64::LDRSWroW;
3338 case AArch64::STRWroX:
3339 case AArch64::STURWi:
3340 case AArch64::STRWui:
3341 return AArch64::STRWroW;
3342 case AArch64::LDRHroX:
3343 case AArch64::LDURHi:
3344 case AArch64::LDRHui:
3345 return AArch64::LDRHroW;
3346 case AArch64::STRHroX:
3347 case AArch64::STURHi:
3348 case AArch64::STRHui:
3349 return AArch64::STRHroW;
3350 case AArch64::LDRHHroX:
3351 case AArch64::LDURHHi:
3352 case AArch64::LDRHHui:
3353 return AArch64::LDRHHroW;
3354 case AArch64::STRHHroX:
3355 case AArch64::STURHHi:
3356 case AArch64::STRHHui:
3357 return AArch64::STRHHroW;
3358 case AArch64::LDRSHXroX:
3359 case AArch64::LDURSHXi:
3360 case AArch64::LDRSHXui:
3361 return AArch64::LDRSHXroW;
3362 case AArch64::LDRSHWroX:
3363 case AArch64::LDURSHWi:
3364 case AArch64::LDRSHWui:
3365 return AArch64::LDRSHWroW;
3366 case AArch64::LDRBroX:
3367 case AArch64::LDURBi:
3368 case AArch64::LDRBui:
3369 return AArch64::LDRBroW;
3370 case AArch64::LDRBBroX:
3371 case AArch64::LDURBBi:
3372 case AArch64::LDRBBui:
3373 return AArch64::LDRBBroW;
3374 case AArch64::LDRSBXroX:
3375 case AArch64::LDURSBXi:
3376 case AArch64::LDRSBXui:
3377 return AArch64::LDRSBXroW;
3378 case AArch64::LDRSBWroX:
3379 case AArch64::LDURSBWi:
3380 case AArch64::LDRSBWui:
3381 return AArch64::LDRSBWroW;
3382 case AArch64::STRBroX:
3383 case AArch64::STURBi:
3384 case AArch64::STRBui:
3385 return AArch64::STRBroW;
3386 case AArch64::STRBBroX:
3387 case AArch64::STURBBi:
3388 case AArch64::STRBBui:
3389 return AArch64::STRBBroW;
3390 }
3391}
3392
3394 const ExtAddrMode &AM) const {
3395
3396 const DebugLoc &DL = MemI.getDebugLoc();
3397 MachineBasicBlock &MBB = *MemI.getParent();
3399
3401 if (AM.ScaledReg) {
3402 // The new instruction will be in the form `ldr Rt, [Xn, Xm, lsl #imm]`.
3403 unsigned Opcode = regOffsetOpcode(MemI.getOpcode());
3404 MRI.constrainRegClass(AM.BaseReg, &AArch64::GPR64spRegClass);
3405 auto B = BuildMI(MBB, MemI, DL, get(Opcode))
3406 .addReg(MemI.getOperand(0).getReg(),
3407 MemI.mayLoad() ? RegState::Define : 0)
3408 .addReg(AM.BaseReg)
3409 .addReg(AM.ScaledReg)
3410 .addImm(0)
3411 .addImm(AM.Scale > 1)
3412 .setMemRefs(MemI.memoperands())
3413 .setMIFlags(MemI.getFlags());
3414 return B.getInstr();
3415 }
3416
3417 assert(AM.ScaledReg == 0 && AM.Scale == 0 &&
3418 "Addressing mode not supported for folding");
3419
3420 // The new instruction will be in the form `ld[u]r Rt, [Xn, #imm]`.
3421 unsigned Scale = 1;
3422 unsigned Opcode = MemI.getOpcode();
3423 if (isInt<9>(AM.Displacement))
3424 Opcode = unscaledOffsetOpcode(Opcode);
3425 else
3426 Opcode = scaledOffsetOpcode(Opcode, Scale);
3427
3428 auto B = BuildMI(MBB, MemI, DL, get(Opcode))
3429 .addReg(MemI.getOperand(0).getReg(),
3430 MemI.mayLoad() ? RegState::Define : 0)
3431 .addReg(AM.BaseReg)
3432 .addImm(AM.Displacement / Scale)
3433 .setMemRefs(MemI.memoperands())
3434 .setMIFlags(MemI.getFlags());
3435 return B.getInstr();
3436 }
3437
3440 // The new instruction will be in the form `ldr Rt, [Xn, Wm, {s,u}xtw #N]`.
3441 assert(AM.ScaledReg && !AM.Displacement &&
3442 "Address offset can be a register or an immediate, but not both");
3443 unsigned Opcode = offsetExtendOpcode(MemI.getOpcode());
3444 MRI.constrainRegClass(AM.BaseReg, &AArch64::GPR64spRegClass);
3445 // Make sure the offset register is in the correct register class.
3446 Register OffsetReg = AM.ScaledReg;
3447 const TargetRegisterClass *RC = MRI.getRegClass(OffsetReg);
3448 if (RC->hasSuperClassEq(&AArch64::GPR64RegClass)) {
3449 OffsetReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3450 BuildMI(MBB, MemI, DL, get(TargetOpcode::COPY), OffsetReg)
3451 .addReg(AM.ScaledReg, 0, AArch64::sub_32);
3452 }
3453 auto B = BuildMI(MBB, MemI, DL, get(Opcode))
3454 .addReg(MemI.getOperand(0).getReg(),
3455 MemI.mayLoad() ? RegState::Define : 0)
3456 .addReg(AM.BaseReg)
3457 .addReg(OffsetReg)
3459 .addImm(AM.Scale != 1)
3460 .setMemRefs(MemI.memoperands())
3461 .setMIFlags(MemI.getFlags());
3462
3463 return B.getInstr();
3464 }
3465
3467 "Function must not be called with an addressing mode it can't handle");
3468}
3469
3471 const MachineInstr &LdSt, const MachineOperand *&BaseOp, int64_t &Offset,
3472 bool &OffsetIsScalable, TypeSize &Width,
3473 const TargetRegisterInfo *TRI) const {
3474 assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
3475 // Handle only loads/stores with base register followed by immediate offset.
3476 if (LdSt.getNumExplicitOperands() == 3) {
3477 // Non-paired instruction (e.g., ldr x1, [x0, #8]).
3478 if ((!LdSt.getOperand(1).isReg() && !LdSt.getOperand(1).isFI()) ||
3479 !LdSt.getOperand(2).isImm())
3480 return false;
3481 } else if (LdSt.getNumExplicitOperands() == 4) {
3482 // Paired instruction (e.g., ldp x1, x2, [x0, #8]).
3483 if (!LdSt.getOperand(1).isReg() ||
3484 (!LdSt.getOperand(2).isReg() && !LdSt.getOperand(2).isFI()) ||
3485 !LdSt.getOperand(3).isImm())
3486 return false;
3487 } else
3488 return false;
3489
3490 // Get the scaling factor for the instruction and set the width for the
3491 // instruction.
3492 TypeSize Scale(0U, false);
3493 int64_t Dummy1, Dummy2;
3494
3495 // If this returns false, then it's an instruction we don't want to handle.
3496 if (!getMemOpInfo(LdSt.getOpcode(), Scale, Width, Dummy1, Dummy2))
3497 return false;
3498
3499 // Compute the offset. Offset is calculated as the immediate operand
3500 // multiplied by the scaling factor. Unscaled instructions have scaling factor
3501 // set to 1.
3502 if (LdSt.getNumExplicitOperands() == 3) {
3503 BaseOp = &LdSt.getOperand(1);
3504 Offset = LdSt.getOperand(2).getImm() * Scale.getKnownMinValue();
3505 } else {
3506 assert(LdSt.getNumExplicitOperands() == 4 && "invalid number of operands");
3507 BaseOp = &LdSt.getOperand(2);
3508 Offset = LdSt.getOperand(3).getImm() * Scale.getKnownMinValue();
3509 }
3510 OffsetIsScalable = Scale.isScalable();
3511
3512 if (!BaseOp->isReg() && !BaseOp->isFI())
3513 return false;
3514
3515 return true;
3516}
3517
3520 assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
3521 MachineOperand &OfsOp = LdSt.getOperand(LdSt.getNumExplicitOperands() - 1);
3522 assert(OfsOp.isImm() && "Offset operand wasn't immediate.");
3523 return OfsOp;
3524}
3525
3526bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
3527 TypeSize &Width, int64_t &MinOffset,
3528 int64_t &MaxOffset) {
3529 switch (Opcode) {
3530 // Not a memory operation or something we want to handle.
3531 default:
3532 Scale = TypeSize::getFixed(0);
3533 Width = TypeSize::getFixed(0);
3534 MinOffset = MaxOffset = 0;
3535 return false;
3536 case AArch64::STRWpost:
3537 case AArch64::LDRWpost:
3538 Width = TypeSize::getFixed(32);
3539 Scale = TypeSize::getFixed(4);
3540 MinOffset = -256;
3541 MaxOffset = 255;
3542 break;
3543 case AArch64::LDURQi:
3544 case AArch64::STURQi:
3545 Width = TypeSize::getFixed(16);
3546 Scale = TypeSize::getFixed(1);
3547 MinOffset = -256;
3548 MaxOffset = 255;
3549 break;
3550 case AArch64::PRFUMi:
3551 case AArch64::LDURXi:
3552 case AArch64::LDURDi:
3553 case AArch64::LDAPURXi:
3554 case AArch64::STURXi:
3555 case AArch64::STURDi:
3556 case AArch64::STLURXi:
3557 Width = TypeSize::getFixed(8);
3558 Scale = TypeSize::getFixed(1);
3559 MinOffset = -256;
3560 MaxOffset = 255;
3561 break;
3562 case AArch64::LDURWi:
3563 case AArch64::LDURSi:
3564 case AArch64::LDURSWi:
3565 case AArch64::LDAPURi:
3566 case AArch64::LDAPURSWi:
3567 case AArch64::STURWi:
3568 case AArch64::STURSi:
3569 case AArch64::STLURWi:
3570 Width = TypeSize::getFixed(4);
3571 Scale = TypeSize::getFixed(1);
3572 MinOffset = -256;
3573 MaxOffset = 255;
3574 break;
3575 case AArch64::LDURHi:
3576 case AArch64::LDURHHi:
3577 case AArch64::LDURSHXi:
3578 case AArch64::LDURSHWi:
3579 case AArch64::LDAPURHi:
3580 case AArch64::LDAPURSHWi:
3581 case AArch64::LDAPURSHXi:
3582 case AArch64::STURHi:
3583 case AArch64::STURHHi:
3584 case AArch64::STLURHi:
3585 Width = TypeSize::getFixed(2);
3586 Scale = TypeSize::getFixed(1);
3587 MinOffset = -256;
3588 MaxOffset = 255;
3589 break;
3590 case AArch64::LDURBi:
3591 case AArch64::LDURBBi:
3592 case AArch64::LDURSBXi:
3593 case AArch64::LDURSBWi:
3594 case AArch64::LDAPURBi:
3595 case AArch64::LDAPURSBWi:
3596 case AArch64::LDAPURSBXi:
3597 case AArch64::STURBi:
3598 case AArch64::STURBBi:
3599 case AArch64::STLURBi:
3600 Width = TypeSize::getFixed(1);
3601 Scale = TypeSize::getFixed(1);
3602 MinOffset = -256;
3603 MaxOffset = 255;
3604 break;
3605 case AArch64::LDPQi:
3606 case AArch64::LDNPQi:
3607 case AArch64::STPQi:
3608 case AArch64::STNPQi:
3609 Scale = TypeSize::getFixed(16);
3610 Width = TypeSize::getFixed(32);
3611 MinOffset = -64;
3612 MaxOffset = 63;
3613 break;
3614 case AArch64::LDRQui:
3615 case AArch64::STRQui:
3616 Scale = TypeSize::getFixed(16);
3617 Width = TypeSize::getFixed(16);
3618 MinOffset = 0;
3619 MaxOffset = 4095;
3620 break;
3621 case AArch64::LDPXi:
3622 case AArch64::LDPDi:
3623 case AArch64::LDNPXi:
3624 case AArch64::LDNPDi:
3625 case AArch64::STPXi:
3626 case AArch64::STPDi:
3627 case AArch64::STNPXi:
3628 case AArch64::STNPDi:
3629 Scale = TypeSize::getFixed(8);
3630 Width = TypeSize::getFixed(16);
3631 MinOffset = -64;
3632 MaxOffset = 63;
3633 break;
3634 case AArch64::PRFMui:
3635 case AArch64::LDRXui:
3636 case AArch64::LDRDui:
3637 case AArch64::STRXui:
3638 case AArch64::STRDui:
3639 Scale = TypeSize::getFixed(8);
3640 Width = TypeSize::getFixed(8);
3641 MinOffset = 0;
3642 MaxOffset = 4095;
3643 break;
3644 case AArch64::StoreSwiftAsyncContext:
3645 // Store is an STRXui, but there might be an ADDXri in the expansion too.
3646 Scale = TypeSize::getFixed(1);
3647 Width = TypeSize::getFixed(8);
3648 MinOffset = 0;
3649 MaxOffset = 4095;
3650 break;
3651 case AArch64::LDPWi:
3652 case AArch64::LDPSi:
3653 case AArch64::LDNPWi:
3654 case AArch64::LDNPSi:
3655 case AArch64::STPWi:
3656 case AArch64::STPSi:
3657 case AArch64::STNPWi:
3658 case AArch64::STNPSi:
3659 Scale = TypeSize::getFixed(4);
3660 Width = TypeSize::getFixed(8);
3661 MinOffset = -64;
3662 MaxOffset = 63;
3663 break;
3664 case AArch64::LDRWui:
3665 case AArch64::LDRSui:
3666 case AArch64::LDRSWui:
3667 case AArch64::STRWui:
3668 case AArch64::STRSui:
3669 Scale = TypeSize::getFixed(4);
3670 Width = TypeSize::getFixed(4);
3671 MinOffset = 0;
3672 MaxOffset = 4095;
3673 break;
3674 case AArch64::LDRHui:
3675 case AArch64::LDRHHui:
3676 case AArch64::LDRSHWui:
3677 case AArch64::LDRSHXui:
3678 case AArch64::STRHui:
3679 case AArch64::STRHHui:
3680 Scale = TypeSize::getFixed(2);
3681 Width = TypeSize::getFixed(2);
3682 MinOffset = 0;
3683 MaxOffset = 4095;
3684 break;
3685 case AArch64::LDRBui:
3686 case AArch64::LDRBBui:
3687 case AArch64::LDRSBWui:
3688 case AArch64::LDRSBXui:
3689 case AArch64::STRBui:
3690 case AArch64::STRBBui:
3691 Scale = TypeSize::getFixed(1);
3692 Width = TypeSize::getFixed(1);
3693 MinOffset = 0;
3694 MaxOffset = 4095;
3695 break;
3696 case AArch64::STPXpre:
3697 case AArch64::LDPXpost:
3698 case AArch64::STPDpre:
3699 case AArch64::LDPDpost:
3700 Scale = TypeSize::getFixed(8);
3701 Width = TypeSize::getFixed(8);
3702 MinOffset = -512;
3703 MaxOffset = 504;
3704 break;
3705 case AArch64::STPQpre:
3706 case AArch64::LDPQpost:
3707 Scale = TypeSize::getFixed(16);
3708 Width = TypeSize::getFixed(16);
3709 MinOffset = -1024;
3710 MaxOffset = 1008;
3711 break;
3712 case AArch64::STRXpre:
3713 case AArch64::STRDpre:
3714 case AArch64::LDRXpost:
3715 case AArch64::LDRDpost:
3716 Scale = TypeSize::getFixed(1);
3717 Width = TypeSize::getFixed(8);
3718 MinOffset = -256;
3719 MaxOffset = 255;
3720 break;
3721 case AArch64::STRQpre:
3722 case AArch64::LDRQpost:
3723 Scale = TypeSize::getFixed(1);
3724 Width = TypeSize::getFixed(16);
3725 MinOffset = -256;
3726 MaxOffset = 255;
3727 break;
3728 case AArch64::ADDG:
3729 Scale = TypeSize::getFixed(16);
3730 Width = TypeSize::getFixed(0);
3731 MinOffset = 0;
3732 MaxOffset = 63;
3733 break;
3734 case AArch64::TAGPstack:
3735 Scale = TypeSize::getFixed(16);
3736 Width = TypeSize::getFixed(0);
3737 // TAGP with a negative offset turns into SUBP, which has a maximum offset
3738 // of 63 (not 64!).
3739 MinOffset = -63;
3740 MaxOffset = 63;
3741 break;
3742 case AArch64::LDG:
3743 case AArch64::STGi:
3744 case AArch64::STZGi:
3745 Scale = TypeSize::getFixed(16);
3746 Width = TypeSize::getFixed(16);
3747 MinOffset = -256;
3748 MaxOffset = 255;
3749 break;
3750 case AArch64::STR_ZZZZXI:
3751 case AArch64::LDR_ZZZZXI:
3752 Scale = TypeSize::getScalable(16);
3753 Width = TypeSize::getScalable(16 * 4);
3754 MinOffset = -256;
3755 MaxOffset = 252;
3756 break;
3757 case AArch64::STR_ZZZXI:
3758 case AArch64::LDR_ZZZXI:
3759 Scale = TypeSize::getScalable(16);
3760 Width = TypeSize::getScalable(16 * 3);
3761 MinOffset = -256;
3762 MaxOffset = 253;
3763 break;
3764 case AArch64::STR_ZZXI:
3765 case AArch64::LDR_ZZXI:
3766 Scale = TypeSize::getScalable(16);
3767 Width = TypeSize::getScalable(16 * 2);
3768 MinOffset = -256;
3769 MaxOffset = 254;
3770 break;
3771 case AArch64::LDR_PXI:
3772 case AArch64::STR_PXI:
3773 Scale = TypeSize::getScalable(2);
3774 Width = TypeSize::getScalable(2);
3775 MinOffset = -256;
3776 MaxOffset = 255;
3777 break;
3778 case AArch64::LDR_PPXI:
3779 case AArch64::STR_PPXI:
3780 Scale = TypeSize::getScalable(2);
3781 Width = TypeSize::getScalable(2 * 2);
3782 MinOffset = -256;
3783 MaxOffset = 254;
3784 break;
3785 case AArch64::LDR_ZXI:
3786 case AArch64::STR_ZXI:
3787 Scale = TypeSize::getScalable(16);
3788 Width = TypeSize::getScalable(16);
3789 MinOffset = -256;
3790 MaxOffset = 255;
3791 break;
3792 case AArch64::LD1B_IMM:
3793 case AArch64::LD1H_IMM:
3794 case AArch64::LD1W_IMM:
3795 case AArch64::LD1D_IMM:
3796 case AArch64::LDNT1B_ZRI:
3797 case AArch64::LDNT1H_ZRI:
3798 case AArch64::LDNT1W_ZRI:
3799 case AArch64::LDNT1D_ZRI:
3800 case AArch64::ST1B_IMM:
3801 case AArch64::ST1H_IMM:
3802 case AArch64::ST1W_IMM:
3803 case AArch64::ST1D_IMM:
3804 case AArch64::STNT1B_ZRI:
3805 case AArch64::STNT1H_ZRI:
3806 case AArch64::STNT1W_ZRI:
3807 case AArch64::STNT1D_ZRI:
3808 case AArch64::LDNF1B_IMM:
3809 case AArch64::LDNF1H_IMM:
3810 case AArch64::LDNF1W_IMM:
3811 case AArch64::LDNF1D_IMM:
3812 // A full vectors worth of data
3813 // Width = mbytes * elements
3814 Scale = TypeSize::getScalable(16);
3815 Width = TypeSize::getScalable(16);
3816 MinOffset = -8;
3817 MaxOffset = 7;
3818 break;
3819 case AArch64::LD2B_IMM:
3820 case AArch64::LD2H_IMM:
3821 case AArch64::LD2W_IMM:
3822 case AArch64::LD2D_IMM:
3823 case AArch64::ST2B_IMM:
3824 case AArch64::ST2H_IMM:
3825 case AArch64::ST2W_IMM:
3826 case AArch64::ST2D_IMM:
3827 Scale = TypeSize::getScalable(32);
3828 Width = TypeSize::getScalable(16 * 2);
3829 MinOffset = -8;
3830 MaxOffset = 7;
3831 break;
3832 case AArch64::LD3B_IMM:
3833 case AArch64::LD3H_IMM:
3834 case AArch64::LD3W_IMM:
3835 case AArch64::LD3D_IMM:
3836 case AArch64::ST3B_IMM:
3837 case AArch64::ST3H_IMM:
3838 case AArch64::ST3W_IMM:
3839 case AArch64::ST3D_IMM:
3840 Scale = TypeSize::getScalable(48);
3841 Width = TypeSize::getScalable(16 * 3);
3842 MinOffset = -8;
3843 MaxOffset = 7;
3844 break;
3845 case AArch64::LD4B_IMM:
3846 case AArch64::LD4H_IMM:
3847 case AArch64::LD4W_IMM:
3848 case AArch64::LD4D_IMM:
3849 case AArch64::ST4B_IMM:
3850 case AArch64::ST4H_IMM:
3851 case AArch64::ST4W_IMM:
3852 case AArch64::ST4D_IMM:
3853 Scale = TypeSize::getScalable(64);
3854 Width = TypeSize::getScalable(16 * 4);
3855 MinOffset = -8;
3856 MaxOffset = 7;
3857 break;
3858 case AArch64::LD1B_H_IMM:
3859 case AArch64::LD1SB_H_IMM:
3860 case AArch64::LD1H_S_IMM:
3861 case AArch64::LD1SH_S_IMM:
3862 case AArch64::LD1W_D_IMM:
3863 case AArch64::LD1SW_D_IMM:
3864 case AArch64::ST1B_H_IMM:
3865 case AArch64::ST1H_S_IMM:
3866 case AArch64::ST1W_D_IMM:
3867 case AArch64::LDNF1B_H_IMM:
3868 case AArch64::LDNF1SB_H_IMM:
3869 case AArch64::LDNF1H_S_IMM:
3870 case AArch64::LDNF1SH_S_IMM:
3871 case AArch64::LDNF1W_D_IMM:
3872 case AArch64::LDNF1SW_D_IMM:
3873 // A half vector worth of data
3874 // Width = mbytes * elements
3875 Scale = TypeSize::getScalable(8);
3876 Width = TypeSize::getScalable(8);
3877 MinOffset = -8;
3878 MaxOffset = 7;
3879 break;
3880 case AArch64::LD1B_S_IMM:
3881 case AArch64::LD1SB_S_IMM:
3882 case AArch64::LD1H_D_IMM:
3883 case AArch64::LD1SH_D_IMM:
3884 case AArch64::ST1B_S_IMM:
3885 case AArch64::ST1H_D_IMM:
3886 case AArch64::LDNF1B_S_IMM:
3887 case AArch64::LDNF1SB_S_IMM:
3888 case AArch64::LDNF1H_D_IMM:
3889 case AArch64::LDNF1SH_D_IMM:
3890 // A quarter vector worth of data
3891 // Width = mbytes * elements
3892 Scale = TypeSize::getScalable(4);
3893 Width = TypeSize::getScalable(4);
3894 MinOffset = -8;
3895 MaxOffset = 7;
3896 break;
3897 case AArch64::LD1B_D_IMM:
3898 case AArch64::LD1SB_D_IMM:
3899 case AArch64::ST1B_D_IMM:
3900 case AArch64::LDNF1B_D_IMM:
3901 case AArch64::LDNF1SB_D_IMM:
3902 // A eighth vector worth of data
3903 // Width = mbytes * elements
3904 Scale = TypeSize::getScalable(2);
3905 Width = TypeSize::getScalable(2);
3906 MinOffset = -8;
3907 MaxOffset = 7;
3908 break;
3909 case AArch64::ST2Gi:
3910 case AArch64::STZ2Gi:
3911 Scale = TypeSize::getFixed(16);
3912 Width = TypeSize::getFixed(32);
3913 MinOffset = -256;
3914 MaxOffset = 255;
3915 break;
3916 case AArch64::STGPi:
3917 Scale = TypeSize::getFixed(16);
3918 Width = TypeSize::getFixed(16);
3919 MinOffset = -64;
3920 MaxOffset = 63;
3921 break;
3922 case AArch64::LD1RB_IMM:
3923 case AArch64::LD1RB_H_IMM:
3924 case AArch64::LD1RB_S_IMM:
3925 case AArch64::LD1RB_D_IMM:
3926 case AArch64::LD1RSB_H_IMM:
3927 case AArch64::LD1RSB_S_IMM:
3928 case AArch64::LD1RSB_D_IMM:
3929 Scale = TypeSize::getFixed(1);
3930 Width = TypeSize::getFixed(1);
3931 MinOffset = 0;
3932 MaxOffset = 63;
3933 break;
3934 case AArch64::LD1RH_IMM:
3935 case AArch64::LD1RH_S_IMM:
3936 case AArch64::LD1RH_D_IMM:
3937 case AArch64::LD1RSH_S_IMM:
3938 case AArch64::LD1RSH_D_IMM:
3939 Scale = TypeSize::getFixed(2);
3940 Width = TypeSize::getFixed(2);
3941 MinOffset = 0;
3942 MaxOffset = 63;
3943 break;
3944 case AArch64::LD1RW_IMM:
3945 case AArch64::LD1RW_D_IMM:
3946 case AArch64::LD1RSW_IMM:
3947 Scale = TypeSize::getFixed(4);
3948 Width = TypeSize::getFixed(4);
3949 MinOffset = 0;
3950 MaxOffset = 63;
3951 break;
3952 case AArch64::LD1RD_IMM:
3953 Scale = TypeSize::getFixed(8);
3954 Width = TypeSize::getFixed(8);
3955 MinOffset = 0;
3956 MaxOffset = 63;
3957 break;
3958 }
3959
3960 return true;
3961}
3962
3963// Scaling factor for unscaled load or store.
3965 switch (Opc) {
3966 default:
3967 llvm_unreachable("Opcode has unknown scale!");
3968 case AArch64::LDRBBui:
3969 case AArch64::LDURBBi:
3970 case AArch64::LDRSBWui:
3971 case AArch64::LDURSBWi:
3972 case AArch64::STRBBui:
3973 case AArch64::STURBBi:
3974 return 1;
3975 case AArch64::LDRHHui:
3976 case AArch64::LDURHHi:
3977 case AArch64::LDRSHWui:
3978 case AArch64::LDURSHWi:
3979 case AArch64::STRHHui:
3980 case AArch64::STURHHi:
3981 return 2;
3982 case AArch64::LDRSui:
3983 case AArch64::LDURSi:
3984 case AArch64::LDRSpre:
3985 case AArch64::LDRSWui:
3986 case AArch64::LDURSWi:
3987 case AArch64::LDRSWpre:
3988 case AArch64::LDRWpre:
3989 case AArch64::LDRWui:
3990 case AArch64::LDURWi:
3991 case AArch64::STRSui:
3992 case AArch64::STURSi:
3993 case AArch64::STRSpre:
3994 case AArch64::STRWui:
3995 case AArch64::STURWi:
3996 case AArch64::STRWpre:
3997 case AArch64::LDPSi:
3998 case AArch64::LDPSWi:
3999 case AArch64::LDPWi:
4000 case AArch64::STPSi:
4001 case AArch64::STPWi:
4002 return 4;
4003 case AArch64::LDRDui:
4004 case AArch64::LDURDi:
4005 case AArch64::LDRDpre:
4006 case AArch64::LDRXui:
4007 case AArch64::LDURXi:
4008 case AArch64::LDRXpre:
4009 case AArch64::STRDui:
4010 case AArch64::STURDi:
4011 case AArch64::STRDpre:
4012 case AArch64::STRXui:
4013 case AArch64::STURXi:
4014 case AArch64::STRXpre:
4015 case AArch64::LDPDi:
4016 case AArch64::LDPXi:
4017 case AArch64::STPDi:
4018 case AArch64::STPXi:
4019 return 8;
4020 case AArch64::LDRQui:
4021 case AArch64::LDURQi:
4022 case AArch64::STRQui:
4023 case AArch64::STURQi:
4024 case AArch64::STRQpre:
4025 case AArch64::LDPQi:
4026 case AArch64::LDRQpre:
4027 case AArch64::STPQi:
4028 case AArch64::STGi:
4029 case AArch64::STZGi:
4030 case AArch64::ST2Gi:
4031 case AArch64::STZ2Gi:
4032 case AArch64::STGPi:
4033 return 16;
4034 }
4035}
4036
4038 switch (MI.getOpcode()) {
4039 default:
4040 return false;
4041 case AArch64::LDRWpre:
4042 case AArch64::LDRXpre:
4043 case AArch64::LDRSWpre:
4044 case AArch64::LDRSpre:
4045 case AArch64::LDRDpre:
4046 case AArch64::LDRQpre:
4047 return true;
4048 }
4049}
4050
4052 switch (MI.getOpcode()) {
4053 default:
4054 return false;
4055 case AArch64::STRWpre:
4056 case AArch64::STRXpre:
4057 case AArch64::STRSpre:
4058 case AArch64::STRDpre:
4059 case AArch64::STRQpre:
4060 return true;
4061 }
4062}
4063
4065 return isPreLd(MI) || isPreSt(MI);
4066}
4067
4069 switch (MI.getOpcode()) {
4070 default:
4071 return false;
4072 case AArch64::LDPSi:
4073 case AArch64::LDPSWi:
4074 case AArch64::LDPDi:
4075 case AArch64::LDPQi:
4076 case AArch64::LDPWi:
4077 case AArch64::LDPXi:
4078 case AArch64::STPSi:
4079 case AArch64::STPDi:
4080 case AArch64::STPQi:
4081 case AArch64::STPWi:
4082 case AArch64::STPXi:
4083 case AArch64::STGPi:
4084 return true;
4085 }
4086}
4087
4089 unsigned Idx =
4091 : 1;
4092 return MI.getOperand(Idx);
4093}
4094
4095const MachineOperand &
4097 unsigned Idx =
4099 : 2;
4100 return MI.getOperand(Idx);
4101}
4102
4104 Register Reg) {
4105 if (MI.getParent() == nullptr)
4106 return nullptr;
4107 const MachineFunction *MF = MI.getParent()->getParent();
4108 return MF ? MF->getRegInfo().getRegClassOrNull(Reg) : nullptr;
4109}
4110
4112 auto IsHFPR = [&](const MachineOperand &Op) {
4113 if (!Op.isReg())
4114 return false;
4115 auto Reg = Op.getReg();
4116 if (Reg.isPhysical())
4117 return AArch64::FPR16RegClass.contains(Reg);
4118 const TargetRegisterClass *TRC = ::getRegClass(MI, Reg);
4119 return TRC == &AArch64::FPR16RegClass ||
4120 TRC == &AArch64::FPR16_loRegClass;
4121 };
4122 return llvm::any_of(MI.operands(), IsHFPR);
4123}
4124
4126 auto IsQFPR = [&](const MachineOperand &Op) {
4127 if (!Op.isReg())
4128 return false;
4129 auto Reg = Op.getReg();
4130 if (Reg.isPhysical())
4131 return AArch64::FPR128RegClass.contains(Reg);
4132 const TargetRegisterClass *TRC = ::getRegClass(MI, Reg);
4133 return TRC == &AArch64::FPR128RegClass ||
4134 TRC == &AArch64::FPR128_loRegClass;
4135 };
4136 return llvm::any_of(MI.operands(), IsQFPR);
4137}
4138
4140 switch (MI.getOpcode()) {
4141 case AArch64::BRK:
4142 case AArch64::HLT:
4143 case AArch64::PACIASP:
4144 case AArch64::PACIBSP:
4145 // Implicit BTI behavior.
4146 return true;
4147 case AArch64::PAUTH_PROLOGUE:
4148 // PAUTH_PROLOGUE expands to PACI(A|B)SP.
4149 return true;
4150 case AArch64::HINT: {
4151 unsigned Imm = MI.getOperand(0).getImm();
4152 // Explicit BTI instruction.
4153 if (Imm == 32 || Imm == 34 || Imm == 36 || Imm == 38)
4154 return true;
4155 // PACI(A|B)SP instructions.
4156 if (Imm == 25 || Imm == 27)
4157 return true;
4158 return false;
4159 }
4160 default:
4161 return false;
4162 }
4163}
4164
4166 auto IsFPR = [&](const MachineOperand &Op) {
4167 if (!Op.isReg())
4168 return false;
4169 auto Reg = Op.getReg();
4170 if (Reg.isPhysical())
4171 return AArch64::FPR128RegClass.contains(Reg) ||
4172 AArch64::FPR64RegClass.contains(Reg) ||
4173 AArch64::FPR32RegClass.contains(Reg) ||
4174 AArch64::FPR16RegClass.contains(Reg) ||
4175 AArch64::FPR8RegClass.contains(Reg);
4176
4177 const TargetRegisterClass *TRC = ::getRegClass(MI, Reg);
4178 return TRC == &AArch64::FPR128RegClass ||
4179 TRC == &AArch64::FPR128_loRegClass ||
4180 TRC == &AArch64::FPR64RegClass ||
4181 TRC == &AArch64::FPR64_loRegClass ||
4182 TRC == &AArch64::FPR32RegClass || TRC == &AArch64::FPR16RegClass ||
4183 TRC == &AArch64::FPR8RegClass;
4184 };
4185 return llvm::any_of(MI.operands(), IsFPR);
4186}
4187
4188// Scale the unscaled offsets. Returns false if the unscaled offset can't be
4189// scaled.
4190static bool scaleOffset(unsigned Opc, int64_t &Offset) {
4191 int Scale = AArch64InstrInfo::getMemScale(Opc);
4192
4193 // If the byte-offset isn't a multiple of the stride, we can't scale this
4194 // offset.
4195 if (Offset % Scale != 0)
4196 return false;
4197
4198 // Convert the byte-offset used by unscaled into an "element" offset used
4199 // by the scaled pair load/store instructions.
4200 Offset /= Scale;
4201 return true;
4202}
4203
4204static bool canPairLdStOpc(unsigned FirstOpc, unsigned SecondOpc) {
4205 if (FirstOpc == SecondOpc)
4206 return true;
4207 // We can also pair sign-ext and zero-ext instructions.
4208 switch (FirstOpc) {
4209 default:
4210 return false;
4211 case AArch64::STRSui:
4212 case AArch64::STURSi:
4213 return SecondOpc == AArch64::STRSui || SecondOpc == AArch64::STURSi;
4214 case AArch64::STRDui:
4215 case AArch64::STURDi:
4216 return SecondOpc == AArch64::STRDui || SecondOpc == AArch64::STURDi;
4217 case AArch64::STRQui:
4218 case AArch64::STURQi:
4219 return SecondOpc == AArch64::STRQui || SecondOpc == AArch64::STURQi;
4220 case AArch64::STRWui:
4221 case AArch64::STURWi:
4222 return SecondOpc == AArch64::STRWui || SecondOpc == AArch64::STURWi;
4223 case AArch64::STRXui:
4224 case AArch64::STURXi:
4225 return SecondOpc == AArch64::STRXui || SecondOpc == AArch64::STURXi;
4226 case AArch64::LDRSui:
4227 case AArch64::LDURSi:
4228 return SecondOpc == AArch64::LDRSui || SecondOpc == AArch64::LDURSi;
4229 case AArch64::LDRDui:
4230 case AArch64::LDURDi:
4231 return SecondOpc == AArch64::LDRDui || SecondOpc == AArch64::LDURDi;
4232 case AArch64::LDRQui:
4233 case AArch64::LDURQi:
4234 return SecondOpc == AArch64::LDRQui || SecondOpc == AArch64::LDURQi;
4235 case AArch64::LDRWui:
4236 case AArch64::LDURWi:
4237 return SecondOpc == AArch64::LDRSWui || SecondOpc == AArch64::LDURSWi;
4238 case AArch64::LDRSWui:
4239 case AArch64::LDURSWi:
4240 return SecondOpc == AArch64::LDRWui || SecondOpc == AArch64::LDURWi;
4241 case AArch64::LDRXui:
4242 case AArch64::LDURXi:
4243 return SecondOpc == AArch64::LDRXui || SecondOpc == AArch64::LDURXi;
4244 }
4245 // These instructions can't be paired based on their opcodes.
4246 return false;
4247}
4248
4249static bool shouldClusterFI(const MachineFrameInfo &MFI, int FI1,
4250 int64_t Offset1, unsigned Opcode1, int FI2,
4251 int64_t Offset2, unsigned Opcode2) {
4252 // Accesses through fixed stack object frame indices may access a different
4253 // fixed stack slot. Check that the object offsets + offsets match.
4254 if (MFI.isFixedObjectIndex(FI1) && MFI.isFixedObjectIndex(FI2)) {
4255 int64_t ObjectOffset1 = MFI.getObjectOffset(FI1);
4256 int64_t ObjectOffset2 = MFI.getObjectOffset(FI2);
4257 assert(ObjectOffset1 <= ObjectOffset2 && "Object offsets are not ordered.");
4258 // Convert to scaled object offsets.
4259 int Scale1 = AArch64InstrInfo::getMemScale(Opcode1);
4260 if (ObjectOffset1 % Scale1 != 0)
4261 return false;
4262 ObjectOffset1 /= Scale1;
4263 int Scale2 = AArch64InstrInfo::getMemScale(Opcode2);
4264 if (ObjectOffset2 % Scale2 != 0)
4265 return false;
4266 ObjectOffset2 /= Scale2;
4267 ObjectOffset1 += Offset1;
4268 ObjectOffset2 += Offset2;
4269 return ObjectOffset1 + 1 == ObjectOffset2;
4270 }
4271
4272 return FI1 == FI2;
4273}
4274
4275/// Detect opportunities for ldp/stp formation.
4276///
4277/// Only called for LdSt for which getMemOperandWithOffset returns true.
4279 ArrayRef<const MachineOperand *> BaseOps1, int64_t OpOffset1,
4280 bool OffsetIsScalable1, ArrayRef<const MachineOperand *> BaseOps2,
4281 int64_t OpOffset2, bool OffsetIsScalable2, unsigned ClusterSize,
4282 unsigned NumBytes) const {
4283 assert(BaseOps1.size() == 1 && BaseOps2.size() == 1);
4284 const MachineOperand &BaseOp1 = *BaseOps1.front();
4285 const MachineOperand &BaseOp2 = *BaseOps2.front();
4286 const MachineInstr &FirstLdSt = *BaseOp1.getParent();
4287 const MachineInstr &SecondLdSt = *BaseOp2.getParent();
4288 if (BaseOp1.getType() != BaseOp2.getType())
4289 return false;
4290
4291 assert((BaseOp1.isReg() || BaseOp1.isFI()) &&
4292 "Only base registers and frame indices are supported.");
4293
4294 // Check for both base regs and base FI.
4295 if (BaseOp1.isReg() && BaseOp1.getReg() != BaseOp2.getReg())
4296 return false;
4297
4298 // Only cluster up to a single pair.
4299 if (ClusterSize > 2)
4300 return false;
4301
4302 if (!isPairableLdStInst(FirstLdSt) || !isPairableLdStInst(SecondLdSt))
4303 return false;
4304
4305 // Can we pair these instructions based on their opcodes?
4306 unsigned FirstOpc = FirstLdSt.getOpcode();
4307 unsigned SecondOpc = SecondLdSt.getOpcode();
4308 if (!canPairLdStOpc(FirstOpc, SecondOpc))
4309 return false;
4310
4311 // Can't merge volatiles or load/stores that have a hint to avoid pair
4312 // formation, for example.
4313 if (!isCandidateToMergeOrPair(FirstLdSt) ||
4314 !isCandidateToMergeOrPair(SecondLdSt))
4315 return false;
4316
4317 // isCandidateToMergeOrPair guarantees that operand 2 is an immediate.
4318 int64_t Offset1 = FirstLdSt.getOperand(2).getImm();
4319 if (hasUnscaledLdStOffset(FirstOpc) && !scaleOffset(FirstOpc, Offset1))
4320 return false;
4321
4322 int64_t Offset2 = SecondLdSt.getOperand(2).getImm();
4323 if (hasUnscaledLdStOffset(SecondOpc) && !scaleOffset(SecondOpc, Offset2))
4324 return false;
4325
4326 // Pairwise instructions have a 7-bit signed offset field.
4327 if (Offset1 > 63 || Offset1 < -64)
4328 return false;
4329
4330 // The caller should already have ordered First/SecondLdSt by offset.
4331 // Note: except for non-equal frame index bases
4332 if (BaseOp1.isFI()) {
4333 assert((!BaseOp1.isIdenticalTo(BaseOp2) || Offset1 <= Offset2) &&
4334 "Caller should have ordered offsets.");
4335
4336 const MachineFrameInfo &MFI =
4337 FirstLdSt.getParent()->getParent()->getFrameInfo();
4338 return shouldClusterFI(MFI, BaseOp1.getIndex(), Offset1, FirstOpc,
4339 BaseOp2.getIndex(), Offset2, SecondOpc);
4340 }
4341
4342 assert(Offset1 <= Offset2 && "Caller should have ordered offsets.");
4343
4344 return Offset1 + 1 == Offset2;
4345}
4346
4348 unsigned Reg, unsigned SubIdx,
4349 unsigned State,
4350 const TargetRegisterInfo *TRI) {
4351 if (!SubIdx)
4352 return MIB.addReg(Reg, State);
4353
4355 return MIB.addReg(TRI->getSubReg(Reg, SubIdx), State);
4356 return MIB.addReg(Reg, State, SubIdx);
4357}
4358
4359static bool forwardCopyWillClobberTuple(unsigned DestReg, unsigned SrcReg,
4360 unsigned NumRegs) {
4361 // We really want the positive remainder mod 32 here, that happens to be
4362 // easily obtainable with a mask.
4363 return ((DestReg - SrcReg) & 0x1f) < NumRegs;
4364}
4365
4368 const DebugLoc &DL, MCRegister DestReg,
4369 MCRegister SrcReg, bool KillSrc,
4370 unsigned Opcode,
4371 ArrayRef<unsigned> Indices) const {
4372 assert(Subtarget.hasNEON() && "Unexpected register copy without NEON");
4374 uint16_t DestEncoding = TRI->getEncodingValue(DestReg);
4375 uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg);
4376 unsigned NumRegs = Indices.size();
4377
4378 int SubReg = 0, End = NumRegs, Incr = 1;
4379 if (forwardCopyWillClobberTuple(DestEncoding, SrcEncoding, NumRegs)) {
4380 SubReg = NumRegs - 1;
4381 End = -1;
4382 Incr = -1;
4383 }
4384
4385 for (; SubReg != End; SubReg += Incr) {
4386 const MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opcode));
4387 AddSubReg(MIB, DestReg, Indices[SubReg], RegState::Define, TRI);
4388 AddSubReg(MIB, SrcReg, Indices[SubReg], 0, TRI);
4389 AddSubReg(MIB, SrcReg, Indices[SubReg], getKillRegState(KillSrc), TRI);
4390 }
4391}
4392
4395 DebugLoc DL, unsigned DestReg,
4396 unsigned SrcReg, bool KillSrc,
4397 unsigned Opcode, unsigned ZeroReg,
4398 llvm::ArrayRef<unsigned> Indices) const {
4400 unsigned NumRegs = Indices.size();
4401
4402#ifndef NDEBUG
4403 uint16_t DestEncoding = TRI->getEncodingValue(DestReg);
4404 uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg);
4405 assert(DestEncoding % NumRegs == 0 && SrcEncoding % NumRegs == 0 &&
4406 "GPR reg sequences should not be able to overlap");
4407#endif
4408
4409 for (unsigned SubReg = 0; SubReg != NumRegs; ++SubReg) {
4410 const MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opcode));
4411 AddSubReg(MIB, DestReg, Indices[SubReg], RegState::Define, TRI);
4412 MIB.addReg(ZeroReg);
4413 AddSubReg(MIB, SrcReg, Indices[SubReg], getKillRegState(KillSrc), TRI);
4414 MIB.addImm(0);
4415 }
4416}
4417
4420 const DebugLoc &DL, MCRegister DestReg,
4421 MCRegister SrcReg, bool KillSrc) const {
4422 if (AArch64::GPR32spRegClass.contains(DestReg) &&
4423 (AArch64::GPR32spRegClass.contains(SrcReg) || SrcReg == AArch64::WZR)) {
4425
4426 if (DestReg == AArch64::WSP || SrcReg == AArch64::WSP) {
4427 // If either operand is WSP, expand to ADD #0.
4428 if (Subtarget.hasZeroCycleRegMove()) {
4429 // Cyclone recognizes "ADD Xd, Xn, #0" as a zero-cycle register move.
4430 MCRegister DestRegX = TRI->getMatchingSuperReg(
4431 DestReg, AArch64::sub_32, &AArch64::GPR64spRegClass);
4432 MCRegister SrcRegX = TRI->getMatchingSuperReg(
4433 SrcReg, AArch64::sub_32, &AArch64::GPR64spRegClass);
4434 // This instruction is reading and writing X registers. This may upset
4435 // the register scavenger and machine verifier, so we need to indicate
4436 // that we are reading an undefined value from SrcRegX, but a proper
4437 // value from SrcReg.
4438 BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestRegX)
4439 .addReg(SrcRegX, RegState::Undef)
4440 .addImm(0)
4442 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
4443 } else {
4444 BuildMI(MBB, I, DL, get(AArch64::ADDWri), DestReg)
4445 .addReg(SrcReg, getKillRegState(KillSrc))
4446 .addImm(0)
4448 }
4449 } else if (SrcReg == AArch64::WZR && Subtarget.hasZeroCycleZeroingGP()) {
4450 BuildMI(MBB, I, DL, get(AArch64::MOVZWi), DestReg)
4451 .addImm(0)
4453 } else {
4454 if (Subtarget.hasZeroCycleRegMove()) {
4455 // Cyclone recognizes "ORR Xd, XZR, Xm" as a zero-cycle register move.
4456 MCRegister DestRegX = TRI->getMatchingSuperReg(
4457 DestReg, AArch64::sub_32, &AArch64::GPR64spRegClass);
4458 MCRegister SrcRegX = TRI->getMatchingSuperReg(
4459 SrcReg, AArch64::sub_32, &AArch64::GPR64spRegClass);
4460 // This instruction is reading and writing X registers. This may upset
4461 // the register scavenger and machine verifier, so we need to indicate
4462 // that we are reading an undefined value from SrcRegX, but a proper
4463 // value from SrcReg.
4464 BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestRegX)
4465 .addReg(AArch64::XZR)
4466 .addReg(SrcRegX, RegState::Undef)
4467 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
4468 } else {
4469 // Otherwise, expand to ORR WZR.
4470 BuildMI(MBB, I, DL, get(AArch64::ORRWrr), DestReg)
4471 .addReg(AArch64::WZR)
4472 .addReg(SrcReg, getKillRegState(KillSrc));
4473 }
4474 }
4475 return;
4476 }
4477
4478 // Copy a Predicate register by ORRing with itself.
4479 if (AArch64::PPRRegClass.contains(DestReg) &&
4480 AArch64::PPRRegClass.contains(SrcReg)) {
4481 assert(Subtarget.hasSVEorSME() && "Unexpected SVE register.");
4482 BuildMI(MBB, I, DL, get(AArch64::ORR_PPzPP), DestReg)
4483 .addReg(SrcReg) // Pg
4484 .addReg(SrcReg)
4485 .addReg(SrcReg, getKillRegState(KillSrc));
4486 return;
4487 }
4488
4489 // Copy a predicate-as-counter register by ORRing with itself as if it
4490 // were a regular predicate (mask) register.
4491 bool DestIsPNR = AArch64::PNRRegClass.contains(DestReg);
4492 bool SrcIsPNR = AArch64::PNRRegClass.contains(SrcReg);
4493 if (DestIsPNR || SrcIsPNR) {
4494 assert((Subtarget.hasSVE2p1() || Subtarget.hasSME2()) &&
4495 "Unexpected predicate-as-counter register.");
4496 auto ToPPR = [](MCRegister R) -> MCRegister {
4497 return (R - AArch64::PN0) + AArch64::P0;
4498 };
4499 MCRegister PPRSrcReg = SrcIsPNR ? ToPPR(SrcReg) : SrcReg;
4500 MCRegister PPRDestReg = DestIsPNR ? ToPPR(DestReg) : DestReg;
4501
4502 if (PPRSrcReg != PPRDestReg) {
4503 auto NewMI = BuildMI(MBB, I, DL, get(AArch64::ORR_PPzPP), PPRDestReg)
4504 .addReg(PPRSrcReg) // Pg
4505 .addReg(PPRSrcReg)
4506 .addReg(PPRSrcReg, getKillRegState(KillSrc));
4507 if (DestIsPNR)
4508 NewMI.addDef(DestReg, RegState::Implicit);
4509 }
4510 return;
4511 }
4512
4513 // Copy a Z register by ORRing with itself.
4514 if (AArch64::ZPRRegClass.contains(DestReg) &&
4515 AArch64::ZPRRegClass.contains(SrcReg)) {
4516 assert(Subtarget.hasSVEorSME() && "Unexpected SVE register.");
4517 BuildMI(MBB, I, DL, get(AArch64::ORR_ZZZ), DestReg)
4518 .addReg(SrcReg)
4519 .addReg(SrcReg, getKillRegState(KillSrc));
4520 return;
4521 }
4522
4523 // Copy a Z register pair by copying the individual sub-registers.
4524 if ((AArch64::ZPR2RegClass.contains(DestReg) ||
4525 AArch64::ZPR2StridedOrContiguousRegClass.contains(DestReg)) &&
4526 (AArch64::ZPR2RegClass.contains(SrcReg) ||
4527 AArch64::ZPR2StridedOrContiguousRegClass.contains(SrcReg))) {
4528 assert(Subtarget.hasSVEorSME() && "Unexpected SVE register.");
4529 static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1};
4530 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORR_ZZZ,
4531 Indices);
4532 return;
4533 }
4534
4535 // Copy a Z register triple by copying the individual sub-registers.
4536 if (AArch64::ZPR3RegClass.contains(DestReg) &&
4537 AArch64::ZPR3RegClass.contains(SrcReg)) {
4538 assert(Subtarget.hasSVEorSME() && "Unexpected SVE register.");
4539 static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1,
4540 AArch64::zsub2};
4541 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORR_ZZZ,
4542 Indices);
4543 return;
4544 }
4545
4546 // Copy a Z register quad by copying the individual sub-registers.
4547 if ((AArch64::ZPR4RegClass.contains(DestReg) ||
4548 AArch64::ZPR4StridedOrContiguousRegClass.contains(DestReg)) &&
4549 (AArch64::ZPR4RegClass.contains(SrcReg) ||
4550 AArch64::ZPR4StridedOrContiguousRegClass.contains(SrcReg))) {
4551 assert(Subtarget.hasSVEorSME() && "Unexpected SVE register.");
4552 static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1,
4553 AArch64::zsub2, AArch64::zsub3};
4554 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORR_ZZZ,
4555 Indices);
4556 return;
4557 }
4558
4559 if (AArch64::GPR64spRegClass.contains(DestReg) &&
4560 (AArch64::GPR64spRegClass.contains(SrcReg) || SrcReg == AArch64::XZR)) {
4561 if (DestReg == AArch64::SP || SrcReg == AArch64::SP) {
4562 // If either operand is SP, expand to ADD #0.
4563 BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestReg)
4564 .addReg(SrcReg, getKillRegState(KillSrc))
4565 .addImm(0)
4567 } else if (SrcReg == AArch64::XZR && Subtarget.hasZeroCycleZeroingGP()) {
4568 BuildMI(MBB, I, DL, get(AArch64::MOVZXi), DestReg)
4569 .addImm(0)
4571 } else {
4572 // Otherwise, expand to ORR XZR.
4573 BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestReg)
4574 .addReg(AArch64::XZR)
4575 .addReg(SrcReg, getKillRegState(KillSrc));
4576 }
4577 return;
4578 }
4579
4580 // Copy a DDDD register quad by copying the individual sub-registers.
4581 if (AArch64::DDDDRegClass.contains(DestReg) &&
4582 AArch64::DDDDRegClass.contains(SrcReg)) {
4583 static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1,
4584 AArch64::dsub2, AArch64::dsub3};
4585 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
4586 Indices);
4587 return;
4588 }
4589
4590 // Copy a DDD register triple by copying the individual sub-registers.
4591 if (AArch64::DDDRegClass.contains(DestReg) &&
4592 AArch64::DDDRegClass.contains(SrcReg)) {
4593 static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1,
4594 AArch64::dsub2};
4595 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
4596 Indices);
4597 return;
4598 }
4599
4600 // Copy a DD register pair by copying the individual sub-registers.
4601 if (AArch64::DDRegClass.contains(DestReg) &&
4602 AArch64::DDRegClass.contains(SrcReg)) {
4603 static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1};
4604 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
4605 Indices);
4606 return;
4607 }
4608
4609 // Copy a QQQQ register quad by copying the individual sub-registers.
4610 if (AArch64::QQQQRegClass.contains(DestReg) &&
4611 AArch64::QQQQRegClass.contains(SrcReg)) {
4612 static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1,
4613 AArch64::qsub2, AArch64::qsub3};
4614 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
4615 Indices);
4616 return;
4617 }
4618
4619 // Copy a QQQ register triple by copying the individual sub-registers.
4620 if (AArch64::QQQRegClass.contains(DestReg) &&
4621 AArch64::QQQRegClass.contains(SrcReg)) {
4622 static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1,
4623 AArch64::qsub2};
4624 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
4625 Indices);
4626 return;
4627 }
4628
4629 // Copy a QQ register pair by copying the individual sub-registers.
4630 if (AArch64::QQRegClass.contains(DestReg) &&
4631 AArch64::QQRegClass.contains(SrcReg)) {
4632 static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1};
4633 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
4634 Indices);
4635 return;
4636 }
4637
4638 if (AArch64::XSeqPairsClassRegClass.contains(DestReg) &&
4639 AArch64::XSeqPairsClassRegClass.contains(SrcReg)) {
4640 static const unsigned Indices[] = {AArch64::sube64, AArch64::subo64};
4641 copyGPRRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRXrs,
4642 AArch64::XZR, Indices);
4643 return;
4644 }
4645
4646 if (AArch64::WSeqPairsClassRegClass.contains(DestReg) &&
4647 AArch64::WSeqPairsClassRegClass.contains(SrcReg)) {
4648 static const unsigned Indices[] = {AArch64::sube32, AArch64::subo32};
4649 copyGPRRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRWrs,
4650 AArch64::WZR, Indices);
4651 return;
4652 }
4653
4654 if (AArch64::FPR128RegClass.contains(DestReg) &&
4655 AArch64::FPR128RegClass.contains(SrcReg)) {
4656 if (Subtarget.hasSVEorSME() && !Subtarget.isNeonAvailable())
4657 BuildMI(MBB, I, DL, get(AArch64::ORR_ZZZ))
4658 .addReg(AArch64::Z0 + (DestReg - AArch64::Q0), RegState::Define)
4659 .addReg(AArch64::Z0 + (SrcReg - AArch64::Q0))
4660 .addReg(AArch64::Z0 + (SrcReg - AArch64::Q0));
4661 else if (Subtarget.hasNEON())
4662 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
4663 .addReg(SrcReg)
4664 .addReg(SrcReg, getKillRegState(KillSrc));
4665 else {
4666 BuildMI(MBB, I, DL, get(AArch64::STRQpre))
4667 .addReg(AArch64::SP, RegState::Define)
4668 .addReg(SrcReg, getKillRegState(KillSrc))
4669 .addReg(AArch64::SP)
4670 .addImm(-16);
4671 BuildMI(MBB, I, DL, get(AArch64::LDRQpre))
4672 .addReg(AArch64::SP, RegState::Define)
4673 .addReg(DestReg, RegState::Define)
4674 .addReg(AArch64::SP)
4675 .addImm(16);
4676 }
4677 return;
4678 }
4679
4680 if (AArch64::FPR64RegClass.contains(DestReg) &&
4681 AArch64::FPR64RegClass.contains(SrcReg)) {
4682 BuildMI(MBB, I, DL, get(AArch64::FMOVDr), DestReg)
4683 .addReg(SrcReg, getKillRegState(KillSrc));
4684 return;
4685 }
4686
4687 if (AArch64::FPR32RegClass.contains(DestReg) &&
4688 AArch64::FPR32RegClass.contains(SrcReg)) {
4689 BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
4690 .addReg(SrcReg, getKillRegState(KillSrc));
4691 return;
4692 }
4693
4694 if (AArch64::FPR16RegClass.contains(DestReg) &&
4695 AArch64::FPR16RegClass.contains(SrcReg)) {
4696 DestReg =
4697 RI.getMatchingSuperReg(DestReg, AArch64::hsub, &AArch64::FPR32RegClass);
4698 SrcReg =
4699 RI.getMatchingSuperReg(SrcReg, AArch64::hsub, &AArch64::FPR32RegClass);
4700 BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
4701 .addReg(SrcReg, getKillRegState(KillSrc));
4702 return;
4703 }
4704
4705 if (AArch64::FPR8RegClass.contains(DestReg) &&
4706 AArch64::FPR8RegClass.contains(SrcReg)) {
4707 DestReg =
4708 RI.getMatchingSuperReg(DestReg, AArch64::bsub, &AArch64::FPR32RegClass);
4709 SrcReg =
4710 RI.getMatchingSuperReg(SrcReg, AArch64::bsub, &AArch64::FPR32RegClass);
4711 BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
4712 .addReg(SrcReg, getKillRegState(KillSrc));
4713 return;
4714 }
4715
4716 // Copies between GPR64 and FPR64.
4717 if (AArch64::FPR64RegClass.contains(DestReg) &&
4718 AArch64::GPR64RegClass.contains(SrcReg)) {
4719 BuildMI(MBB, I, DL, get(AArch64::FMOVXDr), DestReg)
4720 .addReg(SrcReg, getKillRegState(KillSrc));
4721 return;
4722 }
4723 if (AArch64::GPR64RegClass.contains(DestReg) &&
4724 AArch64::FPR64RegClass.contains(SrcReg)) {
4725 BuildMI(MBB, I, DL, get(AArch64::FMOVDXr), DestReg)
4726 .addReg(SrcReg, getKillRegState(KillSrc));
4727 return;
4728 }
4729 // Copies between GPR32 and FPR32.
4730 if (AArch64::FPR32RegClass.contains(DestReg) &&
4731 AArch64::GPR32RegClass.contains(SrcReg)) {
4732 BuildMI(MBB, I, DL, get(AArch64::FMOVWSr), DestReg)
4733 .addReg(SrcReg, getKillRegState(KillSrc));
4734 return;
4735 }
4736 if (AArch64::GPR32RegClass.contains(DestReg) &&
4737 AArch64::FPR32RegClass.contains(SrcReg)) {
4738 BuildMI(MBB, I, DL, get(AArch64::FMOVSWr), DestReg)
4739 .addReg(SrcReg, getKillRegState(KillSrc));
4740 return;
4741 }
4742
4743 if (DestReg == AArch64::NZCV) {
4744 assert(AArch64::GPR64RegClass.contains(SrcReg) && "Invalid NZCV copy");
4745 BuildMI(MBB, I, DL, get(AArch64::MSR))
4746 .addImm(AArch64SysReg::NZCV)
4747 .addReg(SrcReg, getKillRegState(KillSrc))
4748 .addReg(AArch64::NZCV, RegState::Implicit | RegState::Define);
4749 return;
4750 }
4751
4752 if (SrcReg == AArch64::NZCV) {
4753 assert(AArch64::GPR64RegClass.contains(DestReg) && "Invalid NZCV copy");
4754 BuildMI(MBB, I, DL, get(AArch64::MRS), DestReg)
4755 .addImm(AArch64SysReg::NZCV)
4756 .addReg(AArch64::NZCV, RegState::Implicit | getKillRegState(KillSrc));
4757 return;
4758 }
4759
4760#ifndef NDEBUG
4762 errs() << TRI.getRegAsmName(DestReg) << " = COPY "
4763 << TRI.getRegAsmName(SrcReg) << "\n";
4764#endif
4765 llvm_unreachable("unimplemented reg-to-reg copy");
4766}
4767
4770 MachineBasicBlock::iterator InsertBefore,
4771 const MCInstrDesc &MCID,
4772 Register SrcReg, bool IsKill,
4773 unsigned SubIdx0, unsigned SubIdx1, int FI,
4774 MachineMemOperand *MMO) {
4775 Register SrcReg0 = SrcReg;
4776 Register SrcReg1 = SrcReg;
4777 if (SrcReg.isPhysical()) {
4778 SrcReg0 = TRI.getSubReg(SrcReg, SubIdx0);
4779 SubIdx0 = 0;
4780 SrcReg1 = TRI.getSubReg(SrcReg, SubIdx1);
4781 SubIdx1 = 0;
4782 }
4783 BuildMI(MBB, InsertBefore, DebugLoc(), MCID)
4784 .addReg(SrcReg0, getKillRegState(IsKill), SubIdx0)
4785 .addReg(SrcReg1, getKillRegState(IsKill), SubIdx1)
4786 .addFrameIndex(FI)
4787 .addImm(0)
4788 .addMemOperand(MMO);
4789}
4790
4793 Register SrcReg, bool isKill, int FI,
4794 const TargetRegisterClass *RC,
4795 const TargetRegisterInfo *TRI,
4796 Register VReg) const {
4797 MachineFunction &MF = *MBB.getParent();
4798 MachineFrameInfo &MFI = MF.getFrameInfo();
4799
4801 MachineMemOperand *MMO =
4803 MFI.getObjectSize(FI), MFI.getObjectAlign(FI));
4804 unsigned Opc = 0;
4805 bool Offset = true;
4807 unsigned StackID = TargetStackID::Default;
4808 switch (TRI->getSpillSize(*RC)) {
4809 case 1:
4810 if (AArch64::FPR8RegClass.hasSubClassEq(RC))
4811 Opc = AArch64::STRBui;
4812 break;
4813 case 2:
4814 if (AArch64::FPR16RegClass.hasSubClassEq(RC))
4815 Opc = AArch64::STRHui;
4816 else if (AArch64::PPRRegClass.hasSubClassEq(RC)) {
4817 assert(Subtarget.hasSVEorSME() &&
4818 "Unexpected register store without SVE store instructions");
4819 Opc = AArch64::STR_PXI;
4821 } else if (AArch64::PNRRegClass.hasSubClassEq(RC)) {
4822 assert((Subtarget.hasSVE2p1() || Subtarget.hasSME2()) &&
4823 "Unexpected register store without SVE2p1 or SME2");
4824 if (SrcReg.isVirtual()) {
4825 auto NewSrcReg =
4826 MF.getRegInfo().createVirtualRegister(&AArch64::PPRRegClass);
4827 BuildMI(MBB, MBBI, DebugLoc(), get(TargetOpcode::COPY), NewSrcReg)
4828 .addReg(SrcReg);
4829 SrcReg = NewSrcReg;
4830 } else
4831 SrcReg = (SrcReg - AArch64::PN0) + AArch64::P0;
4832 Opc = AArch64::STR_PXI;
4834 }
4835 break;
4836 case 4:
4837 if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
4838 Opc = AArch64::STRWui;
4839 if (SrcReg.isVirtual())
4840 MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR32RegClass);
4841 else
4842 assert(SrcReg != AArch64::WSP);
4843 } else if (AArch64::FPR32RegClass.hasSubClassEq(RC))
4844 Opc = AArch64::STRSui;
4845 else if (AArch64::PPR2RegClass.hasSubClassEq(RC)) {
4846 Opc = AArch64::STR_PPXI;
4848 }
4849 break;
4850 case 8:
4851 if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) {
4852 Opc = AArch64::STRXui;
4853 if (SrcReg.isVirtual())
4854 MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass);
4855 else
4856 assert(SrcReg != AArch64::SP);
4857 } else if (AArch64::FPR64RegClass.hasSubClassEq(RC)) {
4858 Opc = AArch64::STRDui;
4859 } else if (AArch64::WSeqPairsClassRegClass.hasSubClassEq(RC)) {
4861 get(AArch64::STPWi), SrcReg, isKill,
4862 AArch64::sube32, AArch64::subo32, FI, MMO);
4863 return;
4864 }
4865 break;
4866 case 16:
4867 if (AArch64::FPR128RegClass.hasSubClassEq(RC))
4868 Opc = AArch64::STRQui;
4869 else if (AArch64::DDRegClass.hasSubClassEq(RC)) {
4870 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
4871 Opc = AArch64::ST1Twov1d;
4872 Offset = false;
4873 } else if (AArch64::XSeqPairsClassRegClass.hasSubClassEq(RC)) {
4875 get(AArch64::STPXi), SrcReg, isKill,
4876 AArch64::sube64, AArch64::subo64, FI, MMO);
4877 return;
4878 } else if (AArch64::ZPRRegClass.hasSubClassEq(RC)) {
4879 assert(Subtarget.hasSVEorSME() &&
4880 "Unexpected register store without SVE store instructions");
4881 Opc = AArch64::STR_ZXI;
4883 }
4884 break;
4885 case 24:
4886 if (AArch64::DDDRegClass.hasSubClassEq(RC)) {
4887 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
4888 Opc = AArch64::ST1Threev1d;
4889 Offset = false;
4890 }
4891 break;
4892 case 32:
4893 if (AArch64::DDDDRegClass.hasSubClassEq(RC)) {
4894 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
4895 Opc = AArch64::ST1Fourv1d;
4896 Offset = false;
4897 } else if (AArch64::QQRegClass.hasSubClassEq(RC)) {
4898 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
4899 Opc = AArch64::ST1Twov2d;
4900 Offset = false;
4901 } else if (AArch64::ZPR2RegClass.hasSubClassEq(RC) ||
4902 AArch64::ZPR2StridedOrContiguousRegClass.hasSubClassEq(RC)) {
4903 assert(Subtarget.hasSVEorSME() &&
4904 "Unexpected register store without SVE store instructions");
4905 Opc = AArch64::STR_ZZXI;
4907 }
4908 break;
4909 case 48:
4910 if (AArch64::QQQRegClass.hasSubClassEq(RC)) {
4911 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
4912 Opc = AArch64::ST1Threev2d;
4913 Offset = false;
4914 } else if (AArch64::ZPR3RegClass.hasSubClassEq(RC)) {
4915 assert(Subtarget.hasSVEorSME() &&
4916 "Unexpected register store without SVE store instructions");
4917 Opc = AArch64::STR_ZZZXI;
4919 }
4920 break;
4921 case 64:
4922 if (AArch64::QQQQRegClass.hasSubClassEq(RC)) {
4923 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
4924 Opc = AArch64::ST1Fourv2d;
4925 Offset = false;
4926 } else if (AArch64::ZPR4RegClass.hasSubClassEq(RC) ||
4927 AArch64::ZPR4StridedOrContiguousRegClass.hasSubClassEq(RC)) {
4928 assert(Subtarget.hasSVEorSME() &&
4929 "Unexpected register store without SVE store instructions");
4930 Opc = AArch64::STR_ZZZZXI;
4932 }
4933 break;
4934 }
4935 assert(Opc && "Unknown register class");
4936 MFI.setStackID(FI, StackID);
4937
4938 const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DebugLoc(), get(Opc))
4939 .addReg(SrcReg, getKillRegState(isKill))
4940 .addFrameIndex(FI);
4941
4942 if (Offset)
4943 MI.addImm(0);
4944 if (PNRReg.isValid())
4945 MI.addDef(PNRReg, RegState::Implicit);
4946 MI.addMemOperand(MMO);
4947}
4948
4951 MachineBasicBlock::iterator InsertBefore,
4952 const MCInstrDesc &MCID,
4953 Register DestReg, unsigned SubIdx0,
4954 unsigned SubIdx1, int FI,
4955 MachineMemOperand *MMO) {
4956 Register DestReg0 = DestReg;
4957 Register DestReg1 = DestReg;
4958 bool IsUndef = true;
4959 if (DestReg.isPhysical()) {
4960 DestReg0 = TRI.getSubReg(DestReg, SubIdx0);
4961 SubIdx0 = 0;
4962 DestReg1 = TRI.getSubReg(DestReg, SubIdx1);
4963 SubIdx1 = 0;
4964 IsUndef = false;
4965 }
4966 BuildMI(MBB, InsertBefore, DebugLoc(), MCID)
4967 .addReg(DestReg0, RegState::Define | getUndefRegState(IsUndef), SubIdx0)
4968 .addReg(DestReg1, RegState::Define | getUndefRegState(IsUndef), SubIdx1)
4969 .addFrameIndex(FI)
4970 .addImm(0)
4971 .addMemOperand(MMO);
4972}
4973
4976 Register DestReg, int FI,
4977 const TargetRegisterClass *RC,
4978 const TargetRegisterInfo *TRI,
4979 Register VReg) const {
4980 MachineFunction &MF = *MBB.getParent();
4981 MachineFrameInfo &MFI = MF.getFrameInfo();
4983 MachineMemOperand *MMO =
4985 MFI.getObjectSize(FI), MFI.getObjectAlign(FI));
4986
4987 unsigned Opc = 0;
4988 bool Offset = true;
4989 unsigned StackID = TargetStackID::Default;
4991 switch (TRI->getSpillSize(*RC)) {
4992 case 1:
4993 if (AArch64::FPR8RegClass.hasSubClassEq(RC))
4994 Opc = AArch64::LDRBui;
4995 break;
4996 case 2:
4997 if (AArch64::FPR16RegClass.hasSubClassEq(RC))
4998 Opc = AArch64::LDRHui;
4999 else if (AArch64::PPRRegClass.hasSubClassEq(RC)) {
5000 assert(Subtarget.hasSVEorSME() &&
5001 "Unexpected register load without SVE load instructions");
5002 Opc = AArch64::LDR_PXI;
5004 } else if (AArch64::PNRRegClass.hasSubClassEq(RC)) {
5005 assert((Subtarget.hasSVE2p1() || Subtarget.hasSME2()) &&
5006 "Unexpected register load without SVE2p1 or SME2");
5007 PNRReg = DestReg;
5008 if (DestReg.isVirtual())
5009 DestReg = MF.getRegInfo().createVirtualRegister(&AArch64::PPRRegClass);
5010 else
5011 DestReg = (DestReg - AArch64::PN0) + AArch64::P0;
5012 Opc = AArch64::LDR_PXI;
5014 }
5015 break;
5016 case 4:
5017 if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
5018 Opc = AArch64::LDRWui;
5019 if (DestReg.isVirtual())
5020 MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR32RegClass);
5021 else
5022 assert(DestReg != AArch64::WSP);
5023 } else if (AArch64::FPR32RegClass.hasSubClassEq(RC))
5024 Opc = AArch64::LDRSui;
5025 else if (AArch64::PPR2RegClass.hasSubClassEq(RC)) {
5026 Opc = AArch64::LDR_PPXI;
5028 }
5029 break;
5030 case 8:
5031 if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) {
5032 Opc = AArch64::LDRXui;
5033 if (DestReg.isVirtual())
5034 MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR64RegClass);
5035 else
5036 assert(DestReg != AArch64::SP);
5037 } else if (AArch64::FPR64RegClass.hasSubClassEq(RC)) {
5038 Opc = AArch64::LDRDui;
5039 } else if (AArch64::WSeqPairsClassRegClass.hasSubClassEq(RC)) {
5041 get(AArch64::LDPWi), DestReg, AArch64::sube32,
5042 AArch64::subo32, FI, MMO);
5043 return;
5044 }
5045 break;
5046 case 16:
5047 if (AArch64::FPR128RegClass.hasSubClassEq(RC))
5048 Opc = AArch64::LDRQui;
5049 else if (AArch64::DDRegClass.hasSubClassEq(RC)) {
5050 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
5051 Opc = AArch64::LD1Twov1d;
5052 Offset = false;
5053 } else if (AArch64::XSeqPairsClassRegClass.hasSubClassEq(RC)) {
5055 get(AArch64::LDPXi), DestReg, AArch64::sube64,
5056 AArch64::subo64, FI, MMO);
5057 return;
5058 } else if (AArch64::ZPRRegClass.hasSubClassEq(RC)) {
5059 assert(Subtarget.hasSVEorSME() &&
5060 "Unexpected register load without SVE load instructions");
5061 Opc = AArch64::LDR_ZXI;
5063 }
5064 break;
5065 case 24:
5066 if (AArch64::DDDRegClass.hasSubClassEq(RC)) {
5067 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
5068 Opc = AArch64::LD1Threev1d;
5069 Offset = false;
5070 }
5071 break;
5072 case 32:
5073 if (AArch64::DDDDRegClass.hasSubClassEq(RC)) {
5074 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
5075 Opc = AArch64::LD1Fourv1d;
5076 Offset = false;
5077 } else if (AArch64::QQRegClass.hasSubClassEq(RC)) {
5078 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
5079 Opc = AArch64::LD1Twov2d;
5080 Offset = false;
5081 } else if (AArch64::ZPR2RegClass.hasSubClassEq(RC) ||
5082 AArch64::ZPR2StridedOrContiguousRegClass.hasSubClassEq(RC)) {
5083 assert(Subtarget.hasSVEorSME() &&
5084 "Unexpected register load without SVE load instructions");
5085 Opc = AArch64::LDR_ZZXI;
5087 }
5088 break;
5089 case 48:
5090 if (AArch64::QQQRegClass.hasSubClassEq(RC)) {
5091 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
5092 Opc = AArch64::LD1Threev2d;
5093 Offset = false;
5094 } else if (AArch64::ZPR3RegClass.hasSubClassEq(RC)) {
5095 assert(Subtarget.hasSVEorSME() &&
5096 "Unexpected register load without SVE load instructions");
5097 Opc = AArch64::LDR_ZZZXI;
5099 }
5100 break;
5101 case 64:
5102 if (AArch64::QQQQRegClass.hasSubClassEq(RC)) {
5103 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
5104 Opc = AArch64::LD1Fourv2d;
5105 Offset = false;
5106 } else if (AArch64::ZPR4RegClass.hasSubClassEq(RC) ||
5107 AArch64::ZPR4StridedOrContiguousRegClass.hasSubClassEq(RC)) {
5108 assert(Subtarget.