LLVM 19.0.0git
AArch64InstrInfo.cpp
Go to the documentation of this file.
1//===- AArch64InstrInfo.cpp - AArch64 Instruction Information -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains the AArch64 implementation of the TargetInstrInfo class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "AArch64InstrInfo.h"
14#include "AArch64ExpandImm.h"
17#include "AArch64PointerAuth.h"
18#include "AArch64Subtarget.h"
22#include "llvm/ADT/ArrayRef.h"
23#include "llvm/ADT/STLExtras.h"
41#include "llvm/IR/DebugLoc.h"
42#include "llvm/IR/GlobalValue.h"
43#include "llvm/MC/MCAsmInfo.h"
44#include "llvm/MC/MCInst.h"
46#include "llvm/MC/MCInstrDesc.h"
51#include "llvm/Support/LEB128.h"
55#include <cassert>
56#include <cstdint>
57#include <iterator>
58#include <utility>
59
60using namespace llvm;
61
62#define GET_INSTRINFO_CTOR_DTOR
63#include "AArch64GenInstrInfo.inc"
64
66 "aarch64-tbz-offset-bits", cl::Hidden, cl::init(14),
67 cl::desc("Restrict range of TB[N]Z instructions (DEBUG)"));
68
70 "aarch64-cbz-offset-bits", cl::Hidden, cl::init(19),
71 cl::desc("Restrict range of CB[N]Z instructions (DEBUG)"));
72
74 BCCDisplacementBits("aarch64-bcc-offset-bits", cl::Hidden, cl::init(19),
75 cl::desc("Restrict range of Bcc instructions (DEBUG)"));
76
78 BDisplacementBits("aarch64-b-offset-bits", cl::Hidden, cl::init(26),
79 cl::desc("Restrict range of B instructions (DEBUG)"));
80
82 : AArch64GenInstrInfo(AArch64::ADJCALLSTACKDOWN, AArch64::ADJCALLSTACKUP,
83 AArch64::CATCHRET),
84 RI(STI.getTargetTriple()), Subtarget(STI) {}
85
86/// GetInstSize - Return the number of bytes of code the specified
87/// instruction may be. This returns the maximum number of bytes.
89 const MachineBasicBlock &MBB = *MI.getParent();
90 const MachineFunction *MF = MBB.getParent();
91 const Function &F = MF->getFunction();
92 const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo();
93
94 {
95 auto Op = MI.getOpcode();
96 if (Op == AArch64::INLINEASM || Op == AArch64::INLINEASM_BR)
97 return getInlineAsmLength(MI.getOperand(0).getSymbolName(), *MAI);
98 }
99
100 // Meta-instructions emit no code.
101 if (MI.isMetaInstruction())
102 return 0;
103
104 // FIXME: We currently only handle pseudoinstructions that don't get expanded
105 // before the assembly printer.
106 unsigned NumBytes = 0;
107 const MCInstrDesc &Desc = MI.getDesc();
108
109 // Size should be preferably set in
110 // llvm/lib/Target/AArch64/AArch64InstrInfo.td (default case).
111 // Specific cases handle instructions of variable sizes
112 switch (Desc.getOpcode()) {
113 default:
114 if (Desc.getSize())
115 return Desc.getSize();
116
117 // Anything not explicitly designated otherwise (i.e. pseudo-instructions
118 // with fixed constant size but not specified in .td file) is a normal
119 // 4-byte insn.
120 NumBytes = 4;
121 break;
122 case TargetOpcode::STACKMAP:
123 // The upper bound for a stackmap intrinsic is the full length of its shadow
124 NumBytes = StackMapOpers(&MI).getNumPatchBytes();
125 assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
126 break;
127 case TargetOpcode::PATCHPOINT:
128 // The size of the patchpoint intrinsic is the number of bytes requested
129 NumBytes = PatchPointOpers(&MI).getNumPatchBytes();
130 assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
131 break;
132 case TargetOpcode::STATEPOINT:
133 NumBytes = StatepointOpers(&MI).getNumPatchBytes();
134 assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
135 // No patch bytes means a normal call inst is emitted
136 if (NumBytes == 0)
137 NumBytes = 4;
138 break;
139 case TargetOpcode::PATCHABLE_FUNCTION_ENTER:
140 // If `patchable-function-entry` is set, PATCHABLE_FUNCTION_ENTER
141 // instructions are expanded to the specified number of NOPs. Otherwise,
142 // they are expanded to 36-byte XRay sleds.
143 NumBytes =
144 F.getFnAttributeAsParsedInteger("patchable-function-entry", 9) * 4;
145 break;
146 case TargetOpcode::PATCHABLE_FUNCTION_EXIT:
147 case TargetOpcode::PATCHABLE_TYPED_EVENT_CALL:
148 // An XRay sled can be 4 bytes of alignment plus a 32-byte block.
149 NumBytes = 36;
150 break;
151 case TargetOpcode::PATCHABLE_EVENT_CALL:
152 // EVENT_CALL XRay sleds are exactly 6 instructions long (no alignment).
153 NumBytes = 24;
154 break;
155
156 case AArch64::SPACE:
157 NumBytes = MI.getOperand(1).getImm();
158 break;
159 case TargetOpcode::BUNDLE:
160 NumBytes = getInstBundleLength(MI);
161 break;
162 }
163
164 return NumBytes;
165}
166
167unsigned AArch64InstrInfo::getInstBundleLength(const MachineInstr &MI) const {
168 unsigned Size = 0;
170 MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end();
171 while (++I != E && I->isInsideBundle()) {
172 assert(!I->isBundle() && "No nested bundle!");
174 }
175 return Size;
176}
177
180 // Block ends with fall-through condbranch.
181 switch (LastInst->getOpcode()) {
182 default:
183 llvm_unreachable("Unknown branch instruction?");
184 case AArch64::Bcc:
185 Target = LastInst->getOperand(1).getMBB();
186 Cond.push_back(LastInst->getOperand(0));
187 break;
188 case AArch64::CBZW:
189 case AArch64::CBZX:
190 case AArch64::CBNZW:
191 case AArch64::CBNZX:
192 Target = LastInst->getOperand(1).getMBB();
193 Cond.push_back(MachineOperand::CreateImm(-1));
194 Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
195 Cond.push_back(LastInst->getOperand(0));
196 break;
197 case AArch64::TBZW:
198 case AArch64::TBZX:
199 case AArch64::TBNZW:
200 case AArch64::TBNZX:
201 Target = LastInst->getOperand(2).getMBB();
202 Cond.push_back(MachineOperand::CreateImm(-1));
203 Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
204 Cond.push_back(LastInst->getOperand(0));
205 Cond.push_back(LastInst->getOperand(1));
206 }
207}
208
209static unsigned getBranchDisplacementBits(unsigned Opc) {
210 switch (Opc) {
211 default:
212 llvm_unreachable("unexpected opcode!");
213 case AArch64::B:
214 return BDisplacementBits;
215 case AArch64::TBNZW:
216 case AArch64::TBZW:
217 case AArch64::TBNZX:
218 case AArch64::TBZX:
219 return TBZDisplacementBits;
220 case AArch64::CBNZW:
221 case AArch64::CBZW:
222 case AArch64::CBNZX:
223 case AArch64::CBZX:
224 return CBZDisplacementBits;
225 case AArch64::Bcc:
226 return BCCDisplacementBits;
227 }
228}
229
231 int64_t BrOffset) const {
232 unsigned Bits = getBranchDisplacementBits(BranchOp);
233 assert(Bits >= 3 && "max branch displacement must be enough to jump"
234 "over conditional branch expansion");
235 return isIntN(Bits, BrOffset / 4);
236}
237
240 switch (MI.getOpcode()) {
241 default:
242 llvm_unreachable("unexpected opcode!");
243 case AArch64::B:
244 return MI.getOperand(0).getMBB();
245 case AArch64::TBZW:
246 case AArch64::TBNZW:
247 case AArch64::TBZX:
248 case AArch64::TBNZX:
249 return MI.getOperand(2).getMBB();
250 case AArch64::CBZW:
251 case AArch64::CBNZW:
252 case AArch64::CBZX:
253 case AArch64::CBNZX:
254 case AArch64::Bcc:
255 return MI.getOperand(1).getMBB();
256 }
257}
258
260 MachineBasicBlock &NewDestBB,
261 MachineBasicBlock &RestoreBB,
262 const DebugLoc &DL,
263 int64_t BrOffset,
264 RegScavenger *RS) const {
265 assert(RS && "RegScavenger required for long branching");
266 assert(MBB.empty() &&
267 "new block should be inserted for expanding unconditional branch");
268 assert(MBB.pred_size() == 1);
269 assert(RestoreBB.empty() &&
270 "restore block should be inserted for restoring clobbered registers");
271
272 auto buildIndirectBranch = [&](Register Reg, MachineBasicBlock &DestBB) {
273 // Offsets outside of the signed 33-bit range are not supported for ADRP +
274 // ADD.
275 if (!isInt<33>(BrOffset))
277 "Branch offsets outside of the signed 33-bit range not supported");
278
279 BuildMI(MBB, MBB.end(), DL, get(AArch64::ADRP), Reg)
280 .addSym(DestBB.getSymbol(), AArch64II::MO_PAGE);
281 BuildMI(MBB, MBB.end(), DL, get(AArch64::ADDXri), Reg)
282 .addReg(Reg)
283 .addSym(DestBB.getSymbol(), AArch64II::MO_PAGEOFF | AArch64II::MO_NC)
284 .addImm(0);
285 BuildMI(MBB, MBB.end(), DL, get(AArch64::BR)).addReg(Reg);
286 };
287
289 // If X16 is unused, we can rely on the linker to insert a range extension
290 // thunk if NewDestBB is out of range of a single B instruction.
291 constexpr Register Reg = AArch64::X16;
292 if (!RS->isRegUsed(Reg)) {
293 insertUnconditionalBranch(MBB, &NewDestBB, DL);
294 RS->setRegUsed(Reg);
295 return;
296 }
297
298 // If there's a free register and it's worth inflating the code size,
299 // manually insert the indirect branch.
300 Register Scavenged = RS->FindUnusedReg(&AArch64::GPR64RegClass);
301 if (Scavenged != AArch64::NoRegister &&
303 buildIndirectBranch(Scavenged, NewDestBB);
304 RS->setRegUsed(Scavenged);
305 return;
306 }
307
308 // Note: Spilling X16 briefly moves the stack pointer, making it incompatible
309 // with red zones.
311 if (!AFI || AFI->hasRedZone().value_or(true))
313 "Unable to insert indirect branch inside function that has red zone");
314
315 // Otherwise, spill X16 and defer range extension to the linker.
316 BuildMI(MBB, MBB.end(), DL, get(AArch64::STRXpre))
317 .addReg(AArch64::SP, RegState::Define)
318 .addReg(Reg)
319 .addReg(AArch64::SP)
320 .addImm(-16);
321
322 BuildMI(MBB, MBB.end(), DL, get(AArch64::B)).addMBB(&RestoreBB);
323
324 BuildMI(RestoreBB, RestoreBB.end(), DL, get(AArch64::LDRXpost))
325 .addReg(AArch64::SP, RegState::Define)
327 .addReg(AArch64::SP)
328 .addImm(16);
329}
330
331// Branch analysis.
334 MachineBasicBlock *&FBB,
336 bool AllowModify) const {
337 // If the block has no terminators, it just falls into the block after it.
339 if (I == MBB.end())
340 return false;
341
342 // Skip over SpeculationBarrierEndBB terminators
343 if (I->getOpcode() == AArch64::SpeculationBarrierISBDSBEndBB ||
344 I->getOpcode() == AArch64::SpeculationBarrierSBEndBB) {
345 --I;
346 }
347
348 if (!isUnpredicatedTerminator(*I))
349 return false;
350
351 // Get the last instruction in the block.
352 MachineInstr *LastInst = &*I;
353
354 // If there is only one terminator instruction, process it.
355 unsigned LastOpc = LastInst->getOpcode();
356 if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
357 if (isUncondBranchOpcode(LastOpc)) {
358 TBB = LastInst->getOperand(0).getMBB();
359 return false;
360 }
361 if (isCondBranchOpcode(LastOpc)) {
362 // Block ends with fall-through condbranch.
363 parseCondBranch(LastInst, TBB, Cond);
364 return false;
365 }
366 return true; // Can't handle indirect branch.
367 }
368
369 // Get the instruction before it if it is a terminator.
370 MachineInstr *SecondLastInst = &*I;
371 unsigned SecondLastOpc = SecondLastInst->getOpcode();
372
373 // If AllowModify is true and the block ends with two or more unconditional
374 // branches, delete all but the first unconditional branch.
375 if (AllowModify && isUncondBranchOpcode(LastOpc)) {
376 while (isUncondBranchOpcode(SecondLastOpc)) {
377 LastInst->eraseFromParent();
378 LastInst = SecondLastInst;
379 LastOpc = LastInst->getOpcode();
380 if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
381 // Return now the only terminator is an unconditional branch.
382 TBB = LastInst->getOperand(0).getMBB();
383 return false;
384 }
385 SecondLastInst = &*I;
386 SecondLastOpc = SecondLastInst->getOpcode();
387 }
388 }
389
390 // If we're allowed to modify and the block ends in a unconditional branch
391 // which could simply fallthrough, remove the branch. (Note: This case only
392 // matters when we can't understand the whole sequence, otherwise it's also
393 // handled by BranchFolding.cpp.)
394 if (AllowModify && isUncondBranchOpcode(LastOpc) &&
396 LastInst->eraseFromParent();
397 LastInst = SecondLastInst;
398 LastOpc = LastInst->getOpcode();
399 if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
400 assert(!isUncondBranchOpcode(LastOpc) &&
401 "unreachable unconditional branches removed above");
402
403 if (isCondBranchOpcode(LastOpc)) {
404 // Block ends with fall-through condbranch.
405 parseCondBranch(LastInst, TBB, Cond);
406 return false;
407 }
408 return true; // Can't handle indirect branch.
409 }
410 SecondLastInst = &*I;
411 SecondLastOpc = SecondLastInst->getOpcode();
412 }
413
414 // If there are three terminators, we don't know what sort of block this is.
415 if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(*--I))
416 return true;
417
418 // If the block ends with a B and a Bcc, handle it.
419 if (isCondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
420 parseCondBranch(SecondLastInst, TBB, Cond);
421 FBB = LastInst->getOperand(0).getMBB();
422 return false;
423 }
424
425 // If the block ends with two unconditional branches, handle it. The second
426 // one is not executed, so remove it.
427 if (isUncondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
428 TBB = SecondLastInst->getOperand(0).getMBB();
429 I = LastInst;
430 if (AllowModify)
431 I->eraseFromParent();
432 return false;
433 }
434
435 // ...likewise if it ends with an indirect branch followed by an unconditional
436 // branch.
437 if (isIndirectBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
438 I = LastInst;
439 if (AllowModify)
440 I->eraseFromParent();
441 return true;
442 }
443
444 // Otherwise, can't handle this.
445 return true;
446}
447
449 MachineBranchPredicate &MBP,
450 bool AllowModify) const {
451 // For the moment, handle only a block which ends with a cb(n)zx followed by
452 // a fallthrough. Why this? Because it is a common form.
453 // TODO: Should we handle b.cc?
454
456 if (I == MBB.end())
457 return true;
458
459 // Skip over SpeculationBarrierEndBB terminators
460 if (I->getOpcode() == AArch64::SpeculationBarrierISBDSBEndBB ||
461 I->getOpcode() == AArch64::SpeculationBarrierSBEndBB) {
462 --I;
463 }
464
465 if (!isUnpredicatedTerminator(*I))
466 return true;
467
468 // Get the last instruction in the block.
469 MachineInstr *LastInst = &*I;
470 unsigned LastOpc = LastInst->getOpcode();
471 if (!isCondBranchOpcode(LastOpc))
472 return true;
473
474 switch (LastOpc) {
475 default:
476 return true;
477 case AArch64::CBZW:
478 case AArch64::CBZX:
479 case AArch64::CBNZW:
480 case AArch64::CBNZX:
481 break;
482 };
483
484 MBP.TrueDest = LastInst->getOperand(1).getMBB();
485 assert(MBP.TrueDest && "expected!");
486 MBP.FalseDest = MBB.getNextNode();
487
488 MBP.ConditionDef = nullptr;
489 MBP.SingleUseCondition = false;
490
491 MBP.LHS = LastInst->getOperand(0);
492 MBP.RHS = MachineOperand::CreateImm(0);
493 MBP.Predicate = LastOpc == AArch64::CBNZX ? MachineBranchPredicate::PRED_NE
494 : MachineBranchPredicate::PRED_EQ;
495 return false;
496}
497
500 if (Cond[0].getImm() != -1) {
501 // Regular Bcc
504 } else {
505 // Folded compare-and-branch
506 switch (Cond[1].getImm()) {
507 default:
508 llvm_unreachable("Unknown conditional branch!");
509 case AArch64::CBZW:
510 Cond[1].setImm(AArch64::CBNZW);
511 break;
512 case AArch64::CBNZW:
513 Cond[1].setImm(AArch64::CBZW);
514 break;
515 case AArch64::CBZX:
516 Cond[1].setImm(AArch64::CBNZX);
517 break;
518 case AArch64::CBNZX:
519 Cond[1].setImm(AArch64::CBZX);
520 break;
521 case AArch64::TBZW:
522 Cond[1].setImm(AArch64::TBNZW);
523 break;
524 case AArch64::TBNZW:
525 Cond[1].setImm(AArch64::TBZW);
526 break;
527 case AArch64::TBZX:
528 Cond[1].setImm(AArch64::TBNZX);
529 break;
530 case AArch64::TBNZX:
531 Cond[1].setImm(AArch64::TBZX);
532 break;
533 }
534 }
535
536 return false;
537}
538
540 int *BytesRemoved) const {
542 if (I == MBB.end())
543 return 0;
544
545 if (!isUncondBranchOpcode(I->getOpcode()) &&
546 !isCondBranchOpcode(I->getOpcode()))
547 return 0;
548
549 // Remove the branch.
550 I->eraseFromParent();
551
552 I = MBB.end();
553
554 if (I == MBB.begin()) {
555 if (BytesRemoved)
556 *BytesRemoved = 4;
557 return 1;
558 }
559 --I;
560 if (!isCondBranchOpcode(I->getOpcode())) {
561 if (BytesRemoved)
562 *BytesRemoved = 4;
563 return 1;
564 }
565
566 // Remove the branch.
567 I->eraseFromParent();
568 if (BytesRemoved)
569 *BytesRemoved = 8;
570
571 return 2;
572}
573
574void AArch64InstrInfo::instantiateCondBranch(
577 if (Cond[0].getImm() != -1) {
578 // Regular Bcc
579 BuildMI(&MBB, DL, get(AArch64::Bcc)).addImm(Cond[0].getImm()).addMBB(TBB);
580 } else {
581 // Folded compare-and-branch
582 // Note that we use addOperand instead of addReg to keep the flags.
583 const MachineInstrBuilder MIB =
584 BuildMI(&MBB, DL, get(Cond[1].getImm())).add(Cond[2]);
585 if (Cond.size() > 3)
586 MIB.addImm(Cond[3].getImm());
587 MIB.addMBB(TBB);
588 }
589}
590
593 ArrayRef<MachineOperand> Cond, const DebugLoc &DL, int *BytesAdded) const {
594 // Shouldn't be a fall through.
595 assert(TBB && "insertBranch must not be told to insert a fallthrough");
596
597 if (!FBB) {
598 if (Cond.empty()) // Unconditional branch?
599 BuildMI(&MBB, DL, get(AArch64::B)).addMBB(TBB);
600 else
601 instantiateCondBranch(MBB, DL, TBB, Cond);
602
603 if (BytesAdded)
604 *BytesAdded = 4;
605
606 return 1;
607 }
608
609 // Two-way conditional branch.
610 instantiateCondBranch(MBB, DL, TBB, Cond);
611 BuildMI(&MBB, DL, get(AArch64::B)).addMBB(FBB);
612
613 if (BytesAdded)
614 *BytesAdded = 8;
615
616 return 2;
617}
618
619// Find the original register that VReg is copied from.
620static unsigned removeCopies(const MachineRegisterInfo &MRI, unsigned VReg) {
621 while (Register::isVirtualRegister(VReg)) {
622 const MachineInstr *DefMI = MRI.getVRegDef(VReg);
623 if (!DefMI->isFullCopy())
624 return VReg;
625 VReg = DefMI->getOperand(1).getReg();
626 }
627 return VReg;
628}
629
630// Determine if VReg is defined by an instruction that can be folded into a
631// csel instruction. If so, return the folded opcode, and the replacement
632// register.
633static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg,
634 unsigned *NewVReg = nullptr) {
635 VReg = removeCopies(MRI, VReg);
637 return 0;
638
639 bool Is64Bit = AArch64::GPR64allRegClass.hasSubClassEq(MRI.getRegClass(VReg));
640 const MachineInstr *DefMI = MRI.getVRegDef(VReg);
641 unsigned Opc = 0;
642 unsigned SrcOpNum = 0;
643 switch (DefMI->getOpcode()) {
644 case AArch64::ADDSXri:
645 case AArch64::ADDSWri:
646 // if NZCV is used, do not fold.
647 if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr,
648 true) == -1)
649 return 0;
650 // fall-through to ADDXri and ADDWri.
651 [[fallthrough]];
652 case AArch64::ADDXri:
653 case AArch64::ADDWri:
654 // add x, 1 -> csinc.
655 if (!DefMI->getOperand(2).isImm() || DefMI->getOperand(2).getImm() != 1 ||
656 DefMI->getOperand(3).getImm() != 0)
657 return 0;
658 SrcOpNum = 1;
659 Opc = Is64Bit ? AArch64::CSINCXr : AArch64::CSINCWr;
660 break;
661
662 case AArch64::ORNXrr:
663 case AArch64::ORNWrr: {
664 // not x -> csinv, represented as orn dst, xzr, src.
665 unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg());
666 if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)
667 return 0;
668 SrcOpNum = 2;
669 Opc = Is64Bit ? AArch64::CSINVXr : AArch64::CSINVWr;
670 break;
671 }
672
673 case AArch64::SUBSXrr:
674 case AArch64::SUBSWrr:
675 // if NZCV is used, do not fold.
676 if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr,
677 true) == -1)
678 return 0;
679 // fall-through to SUBXrr and SUBWrr.
680 [[fallthrough]];
681 case AArch64::SUBXrr:
682 case AArch64::SUBWrr: {
683 // neg x -> csneg, represented as sub dst, xzr, src.
684 unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg());
685 if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)
686 return 0;
687 SrcOpNum = 2;
688 Opc = Is64Bit ? AArch64::CSNEGXr : AArch64::CSNEGWr;
689 break;
690 }
691 default:
692 return 0;
693 }
694 assert(Opc && SrcOpNum && "Missing parameters");
695
696 if (NewVReg)
697 *NewVReg = DefMI->getOperand(SrcOpNum).getReg();
698 return Opc;
699}
700
703 Register DstReg, Register TrueReg,
704 Register FalseReg, int &CondCycles,
705 int &TrueCycles,
706 int &FalseCycles) const {
707 // Check register classes.
709 const TargetRegisterClass *RC =
710 RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg));
711 if (!RC)
712 return false;
713
714 // Also need to check the dest regclass, in case we're trying to optimize
715 // something like:
716 // %1(gpr) = PHI %2(fpr), bb1, %(fpr), bb2
717 if (!RI.getCommonSubClass(RC, MRI.getRegClass(DstReg)))
718 return false;
719
720 // Expanding cbz/tbz requires an extra cycle of latency on the condition.
721 unsigned ExtraCondLat = Cond.size() != 1;
722
723 // GPRs are handled by csel.
724 // FIXME: Fold in x+1, -x, and ~x when applicable.
725 if (AArch64::GPR64allRegClass.hasSubClassEq(RC) ||
726 AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
727 // Single-cycle csel, csinc, csinv, and csneg.
728 CondCycles = 1 + ExtraCondLat;
729 TrueCycles = FalseCycles = 1;
730 if (canFoldIntoCSel(MRI, TrueReg))
731 TrueCycles = 0;
732 else if (canFoldIntoCSel(MRI, FalseReg))
733 FalseCycles = 0;
734 return true;
735 }
736
737 // Scalar floating point is handled by fcsel.
738 // FIXME: Form fabs, fmin, and fmax when applicable.
739 if (AArch64::FPR64RegClass.hasSubClassEq(RC) ||
740 AArch64::FPR32RegClass.hasSubClassEq(RC)) {
741 CondCycles = 5 + ExtraCondLat;
742 TrueCycles = FalseCycles = 2;
743 return true;
744 }
745
746 // Can't do vectors.
747 return false;
748}
749
752 const DebugLoc &DL, Register DstReg,
754 Register TrueReg, Register FalseReg) const {
756
757 // Parse the condition code, see parseCondBranch() above.
759 switch (Cond.size()) {
760 default:
761 llvm_unreachable("Unknown condition opcode in Cond");
762 case 1: // b.cc
763 CC = AArch64CC::CondCode(Cond[0].getImm());
764 break;
765 case 3: { // cbz/cbnz
766 // We must insert a compare against 0.
767 bool Is64Bit;
768 switch (Cond[1].getImm()) {
769 default:
770 llvm_unreachable("Unknown branch opcode in Cond");
771 case AArch64::CBZW:
772 Is64Bit = false;
774 break;
775 case AArch64::CBZX:
776 Is64Bit = true;
778 break;
779 case AArch64::CBNZW:
780 Is64Bit = false;
782 break;
783 case AArch64::CBNZX:
784 Is64Bit = true;
786 break;
787 }
788 Register SrcReg = Cond[2].getReg();
789 if (Is64Bit) {
790 // cmp reg, #0 is actually subs xzr, reg, #0.
791 MRI.constrainRegClass(SrcReg, &AArch64::GPR64spRegClass);
792 BuildMI(MBB, I, DL, get(AArch64::SUBSXri), AArch64::XZR)
793 .addReg(SrcReg)
794 .addImm(0)
795 .addImm(0);
796 } else {
797 MRI.constrainRegClass(SrcReg, &AArch64::GPR32spRegClass);
798 BuildMI(MBB, I, DL, get(AArch64::SUBSWri), AArch64::WZR)
799 .addReg(SrcReg)
800 .addImm(0)
801 .addImm(0);
802 }
803 break;
804 }
805 case 4: { // tbz/tbnz
806 // We must insert a tst instruction.
807 switch (Cond[1].getImm()) {
808 default:
809 llvm_unreachable("Unknown branch opcode in Cond");
810 case AArch64::TBZW:
811 case AArch64::TBZX:
813 break;
814 case AArch64::TBNZW:
815 case AArch64::TBNZX:
817 break;
818 }
819 // cmp reg, #foo is actually ands xzr, reg, #1<<foo.
820 if (Cond[1].getImm() == AArch64::TBZW || Cond[1].getImm() == AArch64::TBNZW)
821 BuildMI(MBB, I, DL, get(AArch64::ANDSWri), AArch64::WZR)
822 .addReg(Cond[2].getReg())
823 .addImm(
824 AArch64_AM::encodeLogicalImmediate(1ull << Cond[3].getImm(), 32));
825 else
826 BuildMI(MBB, I, DL, get(AArch64::ANDSXri), AArch64::XZR)
827 .addReg(Cond[2].getReg())
828 .addImm(
829 AArch64_AM::encodeLogicalImmediate(1ull << Cond[3].getImm(), 64));
830 break;
831 }
832 }
833
834 unsigned Opc = 0;
835 const TargetRegisterClass *RC = nullptr;
836 bool TryFold = false;
837 if (MRI.constrainRegClass(DstReg, &AArch64::GPR64RegClass)) {
838 RC = &AArch64::GPR64RegClass;
839 Opc = AArch64::CSELXr;
840 TryFold = true;
841 } else if (MRI.constrainRegClass(DstReg, &AArch64::GPR32RegClass)) {
842 RC = &AArch64::GPR32RegClass;
843 Opc = AArch64::CSELWr;
844 TryFold = true;
845 } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR64RegClass)) {
846 RC = &AArch64::FPR64RegClass;
847 Opc = AArch64::FCSELDrrr;
848 } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR32RegClass)) {
849 RC = &AArch64::FPR32RegClass;
850 Opc = AArch64::FCSELSrrr;
851 }
852 assert(RC && "Unsupported regclass");
853
854 // Try folding simple instructions into the csel.
855 if (TryFold) {
856 unsigned NewVReg = 0;
857 unsigned FoldedOpc = canFoldIntoCSel(MRI, TrueReg, &NewVReg);
858 if (FoldedOpc) {
859 // The folded opcodes csinc, csinc and csneg apply the operation to
860 // FalseReg, so we need to invert the condition.
862 TrueReg = FalseReg;
863 } else
864 FoldedOpc = canFoldIntoCSel(MRI, FalseReg, &NewVReg);
865
866 // Fold the operation. Leave any dead instructions for DCE to clean up.
867 if (FoldedOpc) {
868 FalseReg = NewVReg;
869 Opc = FoldedOpc;
870 // The extends the live range of NewVReg.
871 MRI.clearKillFlags(NewVReg);
872 }
873 }
874
875 // Pull all virtual register into the appropriate class.
876 MRI.constrainRegClass(TrueReg, RC);
877 MRI.constrainRegClass(FalseReg, RC);
878
879 // Insert the csel.
880 BuildMI(MBB, I, DL, get(Opc), DstReg)
881 .addReg(TrueReg)
882 .addReg(FalseReg)
883 .addImm(CC);
884}
885
886// Return true if Imm can be loaded into a register by a "cheap" sequence of
887// instructions. For now, "cheap" means at most two instructions.
888static bool isCheapImmediate(const MachineInstr &MI, unsigned BitSize) {
889 if (BitSize == 32)
890 return true;
891
892 assert(BitSize == 64 && "Only bit sizes of 32 or 64 allowed");
893 uint64_t Imm = static_cast<uint64_t>(MI.getOperand(1).getImm());
895 AArch64_IMM::expandMOVImm(Imm, BitSize, Is);
896
897 return Is.size() <= 2;
898}
899
900// FIXME: this implementation should be micro-architecture dependent, so a
901// micro-architecture target hook should be introduced here in future.
903 if (Subtarget.hasExynosCheapAsMoveHandling()) {
904 if (isExynosCheapAsMove(MI))
905 return true;
906 return MI.isAsCheapAsAMove();
907 }
908
909 switch (MI.getOpcode()) {
910 default:
911 return MI.isAsCheapAsAMove();
912
913 case AArch64::ADDWrs:
914 case AArch64::ADDXrs:
915 case AArch64::SUBWrs:
916 case AArch64::SUBXrs:
917 return Subtarget.hasALULSLFast() && MI.getOperand(3).getImm() <= 4;
918
919 // If MOVi32imm or MOVi64imm can be expanded into ORRWri or
920 // ORRXri, it is as cheap as MOV.
921 // Likewise if it can be expanded to MOVZ/MOVN/MOVK.
922 case AArch64::MOVi32imm:
923 return isCheapImmediate(MI, 32);
924 case AArch64::MOVi64imm:
925 return isCheapImmediate(MI, 64);
926 }
927}
928
930 switch (MI.getOpcode()) {
931 default:
932 return false;
933
934 case AArch64::ADDWrs:
935 case AArch64::ADDXrs:
936 case AArch64::ADDSWrs:
937 case AArch64::ADDSXrs: {
938 unsigned Imm = MI.getOperand(3).getImm();
939 unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
940 if (ShiftVal == 0)
941 return true;
942 return AArch64_AM::getShiftType(Imm) == AArch64_AM::LSL && ShiftVal <= 5;
943 }
944
945 case AArch64::ADDWrx:
946 case AArch64::ADDXrx:
947 case AArch64::ADDXrx64:
948 case AArch64::ADDSWrx:
949 case AArch64::ADDSXrx:
950 case AArch64::ADDSXrx64: {
951 unsigned Imm = MI.getOperand(3).getImm();
952 switch (AArch64_AM::getArithExtendType(Imm)) {
953 default:
954 return false;
955 case AArch64_AM::UXTB:
956 case AArch64_AM::UXTH:
957 case AArch64_AM::UXTW:
958 case AArch64_AM::UXTX:
959 return AArch64_AM::getArithShiftValue(Imm) <= 4;
960 }
961 }
962
963 case AArch64::SUBWrs:
964 case AArch64::SUBSWrs: {
965 unsigned Imm = MI.getOperand(3).getImm();
966 unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
967 return ShiftVal == 0 ||
968 (AArch64_AM::getShiftType(Imm) == AArch64_AM::ASR && ShiftVal == 31);
969 }
970
971 case AArch64::SUBXrs:
972 case AArch64::SUBSXrs: {
973 unsigned Imm = MI.getOperand(3).getImm();
974 unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
975 return ShiftVal == 0 ||
976 (AArch64_AM::getShiftType(Imm) == AArch64_AM::ASR && ShiftVal == 63);
977 }
978
979 case AArch64::SUBWrx:
980 case AArch64::SUBXrx:
981 case AArch64::SUBXrx64:
982 case AArch64::SUBSWrx:
983 case AArch64::SUBSXrx:
984 case AArch64::SUBSXrx64: {
985 unsigned Imm = MI.getOperand(3).getImm();
986 switch (AArch64_AM::getArithExtendType(Imm)) {
987 default:
988 return false;
989 case AArch64_AM::UXTB:
990 case AArch64_AM::UXTH:
991 case AArch64_AM::UXTW:
992 case AArch64_AM::UXTX:
993 return AArch64_AM::getArithShiftValue(Imm) == 0;
994 }
995 }
996
997 case AArch64::LDRBBroW:
998 case AArch64::LDRBBroX:
999 case AArch64::LDRBroW:
1000 case AArch64::LDRBroX:
1001 case AArch64::LDRDroW:
1002 case AArch64::LDRDroX:
1003 case AArch64::LDRHHroW:
1004 case AArch64::LDRHHroX:
1005 case AArch64::LDRHroW:
1006 case AArch64::LDRHroX:
1007 case AArch64::LDRQroW:
1008 case AArch64::LDRQroX:
1009 case AArch64::LDRSBWroW:
1010 case AArch64::LDRSBWroX:
1011 case AArch64::LDRSBXroW:
1012 case AArch64::LDRSBXroX:
1013 case AArch64::LDRSHWroW:
1014 case AArch64::LDRSHWroX:
1015 case AArch64::LDRSHXroW:
1016 case AArch64::LDRSHXroX:
1017 case AArch64::LDRSWroW:
1018 case AArch64::LDRSWroX:
1019 case AArch64::LDRSroW:
1020 case AArch64::LDRSroX:
1021 case AArch64::LDRWroW:
1022 case AArch64::LDRWroX:
1023 case AArch64::LDRXroW:
1024 case AArch64::LDRXroX:
1025 case AArch64::PRFMroW:
1026 case AArch64::PRFMroX:
1027 case AArch64::STRBBroW:
1028 case AArch64::STRBBroX:
1029 case AArch64::STRBroW:
1030 case AArch64::STRBroX:
1031 case AArch64::STRDroW:
1032 case AArch64::STRDroX:
1033 case AArch64::STRHHroW:
1034 case AArch64::STRHHroX:
1035 case AArch64::STRHroW:
1036 case AArch64::STRHroX:
1037 case AArch64::STRQroW:
1038 case AArch64::STRQroX:
1039 case AArch64::STRSroW:
1040 case AArch64::STRSroX:
1041 case AArch64::STRWroW:
1042 case AArch64::STRWroX:
1043 case AArch64::STRXroW:
1044 case AArch64::STRXroX: {
1045 unsigned IsSigned = MI.getOperand(3).getImm();
1046 return !IsSigned;
1047 }
1048 }
1049}
1050
1052 unsigned Opc = MI.getOpcode();
1053 switch (Opc) {
1054 default:
1055 return false;
1056 case AArch64::SEH_StackAlloc:
1057 case AArch64::SEH_SaveFPLR:
1058 case AArch64::SEH_SaveFPLR_X:
1059 case AArch64::SEH_SaveReg:
1060 case AArch64::SEH_SaveReg_X:
1061 case AArch64::SEH_SaveRegP:
1062 case AArch64::SEH_SaveRegP_X:
1063 case AArch64::SEH_SaveFReg:
1064 case AArch64::SEH_SaveFReg_X:
1065 case AArch64::SEH_SaveFRegP:
1066 case AArch64::SEH_SaveFRegP_X:
1067 case AArch64::SEH_SetFP:
1068 case AArch64::SEH_AddFP:
1069 case AArch64::SEH_Nop:
1070 case AArch64::SEH_PrologEnd:
1071 case AArch64::SEH_EpilogStart:
1072 case AArch64::SEH_EpilogEnd:
1073 case AArch64::SEH_PACSignLR:
1074 case AArch64::SEH_SaveAnyRegQP:
1075 case AArch64::SEH_SaveAnyRegQPX:
1076 return true;
1077 }
1078}
1079
1081 Register &SrcReg, Register &DstReg,
1082 unsigned &SubIdx) const {
1083 switch (MI.getOpcode()) {
1084 default:
1085 return false;
1086 case AArch64::SBFMXri: // aka sxtw
1087 case AArch64::UBFMXri: // aka uxtw
1088 // Check for the 32 -> 64 bit extension case, these instructions can do
1089 // much more.
1090 if (MI.getOperand(2).getImm() != 0 || MI.getOperand(3).getImm() != 31)
1091 return false;
1092 // This is a signed or unsigned 32 -> 64 bit extension.
1093 SrcReg = MI.getOperand(1).getReg();
1094 DstReg = MI.getOperand(0).getReg();
1095 SubIdx = AArch64::sub_32;
1096 return true;
1097 }
1098}
1099
1101 const MachineInstr &MIa, const MachineInstr &MIb) const {
1103 const MachineOperand *BaseOpA = nullptr, *BaseOpB = nullptr;
1104 int64_t OffsetA = 0, OffsetB = 0;
1105 TypeSize WidthA(0, false), WidthB(0, false);
1106 bool OffsetAIsScalable = false, OffsetBIsScalable = false;
1107
1108 assert(MIa.mayLoadOrStore() && "MIa must be a load or store.");
1109 assert(MIb.mayLoadOrStore() && "MIb must be a load or store.");
1110
1113 return false;
1114
1115 // Retrieve the base, offset from the base and width. Width
1116 // is the size of memory that is being loaded/stored (e.g. 1, 2, 4, 8). If
1117 // base are identical, and the offset of a lower memory access +
1118 // the width doesn't overlap the offset of a higher memory access,
1119 // then the memory accesses are different.
1120 // If OffsetAIsScalable and OffsetBIsScalable are both true, they
1121 // are assumed to have the same scale (vscale).
1122 if (getMemOperandWithOffsetWidth(MIa, BaseOpA, OffsetA, OffsetAIsScalable,
1123 WidthA, TRI) &&
1124 getMemOperandWithOffsetWidth(MIb, BaseOpB, OffsetB, OffsetBIsScalable,
1125 WidthB, TRI)) {
1126 if (BaseOpA->isIdenticalTo(*BaseOpB) &&
1127 OffsetAIsScalable == OffsetBIsScalable) {
1128 int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB;
1129 int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA;
1130 TypeSize LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
1131 if (LowWidth.isScalable() == OffsetAIsScalable &&
1132 LowOffset + (int)LowWidth.getKnownMinValue() <= HighOffset)
1133 return true;
1134 }
1135 }
1136 return false;
1137}
1138
1140 const MachineBasicBlock *MBB,
1141 const MachineFunction &MF) const {
1143 return true;
1144
1145 // Do not move an instruction that can be recognized as a branch target.
1146 if (hasBTISemantics(MI))
1147 return true;
1148
1149 switch (MI.getOpcode()) {
1150 case AArch64::HINT:
1151 // CSDB hints are scheduling barriers.
1152 if (MI.getOperand(0).getImm() == 0x14)
1153 return true;
1154 break;
1155 case AArch64::DSB:
1156 case AArch64::ISB:
1157 // DSB and ISB also are scheduling barriers.
1158 return true;
1159 case AArch64::MSRpstatesvcrImm1:
1160 // SMSTART and SMSTOP are also scheduling barriers.
1161 return true;
1162 default:;
1163 }
1164 if (isSEHInstruction(MI))
1165 return true;
1166 auto Next = std::next(MI.getIterator());
1167 return Next != MBB->end() && Next->isCFIInstruction();
1168}
1169
1170/// analyzeCompare - For a comparison instruction, return the source registers
1171/// in SrcReg and SrcReg2, and the value it compares against in CmpValue.
1172/// Return true if the comparison instruction can be analyzed.
1174 Register &SrcReg2, int64_t &CmpMask,
1175 int64_t &CmpValue) const {
1176 // The first operand can be a frame index where we'd normally expect a
1177 // register.
1178 assert(MI.getNumOperands() >= 2 && "All AArch64 cmps should have 2 operands");
1179 if (!MI.getOperand(1).isReg())
1180 return false;
1181
1182 switch (MI.getOpcode()) {
1183 default:
1184 break;
1185 case AArch64::PTEST_PP:
1186 case AArch64::PTEST_PP_ANY:
1187 SrcReg = MI.getOperand(0).getReg();
1188 SrcReg2 = MI.getOperand(1).getReg();
1189 // Not sure about the mask and value for now...
1190 CmpMask = ~0;
1191 CmpValue = 0;
1192 return true;
1193 case AArch64::SUBSWrr:
1194 case AArch64::SUBSWrs:
1195 case AArch64::SUBSWrx:
1196 case AArch64::SUBSXrr:
1197 case AArch64::SUBSXrs:
1198 case AArch64::SUBSXrx:
1199 case AArch64::ADDSWrr:
1200 case AArch64::ADDSWrs:
1201 case AArch64::ADDSWrx:
1202 case AArch64::ADDSXrr:
1203 case AArch64::ADDSXrs:
1204 case AArch64::ADDSXrx:
1205 // Replace SUBSWrr with SUBWrr if NZCV is not used.
1206 SrcReg = MI.getOperand(1).getReg();
1207 SrcReg2 = MI.getOperand(2).getReg();
1208 CmpMask = ~0;
1209 CmpValue = 0;
1210 return true;
1211 case AArch64::SUBSWri:
1212 case AArch64::ADDSWri:
1213 case AArch64::SUBSXri:
1214 case AArch64::ADDSXri:
1215 SrcReg = MI.getOperand(1).getReg();
1216 SrcReg2 = 0;
1217 CmpMask = ~0;
1218 CmpValue = MI.getOperand(2).getImm();
1219 return true;
1220 case AArch64::ANDSWri:
1221 case AArch64::ANDSXri:
1222 // ANDS does not use the same encoding scheme as the others xxxS
1223 // instructions.
1224 SrcReg = MI.getOperand(1).getReg();
1225 SrcReg2 = 0;
1226 CmpMask = ~0;
1228 MI.getOperand(2).getImm(),
1229 MI.getOpcode() == AArch64::ANDSWri ? 32 : 64);
1230 return true;
1231 }
1232
1233 return false;
1234}
1235
1237 MachineBasicBlock *MBB = Instr.getParent();
1238 assert(MBB && "Can't get MachineBasicBlock here");
1239 MachineFunction *MF = MBB->getParent();
1240 assert(MF && "Can't get MachineFunction here");
1244
1245 for (unsigned OpIdx = 0, EndIdx = Instr.getNumOperands(); OpIdx < EndIdx;
1246 ++OpIdx) {
1247 MachineOperand &MO = Instr.getOperand(OpIdx);
1248 const TargetRegisterClass *OpRegCstraints =
1249 Instr.getRegClassConstraint(OpIdx, TII, TRI);
1250
1251 // If there's no constraint, there's nothing to do.
1252 if (!OpRegCstraints)
1253 continue;
1254 // If the operand is a frame index, there's nothing to do here.
1255 // A frame index operand will resolve correctly during PEI.
1256 if (MO.isFI())
1257 continue;
1258
1259 assert(MO.isReg() &&
1260 "Operand has register constraints without being a register!");
1261
1262 Register Reg = MO.getReg();
1263 if (Reg.isPhysical()) {
1264 if (!OpRegCstraints->contains(Reg))
1265 return false;
1266 } else if (!OpRegCstraints->hasSubClassEq(MRI->getRegClass(Reg)) &&
1267 !MRI->constrainRegClass(Reg, OpRegCstraints))
1268 return false;
1269 }
1270
1271 return true;
1272}
1273
1274/// Return the opcode that does not set flags when possible - otherwise
1275/// return the original opcode. The caller is responsible to do the actual
1276/// substitution and legality checking.
1278 // Don't convert all compare instructions, because for some the zero register
1279 // encoding becomes the sp register.
1280 bool MIDefinesZeroReg = false;
1281 if (MI.definesRegister(AArch64::WZR, /*TRI=*/nullptr) ||
1282 MI.definesRegister(AArch64::XZR, /*TRI=*/nullptr))
1283 MIDefinesZeroReg = true;
1284
1285 switch (MI.getOpcode()) {
1286 default:
1287 return MI.getOpcode();
1288 case AArch64::ADDSWrr:
1289 return AArch64::ADDWrr;
1290 case AArch64::ADDSWri:
1291 return MIDefinesZeroReg ? AArch64::ADDSWri : AArch64::ADDWri;
1292 case AArch64::ADDSWrs:
1293 return MIDefinesZeroReg ? AArch64::ADDSWrs : AArch64::ADDWrs;
1294 case AArch64::ADDSWrx:
1295 return AArch64::ADDWrx;
1296 case AArch64::ADDSXrr:
1297 return AArch64::ADDXrr;
1298 case AArch64::ADDSXri:
1299 return MIDefinesZeroReg ? AArch64::ADDSXri : AArch64::ADDXri;
1300 case AArch64::ADDSXrs:
1301 return MIDefinesZeroReg ? AArch64::ADDSXrs : AArch64::ADDXrs;
1302 case AArch64::ADDSXrx:
1303 return AArch64::ADDXrx;
1304 case AArch64::SUBSWrr:
1305 return AArch64::SUBWrr;
1306 case AArch64::SUBSWri:
1307 return MIDefinesZeroReg ? AArch64::SUBSWri : AArch64::SUBWri;
1308 case AArch64::SUBSWrs:
1309 return MIDefinesZeroReg ? AArch64::SUBSWrs : AArch64::SUBWrs;
1310 case AArch64::SUBSWrx:
1311 return AArch64::SUBWrx;
1312 case AArch64::SUBSXrr:
1313 return AArch64::SUBXrr;
1314 case AArch64::SUBSXri:
1315 return MIDefinesZeroReg ? AArch64::SUBSXri : AArch64::SUBXri;
1316 case AArch64::SUBSXrs:
1317 return MIDefinesZeroReg ? AArch64::SUBSXrs : AArch64::SUBXrs;
1318 case AArch64::SUBSXrx:
1319 return AArch64::SUBXrx;
1320 }
1321}
1322
1323enum AccessKind { AK_Write = 0x01, AK_Read = 0x10, AK_All = 0x11 };
1324
1325/// True when condition flags are accessed (either by writing or reading)
1326/// on the instruction trace starting at From and ending at To.
1327///
1328/// Note: If From and To are from different blocks it's assumed CC are accessed
1329/// on the path.
1332 const TargetRegisterInfo *TRI, const AccessKind AccessToCheck = AK_All) {
1333 // Early exit if To is at the beginning of the BB.
1334 if (To == To->getParent()->begin())
1335 return true;
1336
1337 // Check whether the instructions are in the same basic block
1338 // If not, assume the condition flags might get modified somewhere.
1339 if (To->getParent() != From->getParent())
1340 return true;
1341
1342 // From must be above To.
1343 assert(std::any_of(
1344 ++To.getReverse(), To->getParent()->rend(),
1345 [From](MachineInstr &MI) { return MI.getIterator() == From; }));
1346
1347 // We iterate backward starting at \p To until we hit \p From.
1348 for (const MachineInstr &Instr :
1349 instructionsWithoutDebug(++To.getReverse(), From.getReverse())) {
1350 if (((AccessToCheck & AK_Write) &&
1351 Instr.modifiesRegister(AArch64::NZCV, TRI)) ||
1352 ((AccessToCheck & AK_Read) && Instr.readsRegister(AArch64::NZCV, TRI)))
1353 return true;
1354 }
1355 return false;
1356}
1357
1358std::optional<unsigned>
1359AArch64InstrInfo::canRemovePTestInstr(MachineInstr *PTest, MachineInstr *Mask,
1360 MachineInstr *Pred,
1361 const MachineRegisterInfo *MRI) const {
1362 unsigned MaskOpcode = Mask->getOpcode();
1363 unsigned PredOpcode = Pred->getOpcode();
1364 bool PredIsPTestLike = isPTestLikeOpcode(PredOpcode);
1365 bool PredIsWhileLike = isWhileOpcode(PredOpcode);
1366
1367 if (PredIsWhileLike) {
1368 // For PTEST(PG, PG), PTEST is redundant when PG is the result of a WHILEcc
1369 // instruction and the condition is "any" since WHILcc does an implicit
1370 // PTEST(ALL, PG) check and PG is always a subset of ALL.
1371 if ((Mask == Pred) && PTest->getOpcode() == AArch64::PTEST_PP_ANY)
1372 return PredOpcode;
1373
1374 // For PTEST(PTRUE_ALL, WHILE), if the element size matches, the PTEST is
1375 // redundant since WHILE performs an implicit PTEST with an all active
1376 // mask.
1377 if (isPTrueOpcode(MaskOpcode) && Mask->getOperand(1).getImm() == 31 &&
1378 getElementSizeForOpcode(MaskOpcode) ==
1379 getElementSizeForOpcode(PredOpcode))
1380 return PredOpcode;
1381
1382 return {};
1383 }
1384
1385 if (PredIsPTestLike) {
1386 // For PTEST(PG, PG), PTEST is redundant when PG is the result of an
1387 // instruction that sets the flags as PTEST would and the condition is
1388 // "any" since PG is always a subset of the governing predicate of the
1389 // ptest-like instruction.
1390 if ((Mask == Pred) && PTest->getOpcode() == AArch64::PTEST_PP_ANY)
1391 return PredOpcode;
1392
1393 // For PTEST(PTRUE_ALL, PTEST_LIKE), the PTEST is redundant if the
1394 // the element size matches and either the PTEST_LIKE instruction uses
1395 // the same all active mask or the condition is "any".
1396 if (isPTrueOpcode(MaskOpcode) && Mask->getOperand(1).getImm() == 31 &&
1397 getElementSizeForOpcode(MaskOpcode) ==
1398 getElementSizeForOpcode(PredOpcode)) {
1399 auto PTestLikeMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg());
1400 if (Mask == PTestLikeMask || PTest->getOpcode() == AArch64::PTEST_PP_ANY)
1401 return PredOpcode;
1402 }
1403
1404 // For PTEST(PG, PTEST_LIKE(PG, ...)), the PTEST is redundant since the
1405 // flags are set based on the same mask 'PG', but PTEST_LIKE must operate
1406 // on 8-bit predicates like the PTEST. Otherwise, for instructions like
1407 // compare that also support 16/32/64-bit predicates, the implicit PTEST
1408 // performed by the compare could consider fewer lanes for these element
1409 // sizes.
1410 //
1411 // For example, consider
1412 //
1413 // ptrue p0.b ; P0=1111-1111-1111-1111
1414 // index z0.s, #0, #1 ; Z0=<0,1,2,3>
1415 // index z1.s, #1, #1 ; Z1=<1,2,3,4>
1416 // cmphi p1.s, p0/z, z1.s, z0.s ; P1=0001-0001-0001-0001
1417 // ; ^ last active
1418 // ptest p0, p1.b ; P1=0001-0001-0001-0001
1419 // ; ^ last active
1420 //
1421 // where the compare generates a canonical all active 32-bit predicate
1422 // (equivalent to 'ptrue p1.s, all'). The implicit PTEST sets the last
1423 // active flag, whereas the PTEST instruction with the same mask doesn't.
1424 // For PTEST_ANY this doesn't apply as the flags in this case would be
1425 // identical regardless of element size.
1426 auto PTestLikeMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg());
1427 uint64_t PredElementSize = getElementSizeForOpcode(PredOpcode);
1428 if (Mask == PTestLikeMask && (PredElementSize == AArch64::ElementSizeB ||
1429 PTest->getOpcode() == AArch64::PTEST_PP_ANY))
1430 return PredOpcode;
1431
1432 return {};
1433 }
1434
1435 // If OP in PTEST(PG, OP(PG, ...)) has a flag-setting variant change the
1436 // opcode so the PTEST becomes redundant.
1437 switch (PredOpcode) {
1438 case AArch64::AND_PPzPP:
1439 case AArch64::BIC_PPzPP:
1440 case AArch64::EOR_PPzPP:
1441 case AArch64::NAND_PPzPP:
1442 case AArch64::NOR_PPzPP:
1443 case AArch64::ORN_PPzPP:
1444 case AArch64::ORR_PPzPP:
1445 case AArch64::BRKA_PPzP:
1446 case AArch64::BRKPA_PPzPP:
1447 case AArch64::BRKB_PPzP:
1448 case AArch64::BRKPB_PPzPP:
1449 case AArch64::RDFFR_PPz: {
1450 // Check to see if our mask is the same. If not the resulting flag bits
1451 // may be different and we can't remove the ptest.
1452 auto *PredMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg());
1453 if (Mask != PredMask)
1454 return {};
1455 break;
1456 }
1457 case AArch64::BRKN_PPzP: {
1458 // BRKN uses an all active implicit mask to set flags unlike the other
1459 // flag-setting instructions.
1460 // PTEST(PTRUE_B(31), BRKN(PG, A, B)) -> BRKNS(PG, A, B).
1461 if ((MaskOpcode != AArch64::PTRUE_B) ||
1462 (Mask->getOperand(1).getImm() != 31))
1463 return {};
1464 break;
1465 }
1466 case AArch64::PTRUE_B:
1467 // PTEST(OP=PTRUE_B(A), OP) -> PTRUES_B(A)
1468 break;
1469 default:
1470 // Bail out if we don't recognize the input
1471 return {};
1472 }
1473
1474 return convertToFlagSettingOpc(PredOpcode);
1475}
1476
1477/// optimizePTestInstr - Attempt to remove a ptest of a predicate-generating
1478/// operation which could set the flags in an identical manner
1479bool AArch64InstrInfo::optimizePTestInstr(
1480 MachineInstr *PTest, unsigned MaskReg, unsigned PredReg,
1481 const MachineRegisterInfo *MRI) const {
1482 auto *Mask = MRI->getUniqueVRegDef(MaskReg);
1483 auto *Pred = MRI->getUniqueVRegDef(PredReg);
1484 unsigned PredOpcode = Pred->getOpcode();
1485 auto NewOp = canRemovePTestInstr(PTest, Mask, Pred, MRI);
1486 if (!NewOp)
1487 return false;
1488
1490
1491 // If another instruction between Pred and PTest accesses flags, don't remove
1492 // the ptest or update the earlier instruction to modify them.
1493 if (areCFlagsAccessedBetweenInstrs(Pred, PTest, TRI))
1494 return false;
1495
1496 // If we pass all the checks, it's safe to remove the PTEST and use the flags
1497 // as they are prior to PTEST. Sometimes this requires the tested PTEST
1498 // operand to be replaced with an equivalent instruction that also sets the
1499 // flags.
1500 PTest->eraseFromParent();
1501 if (*NewOp != PredOpcode) {
1502 Pred->setDesc(get(*NewOp));
1503 bool succeeded = UpdateOperandRegClass(*Pred);
1504 (void)succeeded;
1505 assert(succeeded && "Operands have incompatible register classes!");
1506 Pred->addRegisterDefined(AArch64::NZCV, TRI);
1507 }
1508
1509 // Ensure that the flags def is live.
1510 if (Pred->registerDefIsDead(AArch64::NZCV, TRI)) {
1511 unsigned i = 0, e = Pred->getNumOperands();
1512 for (; i != e; ++i) {
1513 MachineOperand &MO = Pred->getOperand(i);
1514 if (MO.isReg() && MO.isDef() && MO.getReg() == AArch64::NZCV) {
1515 MO.setIsDead(false);
1516 break;
1517 }
1518 }
1519 }
1520 return true;
1521}
1522
1523/// Try to optimize a compare instruction. A compare instruction is an
1524/// instruction which produces AArch64::NZCV. It can be truly compare
1525/// instruction
1526/// when there are no uses of its destination register.
1527///
1528/// The following steps are tried in order:
1529/// 1. Convert CmpInstr into an unconditional version.
1530/// 2. Remove CmpInstr if above there is an instruction producing a needed
1531/// condition code or an instruction which can be converted into such an
1532/// instruction.
1533/// Only comparison with zero is supported.
1535 MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int64_t CmpMask,
1536 int64_t CmpValue, const MachineRegisterInfo *MRI) const {
1537 assert(CmpInstr.getParent());
1538 assert(MRI);
1539
1540 // Replace SUBSWrr with SUBWrr if NZCV is not used.
1541 int DeadNZCVIdx =
1542 CmpInstr.findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr, true);
1543 if (DeadNZCVIdx != -1) {
1544 if (CmpInstr.definesRegister(AArch64::WZR, /*TRI=*/nullptr) ||
1545 CmpInstr.definesRegister(AArch64::XZR, /*TRI=*/nullptr)) {
1546 CmpInstr.eraseFromParent();
1547 return true;
1548 }
1549 unsigned Opc = CmpInstr.getOpcode();
1550 unsigned NewOpc = convertToNonFlagSettingOpc(CmpInstr);
1551 if (NewOpc == Opc)
1552 return false;
1553 const MCInstrDesc &MCID = get(NewOpc);
1554 CmpInstr.setDesc(MCID);
1555 CmpInstr.removeOperand(DeadNZCVIdx);
1556 bool succeeded = UpdateOperandRegClass(CmpInstr);
1557 (void)succeeded;
1558 assert(succeeded && "Some operands reg class are incompatible!");
1559 return true;
1560 }
1561
1562 if (CmpInstr.getOpcode() == AArch64::PTEST_PP ||
1563 CmpInstr.getOpcode() == AArch64::PTEST_PP_ANY)
1564 return optimizePTestInstr(&CmpInstr, SrcReg, SrcReg2, MRI);
1565
1566 if (SrcReg2 != 0)
1567 return false;
1568
1569 // CmpInstr is a Compare instruction if destination register is not used.
1570 if (!MRI->use_nodbg_empty(CmpInstr.getOperand(0).getReg()))
1571 return false;
1572
1573 if (CmpValue == 0 && substituteCmpToZero(CmpInstr, SrcReg, *MRI))
1574 return true;
1575 return (CmpValue == 0 || CmpValue == 1) &&
1576 removeCmpToZeroOrOne(CmpInstr, SrcReg, CmpValue, *MRI);
1577}
1578
1579/// Get opcode of S version of Instr.
1580/// If Instr is S version its opcode is returned.
1581/// AArch64::INSTRUCTION_LIST_END is returned if Instr does not have S version
1582/// or we are not interested in it.
1583static unsigned sForm(MachineInstr &Instr) {
1584 switch (Instr.getOpcode()) {
1585 default:
1586 return AArch64::INSTRUCTION_LIST_END;
1587
1588 case AArch64::ADDSWrr:
1589 case AArch64::ADDSWri:
1590 case AArch64::ADDSXrr:
1591 case AArch64::ADDSXri:
1592 case AArch64::SUBSWrr:
1593 case AArch64::SUBSWri:
1594 case AArch64::SUBSXrr:
1595 case AArch64::SUBSXri:
1596 return Instr.getOpcode();
1597
1598 case AArch64::ADDWrr:
1599 return AArch64::ADDSWrr;
1600 case AArch64::ADDWri:
1601 return AArch64::ADDSWri;
1602 case AArch64::ADDXrr:
1603 return AArch64::ADDSXrr;
1604 case AArch64::ADDXri:
1605 return AArch64::ADDSXri;
1606 case AArch64::ADCWr:
1607 return AArch64::ADCSWr;
1608 case AArch64::ADCXr:
1609 return AArch64::ADCSXr;
1610 case AArch64::SUBWrr:
1611 return AArch64::SUBSWrr;
1612 case AArch64::SUBWri:
1613 return AArch64::SUBSWri;
1614 case AArch64::SUBXrr:
1615 return AArch64::SUBSXrr;
1616 case AArch64::SUBXri:
1617 return AArch64::SUBSXri;
1618 case AArch64::SBCWr:
1619 return AArch64::SBCSWr;
1620 case AArch64::SBCXr:
1621 return AArch64::SBCSXr;
1622 case AArch64::ANDWri:
1623 return AArch64::ANDSWri;
1624 case AArch64::ANDXri:
1625 return AArch64::ANDSXri;
1626 }
1627}
1628
1629/// Check if AArch64::NZCV should be alive in successors of MBB.
1631 for (auto *BB : MBB->successors())
1632 if (BB->isLiveIn(AArch64::NZCV))
1633 return true;
1634 return false;
1635}
1636
1637/// \returns The condition code operand index for \p Instr if it is a branch
1638/// or select and -1 otherwise.
1639static int
1641 switch (Instr.getOpcode()) {
1642 default:
1643 return -1;
1644
1645 case AArch64::Bcc: {
1646 int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV, /*TRI=*/nullptr);
1647 assert(Idx >= 2);
1648 return Idx - 2;
1649 }
1650
1651 case AArch64::CSINVWr:
1652 case AArch64::CSINVXr:
1653 case AArch64::CSINCWr:
1654 case AArch64::CSINCXr:
1655 case AArch64::CSELWr:
1656 case AArch64::CSELXr:
1657 case AArch64::CSNEGWr:
1658 case AArch64::CSNEGXr:
1659 case AArch64::FCSELSrrr:
1660 case AArch64::FCSELDrrr: {
1661 int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV, /*TRI=*/nullptr);
1662 assert(Idx >= 1);
1663 return Idx - 1;
1664 }
1665 }
1666}
1667
1668/// Find a condition code used by the instruction.
1669/// Returns AArch64CC::Invalid if either the instruction does not use condition
1670/// codes or we don't optimize CmpInstr in the presence of such instructions.
1673 return CCIdx >= 0 ? static_cast<AArch64CC::CondCode>(
1674 Instr.getOperand(CCIdx).getImm())
1676}
1677
1680 UsedNZCV UsedFlags;
1681 switch (CC) {
1682 default:
1683 break;
1684
1685 case AArch64CC::EQ: // Z set
1686 case AArch64CC::NE: // Z clear
1687 UsedFlags.Z = true;
1688 break;
1689
1690 case AArch64CC::HI: // Z clear and C set
1691 case AArch64CC::LS: // Z set or C clear
1692 UsedFlags.Z = true;
1693 [[fallthrough]];
1694 case AArch64CC::HS: // C set
1695 case AArch64CC::LO: // C clear
1696 UsedFlags.C = true;
1697 break;
1698
1699 case AArch64CC::MI: // N set
1700 case AArch64CC::PL: // N clear
1701 UsedFlags.N = true;
1702 break;
1703
1704 case AArch64CC::VS: // V set
1705 case AArch64CC::VC: // V clear
1706 UsedFlags.V = true;
1707 break;
1708
1709 case AArch64CC::GT: // Z clear, N and V the same
1710 case AArch64CC::LE: // Z set, N and V differ
1711 UsedFlags.Z = true;
1712 [[fallthrough]];
1713 case AArch64CC::GE: // N and V the same
1714 case AArch64CC::LT: // N and V differ
1715 UsedFlags.N = true;
1716 UsedFlags.V = true;
1717 break;
1718 }
1719 return UsedFlags;
1720}
1721
1722/// \returns Conditions flags used after \p CmpInstr in its MachineBB if NZCV
1723/// flags are not alive in successors of the same \p CmpInstr and \p MI parent.
1724/// \returns std::nullopt otherwise.
1725///
1726/// Collect instructions using that flags in \p CCUseInstrs if provided.
1727std::optional<UsedNZCV>
1729 const TargetRegisterInfo &TRI,
1730 SmallVectorImpl<MachineInstr *> *CCUseInstrs) {
1731 MachineBasicBlock *CmpParent = CmpInstr.getParent();
1732 if (MI.getParent() != CmpParent)
1733 return std::nullopt;
1734
1735 if (areCFlagsAliveInSuccessors(CmpParent))
1736 return std::nullopt;
1737
1738 UsedNZCV NZCVUsedAfterCmp;
1740 std::next(CmpInstr.getIterator()), CmpParent->instr_end())) {
1741 if (Instr.readsRegister(AArch64::NZCV, &TRI)) {
1743 if (CC == AArch64CC::Invalid) // Unsupported conditional instruction
1744 return std::nullopt;
1745 NZCVUsedAfterCmp |= getUsedNZCV(CC);
1746 if (CCUseInstrs)
1747 CCUseInstrs->push_back(&Instr);
1748 }
1749 if (Instr.modifiesRegister(AArch64::NZCV, &TRI))
1750 break;
1751 }
1752 return NZCVUsedAfterCmp;
1753}
1754
1755static bool isADDSRegImm(unsigned Opcode) {
1756 return Opcode == AArch64::ADDSWri || Opcode == AArch64::ADDSXri;
1757}
1758
1759static bool isSUBSRegImm(unsigned Opcode) {
1760 return Opcode == AArch64::SUBSWri || Opcode == AArch64::SUBSXri;
1761}
1762
1763/// Check if CmpInstr can be substituted by MI.
1764///
1765/// CmpInstr can be substituted:
1766/// - CmpInstr is either 'ADDS %vreg, 0' or 'SUBS %vreg, 0'
1767/// - and, MI and CmpInstr are from the same MachineBB
1768/// - and, condition flags are not alive in successors of the CmpInstr parent
1769/// - and, if MI opcode is the S form there must be no defs of flags between
1770/// MI and CmpInstr
1771/// or if MI opcode is not the S form there must be neither defs of flags
1772/// nor uses of flags between MI and CmpInstr.
1773/// - and, if C/V flags are not used after CmpInstr
1774/// or if N flag is used but MI produces poison value if signed overflow
1775/// occurs.
1777 const TargetRegisterInfo &TRI) {
1778 // NOTE this assertion guarantees that MI.getOpcode() is add or subtraction
1779 // that may or may not set flags.
1780 assert(sForm(MI) != AArch64::INSTRUCTION_LIST_END);
1781
1782 const unsigned CmpOpcode = CmpInstr.getOpcode();
1783 if (!isADDSRegImm(CmpOpcode) && !isSUBSRegImm(CmpOpcode))
1784 return false;
1785
1786 assert((CmpInstr.getOperand(2).isImm() &&
1787 CmpInstr.getOperand(2).getImm() == 0) &&
1788 "Caller guarantees that CmpInstr compares with constant 0");
1789
1790 std::optional<UsedNZCV> NZVCUsed = examineCFlagsUse(MI, CmpInstr, TRI);
1791 if (!NZVCUsed || NZVCUsed->C)
1792 return false;
1793
1794 // CmpInstr is either 'ADDS %vreg, 0' or 'SUBS %vreg, 0', and MI is either
1795 // '%vreg = add ...' or '%vreg = sub ...'.
1796 // Condition flag V is used to indicate signed overflow.
1797 // 1) MI and CmpInstr set N and V to the same value.
1798 // 2) If MI is add/sub with no-signed-wrap, it produces a poison value when
1799 // signed overflow occurs, so CmpInstr could still be simplified away.
1800 if (NZVCUsed->V && !MI.getFlag(MachineInstr::NoSWrap))
1801 return false;
1802
1803 AccessKind AccessToCheck = AK_Write;
1804 if (sForm(MI) != MI.getOpcode())
1805 AccessToCheck = AK_All;
1806 return !areCFlagsAccessedBetweenInstrs(&MI, &CmpInstr, &TRI, AccessToCheck);
1807}
1808
1809/// Substitute an instruction comparing to zero with another instruction
1810/// which produces needed condition flags.
1811///
1812/// Return true on success.
1813bool AArch64InstrInfo::substituteCmpToZero(
1814 MachineInstr &CmpInstr, unsigned SrcReg,
1815 const MachineRegisterInfo &MRI) const {
1816 // Get the unique definition of SrcReg.
1817 MachineInstr *MI = MRI.getUniqueVRegDef(SrcReg);
1818 if (!MI)
1819 return false;
1820
1822
1823 unsigned NewOpc = sForm(*MI);
1824 if (NewOpc == AArch64::INSTRUCTION_LIST_END)
1825 return false;
1826
1827 if (!canInstrSubstituteCmpInstr(*MI, CmpInstr, TRI))
1828 return false;
1829
1830 // Update the instruction to set NZCV.
1831 MI->setDesc(get(NewOpc));
1832 CmpInstr.eraseFromParent();
1833 bool succeeded = UpdateOperandRegClass(*MI);
1834 (void)succeeded;
1835 assert(succeeded && "Some operands reg class are incompatible!");
1836 MI->addRegisterDefined(AArch64::NZCV, &TRI);
1837 return true;
1838}
1839
1840/// \returns True if \p CmpInstr can be removed.
1841///
1842/// \p IsInvertCC is true if, after removing \p CmpInstr, condition
1843/// codes used in \p CCUseInstrs must be inverted.
1845 int CmpValue, const TargetRegisterInfo &TRI,
1847 bool &IsInvertCC) {
1848 assert((CmpValue == 0 || CmpValue == 1) &&
1849 "Only comparisons to 0 or 1 considered for removal!");
1850
1851 // MI is 'CSINCWr %vreg, wzr, wzr, <cc>' or 'CSINCXr %vreg, xzr, xzr, <cc>'
1852 unsigned MIOpc = MI.getOpcode();
1853 if (MIOpc == AArch64::CSINCWr) {
1854 if (MI.getOperand(1).getReg() != AArch64::WZR ||
1855 MI.getOperand(2).getReg() != AArch64::WZR)
1856 return false;
1857 } else if (MIOpc == AArch64::CSINCXr) {
1858 if (MI.getOperand(1).getReg() != AArch64::XZR ||
1859 MI.getOperand(2).getReg() != AArch64::XZR)
1860 return false;
1861 } else {
1862 return false;
1863 }
1865 if (MICC == AArch64CC::Invalid)
1866 return false;
1867
1868 // NZCV needs to be defined
1869 if (MI.findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr, true) != -1)
1870 return false;
1871
1872 // CmpInstr is 'ADDS %vreg, 0' or 'SUBS %vreg, 0' or 'SUBS %vreg, 1'
1873 const unsigned CmpOpcode = CmpInstr.getOpcode();
1874 bool IsSubsRegImm = isSUBSRegImm(CmpOpcode);
1875 if (CmpValue && !IsSubsRegImm)
1876 return false;
1877 if (!CmpValue && !IsSubsRegImm && !isADDSRegImm(CmpOpcode))
1878 return false;
1879
1880 // MI conditions allowed: eq, ne, mi, pl
1881 UsedNZCV MIUsedNZCV = getUsedNZCV(MICC);
1882 if (MIUsedNZCV.C || MIUsedNZCV.V)
1883 return false;
1884
1885 std::optional<UsedNZCV> NZCVUsedAfterCmp =
1886 examineCFlagsUse(MI, CmpInstr, TRI, &CCUseInstrs);
1887 // Condition flags are not used in CmpInstr basic block successors and only
1888 // Z or N flags allowed to be used after CmpInstr within its basic block
1889 if (!NZCVUsedAfterCmp || NZCVUsedAfterCmp->C || NZCVUsedAfterCmp->V)
1890 return false;
1891 // Z or N flag used after CmpInstr must correspond to the flag used in MI
1892 if ((MIUsedNZCV.Z && NZCVUsedAfterCmp->N) ||
1893 (MIUsedNZCV.N && NZCVUsedAfterCmp->Z))
1894 return false;
1895 // If CmpInstr is comparison to zero MI conditions are limited to eq, ne
1896 if (MIUsedNZCV.N && !CmpValue)
1897 return false;
1898
1899 // There must be no defs of flags between MI and CmpInstr
1900 if (areCFlagsAccessedBetweenInstrs(&MI, &CmpInstr, &TRI, AK_Write))
1901 return false;
1902
1903 // Condition code is inverted in the following cases:
1904 // 1. MI condition is ne; CmpInstr is 'ADDS %vreg, 0' or 'SUBS %vreg, 0'
1905 // 2. MI condition is eq, pl; CmpInstr is 'SUBS %vreg, 1'
1906 IsInvertCC = (CmpValue && (MICC == AArch64CC::EQ || MICC == AArch64CC::PL)) ||
1907 (!CmpValue && MICC == AArch64CC::NE);
1908 return true;
1909}
1910
1911/// Remove comparison in csinc-cmp sequence
1912///
1913/// Examples:
1914/// 1. \code
1915/// csinc w9, wzr, wzr, ne
1916/// cmp w9, #0
1917/// b.eq
1918/// \endcode
1919/// to
1920/// \code
1921/// csinc w9, wzr, wzr, ne
1922/// b.ne
1923/// \endcode
1924///
1925/// 2. \code
1926/// csinc x2, xzr, xzr, mi
1927/// cmp x2, #1
1928/// b.pl
1929/// \endcode
1930/// to
1931/// \code
1932/// csinc x2, xzr, xzr, mi
1933/// b.pl
1934/// \endcode
1935///
1936/// \param CmpInstr comparison instruction
1937/// \return True when comparison removed
1938bool AArch64InstrInfo::removeCmpToZeroOrOne(
1939 MachineInstr &CmpInstr, unsigned SrcReg, int CmpValue,
1940 const MachineRegisterInfo &MRI) const {
1941 MachineInstr *MI = MRI.getUniqueVRegDef(SrcReg);
1942 if (!MI)
1943 return false;
1946 bool IsInvertCC = false;
1947 if (!canCmpInstrBeRemoved(*MI, CmpInstr, CmpValue, TRI, CCUseInstrs,
1948 IsInvertCC))
1949 return false;
1950 // Make transformation
1951 CmpInstr.eraseFromParent();
1952 if (IsInvertCC) {
1953 // Invert condition codes in CmpInstr CC users
1954 for (MachineInstr *CCUseInstr : CCUseInstrs) {
1956 assert(Idx >= 0 && "Unexpected instruction using CC.");
1957 MachineOperand &CCOperand = CCUseInstr->getOperand(Idx);
1959 static_cast<AArch64CC::CondCode>(CCOperand.getImm()));
1960 CCOperand.setImm(CCUse);
1961 }
1962 }
1963 return true;
1964}
1965
1967 if (MI.getOpcode() != TargetOpcode::LOAD_STACK_GUARD &&
1968 MI.getOpcode() != AArch64::CATCHRET)
1969 return false;
1970
1971 MachineBasicBlock &MBB = *MI.getParent();
1972 auto &Subtarget = MBB.getParent()->getSubtarget<AArch64Subtarget>();
1973 auto TRI = Subtarget.getRegisterInfo();
1974 DebugLoc DL = MI.getDebugLoc();
1975
1976 if (MI.getOpcode() == AArch64::CATCHRET) {
1977 // Skip to the first instruction before the epilog.
1978 const TargetInstrInfo *TII =
1980 MachineBasicBlock *TargetMBB = MI.getOperand(0).getMBB();
1982 MachineBasicBlock::iterator FirstEpilogSEH = std::prev(MBBI);
1983 while (FirstEpilogSEH->getFlag(MachineInstr::FrameDestroy) &&
1984 FirstEpilogSEH != MBB.begin())
1985 FirstEpilogSEH = std::prev(FirstEpilogSEH);
1986 if (FirstEpilogSEH != MBB.begin())
1987 FirstEpilogSEH = std::next(FirstEpilogSEH);
1988 BuildMI(MBB, FirstEpilogSEH, DL, TII->get(AArch64::ADRP))
1989 .addReg(AArch64::X0, RegState::Define)
1990 .addMBB(TargetMBB);
1991 BuildMI(MBB, FirstEpilogSEH, DL, TII->get(AArch64::ADDXri))
1992 .addReg(AArch64::X0, RegState::Define)
1993 .addReg(AArch64::X0)
1994 .addMBB(TargetMBB)
1995 .addImm(0);
1996 return true;
1997 }
1998
1999 Register Reg = MI.getOperand(0).getReg();
2001 if (M.getStackProtectorGuard() == "sysreg") {
2002 const AArch64SysReg::SysReg *SrcReg =
2003 AArch64SysReg::lookupSysRegByName(M.getStackProtectorGuardReg());
2004 if (!SrcReg)
2005 report_fatal_error("Unknown SysReg for Stack Protector Guard Register");
2006
2007 // mrs xN, sysreg
2008 BuildMI(MBB, MI, DL, get(AArch64::MRS))
2010 .addImm(SrcReg->Encoding);
2011 int Offset = M.getStackProtectorGuardOffset();
2012 if (Offset >= 0 && Offset <= 32760 && Offset % 8 == 0) {
2013 // ldr xN, [xN, #offset]
2014 BuildMI(MBB, MI, DL, get(AArch64::LDRXui))
2015 .addDef(Reg)
2016 .addUse(Reg, RegState::Kill)
2017 .addImm(Offset / 8);
2018 } else if (Offset >= -256 && Offset <= 255) {
2019 // ldur xN, [xN, #offset]
2020 BuildMI(MBB, MI, DL, get(AArch64::LDURXi))
2021 .addDef(Reg)
2022 .addUse(Reg, RegState::Kill)
2023 .addImm(Offset);
2024 } else if (Offset >= -4095 && Offset <= 4095) {
2025 if (Offset > 0) {
2026 // add xN, xN, #offset
2027 BuildMI(MBB, MI, DL, get(AArch64::ADDXri))
2028 .addDef(Reg)
2029 .addUse(Reg, RegState::Kill)
2030 .addImm(Offset)
2031 .addImm(0);
2032 } else {
2033 // sub xN, xN, #offset
2034 BuildMI(MBB, MI, DL, get(AArch64::SUBXri))
2035 .addDef(Reg)
2036 .addUse(Reg, RegState::Kill)
2037 .addImm(-Offset)
2038 .addImm(0);
2039 }
2040 // ldr xN, [xN]
2041 BuildMI(MBB, MI, DL, get(AArch64::LDRXui))
2042 .addDef(Reg)
2043 .addUse(Reg, RegState::Kill)
2044 .addImm(0);
2045 } else {
2046 // Cases that are larger than +/- 4095 and not a multiple of 8, or larger
2047 // than 23760.
2048 // It might be nice to use AArch64::MOVi32imm here, which would get
2049 // expanded in PreSched2 after PostRA, but our lone scratch Reg already
2050 // contains the MRS result. findScratchNonCalleeSaveRegister() in
2051 // AArch64FrameLowering might help us find such a scratch register
2052 // though. If we failed to find a scratch register, we could emit a
2053 // stream of add instructions to build up the immediate. Or, we could try
2054 // to insert a AArch64::MOVi32imm before register allocation so that we
2055 // didn't need to scavenge for a scratch register.
2056 report_fatal_error("Unable to encode Stack Protector Guard Offset");
2057 }
2058 MBB.erase(MI);
2059 return true;
2060 }
2061
2062 const GlobalValue *GV =
2063 cast<GlobalValue>((*MI.memoperands_begin())->getValue());
2064 const TargetMachine &TM = MBB.getParent()->getTarget();
2065 unsigned OpFlags = Subtarget.ClassifyGlobalReference(GV, TM);
2066 const unsigned char MO_NC = AArch64II::MO_NC;
2067
2068 if ((OpFlags & AArch64II::MO_GOT) != 0) {
2069 BuildMI(MBB, MI, DL, get(AArch64::LOADgot), Reg)
2070 .addGlobalAddress(GV, 0, OpFlags);
2071 if (Subtarget.isTargetILP32()) {
2072 unsigned Reg32 = TRI->getSubReg(Reg, AArch64::sub_32);
2073 BuildMI(MBB, MI, DL, get(AArch64::LDRWui))
2074 .addDef(Reg32, RegState::Dead)
2075 .addUse(Reg, RegState::Kill)
2076 .addImm(0)
2077 .addMemOperand(*MI.memoperands_begin())
2079 } else {
2080 BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
2081 .addReg(Reg, RegState::Kill)
2082 .addImm(0)
2083 .addMemOperand(*MI.memoperands_begin());
2084 }
2085 } else if (TM.getCodeModel() == CodeModel::Large) {
2086 assert(!Subtarget.isTargetILP32() && "how can large exist in ILP32?");
2087 BuildMI(MBB, MI, DL, get(AArch64::MOVZXi), Reg)
2088 .addGlobalAddress(GV, 0, AArch64II::MO_G0 | MO_NC)
2089 .addImm(0);
2090 BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
2091 .addReg(Reg, RegState::Kill)
2092 .addGlobalAddress(GV, 0, AArch64II::MO_G1 | MO_NC)
2093 .addImm(16);
2094 BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
2095 .addReg(Reg, RegState::Kill)
2096 .addGlobalAddress(GV, 0, AArch64II::MO_G2 | MO_NC)
2097 .addImm(32);
2098 BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
2099 .addReg(Reg, RegState::Kill)
2101 .addImm(48);
2102 BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
2103 .addReg(Reg, RegState::Kill)
2104 .addImm(0)
2105 .addMemOperand(*MI.memoperands_begin());
2106 } else if (TM.getCodeModel() == CodeModel::Tiny) {
2107 BuildMI(MBB, MI, DL, get(AArch64::ADR), Reg)
2108 .addGlobalAddress(GV, 0, OpFlags);
2109 } else {
2110 BuildMI(MBB, MI, DL, get(AArch64::ADRP), Reg)
2111 .addGlobalAddress(GV, 0, OpFlags | AArch64II::MO_PAGE);
2112 unsigned char LoFlags = OpFlags | AArch64II::MO_PAGEOFF | MO_NC;
2113 if (Subtarget.isTargetILP32()) {
2114 unsigned Reg32 = TRI->getSubReg(Reg, AArch64::sub_32);
2115 BuildMI(MBB, MI, DL, get(AArch64::LDRWui))
2116 .addDef(Reg32, RegState::Dead)
2117 .addUse(Reg, RegState::Kill)
2118 .addGlobalAddress(GV, 0, LoFlags)
2119 .addMemOperand(*MI.memoperands_begin())
2121 } else {
2122 BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
2123 .addReg(Reg, RegState::Kill)
2124 .addGlobalAddress(GV, 0, LoFlags)
2125 .addMemOperand(*MI.memoperands_begin());
2126 }
2127 }
2128
2129 MBB.erase(MI);
2130
2131 return true;
2132}
2133
2134// Return true if this instruction simply sets its single destination register
2135// to zero. This is equivalent to a register rename of the zero-register.
2137 switch (MI.getOpcode()) {
2138 default:
2139 break;
2140 case AArch64::MOVZWi:
2141 case AArch64::MOVZXi: // movz Rd, #0 (LSL #0)
2142 if (MI.getOperand(1).isImm() && MI.getOperand(1).getImm() == 0) {
2143 assert(MI.getDesc().getNumOperands() == 3 &&
2144 MI.getOperand(2).getImm() == 0 && "invalid MOVZi operands");
2145 return true;
2146 }
2147 break;
2148 case AArch64::ANDWri: // and Rd, Rzr, #imm
2149 return MI.getOperand(1).getReg() == AArch64::WZR;
2150 case AArch64::ANDXri:
2151 return MI.getOperand(1).getReg() == AArch64::XZR;
2152 case TargetOpcode::COPY:
2153 return MI.getOperand(1).getReg() == AArch64::WZR;
2154 }
2155 return false;
2156}
2157
2158// Return true if this instruction simply renames a general register without
2159// modifying bits.
2161 switch (MI.getOpcode()) {
2162 default:
2163 break;
2164 case TargetOpcode::COPY: {
2165 // GPR32 copies will by lowered to ORRXrs
2166 Register DstReg = MI.getOperand(0).getReg();
2167 return (AArch64::GPR32RegClass.contains(DstReg) ||
2168 AArch64::GPR64RegClass.contains(DstReg));
2169 }
2170 case AArch64::ORRXrs: // orr Xd, Xzr, Xm (LSL #0)
2171 if (MI.getOperand(1).getReg() == AArch64::XZR) {
2172 assert(MI.getDesc().getNumOperands() == 4 &&
2173 MI.getOperand(3).getImm() == 0 && "invalid ORRrs operands");
2174 return true;
2175 }
2176 break;
2177 case AArch64::ADDXri: // add Xd, Xn, #0 (LSL #0)
2178 if (MI.getOperand(2).getImm() == 0) {
2179 assert(MI.getDesc().getNumOperands() == 4 &&
2180 MI.getOperand(3).getImm() == 0 && "invalid ADDXri operands");
2181 return true;
2182 }
2183 break;
2184 }
2185 return false;
2186}
2187
2188// Return true if this instruction simply renames a general register without
2189// modifying bits.
2191 switch (MI.getOpcode()) {
2192 default:
2193 break;
2194 case TargetOpcode::COPY: {
2195 Register DstReg = MI.getOperand(0).getReg();
2196 return AArch64::FPR128RegClass.contains(DstReg);
2197 }
2198 case AArch64::ORRv16i8:
2199 if (MI.getOperand(1).getReg() == MI.getOperand(2).getReg()) {
2200 assert(MI.getDesc().getNumOperands() == 3 && MI.getOperand(0).isReg() &&
2201 "invalid ORRv16i8 operands");
2202 return true;
2203 }
2204 break;
2205 }
2206 return false;
2207}
2208
2210 int &FrameIndex) const {
2211 switch (MI.getOpcode()) {
2212 default:
2213 break;
2214 case AArch64::LDRWui:
2215 case AArch64::LDRXui:
2216 case AArch64::LDRBui:
2217 case AArch64::LDRHui:
2218 case AArch64::LDRSui:
2219 case AArch64::LDRDui:
2220 case AArch64::LDRQui:
2221 case AArch64::LDR_PXI:
2222 if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() &&
2223 MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) {
2224 FrameIndex = MI.getOperand(1).getIndex();
2225 return MI.getOperand(0).getReg();
2226 }
2227 break;
2228 }
2229
2230 return 0;
2231}
2232
2234 int &FrameIndex) const {
2235 switch (MI.getOpcode()) {
2236 default:
2237 break;
2238 case AArch64::STRWui:
2239 case AArch64::STRXui:
2240 case AArch64::STRBui:
2241 case AArch64::STRHui:
2242 case AArch64::STRSui:
2243 case AArch64::STRDui:
2244 case AArch64::STRQui:
2245 case AArch64::STR_PXI:
2246 if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() &&
2247 MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) {
2248 FrameIndex = MI.getOperand(1).getIndex();
2249 return MI.getOperand(0).getReg();
2250 }
2251 break;
2252 }
2253 return 0;
2254}
2255
2256/// Check all MachineMemOperands for a hint to suppress pairing.
2258 return llvm::any_of(MI.memoperands(), [](MachineMemOperand *MMO) {
2259 return MMO->getFlags() & MOSuppressPair;
2260 });
2261}
2262
2263/// Set a flag on the first MachineMemOperand to suppress pairing.
2265 if (MI.memoperands_empty())
2266 return;
2267 (*MI.memoperands_begin())->setFlags(MOSuppressPair);
2268}
2269
2270/// Check all MachineMemOperands for a hint that the load/store is strided.
2272 return llvm::any_of(MI.memoperands(), [](MachineMemOperand *MMO) {
2273 return MMO->getFlags() & MOStridedAccess;
2274 });
2275}
2276
2278 switch (Opc) {
2279 default:
2280 return false;
2281 case AArch64::STURSi:
2282 case AArch64::STRSpre:
2283 case AArch64::STURDi:
2284 case AArch64::STRDpre:
2285 case AArch64::STURQi:
2286 case AArch64::STRQpre:
2287 case AArch64::STURBBi:
2288 case AArch64::STURHHi:
2289 case AArch64::STURWi:
2290 case AArch64::STRWpre:
2291 case AArch64::STURXi:
2292 case AArch64::STRXpre:
2293 case AArch64::LDURSi:
2294 case AArch64::LDRSpre:
2295 case AArch64::LDURDi:
2296 case AArch64::LDRDpre:
2297 case AArch64::LDURQi:
2298 case AArch64::LDRQpre:
2299 case AArch64::LDURWi:
2300 case AArch64::LDRWpre:
2301 case AArch64::LDURXi:
2302 case AArch64::LDRXpre:
2303 case AArch64::LDRSWpre:
2304 case AArch64::LDURSWi:
2305 case AArch64::LDURHHi:
2306 case AArch64::LDURBBi:
2307 case AArch64::LDURSBWi:
2308 case AArch64::LDURSHWi:
2309 return true;
2310 }
2311}
2312
2313std::optional<unsigned> AArch64InstrInfo::getUnscaledLdSt(unsigned Opc) {
2314 switch (Opc) {
2315 default: return {};
2316 case AArch64::PRFMui: return AArch64::PRFUMi;
2317 case AArch64::LDRXui: return AArch64::LDURXi;
2318 case AArch64::LDRWui: return AArch64::LDURWi;
2319 case AArch64::LDRBui: return AArch64::LDURBi;
2320 case AArch64::LDRHui: return AArch64::LDURHi;
2321 case AArch64::LDRSui: return AArch64::LDURSi;
2322 case AArch64::LDRDui: return AArch64::LDURDi;
2323 case AArch64::LDRQui: return AArch64::LDURQi;
2324 case AArch64::LDRBBui: return AArch64::LDURBBi;
2325 case AArch64::LDRHHui: return AArch64::LDURHHi;
2326 case AArch64::LDRSBXui: return AArch64::LDURSBXi;
2327 case AArch64::LDRSBWui: return AArch64::LDURSBWi;
2328 case AArch64::LDRSHXui: return AArch64::LDURSHXi;
2329 case AArch64::LDRSHWui: return AArch64::LDURSHWi;
2330 case AArch64::LDRSWui: return AArch64::LDURSWi;
2331 case AArch64::STRXui: return AArch64::STURXi;
2332 case AArch64::STRWui: return AArch64::STURWi;
2333 case AArch64::STRBui: return AArch64::STURBi;
2334 case AArch64::STRHui: return AArch64::STURHi;
2335 case AArch64::STRSui: return AArch64::STURSi;
2336 case AArch64::STRDui: return AArch64::STURDi;
2337 case AArch64::STRQui: return AArch64::STURQi;
2338 case AArch64::STRBBui: return AArch64::STURBBi;
2339 case AArch64::STRHHui: return AArch64::STURHHi;
2340 }
2341}
2342
2344 switch (Opc) {
2345 default:
2346 return 2;
2347 case AArch64::LDPXi:
2348 case AArch64::LDPDi:
2349 case AArch64::STPXi:
2350 case AArch64::STPDi:
2351 case AArch64::LDNPXi:
2352 case AArch64::LDNPDi:
2353 case AArch64::STNPXi:
2354 case AArch64::STNPDi:
2355 case AArch64::LDPQi:
2356 case AArch64::STPQi:
2357 case AArch64::LDNPQi:
2358 case AArch64::STNPQi:
2359 case AArch64::LDPWi:
2360 case AArch64::LDPSi:
2361 case AArch64::STPWi:
2362 case AArch64::STPSi:
2363 case AArch64::LDNPWi:
2364 case AArch64::LDNPSi:
2365 case AArch64::STNPWi:
2366 case AArch64::STNPSi:
2367 case AArch64::LDG:
2368 case AArch64::STGPi:
2369
2370 case AArch64::LD1B_IMM:
2371 case AArch64::LD1B_H_IMM:
2372 case AArch64::LD1B_S_IMM:
2373 case AArch64::LD1B_D_IMM:
2374 case AArch64::LD1SB_H_IMM:
2375 case AArch64::LD1SB_S_IMM:
2376 case AArch64::LD1SB_D_IMM:
2377 case AArch64::LD1H_IMM:
2378 case AArch64::LD1H_S_IMM:
2379 case AArch64::LD1H_D_IMM:
2380 case AArch64::LD1SH_S_IMM:
2381 case AArch64::LD1SH_D_IMM:
2382 case AArch64::LD1W_IMM:
2383 case AArch64::LD1W_D_IMM:
2384 case AArch64::LD1SW_D_IMM:
2385 case AArch64::LD1D_IMM:
2386
2387 case AArch64::LD2B_IMM:
2388 case AArch64::LD2H_IMM:
2389 case AArch64::LD2W_IMM:
2390 case AArch64::LD2D_IMM:
2391 case AArch64::LD3B_IMM:
2392 case AArch64::LD3H_IMM:
2393 case AArch64::LD3W_IMM:
2394 case AArch64::LD3D_IMM:
2395 case AArch64::LD4B_IMM:
2396 case AArch64::LD4H_IMM:
2397 case AArch64::LD4W_IMM:
2398 case AArch64::LD4D_IMM:
2399
2400 case AArch64::ST1B_IMM:
2401 case AArch64::ST1B_H_IMM:
2402 case AArch64::ST1B_S_IMM:
2403 case AArch64::ST1B_D_IMM:
2404 case AArch64::ST1H_IMM:
2405 case AArch64::ST1H_S_IMM:
2406 case AArch64::ST1H_D_IMM:
2407 case AArch64::ST1W_IMM:
2408 case AArch64::ST1W_D_IMM:
2409 case AArch64::ST1D_IMM:
2410
2411 case AArch64::ST2B_IMM:
2412 case AArch64::ST2H_IMM:
2413 case AArch64::ST2W_IMM:
2414 case AArch64::ST2D_IMM:
2415 case AArch64::ST3B_IMM:
2416 case AArch64::ST3H_IMM:
2417 case AArch64::ST3W_IMM:
2418 case AArch64::ST3D_IMM:
2419 case AArch64::ST4B_IMM:
2420 case AArch64::ST4H_IMM:
2421 case AArch64::ST4W_IMM:
2422 case AArch64::ST4D_IMM:
2423
2424 case AArch64::LD1RB_IMM:
2425 case AArch64::LD1RB_H_IMM:
2426 case AArch64::LD1RB_S_IMM:
2427 case AArch64::LD1RB_D_IMM:
2428 case AArch64::LD1RSB_H_IMM:
2429 case AArch64::LD1RSB_S_IMM:
2430 case AArch64::LD1RSB_D_IMM:
2431 case AArch64::LD1RH_IMM:
2432 case AArch64::LD1RH_S_IMM:
2433 case AArch64::LD1RH_D_IMM:
2434 case AArch64::LD1RSH_S_IMM:
2435 case AArch64::LD1RSH_D_IMM:
2436 case AArch64::LD1RW_IMM:
2437 case AArch64::LD1RW_D_IMM:
2438 case AArch64::LD1RSW_IMM:
2439 case AArch64::LD1RD_IMM:
2440
2441 case AArch64::LDNT1B_ZRI:
2442 case AArch64::LDNT1H_ZRI:
2443 case AArch64::LDNT1W_ZRI:
2444 case AArch64::LDNT1D_ZRI:
2445 case AArch64::STNT1B_ZRI:
2446 case AArch64::STNT1H_ZRI:
2447 case AArch64::STNT1W_ZRI:
2448 case AArch64::STNT1D_ZRI:
2449
2450 case AArch64::LDNF1B_IMM:
2451 case AArch64::LDNF1B_H_IMM:
2452 case AArch64::LDNF1B_S_IMM:
2453 case AArch64::LDNF1B_D_IMM:
2454 case AArch64::LDNF1SB_H_IMM:
2455 case AArch64::LDNF1SB_S_IMM:
2456 case AArch64::LDNF1SB_D_IMM:
2457 case AArch64::LDNF1H_IMM:
2458 case AArch64::LDNF1H_S_IMM:
2459 case AArch64::LDNF1H_D_IMM:
2460 case AArch64::LDNF1SH_S_IMM:
2461 case AArch64::LDNF1SH_D_IMM:
2462 case AArch64::LDNF1W_IMM:
2463 case AArch64::LDNF1W_D_IMM:
2464 case AArch64::LDNF1SW_D_IMM:
2465 case AArch64::LDNF1D_IMM:
2466 return 3;
2467 case AArch64::ADDG:
2468 case AArch64::STGi:
2469 case AArch64::LDR_PXI:
2470 case AArch64::STR_PXI:
2471 return 2;
2472 }
2473}
2474
2476 switch (MI.getOpcode()) {
2477 default:
2478 return false;
2479 // Scaled instructions.
2480 case AArch64::STRSui:
2481 case AArch64::STRDui:
2482 case AArch64::STRQui:
2483 case AArch64::STRXui:
2484 case AArch64::STRWui:
2485 case AArch64::LDRSui:
2486 case AArch64::LDRDui:
2487 case AArch64::LDRQui:
2488 case AArch64::LDRXui:
2489 case AArch64::LDRWui:
2490 case AArch64::LDRSWui:
2491 // Unscaled instructions.
2492 case AArch64::STURSi:
2493 case AArch64::STRSpre:
2494 case AArch64::STURDi:
2495 case AArch64::STRDpre:
2496 case AArch64::STURQi:
2497 case AArch64::STRQpre:
2498 case AArch64::STURWi:
2499 case AArch64::STRWpre:
2500 case AArch64::STURXi:
2501 case AArch64::STRXpre:
2502 case AArch64::LDURSi:
2503 case AArch64::LDRSpre:
2504 case AArch64::LDURDi:
2505 case AArch64::LDRDpre:
2506 case AArch64::LDURQi:
2507 case AArch64::LDRQpre:
2508 case AArch64::LDURWi:
2509 case AArch64::LDRWpre:
2510 case AArch64::LDURXi:
2511 case AArch64::LDRXpre:
2512 case AArch64::LDURSWi:
2513 case AArch64::LDRSWpre:
2514 return true;
2515 }
2516}
2517
2519 switch (MI.getOpcode()) {
2520 default:
2521 assert((!MI.isCall() || !MI.isReturn()) &&
2522 "Unexpected instruction - was a new tail call opcode introduced?");
2523 return false;
2524 case AArch64::TCRETURNdi:
2525 case AArch64::TCRETURNri:
2526 case AArch64::TCRETURNrix16x17:
2527 case AArch64::TCRETURNrix17:
2528 case AArch64::TCRETURNrinotx16:
2529 case AArch64::TCRETURNriALL:
2530 case AArch64::AUTH_TCRETURN:
2531 case AArch64::AUTH_TCRETURN_BTI:
2532 return true;
2533 }
2534}
2535
2537 switch (Opc) {
2538 default:
2539 llvm_unreachable("Opcode has no flag setting equivalent!");
2540 // 32-bit cases:
2541 case AArch64::ADDWri:
2542 return AArch64::ADDSWri;
2543 case AArch64::ADDWrr:
2544 return AArch64::ADDSWrr;
2545 case AArch64::ADDWrs:
2546 return AArch64::ADDSWrs;
2547 case AArch64::ADDWrx:
2548 return AArch64::ADDSWrx;
2549 case AArch64::ANDWri:
2550 return AArch64::ANDSWri;
2551 case AArch64::ANDWrr:
2552 return AArch64::ANDSWrr;
2553 case AArch64::ANDWrs:
2554 return AArch64::ANDSWrs;
2555 case AArch64::BICWrr:
2556 return AArch64::BICSWrr;
2557 case AArch64::BICWrs:
2558 return AArch64::BICSWrs;
2559 case AArch64::SUBWri:
2560 return AArch64::SUBSWri;
2561 case AArch64::SUBWrr:
2562 return AArch64::SUBSWrr;
2563 case AArch64::SUBWrs:
2564 return AArch64::SUBSWrs;
2565 case AArch64::SUBWrx:
2566 return AArch64::SUBSWrx;
2567 // 64-bit cases:
2568 case AArch64::ADDXri:
2569 return AArch64::ADDSXri;
2570 case AArch64::ADDXrr:
2571 return AArch64::ADDSXrr;
2572 case AArch64::ADDXrs:
2573 return AArch64::ADDSXrs;
2574 case AArch64::ADDXrx:
2575 return AArch64::ADDSXrx;
2576 case AArch64::ANDXri:
2577 return AArch64::ANDSXri;
2578 case AArch64::ANDXrr:
2579 return AArch64::ANDSXrr;
2580 case AArch64::ANDXrs:
2581 return AArch64::ANDSXrs;
2582 case AArch64::BICXrr:
2583 return AArch64::BICSXrr;
2584 case AArch64::BICXrs:
2585 return AArch64::BICSXrs;
2586 case AArch64::SUBXri:
2587 return AArch64::SUBSXri;
2588 case AArch64::SUBXrr:
2589 return AArch64::SUBSXrr;
2590 case AArch64::SUBXrs:
2591 return AArch64::SUBSXrs;
2592 case AArch64::SUBXrx:
2593 return AArch64::SUBSXrx;
2594 // SVE instructions:
2595 case AArch64::AND_PPzPP:
2596 return AArch64::ANDS_PPzPP;
2597 case AArch64::BIC_PPzPP:
2598 return AArch64::BICS_PPzPP;
2599 case AArch64::EOR_PPzPP:
2600 return AArch64::EORS_PPzPP;
2601 case AArch64::NAND_PPzPP:
2602 return AArch64::NANDS_PPzPP;
2603 case AArch64::NOR_PPzPP:
2604 return AArch64::NORS_PPzPP;
2605 case AArch64::ORN_PPzPP:
2606 return AArch64::ORNS_PPzPP;
2607 case AArch64::ORR_PPzPP:
2608 return AArch64::ORRS_PPzPP;
2609 case AArch64::BRKA_PPzP:
2610 return AArch64::BRKAS_PPzP;
2611 case AArch64::BRKPA_PPzPP:
2612 return AArch64::BRKPAS_PPzPP;
2613 case AArch64::BRKB_PPzP:
2614 return AArch64::BRKBS_PPzP;
2615 case AArch64::BRKPB_PPzPP:
2616 return AArch64::BRKPBS_PPzPP;
2617 case AArch64::BRKN_PPzP:
2618 return AArch64::BRKNS_PPzP;
2619 case AArch64::RDFFR_PPz:
2620 return AArch64::RDFFRS_PPz;
2621 case AArch64::PTRUE_B:
2622 return AArch64::PTRUES_B;
2623 }
2624}
2625
2626// Is this a candidate for ld/st merging or pairing? For example, we don't
2627// touch volatiles or load/stores that have a hint to avoid pair formation.
2629
2630 bool IsPreLdSt = isPreLdSt(MI);
2631
2632 // If this is a volatile load/store, don't mess with it.
2633 if (MI.hasOrderedMemoryRef())
2634 return false;
2635
2636 // Make sure this is a reg/fi+imm (as opposed to an address reloc).
2637 // For Pre-inc LD/ST, the operand is shifted by one.
2638 assert((MI.getOperand(IsPreLdSt ? 2 : 1).isReg() ||
2639 MI.getOperand(IsPreLdSt ? 2 : 1).isFI()) &&
2640 "Expected a reg or frame index operand.");
2641
2642 // For Pre-indexed addressing quadword instructions, the third operand is the
2643 // immediate value.
2644 bool IsImmPreLdSt = IsPreLdSt && MI.getOperand(3).isImm();
2645
2646 if (!MI.getOperand(2).isImm() && !IsImmPreLdSt)
2647 return false;
2648
2649 // Can't merge/pair if the instruction modifies the base register.
2650 // e.g., ldr x0, [x0]
2651 // This case will never occur with an FI base.
2652 // However, if the instruction is an LDR<S,D,Q,W,X,SW>pre or
2653 // STR<S,D,Q,W,X>pre, it can be merged.
2654 // For example:
2655 // ldr q0, [x11, #32]!
2656 // ldr q1, [x11, #16]
2657 // to
2658 // ldp q0, q1, [x11, #32]!
2659 if (MI.getOperand(1).isReg() && !IsPreLdSt) {
2660 Register BaseReg = MI.getOperand(1).getReg();
2662 if (MI.modifiesRegister(BaseReg, TRI))
2663 return false;
2664 }
2665
2666 // Check if this load/store has a hint to avoid pair formation.
2667 // MachineMemOperands hints are set by the AArch64StorePairSuppress pass.
2669 return false;
2670
2671 // Do not pair any callee-save store/reload instructions in the
2672 // prologue/epilogue if the CFI information encoded the operations as separate
2673 // instructions, as that will cause the size of the actual prologue to mismatch
2674 // with the prologue size recorded in the Windows CFI.
2675 const MCAsmInfo *MAI = MI.getMF()->getTarget().getMCAsmInfo();
2676 bool NeedsWinCFI = MAI->usesWindowsCFI() &&
2677 MI.getMF()->getFunction().needsUnwindTableEntry();
2678 if (NeedsWinCFI && (MI.getFlag(MachineInstr::FrameSetup) ||
2680 return false;
2681
2682 // On some CPUs quad load/store pairs are slower than two single load/stores.
2683 if (Subtarget.isPaired128Slow()) {
2684 switch (MI.getOpcode()) {
2685 default:
2686 break;
2687 case AArch64::LDURQi:
2688 case AArch64::STURQi:
2689 case AArch64::LDRQui:
2690 case AArch64::STRQui:
2691 return false;
2692 }
2693 }
2694
2695 return true;
2696}
2697
2700 int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width,
2701 const TargetRegisterInfo *TRI) const {
2702 if (!LdSt.mayLoadOrStore())
2703 return false;
2704
2705 const MachineOperand *BaseOp;
2706 TypeSize WidthN(0, false);
2707 if (!getMemOperandWithOffsetWidth(LdSt, BaseOp, Offset, OffsetIsScalable,
2708 WidthN, TRI))
2709 return false;
2710 // The maximum vscale is 16 under AArch64, return the maximal extent for the
2711 // vector.
2712 Width = LocationSize::precise(WidthN);
2713 BaseOps.push_back(BaseOp);
2714 return true;
2715}
2716
2717std::optional<ExtAddrMode>
2719 const TargetRegisterInfo *TRI) const {
2720 const MachineOperand *Base; // Filled with the base operand of MI.
2721 int64_t Offset; // Filled with the offset of MI.
2722 bool OffsetIsScalable;
2723 if (!getMemOperandWithOffset(MemI, Base, Offset, OffsetIsScalable, TRI))
2724 return std::nullopt;
2725
2726 if (!Base->isReg())
2727 return std::nullopt;
2728 ExtAddrMode AM;
2729 AM.BaseReg = Base->getReg();
2730 AM.Displacement = Offset;
2731 AM.ScaledReg = 0;
2732 AM.Scale = 0;
2733 return AM;
2734}
2735
2737 Register Reg,
2738 const MachineInstr &AddrI,
2739 ExtAddrMode &AM) const {
2740 // Filter out instructions into which we cannot fold.
2741 unsigned NumBytes;
2742 int64_t OffsetScale = 1;
2743 switch (MemI.getOpcode()) {
2744 default:
2745 return false;
2746
2747 case AArch64::LDURQi:
2748 case AArch64::STURQi:
2749 NumBytes = 16;
2750 break;
2751
2752 case AArch64::LDURDi:
2753 case AArch64::STURDi:
2754 case AArch64::LDURXi:
2755 case AArch64::STURXi:
2756 NumBytes = 8;
2757 break;
2758
2759 case AArch64::LDURWi:
2760 case AArch64::LDURSWi:
2761 case AArch64::STURWi:
2762 NumBytes = 4;
2763 break;
2764
2765 case AArch64::LDURHi:
2766 case AArch64::STURHi:
2767 case AArch64::LDURHHi:
2768 case AArch64::STURHHi:
2769 case AArch64::LDURSHXi:
2770 case AArch64::LDURSHWi:
2771 NumBytes = 2;
2772 break;
2773
2774 case AArch64::LDRBroX:
2775 case AArch64::LDRBBroX:
2776 case AArch64::LDRSBXroX:
2777 case AArch64::LDRSBWroX:
2778 case AArch64::STRBroX:
2779 case AArch64::STRBBroX:
2780 case AArch64::LDURBi:
2781 case AArch64::LDURBBi:
2782 case AArch64::LDURSBXi:
2783 case AArch64::LDURSBWi:
2784 case AArch64::STURBi:
2785 case AArch64::STURBBi:
2786 case AArch64::LDRBui:
2787 case AArch64::LDRBBui:
2788 case AArch64::LDRSBXui:
2789 case AArch64::LDRSBWui:
2790 case AArch64::STRBui:
2791 case AArch64::STRBBui:
2792 NumBytes = 1;
2793 break;
2794
2795 case AArch64::LDRQroX:
2796 case AArch64::STRQroX:
2797 case AArch64::LDRQui:
2798 case AArch64::STRQui:
2799 NumBytes = 16;
2800 OffsetScale = 16;
2801 break;
2802
2803 case AArch64::LDRDroX:
2804 case AArch64::STRDroX:
2805 case AArch64::LDRXroX:
2806 case AArch64::STRXroX:
2807 case AArch64::LDRDui:
2808 case AArch64::STRDui:
2809 case AArch64::LDRXui:
2810 case AArch64::STRXui:
2811 NumBytes = 8;
2812 OffsetScale = 8;
2813 break;
2814
2815 case AArch64::LDRWroX:
2816 case AArch64::LDRSWroX:
2817 case AArch64::STRWroX:
2818 case AArch64::LDRWui:
2819 case AArch64::LDRSWui:
2820 case AArch64::STRWui:
2821 NumBytes = 4;
2822 OffsetScale = 4;
2823 break;
2824
2825 case AArch64::LDRHroX:
2826 case AArch64::STRHroX:
2827 case AArch64::LDRHHroX:
2828 case AArch64::STRHHroX:
2829 case AArch64::LDRSHXroX:
2830 case AArch64::LDRSHWroX:
2831 case AArch64::LDRHui:
2832 case AArch64::STRHui:
2833 case AArch64::LDRHHui:
2834 case AArch64::STRHHui:
2835 case AArch64::LDRSHXui:
2836 case AArch64::LDRSHWui:
2837 NumBytes = 2;
2838 OffsetScale = 2;
2839 break;
2840 }
2841
2842 // Check the fold operand is not the loaded/stored value.
2843 const MachineOperand &BaseRegOp = MemI.getOperand(0);
2844 if (BaseRegOp.isReg() && BaseRegOp.getReg() == Reg)
2845 return false;
2846
2847 // Handle memory instructions with a [Reg, Reg] addressing mode.
2848 if (MemI.getOperand(2).isReg()) {
2849 // Bail if the addressing mode already includes extension of the offset
2850 // register.
2851 if (MemI.getOperand(3).getImm())
2852 return false;
2853
2854 // Check if we actually have a scaled offset.
2855 if (MemI.getOperand(4).getImm() == 0)
2856 OffsetScale = 1;
2857
2858 // If the address instructions is folded into the base register, then the
2859 // addressing mode must not have a scale. Then we can swap the base and the
2860 // scaled registers.
2861 if (MemI.getOperand(1).getReg() == Reg && OffsetScale != 1)
2862 return false;
2863
2864 switch (AddrI.getOpcode()) {
2865 default:
2866 return false;
2867
2868 case AArch64::SBFMXri:
2869 // sxtw Xa, Wm
2870 // ldr Xd, [Xn, Xa, lsl #N]
2871 // ->
2872 // ldr Xd, [Xn, Wm, sxtw #N]
2873 if (AddrI.getOperand(2).getImm() != 0 ||
2874 AddrI.getOperand(3).getImm() != 31)
2875 return false;
2876
2877 AM.BaseReg = MemI.getOperand(1).getReg();
2878 if (AM.BaseReg == Reg)
2879 AM.BaseReg = MemI.getOperand(2).getReg();
2880 AM.ScaledReg = AddrI.getOperand(1).getReg();
2881 AM.Scale = OffsetScale;
2882 AM.Displacement = 0;
2884 return true;
2885
2886 case TargetOpcode::SUBREG_TO_REG: {
2887 // mov Wa, Wm
2888 // ldr Xd, [Xn, Xa, lsl #N]
2889 // ->
2890 // ldr Xd, [Xn, Wm, uxtw #N]
2891
2892 // Zero-extension looks like an ORRWrs followed by a SUBREG_TO_REG.
2893 if (AddrI.getOperand(1).getImm() != 0 ||
2894 AddrI.getOperand(3).getImm() != AArch64::sub_32)
2895 return false;
2896
2897 const MachineRegisterInfo &MRI = AddrI.getMF()->getRegInfo();
2898 Register OffsetReg = AddrI.getOperand(2).getReg();
2899 if (!OffsetReg.isVirtual() || !MRI.hasOneNonDBGUse(OffsetReg))
2900 return false;
2901
2902 const MachineInstr &DefMI = *MRI.getVRegDef(OffsetReg);
2903 if (DefMI.getOpcode() != AArch64::ORRWrs ||
2904 DefMI.getOperand(1).getReg() != AArch64::WZR ||
2905 DefMI.getOperand(3).getImm() != 0)
2906 return false;
2907
2908 AM.BaseReg = MemI.getOperand(1).getReg();
2909 if (AM.BaseReg == Reg)
2910 AM.BaseReg = MemI.getOperand(2).getReg();
2911 AM.ScaledReg = DefMI.getOperand(2).getReg();
2912 AM.Scale = OffsetScale;
2913 AM.Displacement = 0;
2915 return true;
2916 }
2917 }
2918 }
2919
2920 // Handle memory instructions with a [Reg, #Imm] addressing mode.
2921
2922 // Check we are not breaking a potential conversion to an LDP.
2923 auto validateOffsetForLDP = [](unsigned NumBytes, int64_t OldOffset,
2924 int64_t NewOffset) -> bool {
2925 int64_t MinOffset, MaxOffset;
2926 switch (NumBytes) {
2927 default:
2928 return true;
2929 case 4:
2930 MinOffset = -256;
2931 MaxOffset = 252;
2932 break;
2933 case 8:
2934 MinOffset = -512;
2935 MaxOffset = 504;
2936 break;
2937 case 16:
2938 MinOffset = -1024;
2939 MaxOffset = 1008;
2940 break;
2941 }
2942 return OldOffset < MinOffset || OldOffset > MaxOffset ||
2943 (NewOffset >= MinOffset && NewOffset <= MaxOffset);
2944 };
2945 auto canFoldAddSubImmIntoAddrMode = [&](int64_t Disp) -> bool {
2946 int64_t OldOffset = MemI.getOperand(2).getImm() * OffsetScale;
2947 int64_t NewOffset = OldOffset + Disp;
2948 if (!isLegalAddressingMode(NumBytes, NewOffset, /* Scale */ 0))
2949 return false;
2950 // If the old offset would fit into an LDP, but the new offset wouldn't,
2951 // bail out.
2952 if (!validateOffsetForLDP(NumBytes, OldOffset, NewOffset))
2953 return false;
2954 AM.BaseReg = AddrI.getOperand(1).getReg();
2955 AM.ScaledReg = 0;
2956 AM.Scale = 0;
2957 AM.Displacement = NewOffset;
2959 return true;
2960 };
2961
2962 auto canFoldAddRegIntoAddrMode =
2963 [&](int64_t Scale,
2965 if (MemI.getOperand(2).getImm() != 0)
2966 return false;
2967 if (!isLegalAddressingMode(NumBytes, /* Offset */ 0, Scale))
2968 return false;
2969 AM.BaseReg = AddrI.getOperand(1).getReg();
2970 AM.ScaledReg = AddrI.getOperand(2).getReg();
2971 AM.Scale = Scale;
2972 AM.Displacement = 0;
2973 AM.Form = Form;
2974 return true;
2975 };
2976
2977 auto avoidSlowSTRQ = [&](const MachineInstr &MemI) {
2978 unsigned Opcode = MemI.getOpcode();
2979 return (Opcode == AArch64::STURQi || Opcode == AArch64::STRQui) &&
2980 Subtarget.isSTRQroSlow();
2981 };
2982
2983 int64_t Disp = 0;
2984 const bool OptSize = MemI.getMF()->getFunction().hasOptSize();
2985 switch (AddrI.getOpcode()) {
2986 default:
2987 return false;
2988
2989 case AArch64::ADDXri:
2990 // add Xa, Xn, #N
2991 // ldr Xd, [Xa, #M]
2992 // ->
2993 // ldr Xd, [Xn, #N'+M]
2994 Disp = AddrI.getOperand(2).getImm() << AddrI.getOperand(3).getImm();
2995 return canFoldAddSubImmIntoAddrMode(Disp);
2996
2997 case AArch64::SUBXri:
2998 // sub Xa, Xn, #N
2999 // ldr Xd, [Xa, #M]
3000 // ->
3001 // ldr Xd, [Xn, #N'+M]
3002 Disp = AddrI.getOperand(2).getImm() << AddrI.getOperand(3).getImm();
3003 return canFoldAddSubImmIntoAddrMode(-Disp);
3004
3005 case AArch64::ADDXrs: {
3006 // add Xa, Xn, Xm, lsl #N
3007 // ldr Xd, [Xa]
3008 // ->
3009 // ldr Xd, [Xn, Xm, lsl #N]
3010
3011 // Don't fold the add if the result would be slower, unless optimising for
3012 // size.
3013 unsigned Shift = static_cast<unsigned>(AddrI.getOperand(3).getImm());
3015 return false;
3016 Shift = AArch64_AM::getShiftValue(Shift);
3017 if (!OptSize) {
3018 if (Shift != 2 && Shift != 3 && Subtarget.hasAddrLSLSlow14())
3019 return false;
3020 if (avoidSlowSTRQ(MemI))
3021 return false;
3022 }
3023 return canFoldAddRegIntoAddrMode(1ULL << Shift);
3024 }
3025
3026 case AArch64::ADDXrr:
3027 // add Xa, Xn, Xm
3028 // ldr Xd, [Xa]
3029 // ->
3030 // ldr Xd, [Xn, Xm, lsl #0]
3031
3032 // Don't fold the add if the result would be slower, unless optimising for
3033 // size.
3034 if (!OptSize && avoidSlowSTRQ(MemI))
3035 return false;
3036 return canFoldAddRegIntoAddrMode(1);
3037
3038 case AArch64::ADDXrx:
3039 // add Xa, Xn, Wm, {s,u}xtw #N
3040 // ldr Xd, [Xa]
3041 // ->
3042 // ldr Xd, [Xn, Wm, {s,u}xtw #N]
3043
3044 // Don't fold the add if the result would be slower, unless optimising for
3045 // size.
3046 if (!OptSize && avoidSlowSTRQ(MemI))
3047 return false;
3048
3049 // Can fold only sign-/zero-extend of a word.
3050 unsigned Imm = static_cast<unsigned>(AddrI.getOperand(3).getImm());
3052 if (Extend != AArch64_AM::UXTW && Extend != AArch64_AM::SXTW)
3053 return false;
3054
3055 return canFoldAddRegIntoAddrMode(
3056 1ULL << AArch64_AM::getArithShiftValue(Imm),
3059 }
3060}
3061
3062// Given an opcode for an instruction with a [Reg, #Imm] addressing mode,
3063// return the opcode of an instruction performing the same operation, but using
3064// the [Reg, Reg] addressing mode.
3065static unsigned regOffsetOpcode(unsigned Opcode) {
3066 switch (Opcode) {
3067 default:
3068 llvm_unreachable("Address folding not implemented for instruction");
3069
3070 case AArch64::LDURQi:
3071 case AArch64::LDRQui:
3072 return AArch64::LDRQroX;
3073 case AArch64::STURQi:
3074 case AArch64::STRQui:
3075 return AArch64::STRQroX;
3076 case AArch64::LDURDi:
3077 case AArch64::LDRDui:
3078 return AArch64::LDRDroX;
3079 case AArch64::STURDi:
3080 case AArch64::STRDui:
3081 return AArch64::STRDroX;
3082 case AArch64::LDURXi:
3083 case AArch64::LDRXui:
3084 return AArch64::LDRXroX;
3085 case AArch64::STURXi:
3086 case AArch64::STRXui:
3087 return AArch64::STRXroX;
3088 case AArch64::LDURWi:
3089 case AArch64::LDRWui:
3090 return AArch64::LDRWroX;
3091 case AArch64::LDURSWi:
3092 case AArch64::LDRSWui:
3093 return AArch64::LDRSWroX;
3094 case AArch64::STURWi:
3095 case AArch64::STRWui:
3096 return AArch64::STRWroX;
3097 case AArch64::LDURHi:
3098 case AArch64::LDRHui:
3099 return AArch64::LDRHroX;
3100 case AArch64::STURHi:
3101 case AArch64::STRHui:
3102 return AArch64::STRHroX;
3103 case AArch64::LDURHHi:
3104 case AArch64::LDRHHui:
3105 return AArch64::LDRHHroX;
3106 case AArch64::STURHHi:
3107 case AArch64::STRHHui:
3108 return AArch64::STRHHroX;
3109 case AArch64::LDURSHXi:
3110 case AArch64::LDRSHXui:
3111 return AArch64::LDRSHXroX;
3112 case AArch64::LDURSHWi:
3113 case AArch64::LDRSHWui:
3114 return AArch64::LDRSHWroX;
3115 case AArch64::LDURBi:
3116 case AArch64::LDRBui:
3117 return AArch64::LDRBroX;
3118 case AArch64::LDURBBi:
3119 case AArch64::LDRBBui:
3120 return AArch64::LDRBBroX;
3121 case AArch64::LDURSBXi:
3122 case AArch64::LDRSBXui:
3123 return AArch64::LDRSBXroX;
3124 case AArch64::LDURSBWi:
3125 case AArch64::LDRSBWui:
3126 return AArch64::LDRSBWroX;
3127 case AArch64::STURBi:
3128 case AArch64::STRBui:
3129 return AArch64::STRBroX;
3130 case AArch64::STURBBi:
3131 case AArch64::STRBBui:
3132 return AArch64::STRBBroX;
3133 }
3134}
3135
3136// Given an opcode for an instruction with a [Reg, #Imm] addressing mode, return
3137// the opcode of an instruction performing the same operation, but using the
3138// [Reg, #Imm] addressing mode with scaled offset.
3139unsigned scaledOffsetOpcode(unsigned Opcode, unsigned &Scale) {
3140 switch (Opcode) {
3141 default:
3142 llvm_unreachable("Address folding not implemented for instruction");
3143
3144 case AArch64::LDURQi:
3145 Scale = 16;
3146 return AArch64::LDRQui;
3147 case AArch64::STURQi:
3148 Scale = 16;
3149 return AArch64::STRQui;
3150 case AArch64::LDURDi:
3151 Scale = 8;
3152 return AArch64::LDRDui;
3153 case AArch64::STURDi:
3154 Scale = 8;
3155 return AArch64::STRDui;
3156 case AArch64::LDURXi:
3157 Scale = 8;
3158 return AArch64::LDRXui;
3159 case AArch64::STURXi:
3160 Scale = 8;
3161 return AArch64::STRXui;
3162 case AArch64::LDURWi:
3163 Scale = 4;
3164 return AArch64::LDRWui;
3165 case AArch64::LDURSWi:
3166 Scale = 4;
3167 return AArch64::LDRSWui;
3168 case AArch64::STURWi:
3169 Scale = 4;
3170 return AArch64::STRWui;
3171 case AArch64::LDURHi:
3172 Scale = 2;
3173 return AArch64::LDRHui;
3174 case AArch64::STURHi:
3175 Scale = 2;
3176 return AArch64::STRHui;
3177 case AArch64::LDURHHi:
3178 Scale = 2;
3179 return AArch64::LDRHHui;
3180 case AArch64::STURHHi:
3181 Scale = 2;
3182 return AArch64::STRHHui;
3183 case AArch64::LDURSHXi:
3184 Scale = 2;
3185 return AArch64::LDRSHXui;
3186 case AArch64::LDURSHWi:
3187 Scale = 2;
3188 return AArch64::LDRSHWui;
3189 case AArch64::LDURBi:
3190 Scale = 1;
3191 return AArch64::LDRBui;
3192 case AArch64::LDURBBi:
3193 Scale = 1;
3194 return AArch64::LDRBBui;
3195 case AArch64::LDURSBXi:
3196 Scale = 1;
3197 return AArch64::LDRSBXui;
3198 case AArch64::LDURSBWi:
3199 Scale = 1;
3200 return AArch64::LDRSBWui;
3201 case AArch64::STURBi:
3202 Scale = 1;
3203 return AArch64::STRBui;
3204 case AArch64::STURBBi:
3205 Scale = 1;
3206 return AArch64::STRBBui;
3207 case AArch64::LDRQui:
3208 case AArch64::STRQui:
3209 Scale = 16;
3210 return Opcode;
3211 case AArch64::LDRDui:
3212 case AArch64::STRDui:
3213 case AArch64::LDRXui:
3214 case AArch64::STRXui:
3215 Scale = 8;
3216 return Opcode;
3217 case AArch64::LDRWui:
3218 case AArch64::LDRSWui:
3219 case AArch64::STRWui:
3220 Scale = 4;
3221 return Opcode;
3222 case AArch64::LDRHui:
3223 case AArch64::STRHui:
3224 case AArch64::LDRHHui:
3225 case AArch64::STRHHui:
3226 case AArch64::LDRSHXui:
3227 case AArch64::LDRSHWui:
3228 Scale = 2;
3229 return Opcode;
3230 case AArch64::LDRBui:
3231 case AArch64::LDRBBui:
3232 case AArch64::LDRSBXui:
3233 case AArch64::LDRSBWui:
3234 case AArch64::STRBui:
3235 case AArch64::STRBBui:
3236 Scale = 1;
3237 return Opcode;
3238 }
3239}
3240
3241// Given an opcode for an instruction with a [Reg, #Imm] addressing mode, return
3242// the opcode of an instruction performing the same operation, but using the
3243// [Reg, #Imm] addressing mode with unscaled offset.
3244unsigned unscaledOffsetOpcode(unsigned Opcode) {
3245 switch (Opcode) {
3246 default:
3247 llvm_unreachable("Address folding not implemented for instruction");
3248
3249 case AArch64::LDURQi:
3250 case AArch64::STURQi:
3251 case AArch64::LDURDi:
3252 case AArch64::STURDi:
3253 case AArch64::LDURXi:
3254 case AArch64::STURXi:
3255 case AArch64::LDURWi:
3256 case AArch64::LDURSWi:
3257 case AArch64::STURWi:
3258 case AArch64::LDURHi:
3259 case AArch64::STURHi:
3260 case AArch64::LDURHHi:
3261 case AArch64::STURHHi:
3262 case AArch64::LDURSHXi:
3263 case AArch64::LDURSHWi:
3264 case AArch64::LDURBi:
3265 case AArch64::STURBi:
3266 case AArch64::LDURBBi:
3267 case AArch64::STURBBi:
3268 case AArch64::LDURSBWi:
3269 case AArch64::LDURSBXi:
3270 return Opcode;
3271 case AArch64::LDRQui:
3272 return AArch64::LDURQi;
3273 case AArch64::STRQui:
3274 return AArch64::STURQi;
3275 case AArch64::LDRDui:
3276 return AArch64::LDURDi;
3277 case AArch64::STRDui:
3278 return AArch64::STURDi;
3279 case AArch64::LDRXui:
3280 return AArch64::LDURXi;
3281 case AArch64::STRXui:
3282 return AArch64::STURXi;
3283 case AArch64::LDRWui:
3284 return AArch64::LDURWi;
3285 case AArch64::LDRSWui:
3286 return AArch64::LDURSWi;
3287 case AArch64::STRWui:
3288 return AArch64::STURWi;
3289 case AArch64::LDRHui:
3290 return AArch64::LDURHi;
3291 case AArch64::STRHui:
3292 return AArch64::STURHi;
3293 case AArch64::LDRHHui:
3294 return AArch64::LDURHHi;
3295 case AArch64::STRHHui:
3296 return AArch64::STURHHi;
3297 case AArch64::LDRSHXui:
3298 return AArch64::LDURSHXi;
3299 case AArch64::LDRSHWui:
3300 return AArch64::LDURSHWi;
3301 case AArch64::LDRBBui:
3302 return AArch64::LDURBBi;
3303 case AArch64::LDRBui:
3304 return AArch64::LDURBi;
3305 case AArch64::STRBBui:
3306 return AArch64::STURBBi;
3307 case AArch64::STRBui:
3308 return AArch64::STURBi;
3309 case AArch64::LDRSBWui:
3310 return AArch64::LDURSBWi;
3311 case AArch64::LDRSBXui:
3312 return AArch64::LDURSBXi;
3313 }
3314}
3315
3316// Given the opcode of a memory load/store instruction, return the opcode of an
3317// instruction performing the same operation, but using
3318// the [Reg, Reg, {s,u}xtw #N] addressing mode with sign-/zero-extend of the
3319// offset register.
3320static unsigned offsetExtendOpcode(unsigned Opcode) {
3321 switch (Opcode) {
3322 default:
3323 llvm_unreachable("Address folding not implemented for instruction");
3324
3325 case AArch64::LDRQroX:
3326 case AArch64::LDURQi:
3327 case AArch64::LDRQui:
3328 return AArch64::LDRQroW;
3329 case AArch64::STRQroX:
3330 case AArch64::STURQi:
3331 case AArch64::STRQui:
3332 return AArch64::STRQroW;
3333 case AArch64::LDRDroX:
3334 case AArch64::LDURDi:
3335 case AArch64::LDRDui:
3336 return AArch64::LDRDroW;
3337 case AArch64::STRDroX:
3338 case AArch64::STURDi:
3339 case AArch64::STRDui:
3340 return AArch64::STRDroW;
3341 case AArch64::LDRXroX:
3342 case AArch64::LDURXi:
3343 case AArch64::LDRXui:
3344 return AArch64::LDRXroW;
3345 case AArch64::STRXroX:
3346 case AArch64::STURXi:
3347 case AArch64::STRXui:
3348 return AArch64::STRXroW;
3349 case AArch64::LDRWroX:
3350 case AArch64::LDURWi:
3351 case AArch64::LDRWui:
3352 return AArch64::LDRWroW;
3353 case AArch64::LDRSWroX:
3354 case AArch64::LDURSWi:
3355 case AArch64::LDRSWui:
3356 return AArch64::LDRSWroW;
3357 case AArch64::STRWroX:
3358 case AArch64::STURWi:
3359 case AArch64::STRWui:
3360 return AArch64::STRWroW;
3361 case AArch64::LDRHroX:
3362 case AArch64::LDURHi:
3363 case AArch64::LDRHui:
3364 return AArch64::LDRHroW;
3365 case AArch64::STRHroX:
3366 case AArch64::STURHi:
3367 case AArch64::STRHui:
3368 return AArch64::STRHroW;
3369 case AArch64::LDRHHroX:
3370 case AArch64::LDURHHi:
3371 case AArch64::LDRHHui:
3372 return AArch64::LDRHHroW;
3373 case AArch64::STRHHroX:
3374 case AArch64::STURHHi:
3375 case AArch64::STRHHui:
3376 return AArch64::STRHHroW;
3377 case AArch64::LDRSHXroX:
3378 case AArch64::LDURSHXi:
3379 case AArch64::LDRSHXui:
3380 return AArch64::LDRSHXroW;
3381 case AArch64::LDRSHWroX:
3382 case AArch64::LDURSHWi:
3383 case AArch64::LDRSHWui:
3384 return AArch64::LDRSHWroW;
3385 case AArch64::LDRBroX:
3386 case AArch64::LDURBi:
3387 case AArch64::LDRBui:
3388 return AArch64::LDRBroW;
3389 case AArch64::LDRBBroX:
3390 case AArch64::LDURBBi:
3391 case AArch64::LDRBBui:
3392 return AArch64::LDRBBroW;
3393 case AArch64::LDRSBXroX:
3394 case AArch64::LDURSBXi:
3395 case AArch64::LDRSBXui:
3396 return AArch64::LDRSBXroW;
3397 case AArch64::LDRSBWroX:
3398 case AArch64::LDURSBWi:
3399 case AArch64::LDRSBWui:
3400 return AArch64::LDRSBWroW;
3401 case AArch64::STRBroX:
3402 case AArch64::STURBi:
3403 case AArch64::STRBui:
3404 return AArch64::STRBroW;
3405 case AArch64::STRBBroX:
3406 case AArch64::STURBBi:
3407 case AArch64::STRBBui:
3408 return AArch64::STRBBroW;
3409 }
3410}
3411
3413 const ExtAddrMode &AM) const {
3414
3415 const DebugLoc &DL = MemI.getDebugLoc();
3416 MachineBasicBlock &MBB = *MemI.getParent();
3418
3420 if (AM.ScaledReg) {
3421 // The new instruction will be in the form `ldr Rt, [Xn, Xm, lsl #imm]`.
3422 unsigned Opcode = regOffsetOpcode(MemI.getOpcode());
3423 MRI.constrainRegClass(AM.BaseReg, &AArch64::GPR64spRegClass);
3424 auto B = BuildMI(MBB, MemI, DL, get(Opcode))
3425 .addReg(MemI.getOperand(0).getReg(),
3426 MemI.mayLoad() ? RegState::Define : 0)
3427 .addReg(AM.BaseReg)
3428 .addReg(AM.ScaledReg)
3429 .addImm(0)
3430 .addImm(AM.Scale > 1)
3431 .setMemRefs(MemI.memoperands())
3432 .setMIFlags(MemI.getFlags());
3433 return B.getInstr();
3434 }
3435
3436 assert(AM.ScaledReg == 0 && AM.Scale == 0 &&
3437 "Addressing mode not supported for folding");
3438
3439 // The new instruction will be in the form `ld[u]r Rt, [Xn, #imm]`.
3440 unsigned Scale = 1;
3441 unsigned Opcode = MemI.getOpcode();
3442 if (isInt<9>(AM.Displacement))
3443 Opcode = unscaledOffsetOpcode(Opcode);
3444 else
3445 Opcode = scaledOffsetOpcode(Opcode, Scale);
3446
3447 auto B = BuildMI(MBB, MemI, DL, get(Opcode))
3448 .addReg(MemI.getOperand(0).getReg(),
3449 MemI.mayLoad() ? RegState::Define : 0)
3450 .addReg(AM.BaseReg)
3451 .addImm(AM.Displacement / Scale)
3452 .setMemRefs(MemI.memoperands())
3453 .setMIFlags(MemI.getFlags());
3454 return B.getInstr();
3455 }
3456
3459 // The new instruction will be in the form `ldr Rt, [Xn, Wm, {s,u}xtw #N]`.
3460 assert(AM.ScaledReg && !AM.Displacement &&
3461 "Address offset can be a register or an immediate, but not both");
3462 unsigned Opcode = offsetExtendOpcode(MemI.getOpcode());
3463 MRI.constrainRegClass(AM.BaseReg, &AArch64::GPR64spRegClass);
3464 // Make sure the offset register is in the correct register class.
3465 Register OffsetReg = AM.ScaledReg;
3466 const TargetRegisterClass *RC = MRI.getRegClass(OffsetReg);
3467 if (RC->hasSuperClassEq(&AArch64::GPR64RegClass)) {
3468 OffsetReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3469 BuildMI(MBB, MemI, DL, get(TargetOpcode::COPY), OffsetReg)
3470 .addReg(AM.ScaledReg, 0, AArch64::sub_32);
3471 }
3472 auto B = BuildMI(MBB, MemI, DL, get(Opcode))
3473 .addReg(MemI.getOperand(0).getReg(),
3474 MemI.mayLoad() ? RegState::Define : 0)
3475 .addReg(AM.BaseReg)
3476 .addReg(OffsetReg)
3478 .addImm(AM.Scale != 1)
3479 .setMemRefs(MemI.memoperands())
3480 .setMIFlags(MemI.getFlags());
3481
3482 return B.getInstr();
3483 }
3484
3486 "Function must not be called with an addressing mode it can't handle");
3487}
3488
3490 const MachineInstr &LdSt, const MachineOperand *&BaseOp, int64_t &Offset,
3491 bool &OffsetIsScalable, TypeSize &Width,
3492 const TargetRegisterInfo *TRI) const {
3493 assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
3494 // Handle only loads/stores with base register followed by immediate offset.
3495 if (LdSt.getNumExplicitOperands() == 3) {
3496 // Non-paired instruction (e.g., ldr x1, [x0, #8]).
3497 if ((!LdSt.getOperand(1).isReg() && !LdSt.getOperand(1).isFI()) ||
3498 !LdSt.getOperand(2).isImm())
3499 return false;
3500 } else if (LdSt.getNumExplicitOperands() == 4) {
3501 // Paired instruction (e.g., ldp x1, x2, [x0, #8]).
3502 if (!LdSt.getOperand(1).isReg() ||
3503 (!LdSt.getOperand(2).isReg() && !LdSt.getOperand(2).isFI()) ||
3504 !LdSt.getOperand(3).isImm())
3505 return false;
3506 } else
3507 return false;
3508
3509 // Get the scaling factor for the instruction and set the width for the
3510 // instruction.
3511 TypeSize Scale(0U, false);
3512 int64_t Dummy1, Dummy2;
3513
3514 // If this returns false, then it's an instruction we don't want to handle.
3515 if (!getMemOpInfo(LdSt.getOpcode(), Scale, Width, Dummy1, Dummy2))
3516 return false;
3517
3518 // Compute the offset. Offset is calculated as the immediate operand
3519 // multiplied by the scaling factor. Unscaled instructions have scaling factor
3520 // set to 1.
3521 if (LdSt.getNumExplicitOperands() == 3) {
3522 BaseOp = &LdSt.getOperand(1);
3523 Offset = LdSt.getOperand(2).getImm() * Scale.getKnownMinValue();
3524 } else {
3525 assert(LdSt.getNumExplicitOperands() == 4 && "invalid number of operands");
3526 BaseOp = &LdSt.getOperand(2);
3527 Offset = LdSt.getOperand(3).getImm() * Scale.getKnownMinValue();
3528 }
3529 OffsetIsScalable = Scale.isScalable();
3530
3531 if (!BaseOp->isReg() && !BaseOp->isFI())
3532 return false;
3533
3534 return true;
3535}
3536
3539 assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
3540 MachineOperand &OfsOp = LdSt.getOperand(LdSt.getNumExplicitOperands() - 1);
3541 assert(OfsOp.isImm() && "Offset operand wasn't immediate.");
3542 return OfsOp;
3543}
3544
3545bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
3546 TypeSize &Width, int64_t &MinOffset,
3547 int64_t &MaxOffset) {
3548 switch (Opcode) {
3549 // Not a memory operation or something we want to handle.
3550 default:
3551 Scale = TypeSize::getFixed(0);
3552 Width = TypeSize::getFixed(0);
3553 MinOffset = MaxOffset = 0;
3554 return false;
3555 case AArch64::STRWpost:
3556 case AArch64::LDRWpost:
3557 Width = TypeSize::getFixed(32);
3558 Scale = TypeSize::getFixed(4);
3559 MinOffset = -256;
3560 MaxOffset = 255;
3561 break;
3562 case AArch64::LDURQi:
3563 case AArch64::STURQi:
3564 Width = TypeSize::getFixed(16);
3565 Scale = TypeSize::getFixed(1);
3566 MinOffset = -256;
3567 MaxOffset = 255;
3568 break;
3569 case AArch64::PRFUMi:
3570 case AArch64::LDURXi:
3571 case AArch64::LDURDi:
3572 case AArch64::LDAPURXi:
3573 case AArch64::STURXi:
3574 case AArch64::STURDi:
3575 case AArch64::STLURXi:
3576 Width = TypeSize::getFixed(8);
3577 Scale = TypeSize::getFixed(1);
3578 MinOffset = -256;
3579 MaxOffset = 255;
3580 break;
3581 case AArch64::LDURWi:
3582 case AArch64::LDURSi:
3583 case AArch64::LDURSWi:
3584 case AArch64::LDAPURi:
3585 case AArch64::LDAPURSWi:
3586 case AArch64::STURWi:
3587 case AArch64::STURSi:
3588 case AArch64::STLURWi:
3589 Width = TypeSize::getFixed(4);
3590 Scale = TypeSize::getFixed(1);
3591 MinOffset = -256;
3592 MaxOffset = 255;
3593 break;
3594 case AArch64::LDURHi:
3595 case AArch64::LDURHHi:
3596 case AArch64::LDURSHXi:
3597 case AArch64::LDURSHWi:
3598 case AArch64::LDAPURHi:
3599 case AArch64::LDAPURSHWi:
3600 case AArch64::LDAPURSHXi:
3601 case AArch64::STURHi:
3602 case AArch64::STURHHi:
3603 case AArch64::STLURHi:
3604 Width = TypeSize::getFixed(2);
3605 Scale = TypeSize::getFixed(1);
3606 MinOffset = -256;
3607 MaxOffset = 255;
3608 break;
3609 case AArch64::LDURBi:
3610 case AArch64::LDURBBi:
3611 case AArch64::LDURSBXi:
3612 case AArch64::LDURSBWi:
3613 case AArch64::LDAPURBi:
3614 case AArch64::LDAPURSBWi:
3615 case AArch64::LDAPURSBXi:
3616 case AArch64::STURBi:
3617 case AArch64::STURBBi:
3618 case AArch64::STLURBi:
3619 Width = TypeSize::getFixed(1);
3620 Scale = TypeSize::getFixed(1);
3621 MinOffset = -256;
3622 MaxOffset = 255;
3623 break;
3624 case AArch64::LDPQi:
3625 case AArch64::LDNPQi:
3626 case AArch64::STPQi:
3627 case AArch64::STNPQi:
3628 Scale = TypeSize::getFixed(16);
3629 Width = TypeSize::getFixed(32);
3630 MinOffset = -64;
3631 MaxOffset = 63;
3632 break;
3633 case AArch64::LDRQui:
3634 case AArch64::STRQui:
3635 Scale = TypeSize::getFixed(16);
3636 Width = TypeSize::getFixed(16);
3637 MinOffset = 0;
3638 MaxOffset = 4095;
3639 break;
3640 case AArch64::LDPXi:
3641 case AArch64::LDPDi:
3642 case AArch64::LDNPXi:
3643 case AArch64::LDNPDi:
3644 case AArch64::STPXi:
3645 case AArch64::STPDi:
3646 case AArch64::STNPXi:
3647 case AArch64::STNPDi:
3648 Scale = TypeSize::getFixed(8);
3649 Width = TypeSize::getFixed(16);
3650 MinOffset = -64;
3651 MaxOffset = 63;
3652 break;
3653 case AArch64::PRFMui:
3654 case AArch64::LDRXui:
3655 case AArch64::LDRDui:
3656 case AArch64::STRXui:
3657 case AArch64::STRDui:
3658 Scale = TypeSize::getFixed(8);
3659 Width = TypeSize::getFixed(8);
3660 MinOffset = 0;
3661 MaxOffset = 4095;
3662 break;
3663 case AArch64::StoreSwiftAsyncContext:
3664 // Store is an STRXui, but there might be an ADDXri in the expansion too.
3665 Scale = TypeSize::getFixed(1);
3666 Width = TypeSize::getFixed(8);
3667 MinOffset = 0;
3668 MaxOffset = 4095;
3669 break;
3670 case AArch64::LDPWi:
3671 case AArch64::LDPSi:
3672 case AArch64::LDNPWi:
3673 case AArch64::LDNPSi:
3674 case AArch64::STPWi:
3675 case AArch64::STPSi:
3676 case AArch64::STNPWi:
3677 case AArch64::STNPSi:
3678 Scale = TypeSize::getFixed(4);
3679 Width = TypeSize::getFixed(8);
3680 MinOffset = -64;
3681 MaxOffset = 63;
3682 break;
3683 case AArch64::LDRWui:
3684 case AArch64::LDRSui:
3685 case AArch64::LDRSWui:
3686 case AArch64::STRWui:
3687 case AArch64::STRSui:
3688 Scale = TypeSize::getFixed(4);
3689 Width = TypeSize::getFixed(4);
3690 MinOffset = 0;
3691 MaxOffset = 4095;
3692 break;
3693 case AArch64::LDRHui:
3694 case AArch64::LDRHHui:
3695 case AArch64::LDRSHWui:
3696 case AArch64::LDRSHXui:
3697 case AArch64::STRHui:
3698 case AArch64::STRHHui:
3699 Scale = TypeSize::getFixed(2);
3700 Width = TypeSize::getFixed(2);
3701 MinOffset = 0;
3702 MaxOffset = 4095;
3703 break;
3704 case AArch64::LDRBui:
3705 case AArch64::LDRBBui:
3706 case AArch64::LDRSBWui:
3707 case AArch64::LDRSBXui:
3708 case AArch64::STRBui:
3709 case AArch64::STRBBui:
3710 Scale = TypeSize::getFixed(1);
3711 Width = TypeSize::getFixed(1);
3712 MinOffset = 0;
3713 MaxOffset = 4095;
3714 break;
3715 case AArch64::STPXpre:
3716 case AArch64::LDPXpost:
3717 case AArch64::STPDpre:
3718 case AArch64::LDPDpost:
3719 Scale = TypeSize::getFixed(8);
3720 Width = TypeSize::getFixed(8);
3721 MinOffset = -512;
3722 MaxOffset = 504;
3723 break;
3724 case AArch64::STPQpre:
3725 case AArch64::LDPQpost:
3726 Scale = TypeSize::getFixed(16);
3727 Width = TypeSize::getFixed(16);
3728 MinOffset = -1024;
3729 MaxOffset = 1008;
3730 break;
3731 case AArch64::STRXpre:
3732 case AArch64::STRDpre:
3733 case AArch64::LDRXpost:
3734 case AArch64::LDRDpost:
3735 Scale = TypeSize::getFixed(1);
3736 Width = TypeSize::getFixed(8);
3737 MinOffset = -256;
3738 MaxOffset = 255;
3739 break;
3740 case AArch64::STRQpre:
3741 case AArch64::LDRQpost:
3742 Scale = TypeSize::getFixed(1);
3743 Width = TypeSize::getFixed(16);
3744 MinOffset = -256;
3745 MaxOffset = 255;
3746 break;
3747 case AArch64::ADDG:
3748 Scale = TypeSize::getFixed(16);
3749 Width = TypeSize::getFixed(0);
3750 MinOffset = 0;
3751 MaxOffset = 63;
3752 break;
3753 case AArch64::TAGPstack:
3754 Scale = TypeSize::getFixed(16);
3755 Width = TypeSize::getFixed(0);
3756 // TAGP with a negative offset turns into SUBP, which has a maximum offset
3757 // of 63 (not 64!).
3758 MinOffset = -63;
3759 MaxOffset = 63;
3760 break;
3761 case AArch64::LDG:
3762 case AArch64::STGi:
3763 case AArch64::STZGi:
3764 Scale = TypeSize::getFixed(16);
3765 Width = TypeSize::getFixed(16);
3766 MinOffset = -256;
3767 MaxOffset = 255;
3768 break;
3769 case AArch64::STR_ZZZZXI:
3770 case AArch64::LDR_ZZZZXI:
3771 Scale = TypeSize::getScalable(16);
3772 Width = TypeSize::getScalable(16 * 4);
3773 MinOffset = -256;
3774 MaxOffset = 252;
3775 break;
3776 case AArch64::STR_ZZZXI:
3777 case AArch64::LDR_ZZZXI:
3778 Scale = TypeSize::getScalable(16);
3779 Width = TypeSize::getScalable(16 * 3);
3780 MinOffset = -256;
3781 MaxOffset = 253;
3782 break;
3783 case AArch64::STR_ZZXI:
3784 case AArch64::LDR_ZZXI:
3785 Scale = TypeSize::getScalable(16);
3786 Width = TypeSize::getScalable(16 * 2);
3787 MinOffset = -256;
3788 MaxOffset = 254;
3789 break;
3790 case AArch64::LDR_PXI:
3791 case AArch64::STR_PXI:
3792 Scale = TypeSize::getScalable(2);
3793 Width = TypeSize::getScalable(2);
3794 MinOffset = -256;
3795 MaxOffset = 255;
3796 break;
3797 case AArch64::LDR_PPXI:
3798 case AArch64::STR_PPXI:
3799 Scale = TypeSize::getScalable(2);
3800 Width = TypeSize::getScalable(2 * 2);
3801 MinOffset = -256;
3802 MaxOffset = 254;
3803 break;
3804 case AArch64::LDR_ZXI:
3805 case AArch64::STR_ZXI:
3806 Scale = TypeSize::getScalable(16);
3807 Width = TypeSize::getScalable(16);
3808 MinOffset = -256;
3809 MaxOffset = 255;
3810 break;
3811 case AArch64::LD1B_IMM:
3812 case AArch64::LD1H_IMM:
3813 case AArch64::LD1W_IMM:
3814 case AArch64::LD1D_IMM:
3815 case AArch64::LDNT1B_ZRI:
3816 case AArch64::LDNT1H_ZRI:
3817 case AArch64::LDNT1W_ZRI:
3818 case AArch64::LDNT1D_ZRI:
3819 case AArch64::ST1B_IMM:
3820 case AArch64::ST1H_IMM:
3821 case AArch64::ST1W_IMM:
3822 case AArch64::ST1D_IMM:
3823 case AArch64::STNT1B_ZRI:
3824 case AArch64::STNT1H_ZRI:
3825 case AArch64::STNT1W_ZRI:
3826 case AArch64::STNT1D_ZRI:
3827 case AArch64::LDNF1B_IMM:
3828 case AArch64::LDNF1H_IMM:
3829 case AArch64::LDNF1W_IMM:
3830 case AArch64::LDNF1D_IMM:
3831 // A full vectors worth of data
3832 // Width = mbytes * elements
3833 Scale = TypeSize::getScalable(16);
3834 Width = TypeSize::getScalable(16);
3835 MinOffset = -8;
3836 MaxOffset = 7;
3837 break;
3838 case AArch64::LD2B_IMM:
3839 case AArch64::LD2H_IMM:
3840 case AArch64::LD2W_IMM:
3841 case AArch64::LD2D_IMM:
3842 case AArch64::ST2B_IMM:
3843 case AArch64::ST2H_IMM:
3844 case AArch64::ST2W_IMM:
3845 case AArch64::ST2D_IMM:
3846 Scale = TypeSize::getScalable(32);
3847 Width = TypeSize::getScalable(16 * 2);
3848 MinOffset = -8;
3849 MaxOffset = 7;
3850 break;
3851 case AArch64::LD3B_IMM:
3852 case AArch64::LD3H_IMM:
3853 case AArch64::LD3W_IMM:
3854 case AArch64::LD3D_IMM:
3855 case AArch64::ST3B_IMM:
3856 case AArch64::ST3H_IMM:
3857 case AArch64::ST3W_IMM:
3858 case AArch64::ST3D_IMM:
3859 Scale = TypeSize::getScalable(48);
3860 Width = TypeSize::getScalable(16 * 3);
3861 MinOffset = -8;
3862 MaxOffset = 7;
3863 break;
3864 case AArch64::LD4B_IMM:
3865 case AArch64::LD4H_IMM:
3866 case AArch64::LD4W_IMM:
3867 case AArch64::LD4D_IMM:
3868 case AArch64::ST4B_IMM:
3869 case AArch64::ST4H_IMM:
3870 case AArch64::ST4W_IMM:
3871 case AArch64::ST4D_IMM:
3872 Scale = TypeSize::getScalable(64);
3873 Width = TypeSize::getScalable(16 * 4);
3874 MinOffset = -8;
3875 MaxOffset = 7;
3876 break;
3877 case AArch64::LD1B_H_IMM:
3878 case AArch64::LD1SB_H_IMM:
3879 case AArch64::LD1H_S_IMM:
3880 case AArch64::LD1SH_S_IMM:
3881 case AArch64::LD1W_D_IMM:
3882 case AArch64::LD1SW_D_IMM:
3883 case AArch64::ST1B_H_IMM:
3884 case AArch64::ST1H_S_IMM:
3885 case AArch64::ST1W_D_IMM:
3886 case AArch64::LDNF1B_H_IMM:
3887 case AArch64::LDNF1SB_H_IMM:
3888 case AArch64::LDNF1H_S_IMM:
3889 case AArch64::LDNF1SH_S_IMM:
3890 case AArch64::LDNF1W_D_IMM:
3891 case AArch64::LDNF1SW_D_IMM:
3892 // A half vector worth of data
3893 // Width = mbytes * elements
3894 Scale = TypeSize::getScalable(8);
3895 Width = TypeSize::getScalable(8);
3896 MinOffset = -8;
3897 MaxOffset = 7;
3898 break;
3899 case AArch64::LD1B_S_IMM:
3900 case AArch64::LD1SB_S_IMM:
3901 case AArch64::LD1H_D_IMM:
3902 case AArch64::LD1SH_D_IMM:
3903 case AArch64::ST1B_S_IMM:
3904 case AArch64::ST1H_D_IMM:
3905 case AArch64::LDNF1B_S_IMM:
3906 case AArch64::LDNF1SB_S_IMM:
3907 case AArch64::LDNF1H_D_IMM:
3908 case AArch64::LDNF1SH_D_IMM:
3909 // A quarter vector worth of data
3910 // Width = mbytes * elements
3911 Scale = TypeSize::getScalable(4);
3912 Width = TypeSize::getScalable(4);
3913 MinOffset = -8;
3914 MaxOffset = 7;
3915 break;
3916 case AArch64::LD1B_D_IMM:
3917 case AArch64::LD1SB_D_IMM:
3918 case AArch64::ST1B_D_IMM:
3919 case AArch64::LDNF1B_D_IMM:
3920 case AArch64::LDNF1SB_D_IMM:
3921 // A eighth vector worth of data
3922 // Width = mbytes * elements
3923 Scale = TypeSize::getScalable(2);
3924 Width = TypeSize::getScalable(2);
3925 MinOffset = -8;
3926 MaxOffset = 7;
3927 break;
3928 case AArch64::ST2Gi:
3929 case AArch64::STZ2Gi:
3930 Scale = TypeSize::getFixed(16);
3931 Width = TypeSize::getFixed(32);
3932 MinOffset = -256;
3933 MaxOffset = 255;
3934 break;
3935 case AArch64::STGPi:
3936 Scale = TypeSize::getFixed(16);
3937 Width = TypeSize::getFixed(16);
3938 MinOffset = -64;
3939 MaxOffset = 63;
3940 break;
3941 case AArch64::LD1RB_IMM:
3942 case AArch64::LD1RB_H_IMM:
3943 case AArch64::LD1RB_S_IMM:
3944 case AArch64::LD1RB_D_IMM:
3945 case AArch64::LD1RSB_H_IMM:
3946 case AArch64::LD1RSB_S_IMM:
3947 case AArch64::LD1RSB_D_IMM:
3948 Scale = TypeSize::getFixed(1);
3949 Width = TypeSize::getFixed(1);
3950 MinOffset = 0;
3951 MaxOffset = 63;
3952 break;
3953 case AArch64::LD1RH_IMM:
3954 case AArch64::LD1RH_S_IMM:
3955 case AArch64::LD1RH_D_IMM:
3956 case AArch64::LD1RSH_S_IMM:
3957 case AArch64::LD1RSH_D_IMM:
3958 Scale = TypeSize::getFixed(2);
3959 Width = TypeSize::getFixed(2);
3960 MinOffset = 0;
3961 MaxOffset = 63;
3962 break;
3963 case AArch64::LD1RW_IMM:
3964 case AArch64::LD1RW_D_IMM:
3965 case AArch64::LD1RSW_IMM:
3966 Scale = TypeSize::getFixed(4);
3967 Width = TypeSize::getFixed(4);
3968 MinOffset = 0;
3969 MaxOffset = 63;
3970 break;
3971 case AArch64::LD1RD_IMM:
3972 Scale = TypeSize::getFixed(8);
3973 Width = TypeSize::getFixed(8);
3974 MinOffset = 0;
3975 MaxOffset = 63;
3976 break;
3977 }
3978
3979 return true;
3980}
3981
3982// Scaling factor for unscaled load or store.
3984 switch (Opc) {
3985 default:
3986 llvm_unreachable("Opcode has unknown scale!");
3987 case AArch64::LDRBBui:
3988 case AArch64::LDURBBi:
3989 case AArch64::LDRSBWui:
3990 case AArch64::LDURSBWi:
3991 case AArch64::STRBBui:
3992 case AArch64::STURBBi:
3993 return 1;
3994 case AArch64::LDRHHui:
3995 case AArch64::LDURHHi:
3996 case AArch64::LDRSHWui:
3997 case AArch64::LDURSHWi:
3998 case AArch64::STRHHui:
3999 case AArch64::STURHHi:
4000 return 2;
4001 case AArch64::LDRSui:
4002 case AArch64::LDURSi:
4003 case AArch64::LDRSpre:
4004 case AArch64::LDRSWui:
4005 case AArch64::LDURSWi:
4006 case AArch64::LDRSWpre:
4007 case AArch64::LDRWpre:
4008 case AArch64::LDRWui:
4009 case AArch64::LDURWi:
4010 case AArch64::STRSui:
4011 case AArch64::STURSi:
4012 case AArch64::STRSpre:
4013 case AArch64::STRWui:
4014 case AArch64::STURWi:
4015 case AArch64::STRWpre:
4016 case AArch64::LDPSi:
4017 case AArch64::LDPSWi:
4018 case AArch64::LDPWi:
4019 case AArch64::STPSi:
4020 case AArch64::STPWi:
4021 return 4;
4022 case AArch64::LDRDui:
4023 case AArch64::LDURDi:
4024 case AArch64::LDRDpre:
4025 case AArch64::LDRXui:
4026 case AArch64::LDURXi:
4027 case AArch64::LDRXpre:
4028 case AArch64::STRDui:
4029 case AArch64::STURDi:
4030 case AArch64::STRDpre:
4031 case AArch64::STRXui:
4032 case AArch64::STURXi:
4033 case AArch64::STRXpre:
4034 case AArch64::LDPDi:
4035 case AArch64::LDPXi:
4036 case AArch64::STPDi:
4037 case AArch64::STPXi:
4038 return 8;
4039 case AArch64::LDRQui:
4040 case AArch64::LDURQi:
4041 case AArch64::STRQui:
4042 case AArch64::STURQi:
4043 case AArch64::STRQpre:
4044 case AArch64::LDPQi:
4045 case AArch64::LDRQpre:
4046 case AArch64::STPQi:
4047 case AArch64::STGi:
4048 case AArch64::STZGi:
4049 case AArch64::ST2Gi:
4050 case AArch64::STZ2Gi:
4051 case AArch64::STGPi:
4052 return 16;
4053 }
4054}
4055
4057 switch (MI.getOpcode()) {
4058 default:
4059 return false;
4060 case AArch64::LDRWpre:
4061 case AArch64::LDRXpre:
4062 case AArch64::LDRSWpre:
4063 case AArch64::LDRSpre:
4064 case AArch64::LDRDpre:
4065 case AArch64::LDRQpre:
4066 return true;
4067 }
4068}
4069
4071 switch (MI.getOpcode()) {
4072 default:
4073 return false;
4074 case AArch64::STRWpre:
4075 case AArch64::STRXpre:
4076 case AArch64::STRSpre:
4077 case AArch64::STRDpre:
4078 case AArch64::STRQpre:
4079 return true;
4080 }
4081}
4082
4084 return isPreLd(MI) || isPreSt(MI);
4085}
4086
4088 switch (MI.getOpcode()) {
4089 default:
4090 return false;
4091 case AArch64::LDPSi:
4092 case AArch64::LDPSWi:
4093 case AArch64::LDPDi:
4094 case AArch64::LDPQi:
4095 case AArch64::LDPWi:
4096 case AArch64::LDPXi:
4097 case AArch64::STPSi:
4098 case AArch64::STPDi:
4099 case AArch64::STPQi:
4100 case AArch64::STPWi:
4101 case AArch64::STPXi:
4102 case AArch64::STGPi:
4103 return true;
4104 }
4105}
4106
4108 unsigned Idx =
4110 : 1;
4111 return MI.getOperand(Idx);
4112}
4113
4114const MachineOperand &
4116 unsigned Idx =
4118 : 2;
4119 return MI.getOperand(Idx);
4120}
4121
4123 Register Reg) {
4124 if (MI.getParent() == nullptr)
4125 return nullptr;
4126 const MachineFunction *MF = MI.getParent()->getParent();
4127 return MF ? MF->getRegInfo().getRegClassOrNull(Reg) : nullptr;
4128}
4129
4131 auto IsHFPR = [&](const MachineOperand &Op) {
4132 if (!Op.isReg())
4133 return false;
4134 auto Reg = Op.getReg();
4135 if (Reg.isPhysical())
4136 return AArch64::FPR16RegClass.contains(Reg);
4137 const TargetRegisterClass *TRC = ::getRegClass(MI, Reg);
4138 return TRC == &AArch64::FPR16RegClass ||
4139 TRC == &AArch64::FPR16_loRegClass;
4140 };
4141 return llvm::any_of(MI.operands(), IsHFPR);
4142}
4143
4145 auto IsQFPR = [&](const MachineOperand &Op) {
4146 if (!Op.isReg())
4147 return false;
4148 auto Reg = Op.getReg();
4149 if (Reg.isPhysical())
4150 return AArch64::FPR128RegClass.contains(Reg);
4151 const TargetRegisterClass *TRC = ::getRegClass(MI, Reg);
4152 return TRC == &AArch64::FPR128RegClass ||
4153 TRC == &AArch64::FPR128_loRegClass;
4154 };
4155 return llvm::any_of(MI.operands(), IsQFPR);
4156}
4157
4159 switch (MI.getOpcode()) {
4160 case AArch64::BRK:
4161 case AArch64::HLT:
4162 case AArch64::PACIASP:
4163 case AArch64::PACIBSP:
4164 // Implicit BTI behavior.
4165 return true;
4166 case AArch64::PAUTH_PROLOGUE:
4167 // PAUTH_PROLOGUE expands to PACI(A|B)SP.
4168 return true;
4169 case AArch64::HINT: {
4170 unsigned Imm = MI.getOperand(0).getImm();
4171 // Explicit BTI instruction.
4172 if (Imm == 32 || Imm == 34 || Imm == 36 || Imm == 38)
4173 return true;
4174 // PACI(A|B)SP instructions.
4175 if (Imm == 25 || Imm == 27)
4176 return true;
4177 return false;
4178 }
4179 default:
4180 return false;
4181 }
4182}
4183
4185 auto IsFPR = [&](const MachineOperand &Op) {
4186 if (!Op.isReg())
4187 return false;
4188 auto Reg = Op.getReg();
4189 if (Reg.isPhysical())
4190 return AArch64::FPR128RegClass.contains(Reg) ||
4191 AArch64::FPR64RegClass.contains(Reg) ||
4192 AArch64::FPR32RegClass.contains(Reg) ||
4193 AArch64::FPR16RegClass.contains(Reg) ||
4194 AArch64::FPR8RegClass.contains(Reg);
4195
4196 const TargetRegisterClass *TRC = ::getRegClass(MI, Reg);
4197 return TRC == &AArch64::FPR128RegClass ||
4198 TRC == &AArch64::FPR128_loRegClass ||
4199 TRC == &AArch64::FPR64RegClass ||
4200 TRC == &AArch64::FPR64_loRegClass ||
4201 TRC == &AArch64::FPR32RegClass || TRC == &AArch64::FPR16RegClass ||
4202 TRC == &AArch64::FPR8RegClass;
4203 };
4204 return llvm::any_of(MI.operands(), IsFPR);
4205}
4206
4207// Scale the unscaled offsets. Returns false if the unscaled offset can't be
4208// scaled.
4209static bool scaleOffset(unsigned Opc, int64_t &Offset) {
4210 int Scale = AArch64InstrInfo::getMemScale(Opc);
4211
4212 // If the byte-offset isn't a multiple of the stride, we can't scale this
4213 // offset.
4214 if (Offset % Scale != 0)
4215 return false;
4216
4217 // Convert the byte-offset used by unscaled into an "element" offset used
4218 // by the scaled pair load/store instructions.
4219 Offset /= Scale;
4220 return true;
4221}
4222
4223static bool canPairLdStOpc(unsigned FirstOpc, unsigned SecondOpc) {
4224 if (FirstOpc == SecondOpc)
4225 return true;
4226 // We can also pair sign-ext and zero-ext instructions.
4227 switch (FirstOpc) {
4228 default:
4229 return false;
4230 case AArch64::STRSui:
4231 case AArch64::STURSi:
4232 return SecondOpc == AArch64::STRSui || SecondOpc == AArch64::STURSi;
4233 case AArch64::STRDui:
4234 case AArch64::STURDi:
4235 return SecondOpc == AArch64::STRDui || SecondOpc == AArch64::STURDi;
4236 case AArch64::STRQui:
4237 case AArch64::STURQi:
4238 return SecondOpc == AArch64::STRQui || SecondOpc == AArch64::STURQi;
4239 case AArch64::STRWui:
4240 case AArch64::STURWi:
4241 return SecondOpc == AArch64::STRWui || SecondOpc == AArch64::STURWi;
4242 case AArch64::STRXui:
4243 case AArch64::STURXi:
4244 return SecondOpc == AArch64::STRXui || SecondOpc == AArch64::STURXi;
4245 case AArch64::LDRSui:
4246 case AArch64::LDURSi:
4247 return SecondOpc == AArch64::LDRSui || SecondOpc == AArch64::LDURSi;
4248 case AArch64::LDRDui:
4249 case AArch64::LDURDi:
4250 return SecondOpc == AArch64::LDRDui || SecondOpc == AArch64::LDURDi;
4251 case AArch64::LDRQui:
4252 case AArch64::LDURQi:
4253 return SecondOpc == AArch64::LDRQui || SecondOpc == AArch64::LDURQi;
4254 case AArch64::LDRWui:
4255 case AArch64::LDURWi:
4256 return SecondOpc == AArch64::LDRSWui || SecondOpc == AArch64::LDURSWi;
4257 case AArch64::LDRSWui:
4258 case AArch64::LDURSWi:
4259 return SecondOpc == AArch64::LDRWui || SecondOpc == AArch64::LDURWi;
4260 case AArch64::LDRXui:
4261 case AArch64::LDURXi:
4262 return SecondOpc == AArch64::LDRXui || SecondOpc == AArch64::LDURXi;
4263 }
4264 // These instructions can't be paired based on their opcodes.
4265 return false;
4266}
4267
4268static bool shouldClusterFI(const MachineFrameInfo &MFI, int FI1,
4269 int64_t Offset1, unsigned Opcode1, int FI2,
4270 int64_t Offset2, unsigned Opcode2) {
4271 // Accesses through fixed stack object frame indices may access a different
4272 // fixed stack slot. Check that the object offsets + offsets match.
4273 if (MFI.isFixedObjectIndex(FI1) && MFI.isFixedObjectIndex(FI2)) {
4274 int64_t ObjectOffset1 = MFI.getObjectOffset(FI1);
4275 int64_t ObjectOffset2 = MFI.getObjectOffset(FI2);
4276 assert(ObjectOffset1 <= ObjectOffset2 && "Object offsets are not ordered.");
4277 // Convert to scaled object offsets.
4278 int Scale1 = AArch64InstrInfo::getMemScale(Opcode1);
4279 if (ObjectOffset1 % Scale1 != 0)
4280 return false;
4281 ObjectOffset1 /= Scale1;
4282 int Scale2 = AArch64InstrInfo::getMemScale(Opcode2);
4283 if (ObjectOffset2 % Scale2 != 0)
4284 return false;
4285 ObjectOffset2 /= Scale2;
4286 ObjectOffset1 += Offset1;
4287 ObjectOffset2 += Offset2;
4288 return ObjectOffset1 + 1 == ObjectOffset2;
4289 }
4290
4291 return FI1 == FI2;
4292}
4293
4294/// Detect opportunities for ldp/stp formation.
4295///
4296/// Only called for LdSt for which getMemOperandWithOffset returns true.
4298 ArrayRef<const MachineOperand *> BaseOps1, int64_t OpOffset1,
4299 bool OffsetIsScalable1, ArrayRef<const MachineOperand *> BaseOps2,
4300 int64_t OpOffset2, bool OffsetIsScalable2, unsigned ClusterSize,
4301 unsigned NumBytes) const {
4302 assert(BaseOps1.size() == 1 && BaseOps2.size() == 1);
4303 const MachineOperand &BaseOp1 = *BaseOps1.front();
4304 const MachineOperand &BaseOp2 = *BaseOps2.front();
4305 const MachineInstr &FirstLdSt = *BaseOp1.getParent();
4306 const MachineInstr &SecondLdSt = *BaseOp2.getParent();
4307 if (BaseOp1.getType() != BaseOp2.getType())
4308 return false;
4309
4310 assert((BaseOp1.isReg() || BaseOp1.isFI()) &&
4311 "Only base registers and frame indices are supported.");
4312
4313 // Check for both base regs and base FI.
4314 if (BaseOp1.isReg() && BaseOp1.getReg() != BaseOp2.getReg())
4315 return false;
4316
4317 // Only cluster up to a single pair.
4318 if (ClusterSize > 2)
4319 return false;
4320
4321 if (!isPairableLdStInst(FirstLdSt) || !isPairableLdStInst(SecondLdSt))
4322 return false;
4323
4324 // Can we pair these instructions based on their opcodes?
4325 unsigned FirstOpc = FirstLdSt.getOpcode();
4326 unsigned SecondOpc = SecondLdSt.getOpcode();
4327 if (!canPairLdStOpc(FirstOpc, SecondOpc))
4328 return false;
4329
4330 // Can't merge volatiles or load/stores that have a hint to avoid pair
4331 // formation, for example.
4332 if (!isCandidateToMergeOrPair(FirstLdSt) ||
4333 !isCandidateToMergeOrPair(SecondLdSt))
4334 return false;
4335
4336 // isCandidateToMergeOrPair guarantees that operand 2 is an immediate.
4337 int64_t Offset1 = FirstLdSt.getOperand(2).getImm();
4338 if (hasUnscaledLdStOffset(FirstOpc) && !scaleOffset(FirstOpc, Offset1))
4339 return false;
4340
4341 int64_t Offset2 = SecondLdSt.getOperand(2).getImm();
4342 if (hasUnscaledLdStOffset(SecondOpc) && !scaleOffset(SecondOpc, Offset2))
4343 return false;
4344
4345 // Pairwise instructions have a 7-bit signed offset field.
4346 if (Offset1 > 63 || Offset1 < -64)
4347 return false;
4348
4349 // The caller should already have ordered First/SecondLdSt by offset.
4350 // Note: except for non-equal frame index bases
4351 if (BaseOp1.isFI()) {
4352 assert((!BaseOp1.isIdenticalTo(BaseOp2) || Offset1 <= Offset2) &&
4353 "Caller should have ordered offsets.");
4354
4355 const MachineFrameInfo &MFI =
4356 FirstLdSt.getParent()->getParent()->getFrameInfo();
4357 return shouldClusterFI(MFI, BaseOp1.getIndex(), Offset1, FirstOpc,
4358 BaseOp2.getIndex(), Offset2, SecondOpc);
4359 }
4360
4361 assert(Offset1 <= Offset2 && "Caller should have ordered offsets.");
4362
4363 return Offset1 + 1 == Offset2;
4364}
4365
4367 unsigned Reg, unsigned SubIdx,
4368 unsigned State,
4369 const TargetRegisterInfo *TRI) {
4370 if (!SubIdx)
4371 return MIB.addReg(Reg, State);
4372
4374 return MIB.addReg(TRI->getSubReg(Reg, SubIdx), State);
4375 return MIB.addReg(Reg, State, SubIdx);
4376}
4377
4378static bool forwardCopyWillClobberTuple(unsigned DestReg, unsigned SrcReg,
4379 unsigned NumRegs) {
4380 // We really want the positive remainder mod 32 here, that happens to be
4381 // easily obtainable with a mask.
4382 return ((DestReg - SrcReg) & 0x1f) < NumRegs;
4383}
4384
4387 const DebugLoc &DL, MCRegister DestReg,
4388 MCRegister SrcReg, bool KillSrc,
4389 unsigned Opcode,
4390 ArrayRef<unsigned> Indices) const {
4391 assert(Subtarget.hasNEON() && "Unexpected register copy without NEON");
4393 uint16_t DestEncoding = TRI->getEncodingValue(DestReg);
4394 uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg);
4395 unsigned NumRegs = Indices.size();
4396
4397 int SubReg = 0, End = NumRegs, Incr = 1;
4398 if (forwardCopyWillClobberTuple(DestEncoding, SrcEncoding, NumRegs)) {
4399 SubReg = NumRegs - 1;
4400 End = -1;
4401 Incr = -1;
4402 }
4403
4404 for (; SubReg != End; SubReg += Incr) {
4405 const MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opcode));
4406 AddSubReg(MIB, DestReg, Indices[SubReg], RegState::Define, TRI);
4407 AddSubReg(MIB, SrcReg, Indices[SubReg], 0, TRI);
4408 AddSubReg(MIB, SrcReg, Indices[SubReg], getKillRegState(KillSrc), TRI);
4409 }
4410}
4411
4414 DebugLoc DL, unsigned DestReg,
4415 unsigned SrcReg, bool KillSrc,
4416 unsigned Opcode, unsigned ZeroReg,
4417 llvm::ArrayRef<unsigned> Indices) const {
4419 unsigned NumRegs = Indices.size();
4420
4421#ifndef NDEBUG
4422 uint16_t DestEncoding = TRI->getEncodingValue(DestReg);
4423 uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg);
4424 assert(DestEncoding % NumRegs == 0 && SrcEncoding % NumRegs == 0 &&
4425 "GPR reg sequences should not be able to overlap");
4426#endif
4427
4428 for (unsigned SubReg = 0; SubReg != NumRegs; ++SubReg) {
4429 const MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opcode));
4430 AddSubReg(MIB, DestReg, Indices[SubReg], RegState::Define, TRI);
4431 MIB.addReg(ZeroReg);
4432 AddSubReg(MIB, SrcReg, Indices[SubReg], getKillRegState(KillSrc), TRI);
4433 MIB.addImm(0);
4434 }
4435}
4436
4439 const DebugLoc &DL, MCRegister DestReg,
4440 MCRegister SrcReg, bool KillSrc) const {
4441 if (AArch64::GPR32spRegClass.contains(DestReg) &&
4442 (AArch64::GPR32spRegClass.contains(SrcReg) || SrcReg == AArch64::WZR)) {
4444
4445 if (DestReg == AArch64::WSP || SrcReg == AArch64::WSP) {
4446 // If either operand is WSP, expand to ADD #0.
4447 if (Subtarget.hasZeroCycleRegMove()) {
4448 // Cyclone recognizes "ADD Xd, Xn, #0" as a zero-cycle register move.
4449 MCRegister DestRegX = TRI->getMatchingSuperReg(
4450 DestReg, AArch64::sub_32, &AArch64::GPR64spRegClass);
4451 MCRegister SrcRegX = TRI->getMatchingSuperReg(
4452 SrcReg, AArch64::sub_32, &AArch64::GPR64spRegClass);
4453 // This instruction is reading and writing X registers. This may upset
4454 // the register scavenger and machine verifier, so we need to indicate
4455 // that we are reading an undefined value from SrcRegX, but a proper
4456 // value from SrcReg.
4457 BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestRegX)
4458 .addReg(SrcRegX, RegState::Undef)
4459 .addImm(0)
4461 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
4462 } else {
4463 BuildMI(MBB, I, DL, get(AArch64::ADDWri), DestReg)
4464 .addReg(SrcReg, getKillRegState(KillSrc))
4465 .addImm(0)
4467 }
4468 } else if (SrcReg == AArch64::WZR && Subtarget.hasZeroCycleZeroingGP()) {
4469 BuildMI(MBB, I, DL, get(AArch64::MOVZWi), DestReg)
4470 .addImm(0)
4472 } else {
4473 if (Subtarget.hasZeroCycleRegMove()) {
4474 // Cyclone recognizes "ORR Xd, XZR, Xm" as a zero-cycle register move.
4475 MCRegister DestRegX = TRI->getMatchingSuperReg(
4476 DestReg, AArch64::sub_32, &AArch64::GPR64spRegClass);
4477 MCRegister SrcRegX = TRI->getMatchingSuperReg(
4478 SrcReg, AArch64::sub_32, &AArch64::GPR64spRegClass);
4479 // This instruction is reading and writing X registers. This may upset
4480 // the register scavenger and machine verifier, so we need to indicate
4481 // that we are reading an undefined value from SrcRegX, but a proper
4482 // value from SrcReg.
4483 BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestRegX)
4484 .addReg(AArch64::XZR)
4485 .addReg(SrcRegX, RegState::Undef)
4486 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
4487 } else {
4488 // Otherwise, expand to ORR WZR.
4489 BuildMI(MBB, I, DL, get(AArch64::ORRWrr), DestReg)
4490 .addReg(AArch64::WZR)
4491 .addReg(SrcReg, getKillRegState(KillSrc));
4492 }
4493 }
4494 return;
4495 }
4496
4497 // Copy a Predicate register by ORRing with itself.
4498 if (AArch64::PPRRegClass.contains(DestReg) &&
4499 AArch64::PPRRegClass.contains(SrcReg)) {
4500 assert(Subtarget.hasSVEorSME() && "Unexpected SVE register.");
4501 BuildMI(MBB, I, DL, get(AArch64::ORR_PPzPP), DestReg)
4502 .addReg(SrcReg) // Pg
4503 .addReg(SrcReg)
4504 .addReg(SrcReg, getKillRegState(KillSrc));
4505 return;
4506 }
4507
4508 // Copy a predicate-as-counter register by ORRing with itself as if it
4509 // were a regular predicate (mask) register.
4510 bool DestIsPNR = AArch64::PNRRegClass.contains(DestReg);
4511 bool SrcIsPNR = AArch64::PNRRegClass.contains(SrcReg);
4512 if (DestIsPNR || SrcIsPNR) {
4513 assert((Subtarget.hasSVE2p1() || Subtarget.hasSME2()) &&
4514 "Unexpected predicate-as-counter register.");
4515 auto ToPPR = [](MCRegister R) -> MCRegister {
4516 return (R - AArch64::PN0) + AArch64::P0;
4517 };
4518 MCRegister PPRSrcReg = SrcIsPNR ? ToPPR(SrcReg) : SrcReg;
4519 MCRegister PPRDestReg = DestIsPNR ? ToPPR(DestReg) : DestReg;
4520
4521 if (PPRSrcReg != PPRDestReg) {
4522 auto NewMI = BuildMI(MBB, I, DL, get(AArch64::ORR_PPzPP), PPRDestReg)
4523 .addReg(PPRSrcReg) // Pg
4524 .addReg(PPRSrcReg)
4525 .addReg(PPRSrcReg, getKillRegState(KillSrc));
4526 if (DestIsPNR)
4527 NewMI.addDef(DestReg, RegState::Implicit);
4528 }
4529 return;
4530 }
4531
4532 // Copy a Z register by ORRing with itself.
4533 if (AArch64::ZPRRegClass.contains(DestReg) &&
4534 AArch64::ZPRRegClass.contains(SrcReg)) {
4535 assert(Subtarget.hasSVEorSME() && "Unexpected SVE register.");
4536 BuildMI(MBB, I, DL, get(AArch64::ORR_ZZZ), DestReg)
4537 .addReg(SrcReg)
4538 .addReg(SrcReg, getKillRegState(KillSrc));
4539 return;
4540 }
4541
4542 // Copy a Z register pair by copying the individual sub-registers.
4543 if ((AArch64::ZPR2RegClass.contains(DestReg) ||
4544 AArch64::ZPR2StridedOrContiguousRegClass.contains(DestReg)) &&
4545 (AArch64::ZPR2RegClass.contains(SrcReg) ||
4546 AArch64::ZPR2StridedOrContiguousRegClass.contains(SrcReg))) {
4547 assert(Subtarget.hasSVEorSME() && "Unexpected SVE register.");
4548 static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1};
4549 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORR_ZZZ,
4550 Indices);
4551 return;
4552 }
4553
4554 // Copy a Z register triple by copying the individual sub-registers.
4555 if (AArch64::ZPR3RegClass.contains(DestReg) &&
4556 AArch64::ZPR3RegClass.contains(SrcReg)) {
4557 assert(Subtarget.hasSVEorSME() && "Unexpected SVE register.");
4558 static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1,
4559 AArch64::zsub2};
4560 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORR_ZZZ,
4561 Indices);
4562 return;
4563 }
4564
4565 // Copy a Z register quad by copying the individual sub-registers.
4566 if ((AArch64::ZPR4RegClass.contains(DestReg) ||
4567 AArch64::ZPR4StridedOrContiguousRegClass.contains(DestReg)) &&
4568 (AArch64::ZPR4RegClass.contains(SrcReg) ||
4569 AArch64::ZPR4StridedOrContiguousRegClass.contains(SrcReg))) {
4570 assert(Subtarget.hasSVEorSME() && "Unexpected SVE register.");
4571 static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1,
4572 AArch64::zsub2, AArch64::zsub3};
4573 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORR_ZZZ,
4574 Indices);
4575 return;
4576 }
4577
4578 if (AArch64::GPR64spRegClass.contains(DestReg) &&
4579 (AArch64::GPR64spRegClass.contains(SrcReg) || SrcReg == AArch64::XZR)) {
4580 if (DestReg == AArch64::SP || SrcReg == AArch64::SP) {
4581 // If either operand is SP, expand to ADD #0.
4582 BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestReg)
4583 .addReg(SrcReg, getKillRegState(KillSrc))
4584 .addImm(0)
4586 } else if (SrcReg == AArch64::XZR && Subtarget.hasZeroCycleZeroingGP()) {
4587 BuildMI(MBB, I, DL, get(AArch64::MOVZXi), DestReg)
4588 .addImm(0)
4590 } else {
4591 // Otherwise, expand to ORR XZR.
4592 BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestReg)
4593 .addReg(AArch64::XZR)
4594 .addReg(SrcReg, getKillRegState(KillSrc));
4595 }
4596 return;
4597 }
4598
4599 // Copy a DDDD register quad by copying the individual sub-registers.
4600 if (AArch64::DDDDRegClass.contains(DestReg) &&
4601 AArch64::DDDDRegClass.contains(SrcReg)) {
4602 static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1,
4603 AArch64::dsub2, AArch64::dsub3};
4604 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
4605 Indices);
4606 return;
4607 }
4608
4609 // Copy a DDD register triple by copying the individual sub-registers.
4610 if (AArch64::DDDRegClass.contains(DestReg) &&
4611 AArch64::DDDRegClass.contains(SrcReg)) {
4612 static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1,
4613 AArch64::dsub2};
4614 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
4615 Indices);
4616 return;
4617 }
4618
4619 // Copy a DD register pair by copying the individual sub-registers.
4620 if (AArch64::DDRegClass.contains(DestReg) &&
4621 AArch64::DDRegClass.contains(SrcReg)) {
4622 static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1};
4623 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
4624 Indices);
4625 return;
4626 }
4627
4628 // Copy a QQQQ register quad by copying the individual sub-registers.
4629 if (AArch64::QQQQRegClass.contains(DestReg) &&
4630 AArch64::QQQQRegClass.contains(SrcReg)) {
4631 static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1,
4632 AArch64::qsub2, AArch64::qsub3};
4633 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
4634 Indices);
4635 return;
4636 }
4637
4638 // Copy a QQQ register triple by copying the individual sub-registers.
4639 if (AArch64::QQQRegClass.contains(DestReg) &&
4640 AArch64::QQQRegClass.contains(SrcReg)) {
4641 static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1,
4642 AArch64::qsub2};
4643 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
4644 Indices);
4645 return;
4646 }
4647
4648 // Copy a QQ register pair by copying the individual sub-registers.
4649 if (AArch64::QQRegClass.contains(DestReg) &&
4650 AArch64::QQRegClass.contains(SrcReg)) {
4651 static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1};
4652 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
4653 Indices);
4654 return;
4655 }
4656
4657 if (AArch64::XSeqPairsClassRegClass.contains(DestReg) &&
4658 AArch64::XSeqPairsClassRegClass.contains(SrcReg)) {
4659 static const unsigned Indices[] = {AArch64::sube64, AArch64::subo64};
4660 copyGPRRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRXrs,
4661 AArch64::XZR, Indices);
4662 return;
4663 }
4664
4665 if (AArch64::WSeqPairsClassRegClass.contains(DestReg) &&
4666 AArch64::WSeqPairsClassRegClass.contains(SrcReg)) {
4667 static const unsigned Indices[] = {AArch64::sube32, AArch64::subo32};
4668 copyGPRRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRWrs,
4669 AArch64::WZR, Indices);
4670 return;
4671 }
4672
4673 if (AArch64::FPR128RegClass.contains(DestReg) &&
4674 AArch64::FPR128RegClass.contains(SrcReg)) {
4675 if (Subtarget.hasSVEorSME() && !Subtarget.isNeonAvailable())
4676 BuildMI(MBB, I, DL, get(AArch64::ORR_ZZZ))
4677 .addReg(AArch64::Z0 + (DestReg - AArch64::Q0), RegState::Define)
4678 .addReg(AArch64::Z0 + (SrcReg - AArch64::Q0))
4679 .addReg(AArch64::Z0 + (SrcReg - AArch64::Q0));
4680 else if (Subtarget.isNeonAvailable())
4681 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
4682 .addReg(SrcReg)
4683 .addReg(SrcReg, getKillRegState(KillSrc));
4684 else {
4685 BuildMI(MBB, I, DL, get(AArch64::STRQpre))
4686 .addReg(AArch64::SP, RegState::Define)
4687 .addReg(SrcReg, getKillRegState(KillSrc))
4688 .addReg(AArch64::SP)
4689 .addImm(-16);
4690 BuildMI(MBB, I, DL, get(AArch64::LDRQpost))
4691 .addReg(AArch64::SP, RegState::Define)
4692 .addReg(DestReg, RegState::Define)
4693 .addReg(AArch64::SP)
4694 .addImm(16);
4695 }
4696 return;
4697 }
4698
4699 if (AArch64::FPR64RegClass.contains(DestReg) &&
4700 AArch64::FPR64RegClass.contains(SrcReg)) {
4701 BuildMI(MBB, I, DL, get(AArch64::FMOVDr), DestReg)
4702 .addReg(SrcReg, getKillRegState(KillSrc));
4703 return;
4704 }
4705
4706 if (AArch64::FPR32RegClass.contains(DestReg) &&
4707 AArch64::FPR32RegClass.contains(SrcReg)) {
4708 BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
4709 .addReg(SrcReg, getKillRegState(KillSrc));
4710 return;
4711 }
4712
4713 if (AArch64::FPR16RegClass.contains(DestReg) &&
4714 AArch64::FPR16RegClass.contains(SrcReg)) {
4715 DestReg =
4716 RI.getMatchingSuperReg(DestReg, AArch64::hsub, &AArch64::FPR32RegClass);
4717 SrcReg =
4718 RI.getMatchingSuperReg(SrcReg, AArch64::hsub, &AArch64::FPR32RegClass);
4719 BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
4720 .addReg(SrcReg, getKillRegState(KillSrc));
4721 return;
4722 }
4723
4724 if (AArch64::FPR8RegClass.contains(DestReg) &&
4725 AArch64::FPR8RegClass.contains(SrcReg)) {
4726 DestReg =
4727 RI.getMatchingSuperReg(DestReg, AArch64::bsub, &AArch64::FPR32RegClass);
4728 SrcReg =
4729 RI.getMatchingSuperReg(SrcReg, AArch64::bsub, &AArch64::FPR32RegClass);
4730 BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
4731 .addReg(SrcReg, getKillRegState(KillSrc));
4732 return;
4733 }
4734
4735 // Copies between GPR64 and FPR64.
4736 if (AArch64::FPR64RegClass.contains(DestReg) &&
4737 AArch64::GPR64RegClass.contains(SrcReg)) {
4738 BuildMI(MBB, I, DL, get(AArch64::FMOVXDr), DestReg)
4739 .addReg(SrcReg, getKillRegState(KillSrc));
4740 return;
4741 }
4742 if (AArch64::GPR64RegClass.contains(DestReg) &&
4743 AArch64::FPR64RegClass.contains(SrcReg)) {
4744 BuildMI(MBB, I, DL, get(AArch64::FMOVDXr), DestReg)
4745 .addReg(SrcReg, getKillRegState(KillSrc));
4746 return;
4747 }
4748 // Copies between GPR32 and FPR32.
4749 if (AArch64::FPR32RegClass.contains(DestReg) &&
4750 AArch64::GPR32RegClass.contains(SrcReg)) {
4751 BuildMI(MBB, I, DL, get(AArch64::FMOVWSr), DestReg)
4752 .addReg(SrcReg, getKillRegState(KillSrc));
4753 return;
4754 }
4755 if (AArch64::GPR32RegClass.contains(DestReg) &&
4756 AArch64::FPR32RegClass.contains(SrcReg)) {
4757 BuildMI(MBB, I, DL, get(AArch64::FMOVSWr), DestReg)
4758 .addReg(SrcReg, getKillRegState(KillSrc));
4759 return;
4760 }
4761
4762 if (DestReg == AArch64::NZCV) {
4763 assert(AArch64::GPR64RegClass.contains(SrcReg) && "Invalid NZCV copy");
4764 BuildMI(MBB, I, DL, get(AArch64::MSR))
4765 .addImm(AArch64SysReg::NZCV)
4766 .addReg(SrcReg, getKillRegState(KillSrc))
4767 .addReg(AArch64::NZCV, RegState::Implicit | RegState::Define);
4768 return;
4769 }
4770
4771 if (SrcReg == AArch64::NZCV) {
4772 assert(AArch64::GPR64RegClass.contains(DestReg) && "Invalid NZCV copy");
4773 BuildMI(MBB, I, DL, get(AArch64::MRS), DestReg)
4774 .addImm(AArch64SysReg::NZCV)
4775 .addReg(AArch64::NZCV, RegState::Implicit | getKillRegState(KillSrc));
4776 return;
4777 }
4778
4779#ifndef NDEBUG
4781 errs() << TRI.getRegAsmName(DestReg) << " = COPY "
4782 << TRI.getRegAsmName(SrcReg) << "\n";
4783#endif
4784 llvm_unreachable("unimplemented reg-to-reg copy");
4785}
4786
4789 MachineBasicBlock::iterator InsertBefore,
4790 const MCInstrDesc &MCID,
4791 Register SrcReg, bool IsKill,
4792 unsigned SubIdx0, unsigned SubIdx1, int FI,
4793 MachineMemOperand *MMO) {
4794 Register SrcReg0 = SrcReg;
4795 Register SrcReg1 = SrcReg;
4796 if (SrcReg.isPhysical()) {
4797 SrcReg0 = TRI.getSubReg(SrcReg, SubIdx0);
4798 SubIdx0 = 0;
4799 SrcReg1 = TRI.getSubReg(SrcReg, SubIdx1);
4800 SubIdx1 = 0;
4801 }
4802 BuildMI(MBB, InsertBefore, DebugLoc(), MCID)
4803 .addReg(SrcReg0, getKillRegState(IsKill), SubIdx0)
4804 .addReg(SrcReg1, getKillRegState(IsKill), SubIdx1)
4805 .addFrameIndex(FI)
4806 .addImm(0)
4807 .addMemOperand(MMO);
4808}
4809
4812 Register SrcReg, bool isKill, int FI,
4813 const TargetRegisterClass *RC,
4814 const TargetRegisterInfo *TRI,
4815 Register VReg) const {
4816 MachineFunction &MF = *MBB.getParent();
4817 MachineFrameInfo &MFI = MF.getFrameInfo();
4818
4820 MachineMemOperand *MMO =
4822 MFI.getObjectSize(FI), MFI.getObjectAlign(FI));
4823 unsigned Opc = 0;
4824 bool Offset = true;
4826 unsigned StackID = TargetStackID::Default;
4827 switch (TRI->getSpillSize(*RC)) {
4828 case 1:
4829 if (AArch64::FPR8RegClass.hasSubClassEq(RC))
4830 Opc = AArch64::STRBui;
4831 break;
4832 case 2: {
4833 bool IsPNR = AArch64::PNRRegClass.hasSubClassEq(RC);
4834 if (AArch64::FPR16RegClass.hasSubClassEq(RC))
4835 Opc = AArch64::STRHui;
4836 else if (IsPNR || AArch64::PPRRegClass.hasSubClassEq(RC)) {
4837 assert(Subtarget.hasSVEorSME() &&
4838 "Unexpected register store without SVE store instructions");
4839 assert((!IsPNR || Subtarget.hasSVE2p1() || Subtarget.hasSME2()) &&
4840 "Unexpected register store without SVE2p1 or SME2");
4841 Opc = AArch64::STR_PXI;
4843 }
4844 break;
4845 }
4846 case 4:
4847 if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
4848 Opc = AArch64::STRWui;
4849 if (SrcReg.isVirtual())
4850 MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR32RegClass);
4851 else
4852 assert(SrcReg != AArch64::WSP);
4853 } else if (AArch64::FPR32RegClass.hasSubClassEq(RC))
4854 Opc = AArch64::STRSui;
4855 else if (AArch64::PPR2RegClass.hasSubClassEq(RC)) {
4856 Opc = AArch64::STR_PPXI;
4858 }
4859 break;
4860 case 8:
4861 if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) {
4862 Opc = AArch64::STRXui;
4863 if (SrcReg.isVirtual())
4864 MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass);
4865 else
4866 assert(SrcReg != AArch64::SP);
4867 } else if (AArch64::FPR64RegClass.hasSubClassEq(RC)) {
4868 Opc = AArch64::STRDui;
4869 } else if (AArch64::WSeqPairsClassRegClass.hasSubClassEq(RC)) {
4871 get(AArch64::STPWi), SrcReg, isKill,
4872 AArch64::sube32, AArch64::subo32, FI, MMO);
4873 return;
4874 }
4875 break;
4876 case 16:
4877 if (AArch64::FPR128RegClass.hasSubClassEq(RC))
4878 Opc = AArch64::STRQui;
4879 else if (AArch64::DDRegClass.hasSubClassEq(RC)) {
4880 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
4881 Opc = AArch64::ST1Twov1d;
4882 Offset = false;
4883 } else if (AArch64::XSeqPairsClassRegClass.hasSubClassEq(RC)) {
4885 get(AArch64::STPXi), SrcReg, isKill,
4886 AArch64::sube64, AArch64::subo64, FI, MMO);
4887 return;
4888 } else if (AArch64::ZPRRegClass.hasSubClassEq(RC)) {
4889 assert(Subtarget.hasSVEorSME() &&
4890 "Unexpected register store without SVE store instructions");
4891 Opc = AArch64::STR_ZXI;
4893 }
4894 break;
4895 case 24:
4896 if (AArch64::DDDRegClass.hasSubClassEq(RC)) {
4897 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
4898 Opc = AArch64::ST1Threev1d;
4899 Offset = false;
4900 }
4901 break;
4902 case 32:
4903 if (AArch64::DDDDRegClass.hasSubClassEq(RC)) {
4904 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
4905 Opc = AArch64::ST1Fourv1d;
4906 Offset = false;
4907 } else if (AArch64::QQRegClass.hasSubClassEq(RC)) {
4908 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
4909 Opc = AArch64::ST1Twov2d;
4910 Offset = false;
4911 } else if (AArch64::ZPR2RegClass.hasSubClassEq(RC) ||
4912 AArch64::ZPR2StridedOrContiguousRegClass.hasSubClassEq(RC)) {
4913 assert(Subtarget.hasSVEorSME() &&
4914 "Unexpected register store without SVE store instructions");
4915 Opc = AArch64::STR_ZZXI;
4917 }
4918 break;
4919 case 48:
4920 if (AArch64::QQQRegClass.hasSubClassEq(RC)) {
4921 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
4922 Opc = AArch64::ST1Threev2d;
4923 Offset = false;
4924 } else if (AArch64::ZPR3RegClass.hasSubClassEq(RC)) {
4925 assert(Subtarget.hasSVEorSME() &&
4926 "Unexpected register store without SVE store instructions");
4927 Opc = AArch64::STR_ZZZXI;
4929 }
4930 break;
4931 case 64:
4932 if (AArch64::QQQQRegClass.hasSubClassEq(RC)) {
4933 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
4934 Opc = AArch64::ST1Fourv2d;
4935 Offset = false;
4936 } else if (AArch64::ZPR4RegClass.hasSubClassEq(RC) ||
4937 AArch64::ZPR4StridedOrContiguousRegClass.hasSubClassEq(RC)) {
4938 assert(Subtarget.hasSVEorSME() &&
4939 "Unexpected register store without SVE store instructions");
4940 Opc = AArch64::STR_ZZZZXI;
4942 }
4943 break;
4944 }
4945 assert(Opc && "Unknown register class");
4946 MFI.setStackID(FI, StackID);
4947
4948 const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DebugLoc(), get(Opc))
4949 .addReg(SrcReg, getKillRegState(isKill))
4950 .addFrameIndex(FI);
4951
4952 if (Offset)
4953 MI.addImm(0);
4954 if (PNRReg.isValid())
4955 MI.addDef(PNRReg, RegState::Implicit);
4956 MI.addMemOperand(MMO);
4957}
4958
4961 MachineBasicBlock::iterator InsertBefore,
4962 const MCInstrDesc &MCID,
4963 Register DestReg, unsigned SubIdx0,
4964 unsigned SubIdx1, int FI,
4965 MachineMemOperand *MMO) {
4966 Register DestReg0 = DestReg;
4967 Register DestReg1 = DestReg;
4968 bool IsUndef = true;
4969 if (DestReg.isPhysical()) {
4970 DestReg0 = TRI.getSubReg(DestReg, SubIdx0);
4971 SubIdx0 = 0;
4972 DestReg1 = TRI.getSubReg(DestReg, SubIdx1);
4973 SubIdx1 = 0;
4974 IsUndef = false;
4975 }
4976 BuildMI(MBB, InsertBefore, DebugLoc(), MCID)
4977 .addReg(DestReg0, RegState::Define | getUndefRegState(IsUndef), SubIdx0)
4978 .addReg(DestReg1, RegState::Define | getUndefRegState(IsUndef), SubIdx1)
4979 .addFrameIndex(FI)
4980 .addImm(0)
4981 .addMemOperand(MMO);
4982}
4983
4986 Register DestReg, int FI,
4987 const TargetRegisterClass *RC,
4988 const TargetRegisterInfo *TRI,
4989 Register VReg) const {
4990 MachineFunction &MF = *MBB.getParent();
4991 MachineFrameInfo &MFI = MF.getFrameInfo();
4993 MachineMemOperand *MMO =
4995 MFI.getObjectSize(FI), MFI.getObjectAlign(FI));
4996
4997 unsigned Opc = 0;
4998 bool Offset = true;
4999 unsigned StackID = TargetStackID::Default;
5001 switch (TRI->getSpillSize(*RC)) {
5002 case 1:
5003 if (AArch64::FPR8RegClass.hasSubClassEq(RC))
5004 Opc = AArch64::LDRBui;
5005 break;
5006 case 2: {
5007 bool IsPNR = AArch64::PNRRegClass.hasSubClassEq(RC);
5008 if (AArch64::FPR16RegClass.hasSubClassEq(RC))
5009 Opc = AArch64::LDRHui;
5010 else if (IsPNR || AArch64::PPRRegClass.hasSubClassEq(RC)) {
5011 assert(Subtarget.hasSVEorSME() &&
5012 "Unexpected register load without SVE load instructions");
5013 assert((!IsPNR || Subtarget.hasSVE2p1() || Subtarget.hasSME2()) &&
5014 "Unexpected register load without SVE2p1 or SME2");
5015 if (IsPNR)
5016 PNRReg = DestReg;
5017 Opc = AArch64::LDR_PXI;
5019 }
5020 break;
5021 }
5022 case 4:
5023 if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
5024 Opc = AArch64::LDRWui;
5025 if (DestReg.isVirtual())
5026 MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR32RegClass);
5027 else
5028 assert(DestReg != AArch64::WSP);
5029 } else if (AArch64::FPR32RegClass.hasSubClassEq(RC))
5030 Opc = AArch64::LDRSui;
5031 else if (AArch64::PPR2RegClass.hasSubClassEq(RC)) {
5032 Opc = AArch64::LDR_PPXI;
5034 }
5035 break;
5036 case 8:
5037 if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) {
5038 Opc = AArch64::LDRXui;
5039 if (DestReg.isVirtual())
5040 MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR64RegClass);
5041 else
5042 assert(DestReg != AArch64::SP);
5043 } else if (AArch64::FPR64RegClass.hasSubClassEq(RC)) {
5044 Opc = AArch64::LDRDui;
5045 } else if (AArch64::WSeqPairsClassRegClass.hasSubClassEq(RC)) {
5047 get(AArch64::LDPWi), DestReg, AArch64::sube32,
5048 AArch64::subo32, FI, MMO);
5049 return;
5050 }
5051 break;
5052 case 16:
5053 if (AArch64::FPR128RegClass.hasSubClassEq(RC))
5054 Opc = AArch64::LDRQui;
5055 else if (AArch64::DDRegClass.hasSubClassEq(RC)) {
5056 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
5057 Opc = AArch64::LD1Twov1d;
5058 Offset = false;
5059 } else if (AArch64::XSeqPairsClassRegClass.hasSubClassEq(RC)) {
5061 get(AArch64::LDPXi), DestReg, AArch64::sube64,
5062 AArch64::subo64, FI, MMO);
5063 return;
5064 } else if (AArch64::ZPRRegClass.hasSubClassEq(RC)) {
5065 assert(Subtarget.hasSVEorSME() &&
5066 "Unexpected register load without SVE load instructions");
5067 Opc = AArch64::LDR_ZXI;
5069 }
5070 break;
5071 case 24:
5072 if (AArch64::DDDRegClass.hasSubClassEq(RC)) {
5073 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
5074 Opc = AArch64::LD1Threev1d;
5075 Offset = false;
5076 }
5077 break;
5078 case 32:
5079 if (AArch64::DDDDRegClass.hasSubClassEq(RC)) {
5080 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
5081 Opc = AArch64::LD1Fourv1d;
5082 Offset = false;
5083 } else if (AArch64::QQRegClass.hasSubClassEq(RC)) {
5084 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
5085 Opc = AArch64::LD1Twov2d;
5086 Offset = false;
5087 } else if (AArch64::ZPR2RegClass.hasSubClassEq(RC) ||
5088 AArch64::ZPR2StridedOrContiguousRegClass.hasSubClassEq(RC)) {
5089 assert(Subtarget.hasSVEorSME() &&
5090 "Unexpected register load without SVE load instructions");
5091 Opc = AArch64::LDR_ZZXI;
5093 }
5094 break;
5095 case 48:
5096 if (AArch64::QQQRegClass.hasSubClassEq(RC)) {
5097 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
5098 Opc = AArch64::LD1Threev2d;
5099 Offset = false;
5100 } else if (AArch64::ZPR3RegClass.hasSubClassEq(RC)) {
5101 assert(Subtarget.hasSVEorSME() &&
5102 "Unexpected register load without SVE load instructions");
5103 Opc = AArch64::LDR_ZZZXI;
51