LLVM 20.0.0git
AArch64InstrInfo.cpp
Go to the documentation of this file.
1//===- AArch64InstrInfo.cpp - AArch64 Instruction Information -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains the AArch64 implementation of the TargetInstrInfo class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "AArch64InstrInfo.h"
14#include "AArch64ExpandImm.h"
17#include "AArch64PointerAuth.h"
18#include "AArch64Subtarget.h"
22#include "llvm/ADT/ArrayRef.h"
23#include "llvm/ADT/STLExtras.h"
41#include "llvm/IR/DebugLoc.h"
42#include "llvm/IR/GlobalValue.h"
43#include "llvm/IR/Module.h"
44#include "llvm/MC/MCAsmInfo.h"
45#include "llvm/MC/MCInst.h"
47#include "llvm/MC/MCInstrDesc.h"
52#include "llvm/Support/LEB128.h"
56#include <cassert>
57#include <cstdint>
58#include <iterator>
59#include <utility>
60
61using namespace llvm;
62
63#define GET_INSTRINFO_CTOR_DTOR
64#include "AArch64GenInstrInfo.inc"
65
67 "aarch64-tbz-offset-bits", cl::Hidden, cl::init(14),
68 cl::desc("Restrict range of TB[N]Z instructions (DEBUG)"));
69
71 "aarch64-cbz-offset-bits", cl::Hidden, cl::init(19),
72 cl::desc("Restrict range of CB[N]Z instructions (DEBUG)"));
73
75 BCCDisplacementBits("aarch64-bcc-offset-bits", cl::Hidden, cl::init(19),
76 cl::desc("Restrict range of Bcc instructions (DEBUG)"));
77
79 BDisplacementBits("aarch64-b-offset-bits", cl::Hidden, cl::init(26),
80 cl::desc("Restrict range of B instructions (DEBUG)"));
81
83 : AArch64GenInstrInfo(AArch64::ADJCALLSTACKDOWN, AArch64::ADJCALLSTACKUP,
84 AArch64::CATCHRET),
85 RI(STI.getTargetTriple()), Subtarget(STI) {}
86
87/// GetInstSize - Return the number of bytes of code the specified
88/// instruction may be. This returns the maximum number of bytes.
90 const MachineBasicBlock &MBB = *MI.getParent();
91 const MachineFunction *MF = MBB.getParent();
92 const Function &F = MF->getFunction();
93 const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo();
94
95 {
96 auto Op = MI.getOpcode();
97 if (Op == AArch64::INLINEASM || Op == AArch64::INLINEASM_BR)
98 return getInlineAsmLength(MI.getOperand(0).getSymbolName(), *MAI);
99 }
100
101 // Meta-instructions emit no code.
102 if (MI.isMetaInstruction())
103 return 0;
104
105 // FIXME: We currently only handle pseudoinstructions that don't get expanded
106 // before the assembly printer.
107 unsigned NumBytes = 0;
108 const MCInstrDesc &Desc = MI.getDesc();
109
110 // Size should be preferably set in
111 // llvm/lib/Target/AArch64/AArch64InstrInfo.td (default case).
112 // Specific cases handle instructions of variable sizes
113 switch (Desc.getOpcode()) {
114 default:
115 if (Desc.getSize())
116 return Desc.getSize();
117
118 // Anything not explicitly designated otherwise (i.e. pseudo-instructions
119 // with fixed constant size but not specified in .td file) is a normal
120 // 4-byte insn.
121 NumBytes = 4;
122 break;
123 case TargetOpcode::STACKMAP:
124 // The upper bound for a stackmap intrinsic is the full length of its shadow
125 NumBytes = StackMapOpers(&MI).getNumPatchBytes();
126 assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
127 break;
128 case TargetOpcode::PATCHPOINT:
129 // The size of the patchpoint intrinsic is the number of bytes requested
130 NumBytes = PatchPointOpers(&MI).getNumPatchBytes();
131 assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
132 break;
133 case TargetOpcode::STATEPOINT:
134 NumBytes = StatepointOpers(&MI).getNumPatchBytes();
135 assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
136 // No patch bytes means a normal call inst is emitted
137 if (NumBytes == 0)
138 NumBytes = 4;
139 break;
140 case TargetOpcode::PATCHABLE_FUNCTION_ENTER:
141 // If `patchable-function-entry` is set, PATCHABLE_FUNCTION_ENTER
142 // instructions are expanded to the specified number of NOPs. Otherwise,
143 // they are expanded to 36-byte XRay sleds.
144 NumBytes =
145 F.getFnAttributeAsParsedInteger("patchable-function-entry", 9) * 4;
146 break;
147 case TargetOpcode::PATCHABLE_FUNCTION_EXIT:
148 case TargetOpcode::PATCHABLE_TYPED_EVENT_CALL:
149 // An XRay sled can be 4 bytes of alignment plus a 32-byte block.
150 NumBytes = 36;
151 break;
152 case TargetOpcode::PATCHABLE_EVENT_CALL:
153 // EVENT_CALL XRay sleds are exactly 6 instructions long (no alignment).
154 NumBytes = 24;
155 break;
156
157 case AArch64::SPACE:
158 NumBytes = MI.getOperand(1).getImm();
159 break;
160 case TargetOpcode::BUNDLE:
161 NumBytes = getInstBundleLength(MI);
162 break;
163 }
164
165 return NumBytes;
166}
167
168unsigned AArch64InstrInfo::getInstBundleLength(const MachineInstr &MI) const {
169 unsigned Size = 0;
171 MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end();
172 while (++I != E && I->isInsideBundle()) {
173 assert(!I->isBundle() && "No nested bundle!");
175 }
176 return Size;
177}
178
181 // Block ends with fall-through condbranch.
182 switch (LastInst->getOpcode()) {
183 default:
184 llvm_unreachable("Unknown branch instruction?");
185 case AArch64::Bcc:
186 Target = LastInst->getOperand(1).getMBB();
187 Cond.push_back(LastInst->getOperand(0));
188 break;
189 case AArch64::CBZW:
190 case AArch64::CBZX:
191 case AArch64::CBNZW:
192 case AArch64::CBNZX:
193 Target = LastInst->getOperand(1).getMBB();
194 Cond.push_back(MachineOperand::CreateImm(-1));
195 Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
196 Cond.push_back(LastInst->getOperand(0));
197 break;
198 case AArch64::TBZW:
199 case AArch64::TBZX:
200 case AArch64::TBNZW:
201 case AArch64::TBNZX:
202 Target = LastInst->getOperand(2).getMBB();
203 Cond.push_back(MachineOperand::CreateImm(-1));
204 Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
205 Cond.push_back(LastInst->getOperand(0));
206 Cond.push_back(LastInst->getOperand(1));
207 }
208}
209
210static unsigned getBranchDisplacementBits(unsigned Opc) {
211 switch (Opc) {
212 default:
213 llvm_unreachable("unexpected opcode!");
214 case AArch64::B:
215 return BDisplacementBits;
216 case AArch64::TBNZW:
217 case AArch64::TBZW:
218 case AArch64::TBNZX:
219 case AArch64::TBZX:
220 return TBZDisplacementBits;
221 case AArch64::CBNZW:
222 case AArch64::CBZW:
223 case AArch64::CBNZX:
224 case AArch64::CBZX:
225 return CBZDisplacementBits;
226 case AArch64::Bcc:
227 return BCCDisplacementBits;
228 }
229}
230
232 int64_t BrOffset) const {
233 unsigned Bits = getBranchDisplacementBits(BranchOp);
234 assert(Bits >= 3 && "max branch displacement must be enough to jump"
235 "over conditional branch expansion");
236 return isIntN(Bits, BrOffset / 4);
237}
238
241 switch (MI.getOpcode()) {
242 default:
243 llvm_unreachable("unexpected opcode!");
244 case AArch64::B:
245 return MI.getOperand(0).getMBB();
246 case AArch64::TBZW:
247 case AArch64::TBNZW:
248 case AArch64::TBZX:
249 case AArch64::TBNZX:
250 return MI.getOperand(2).getMBB();
251 case AArch64::CBZW:
252 case AArch64::CBNZW:
253 case AArch64::CBZX:
254 case AArch64::CBNZX:
255 case AArch64::Bcc:
256 return MI.getOperand(1).getMBB();
257 }
258}
259
261 MachineBasicBlock &NewDestBB,
262 MachineBasicBlock &RestoreBB,
263 const DebugLoc &DL,
264 int64_t BrOffset,
265 RegScavenger *RS) const {
266 assert(RS && "RegScavenger required for long branching");
267 assert(MBB.empty() &&
268 "new block should be inserted for expanding unconditional branch");
269 assert(MBB.pred_size() == 1);
270 assert(RestoreBB.empty() &&
271 "restore block should be inserted for restoring clobbered registers");
272
273 auto buildIndirectBranch = [&](Register Reg, MachineBasicBlock &DestBB) {
274 // Offsets outside of the signed 33-bit range are not supported for ADRP +
275 // ADD.
276 if (!isInt<33>(BrOffset))
278 "Branch offsets outside of the signed 33-bit range not supported");
279
280 BuildMI(MBB, MBB.end(), DL, get(AArch64::ADRP), Reg)
281 .addSym(DestBB.getSymbol(), AArch64II::MO_PAGE);
282 BuildMI(MBB, MBB.end(), DL, get(AArch64::ADDXri), Reg)
283 .addReg(Reg)
284 .addSym(DestBB.getSymbol(), AArch64II::MO_PAGEOFF | AArch64II::MO_NC)
285 .addImm(0);
286 BuildMI(MBB, MBB.end(), DL, get(AArch64::BR)).addReg(Reg);
287 };
288
290 // If X16 is unused, we can rely on the linker to insert a range extension
291 // thunk if NewDestBB is out of range of a single B instruction.
292 constexpr Register Reg = AArch64::X16;
293 if (!RS->isRegUsed(Reg)) {
294 insertUnconditionalBranch(MBB, &NewDestBB, DL);
295 RS->setRegUsed(Reg);
296 return;
297 }
298
299 // If there's a free register and it's worth inflating the code size,
300 // manually insert the indirect branch.
301 Register Scavenged = RS->FindUnusedReg(&AArch64::GPR64RegClass);
302 if (Scavenged != AArch64::NoRegister &&
304 buildIndirectBranch(Scavenged, NewDestBB);
305 RS->setRegUsed(Scavenged);
306 return;
307 }
308
309 // Note: Spilling X16 briefly moves the stack pointer, making it incompatible
310 // with red zones.
312 if (!AFI || AFI->hasRedZone().value_or(true))
314 "Unable to insert indirect branch inside function that has red zone");
315
316 // Otherwise, spill X16 and defer range extension to the linker.
317 BuildMI(MBB, MBB.end(), DL, get(AArch64::STRXpre))
318 .addReg(AArch64::SP, RegState::Define)
319 .addReg(Reg)
320 .addReg(AArch64::SP)
321 .addImm(-16);
322
323 BuildMI(MBB, MBB.end(), DL, get(AArch64::B)).addMBB(&RestoreBB);
324
325 BuildMI(RestoreBB, RestoreBB.end(), DL, get(AArch64::LDRXpost))
326 .addReg(AArch64::SP, RegState::Define)
328 .addReg(AArch64::SP)
329 .addImm(16);
330}
331
332// Branch analysis.
335 MachineBasicBlock *&FBB,
337 bool AllowModify) const {
338 // If the block has no terminators, it just falls into the block after it.
340 if (I == MBB.end())
341 return false;
342
343 // Skip over SpeculationBarrierEndBB terminators
344 if (I->getOpcode() == AArch64::SpeculationBarrierISBDSBEndBB ||
345 I->getOpcode() == AArch64::SpeculationBarrierSBEndBB) {
346 --I;
347 }
348
349 if (!isUnpredicatedTerminator(*I))
350 return false;
351
352 // Get the last instruction in the block.
353 MachineInstr *LastInst = &*I;
354
355 // If there is only one terminator instruction, process it.
356 unsigned LastOpc = LastInst->getOpcode();
357 if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
358 if (isUncondBranchOpcode(LastOpc)) {
359 TBB = LastInst->getOperand(0).getMBB();
360 return false;
361 }
362 if (isCondBranchOpcode(LastOpc)) {
363 // Block ends with fall-through condbranch.
364 parseCondBranch(LastInst, TBB, Cond);
365 return false;
366 }
367 return true; // Can't handle indirect branch.
368 }
369
370 // Get the instruction before it if it is a terminator.
371 MachineInstr *SecondLastInst = &*I;
372 unsigned SecondLastOpc = SecondLastInst->getOpcode();
373
374 // If AllowModify is true and the block ends with two or more unconditional
375 // branches, delete all but the first unconditional branch.
376 if (AllowModify && isUncondBranchOpcode(LastOpc)) {
377 while (isUncondBranchOpcode(SecondLastOpc)) {
378 LastInst->eraseFromParent();
379 LastInst = SecondLastInst;
380 LastOpc = LastInst->getOpcode();
381 if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
382 // Return now the only terminator is an unconditional branch.
383 TBB = LastInst->getOperand(0).getMBB();
384 return false;
385 }
386 SecondLastInst = &*I;
387 SecondLastOpc = SecondLastInst->getOpcode();
388 }
389 }
390
391 // If we're allowed to modify and the block ends in a unconditional branch
392 // which could simply fallthrough, remove the branch. (Note: This case only
393 // matters when we can't understand the whole sequence, otherwise it's also
394 // handled by BranchFolding.cpp.)
395 if (AllowModify && isUncondBranchOpcode(LastOpc) &&
397 LastInst->eraseFromParent();
398 LastInst = SecondLastInst;
399 LastOpc = LastInst->getOpcode();
400 if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
401 assert(!isUncondBranchOpcode(LastOpc) &&
402 "unreachable unconditional branches removed above");
403
404 if (isCondBranchOpcode(LastOpc)) {
405 // Block ends with fall-through condbranch.
406 parseCondBranch(LastInst, TBB, Cond);
407 return false;
408 }
409 return true; // Can't handle indirect branch.
410 }
411 SecondLastInst = &*I;
412 SecondLastOpc = SecondLastInst->getOpcode();
413 }
414
415 // If there are three terminators, we don't know what sort of block this is.
416 if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(*--I))
417 return true;
418
419 // If the block ends with a B and a Bcc, handle it.
420 if (isCondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
421 parseCondBranch(SecondLastInst, TBB, Cond);
422 FBB = LastInst->getOperand(0).getMBB();
423 return false;
424 }
425
426 // If the block ends with two unconditional branches, handle it. The second
427 // one is not executed, so remove it.
428 if (isUncondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
429 TBB = SecondLastInst->getOperand(0).getMBB();
430 I = LastInst;
431 if (AllowModify)
432 I->eraseFromParent();
433 return false;
434 }
435
436 // ...likewise if it ends with an indirect branch followed by an unconditional
437 // branch.
438 if (isIndirectBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
439 I = LastInst;
440 if (AllowModify)
441 I->eraseFromParent();
442 return true;
443 }
444
445 // Otherwise, can't handle this.
446 return true;
447}
448
450 MachineBranchPredicate &MBP,
451 bool AllowModify) const {
452 // For the moment, handle only a block which ends with a cb(n)zx followed by
453 // a fallthrough. Why this? Because it is a common form.
454 // TODO: Should we handle b.cc?
455
457 if (I == MBB.end())
458 return true;
459
460 // Skip over SpeculationBarrierEndBB terminators
461 if (I->getOpcode() == AArch64::SpeculationBarrierISBDSBEndBB ||
462 I->getOpcode() == AArch64::SpeculationBarrierSBEndBB) {
463 --I;
464 }
465
466 if (!isUnpredicatedTerminator(*I))
467 return true;
468
469 // Get the last instruction in the block.
470 MachineInstr *LastInst = &*I;
471 unsigned LastOpc = LastInst->getOpcode();
472 if (!isCondBranchOpcode(LastOpc))
473 return true;
474
475 switch (LastOpc) {
476 default:
477 return true;
478 case AArch64::CBZW:
479 case AArch64::CBZX:
480 case AArch64::CBNZW:
481 case AArch64::CBNZX:
482 break;
483 };
484
485 MBP.TrueDest = LastInst->getOperand(1).getMBB();
486 assert(MBP.TrueDest && "expected!");
487 MBP.FalseDest = MBB.getNextNode();
488
489 MBP.ConditionDef = nullptr;
490 MBP.SingleUseCondition = false;
491
492 MBP.LHS = LastInst->getOperand(0);
493 MBP.RHS = MachineOperand::CreateImm(0);
494 MBP.Predicate = LastOpc == AArch64::CBNZX ? MachineBranchPredicate::PRED_NE
495 : MachineBranchPredicate::PRED_EQ;
496 return false;
497}
498
501 if (Cond[0].getImm() != -1) {
502 // Regular Bcc
505 } else {
506 // Folded compare-and-branch
507 switch (Cond[1].getImm()) {
508 default:
509 llvm_unreachable("Unknown conditional branch!");
510 case AArch64::CBZW:
511 Cond[1].setImm(AArch64::CBNZW);
512 break;
513 case AArch64::CBNZW:
514 Cond[1].setImm(AArch64::CBZW);
515 break;
516 case AArch64::CBZX:
517 Cond[1].setImm(AArch64::CBNZX);
518 break;
519 case AArch64::CBNZX:
520 Cond[1].setImm(AArch64::CBZX);
521 break;
522 case AArch64::TBZW:
523 Cond[1].setImm(AArch64::TBNZW);
524 break;
525 case AArch64::TBNZW:
526 Cond[1].setImm(AArch64::TBZW);
527 break;
528 case AArch64::TBZX:
529 Cond[1].setImm(AArch64::TBNZX);
530 break;
531 case AArch64::TBNZX:
532 Cond[1].setImm(AArch64::TBZX);
533 break;
534 }
535 }
536
537 return false;
538}
539
541 int *BytesRemoved) const {
543 if (I == MBB.end())
544 return 0;
545
546 if (!isUncondBranchOpcode(I->getOpcode()) &&
547 !isCondBranchOpcode(I->getOpcode()))
548 return 0;
549
550 // Remove the branch.
551 I->eraseFromParent();
552
553 I = MBB.end();
554
555 if (I == MBB.begin()) {
556 if (BytesRemoved)
557 *BytesRemoved = 4;
558 return 1;
559 }
560 --I;
561 if (!isCondBranchOpcode(I->getOpcode())) {
562 if (BytesRemoved)
563 *BytesRemoved = 4;
564 return 1;
565 }
566
567 // Remove the branch.
568 I->eraseFromParent();
569 if (BytesRemoved)
570 *BytesRemoved = 8;
571
572 return 2;
573}
574
575void AArch64InstrInfo::instantiateCondBranch(
578 if (Cond[0].getImm() != -1) {
579 // Regular Bcc
580 BuildMI(&MBB, DL, get(AArch64::Bcc)).addImm(Cond[0].getImm()).addMBB(TBB);
581 } else {
582 // Folded compare-and-branch
583 // Note that we use addOperand instead of addReg to keep the flags.
584 const MachineInstrBuilder MIB =
585 BuildMI(&MBB, DL, get(Cond[1].getImm())).add(Cond[2]);
586 if (Cond.size() > 3)
587 MIB.addImm(Cond[3].getImm());
588 MIB.addMBB(TBB);
589 }
590}
591
594 ArrayRef<MachineOperand> Cond, const DebugLoc &DL, int *BytesAdded) const {
595 // Shouldn't be a fall through.
596 assert(TBB && "insertBranch must not be told to insert a fallthrough");
597
598 if (!FBB) {
599 if (Cond.empty()) // Unconditional branch?
600 BuildMI(&MBB, DL, get(AArch64::B)).addMBB(TBB);
601 else
602 instantiateCondBranch(MBB, DL, TBB, Cond);
603
604 if (BytesAdded)
605 *BytesAdded = 4;
606
607 return 1;
608 }
609
610 // Two-way conditional branch.
611 instantiateCondBranch(MBB, DL, TBB, Cond);
612 BuildMI(&MBB, DL, get(AArch64::B)).addMBB(FBB);
613
614 if (BytesAdded)
615 *BytesAdded = 8;
616
617 return 2;
618}
619
620// Find the original register that VReg is copied from.
621static unsigned removeCopies(const MachineRegisterInfo &MRI, unsigned VReg) {
622 while (Register::isVirtualRegister(VReg)) {
623 const MachineInstr *DefMI = MRI.getVRegDef(VReg);
624 if (!DefMI->isFullCopy())
625 return VReg;
626 VReg = DefMI->getOperand(1).getReg();
627 }
628 return VReg;
629}
630
631// Determine if VReg is defined by an instruction that can be folded into a
632// csel instruction. If so, return the folded opcode, and the replacement
633// register.
634static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg,
635 unsigned *NewVReg = nullptr) {
636 VReg = removeCopies(MRI, VReg);
638 return 0;
639
640 bool Is64Bit = AArch64::GPR64allRegClass.hasSubClassEq(MRI.getRegClass(VReg));
641 const MachineInstr *DefMI = MRI.getVRegDef(VReg);
642 unsigned Opc = 0;
643 unsigned SrcOpNum = 0;
644 switch (DefMI->getOpcode()) {
645 case AArch64::ADDSXri:
646 case AArch64::ADDSWri:
647 // if NZCV is used, do not fold.
648 if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr,
649 true) == -1)
650 return 0;
651 // fall-through to ADDXri and ADDWri.
652 [[fallthrough]];
653 case AArch64::ADDXri:
654 case AArch64::ADDWri:
655 // add x, 1 -> csinc.
656 if (!DefMI->getOperand(2).isImm() || DefMI->getOperand(2).getImm() != 1 ||
657 DefMI->getOperand(3).getImm() != 0)
658 return 0;
659 SrcOpNum = 1;
660 Opc = Is64Bit ? AArch64::CSINCXr : AArch64::CSINCWr;
661 break;
662
663 case AArch64::ORNXrr:
664 case AArch64::ORNWrr: {
665 // not x -> csinv, represented as orn dst, xzr, src.
666 unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg());
667 if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)
668 return 0;
669 SrcOpNum = 2;
670 Opc = Is64Bit ? AArch64::CSINVXr : AArch64::CSINVWr;
671 break;
672 }
673
674 case AArch64::SUBSXrr:
675 case AArch64::SUBSWrr:
676 // if NZCV is used, do not fold.
677 if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr,
678 true) == -1)
679 return 0;
680 // fall-through to SUBXrr and SUBWrr.
681 [[fallthrough]];
682 case AArch64::SUBXrr:
683 case AArch64::SUBWrr: {
684 // neg x -> csneg, represented as sub dst, xzr, src.
685 unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg());
686 if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)
687 return 0;
688 SrcOpNum = 2;
689 Opc = Is64Bit ? AArch64::CSNEGXr : AArch64::CSNEGWr;
690 break;
691 }
692 default:
693 return 0;
694 }
695 assert(Opc && SrcOpNum && "Missing parameters");
696
697 if (NewVReg)
698 *NewVReg = DefMI->getOperand(SrcOpNum).getReg();
699 return Opc;
700}
701
704 Register DstReg, Register TrueReg,
705 Register FalseReg, int &CondCycles,
706 int &TrueCycles,
707 int &FalseCycles) const {
708 // Check register classes.
710 const TargetRegisterClass *RC =
711 RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg));
712 if (!RC)
713 return false;
714
715 // Also need to check the dest regclass, in case we're trying to optimize
716 // something like:
717 // %1(gpr) = PHI %2(fpr), bb1, %(fpr), bb2
718 if (!RI.getCommonSubClass(RC, MRI.getRegClass(DstReg)))
719 return false;
720
721 // Expanding cbz/tbz requires an extra cycle of latency on the condition.
722 unsigned ExtraCondLat = Cond.size() != 1;
723
724 // GPRs are handled by csel.
725 // FIXME: Fold in x+1, -x, and ~x when applicable.
726 if (AArch64::GPR64allRegClass.hasSubClassEq(RC) ||
727 AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
728 // Single-cycle csel, csinc, csinv, and csneg.
729 CondCycles = 1 + ExtraCondLat;
730 TrueCycles = FalseCycles = 1;
731 if (canFoldIntoCSel(MRI, TrueReg))
732 TrueCycles = 0;
733 else if (canFoldIntoCSel(MRI, FalseReg))
734 FalseCycles = 0;
735 return true;
736 }
737
738 // Scalar floating point is handled by fcsel.
739 // FIXME: Form fabs, fmin, and fmax when applicable.
740 if (AArch64::FPR64RegClass.hasSubClassEq(RC) ||
741 AArch64::FPR32RegClass.hasSubClassEq(RC)) {
742 CondCycles = 5 + ExtraCondLat;
743 TrueCycles = FalseCycles = 2;
744 return true;
745 }
746
747 // Can't do vectors.
748 return false;
749}
750
753 const DebugLoc &DL, Register DstReg,
755 Register TrueReg, Register FalseReg) const {
757
758 // Parse the condition code, see parseCondBranch() above.
760 switch (Cond.size()) {
761 default:
762 llvm_unreachable("Unknown condition opcode in Cond");
763 case 1: // b.cc
764 CC = AArch64CC::CondCode(Cond[0].getImm());
765 break;
766 case 3: { // cbz/cbnz
767 // We must insert a compare against 0.
768 bool Is64Bit;
769 switch (Cond[1].getImm()) {
770 default:
771 llvm_unreachable("Unknown branch opcode in Cond");
772 case AArch64::CBZW:
773 Is64Bit = false;
775 break;
776 case AArch64::CBZX:
777 Is64Bit = true;
779 break;
780 case AArch64::CBNZW:
781 Is64Bit = false;
783 break;
784 case AArch64::CBNZX:
785 Is64Bit = true;
787 break;
788 }
789 Register SrcReg = Cond[2].getReg();
790 if (Is64Bit) {
791 // cmp reg, #0 is actually subs xzr, reg, #0.
792 MRI.constrainRegClass(SrcReg, &AArch64::GPR64spRegClass);
793 BuildMI(MBB, I, DL, get(AArch64::SUBSXri), AArch64::XZR)
794 .addReg(SrcReg)
795 .addImm(0)
796 .addImm(0);
797 } else {
798 MRI.constrainRegClass(SrcReg, &AArch64::GPR32spRegClass);
799 BuildMI(MBB, I, DL, get(AArch64::SUBSWri), AArch64::WZR)
800 .addReg(SrcReg)
801 .addImm(0)
802 .addImm(0);
803 }
804 break;
805 }
806 case 4: { // tbz/tbnz
807 // We must insert a tst instruction.
808 switch (Cond[1].getImm()) {
809 default:
810 llvm_unreachable("Unknown branch opcode in Cond");
811 case AArch64::TBZW:
812 case AArch64::TBZX:
814 break;
815 case AArch64::TBNZW:
816 case AArch64::TBNZX:
818 break;
819 }
820 // cmp reg, #foo is actually ands xzr, reg, #1<<foo.
821 if (Cond[1].getImm() == AArch64::TBZW || Cond[1].getImm() == AArch64::TBNZW)
822 BuildMI(MBB, I, DL, get(AArch64::ANDSWri), AArch64::WZR)
823 .addReg(Cond[2].getReg())
824 .addImm(
825 AArch64_AM::encodeLogicalImmediate(1ull << Cond[3].getImm(), 32));
826 else
827 BuildMI(MBB, I, DL, get(AArch64::ANDSXri), AArch64::XZR)
828 .addReg(Cond[2].getReg())
829 .addImm(
830 AArch64_AM::encodeLogicalImmediate(1ull << Cond[3].getImm(), 64));
831 break;
832 }
833 }
834
835 unsigned Opc = 0;
836 const TargetRegisterClass *RC = nullptr;
837 bool TryFold = false;
838 if (MRI.constrainRegClass(DstReg, &AArch64::GPR64RegClass)) {
839 RC = &AArch64::GPR64RegClass;
840 Opc = AArch64::CSELXr;
841 TryFold = true;
842 } else if (MRI.constrainRegClass(DstReg, &AArch64::GPR32RegClass)) {
843 RC = &AArch64::GPR32RegClass;
844 Opc = AArch64::CSELWr;
845 TryFold = true;
846 } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR64RegClass)) {
847 RC = &AArch64::FPR64RegClass;
848 Opc = AArch64::FCSELDrrr;
849 } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR32RegClass)) {
850 RC = &AArch64::FPR32RegClass;
851 Opc = AArch64::FCSELSrrr;
852 }
853 assert(RC && "Unsupported regclass");
854
855 // Try folding simple instructions into the csel.
856 if (TryFold) {
857 unsigned NewVReg = 0;
858 unsigned FoldedOpc = canFoldIntoCSel(MRI, TrueReg, &NewVReg);
859 if (FoldedOpc) {
860 // The folded opcodes csinc, csinc and csneg apply the operation to
861 // FalseReg, so we need to invert the condition.
863 TrueReg = FalseReg;
864 } else
865 FoldedOpc = canFoldIntoCSel(MRI, FalseReg, &NewVReg);
866
867 // Fold the operation. Leave any dead instructions for DCE to clean up.
868 if (FoldedOpc) {
869 FalseReg = NewVReg;
870 Opc = FoldedOpc;
871 // The extends the live range of NewVReg.
872 MRI.clearKillFlags(NewVReg);
873 }
874 }
875
876 // Pull all virtual register into the appropriate class.
877 MRI.constrainRegClass(TrueReg, RC);
878 MRI.constrainRegClass(FalseReg, RC);
879
880 // Insert the csel.
881 BuildMI(MBB, I, DL, get(Opc), DstReg)
882 .addReg(TrueReg)
883 .addReg(FalseReg)
884 .addImm(CC);
885}
886
887// Return true if Imm can be loaded into a register by a "cheap" sequence of
888// instructions. For now, "cheap" means at most two instructions.
889static bool isCheapImmediate(const MachineInstr &MI, unsigned BitSize) {
890 if (BitSize == 32)
891 return true;
892
893 assert(BitSize == 64 && "Only bit sizes of 32 or 64 allowed");
894 uint64_t Imm = static_cast<uint64_t>(MI.getOperand(1).getImm());
896 AArch64_IMM::expandMOVImm(Imm, BitSize, Is);
897
898 return Is.size() <= 2;
899}
900
901// FIXME: this implementation should be micro-architecture dependent, so a
902// micro-architecture target hook should be introduced here in future.
904 if (Subtarget.hasExynosCheapAsMoveHandling()) {
905 if (isExynosCheapAsMove(MI))
906 return true;
907 return MI.isAsCheapAsAMove();
908 }
909
910 switch (MI.getOpcode()) {
911 default:
912 return MI.isAsCheapAsAMove();
913
914 case AArch64::ADDWrs:
915 case AArch64::ADDXrs:
916 case AArch64::SUBWrs:
917 case AArch64::SUBXrs:
918 return Subtarget.hasALULSLFast() && MI.getOperand(3).getImm() <= 4;
919
920 // If MOVi32imm or MOVi64imm can be expanded into ORRWri or
921 // ORRXri, it is as cheap as MOV.
922 // Likewise if it can be expanded to MOVZ/MOVN/MOVK.
923 case AArch64::MOVi32imm:
924 return isCheapImmediate(MI, 32);
925 case AArch64::MOVi64imm:
926 return isCheapImmediate(MI, 64);
927 }
928}
929
931 switch (MI.getOpcode()) {
932 default:
933 return false;
934
935 case AArch64::ADDWrs:
936 case AArch64::ADDXrs:
937 case AArch64::ADDSWrs:
938 case AArch64::ADDSXrs: {
939 unsigned Imm = MI.getOperand(3).getImm();
940 unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
941 if (ShiftVal == 0)
942 return true;
943 return AArch64_AM::getShiftType(Imm) == AArch64_AM::LSL && ShiftVal <= 5;
944 }
945
946 case AArch64::ADDWrx:
947 case AArch64::ADDXrx:
948 case AArch64::ADDXrx64:
949 case AArch64::ADDSWrx:
950 case AArch64::ADDSXrx:
951 case AArch64::ADDSXrx64: {
952 unsigned Imm = MI.getOperand(3).getImm();
953 switch (AArch64_AM::getArithExtendType(Imm)) {
954 default:
955 return false;
956 case AArch64_AM::UXTB:
957 case AArch64_AM::UXTH:
958 case AArch64_AM::UXTW:
959 case AArch64_AM::UXTX:
960 return AArch64_AM::getArithShiftValue(Imm) <= 4;
961 }
962 }
963
964 case AArch64::SUBWrs:
965 case AArch64::SUBSWrs: {
966 unsigned Imm = MI.getOperand(3).getImm();
967 unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
968 return ShiftVal == 0 ||
969 (AArch64_AM::getShiftType(Imm) == AArch64_AM::ASR && ShiftVal == 31);
970 }
971
972 case AArch64::SUBXrs:
973 case AArch64::SUBSXrs: {
974 unsigned Imm = MI.getOperand(3).getImm();
975 unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
976 return ShiftVal == 0 ||
977 (AArch64_AM::getShiftType(Imm) == AArch64_AM::ASR && ShiftVal == 63);
978 }
979
980 case AArch64::SUBWrx:
981 case AArch64::SUBXrx:
982 case AArch64::SUBXrx64:
983 case AArch64::SUBSWrx:
984 case AArch64::SUBSXrx:
985 case AArch64::SUBSXrx64: {
986 unsigned Imm = MI.getOperand(3).getImm();
987 switch (AArch64_AM::getArithExtendType(Imm)) {
988 default:
989 return false;
990 case AArch64_AM::UXTB:
991 case AArch64_AM::UXTH:
992 case AArch64_AM::UXTW:
993 case AArch64_AM::UXTX:
994 return AArch64_AM::getArithShiftValue(Imm) == 0;
995 }
996 }
997
998 case AArch64::LDRBBroW:
999 case AArch64::LDRBBroX:
1000 case AArch64::LDRBroW:
1001 case AArch64::LDRBroX:
1002 case AArch64::LDRDroW:
1003 case AArch64::LDRDroX:
1004 case AArch64::LDRHHroW:
1005 case AArch64::LDRHHroX:
1006 case AArch64::LDRHroW:
1007 case AArch64::LDRHroX:
1008 case AArch64::LDRQroW:
1009 case AArch64::LDRQroX:
1010 case AArch64::LDRSBWroW:
1011 case AArch64::LDRSBWroX:
1012 case AArch64::LDRSBXroW:
1013 case AArch64::LDRSBXroX:
1014 case AArch64::LDRSHWroW:
1015 case AArch64::LDRSHWroX:
1016 case AArch64::LDRSHXroW:
1017 case AArch64::LDRSHXroX:
1018 case AArch64::LDRSWroW:
1019 case AArch64::LDRSWroX:
1020 case AArch64::LDRSroW:
1021 case AArch64::LDRSroX:
1022 case AArch64::LDRWroW:
1023 case AArch64::LDRWroX:
1024 case AArch64::LDRXroW:
1025 case AArch64::LDRXroX:
1026 case AArch64::PRFMroW:
1027 case AArch64::PRFMroX:
1028 case AArch64::STRBBroW:
1029 case AArch64::STRBBroX:
1030 case AArch64::STRBroW:
1031 case AArch64::STRBroX:
1032 case AArch64::STRDroW:
1033 case AArch64::STRDroX:
1034 case AArch64::STRHHroW:
1035 case AArch64::STRHHroX:
1036 case AArch64::STRHroW:
1037 case AArch64::STRHroX:
1038 case AArch64::STRQroW:
1039 case AArch64::STRQroX:
1040 case AArch64::STRSroW:
1041 case AArch64::STRSroX:
1042 case AArch64::STRWroW:
1043 case AArch64::STRWroX:
1044 case AArch64::STRXroW:
1045 case AArch64::STRXroX: {
1046 unsigned IsSigned = MI.getOperand(3).getImm();
1047 return !IsSigned;
1048 }
1049 }
1050}
1051
1053 unsigned Opc = MI.getOpcode();
1054 switch (Opc) {
1055 default:
1056 return false;
1057 case AArch64::SEH_StackAlloc:
1058 case AArch64::SEH_SaveFPLR:
1059 case AArch64::SEH_SaveFPLR_X:
1060 case AArch64::SEH_SaveReg:
1061 case AArch64::SEH_SaveReg_X:
1062 case AArch64::SEH_SaveRegP:
1063 case AArch64::SEH_SaveRegP_X:
1064 case AArch64::SEH_SaveFReg:
1065 case AArch64::SEH_SaveFReg_X:
1066 case AArch64::SEH_SaveFRegP:
1067 case AArch64::SEH_SaveFRegP_X:
1068 case AArch64::SEH_SetFP:
1069 case AArch64::SEH_AddFP:
1070 case AArch64::SEH_Nop:
1071 case AArch64::SEH_PrologEnd:
1072 case AArch64::SEH_EpilogStart:
1073 case AArch64::SEH_EpilogEnd:
1074 case AArch64::SEH_PACSignLR:
1075 case AArch64::SEH_SaveAnyRegQP:
1076 case AArch64::SEH_SaveAnyRegQPX:
1077 return true;
1078 }
1079}
1080
1082 Register &SrcReg, Register &DstReg,
1083 unsigned &SubIdx) const {
1084 switch (MI.getOpcode()) {
1085 default:
1086 return false;
1087 case AArch64::SBFMXri: // aka sxtw
1088 case AArch64::UBFMXri: // aka uxtw
1089 // Check for the 32 -> 64 bit extension case, these instructions can do
1090 // much more.
1091 if (MI.getOperand(2).getImm() != 0 || MI.getOperand(3).getImm() != 31)
1092 return false;
1093 // This is a signed or unsigned 32 -> 64 bit extension.
1094 SrcReg = MI.getOperand(1).getReg();
1095 DstReg = MI.getOperand(0).getReg();
1096 SubIdx = AArch64::sub_32;
1097 return true;
1098 }
1099}
1100
1102 const MachineInstr &MIa, const MachineInstr &MIb) const {
1104 const MachineOperand *BaseOpA = nullptr, *BaseOpB = nullptr;
1105 int64_t OffsetA = 0, OffsetB = 0;
1106 TypeSize WidthA(0, false), WidthB(0, false);
1107 bool OffsetAIsScalable = false, OffsetBIsScalable = false;
1108
1109 assert(MIa.mayLoadOrStore() && "MIa must be a load or store.");
1110 assert(MIb.mayLoadOrStore() && "MIb must be a load or store.");
1111
1114 return false;
1115
1116 // Retrieve the base, offset from the base and width. Width
1117 // is the size of memory that is being loaded/stored (e.g. 1, 2, 4, 8). If
1118 // base are identical, and the offset of a lower memory access +
1119 // the width doesn't overlap the offset of a higher memory access,
1120 // then the memory accesses are different.
1121 // If OffsetAIsScalable and OffsetBIsScalable are both true, they
1122 // are assumed to have the same scale (vscale).
1123 if (getMemOperandWithOffsetWidth(MIa, BaseOpA, OffsetA, OffsetAIsScalable,
1124 WidthA, TRI) &&
1125 getMemOperandWithOffsetWidth(MIb, BaseOpB, OffsetB, OffsetBIsScalable,
1126 WidthB, TRI)) {
1127 if (BaseOpA->isIdenticalTo(*BaseOpB) &&
1128 OffsetAIsScalable == OffsetBIsScalable) {
1129 int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB;
1130 int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA;
1131 TypeSize LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
1132 if (LowWidth.isScalable() == OffsetAIsScalable &&
1133 LowOffset + (int)LowWidth.getKnownMinValue() <= HighOffset)
1134 return true;
1135 }
1136 }
1137 return false;
1138}
1139
1141 const MachineBasicBlock *MBB,
1142 const MachineFunction &MF) const {
1144 return true;
1145
1146 // Do not move an instruction that can be recognized as a branch target.
1147 if (hasBTISemantics(MI))
1148 return true;
1149
1150 switch (MI.getOpcode()) {
1151 case AArch64::HINT:
1152 // CSDB hints are scheduling barriers.
1153 if (MI.getOperand(0).getImm() == 0x14)
1154 return true;
1155 break;
1156 case AArch64::DSB:
1157 case AArch64::ISB:
1158 // DSB and ISB also are scheduling barriers.
1159 return true;
1160 case AArch64::MSRpstatesvcrImm1:
1161 // SMSTART and SMSTOP are also scheduling barriers.
1162 return true;
1163 default:;
1164 }
1165 if (isSEHInstruction(MI))
1166 return true;
1167 auto Next = std::next(MI.getIterator());
1168 return Next != MBB->end() && Next->isCFIInstruction();
1169}
1170
1171/// analyzeCompare - For a comparison instruction, return the source registers
1172/// in SrcReg and SrcReg2, and the value it compares against in CmpValue.
1173/// Return true if the comparison instruction can be analyzed.
1175 Register &SrcReg2, int64_t &CmpMask,
1176 int64_t &CmpValue) const {
1177 // The first operand can be a frame index where we'd normally expect a
1178 // register.
1179 assert(MI.getNumOperands() >= 2 && "All AArch64 cmps should have 2 operands");
1180 if (!MI.getOperand(1).isReg())
1181 return false;
1182
1183 switch (MI.getOpcode()) {
1184 default:
1185 break;
1186 case AArch64::PTEST_PP:
1187 case AArch64::PTEST_PP_ANY:
1188 SrcReg = MI.getOperand(0).getReg();
1189 SrcReg2 = MI.getOperand(1).getReg();
1190 // Not sure about the mask and value for now...
1191 CmpMask = ~0;
1192 CmpValue = 0;
1193 return true;
1194 case AArch64::SUBSWrr:
1195 case AArch64::SUBSWrs:
1196 case AArch64::SUBSWrx:
1197 case AArch64::SUBSXrr:
1198 case AArch64::SUBSXrs:
1199 case AArch64::SUBSXrx:
1200 case AArch64::ADDSWrr:
1201 case AArch64::ADDSWrs:
1202 case AArch64::ADDSWrx:
1203 case AArch64::ADDSXrr:
1204 case AArch64::ADDSXrs:
1205 case AArch64::ADDSXrx:
1206 // Replace SUBSWrr with SUBWrr if NZCV is not used.
1207 SrcReg = MI.getOperand(1).getReg();
1208 SrcReg2 = MI.getOperand(2).getReg();
1209 CmpMask = ~0;
1210 CmpValue = 0;
1211 return true;
1212 case AArch64::SUBSWri:
1213 case AArch64::ADDSWri:
1214 case AArch64::SUBSXri:
1215 case AArch64::ADDSXri:
1216 SrcReg = MI.getOperand(1).getReg();
1217 SrcReg2 = 0;
1218 CmpMask = ~0;
1219 CmpValue = MI.getOperand(2).getImm();
1220 return true;
1221 case AArch64::ANDSWri:
1222 case AArch64::ANDSXri:
1223 // ANDS does not use the same encoding scheme as the others xxxS
1224 // instructions.
1225 SrcReg = MI.getOperand(1).getReg();
1226 SrcReg2 = 0;
1227 CmpMask = ~0;
1229 MI.getOperand(2).getImm(),
1230 MI.getOpcode() == AArch64::ANDSWri ? 32 : 64);
1231 return true;
1232 }
1233
1234 return false;
1235}
1236
1238 MachineBasicBlock *MBB = Instr.getParent();
1239 assert(MBB && "Can't get MachineBasicBlock here");
1240 MachineFunction *MF = MBB->getParent();
1241 assert(MF && "Can't get MachineFunction here");
1245
1246 for (unsigned OpIdx = 0, EndIdx = Instr.getNumOperands(); OpIdx < EndIdx;
1247 ++OpIdx) {
1248 MachineOperand &MO = Instr.getOperand(OpIdx);
1249 const TargetRegisterClass *OpRegCstraints =
1250 Instr.getRegClassConstraint(OpIdx, TII, TRI);
1251
1252 // If there's no constraint, there's nothing to do.
1253 if (!OpRegCstraints)
1254 continue;
1255 // If the operand is a frame index, there's nothing to do here.
1256 // A frame index operand will resolve correctly during PEI.
1257 if (MO.isFI())
1258 continue;
1259
1260 assert(MO.isReg() &&
1261 "Operand has register constraints without being a register!");
1262
1263 Register Reg = MO.getReg();
1264 if (Reg.isPhysical()) {
1265 if (!OpRegCstraints->contains(Reg))
1266 return false;
1267 } else if (!OpRegCstraints->hasSubClassEq(MRI->getRegClass(Reg)) &&
1268 !MRI->constrainRegClass(Reg, OpRegCstraints))
1269 return false;
1270 }
1271
1272 return true;
1273}
1274
1275/// Return the opcode that does not set flags when possible - otherwise
1276/// return the original opcode. The caller is responsible to do the actual
1277/// substitution and legality checking.
1279 // Don't convert all compare instructions, because for some the zero register
1280 // encoding becomes the sp register.
1281 bool MIDefinesZeroReg = false;
1282 if (MI.definesRegister(AArch64::WZR, /*TRI=*/nullptr) ||
1283 MI.definesRegister(AArch64::XZR, /*TRI=*/nullptr))
1284 MIDefinesZeroReg = true;
1285
1286 switch (MI.getOpcode()) {
1287 default:
1288 return MI.getOpcode();
1289 case AArch64::ADDSWrr:
1290 return AArch64::ADDWrr;
1291 case AArch64::ADDSWri:
1292 return MIDefinesZeroReg ? AArch64::ADDSWri : AArch64::ADDWri;
1293 case AArch64::ADDSWrs:
1294 return MIDefinesZeroReg ? AArch64::ADDSWrs : AArch64::ADDWrs;
1295 case AArch64::ADDSWrx:
1296 return AArch64::ADDWrx;
1297 case AArch64::ADDSXrr:
1298 return AArch64::ADDXrr;
1299 case AArch64::ADDSXri:
1300 return MIDefinesZeroReg ? AArch64::ADDSXri : AArch64::ADDXri;
1301 case AArch64::ADDSXrs:
1302 return MIDefinesZeroReg ? AArch64::ADDSXrs : AArch64::ADDXrs;
1303 case AArch64::ADDSXrx:
1304 return AArch64::ADDXrx;
1305 case AArch64::SUBSWrr:
1306 return AArch64::SUBWrr;
1307 case AArch64::SUBSWri:
1308 return MIDefinesZeroReg ? AArch64::SUBSWri : AArch64::SUBWri;
1309 case AArch64::SUBSWrs:
1310 return MIDefinesZeroReg ? AArch64::SUBSWrs : AArch64::SUBWrs;
1311 case AArch64::SUBSWrx:
1312 return AArch64::SUBWrx;
1313 case AArch64::SUBSXrr:
1314 return AArch64::SUBXrr;
1315 case AArch64::SUBSXri:
1316 return MIDefinesZeroReg ? AArch64::SUBSXri : AArch64::SUBXri;
1317 case AArch64::SUBSXrs:
1318 return MIDefinesZeroReg ? AArch64::SUBSXrs : AArch64::SUBXrs;
1319 case AArch64::SUBSXrx:
1320 return AArch64::SUBXrx;
1321 }
1322}
1323
1324enum AccessKind { AK_Write = 0x01, AK_Read = 0x10, AK_All = 0x11 };
1325
1326/// True when condition flags are accessed (either by writing or reading)
1327/// on the instruction trace starting at From and ending at To.
1328///
1329/// Note: If From and To are from different blocks it's assumed CC are accessed
1330/// on the path.
1333 const TargetRegisterInfo *TRI, const AccessKind AccessToCheck = AK_All) {
1334 // Early exit if To is at the beginning of the BB.
1335 if (To == To->getParent()->begin())
1336 return true;
1337
1338 // Check whether the instructions are in the same basic block
1339 // If not, assume the condition flags might get modified somewhere.
1340 if (To->getParent() != From->getParent())
1341 return true;
1342
1343 // From must be above To.
1344 assert(std::any_of(
1345 ++To.getReverse(), To->getParent()->rend(),
1346 [From](MachineInstr &MI) { return MI.getIterator() == From; }));
1347
1348 // We iterate backward starting at \p To until we hit \p From.
1349 for (const MachineInstr &Instr :
1350 instructionsWithoutDebug(++To.getReverse(), From.getReverse())) {
1351 if (((AccessToCheck & AK_Write) &&
1352 Instr.modifiesRegister(AArch64::NZCV, TRI)) ||
1353 ((AccessToCheck & AK_Read) && Instr.readsRegister(AArch64::NZCV, TRI)))
1354 return true;
1355 }
1356 return false;
1357}
1358
1359std::optional<unsigned>
1360AArch64InstrInfo::canRemovePTestInstr(MachineInstr *PTest, MachineInstr *Mask,
1361 MachineInstr *Pred,
1362 const MachineRegisterInfo *MRI) const {
1363 unsigned MaskOpcode = Mask->getOpcode();
1364 unsigned PredOpcode = Pred->getOpcode();
1365 bool PredIsPTestLike = isPTestLikeOpcode(PredOpcode);
1366 bool PredIsWhileLike = isWhileOpcode(PredOpcode);
1367
1368 if (PredIsWhileLike) {
1369 // For PTEST(PG, PG), PTEST is redundant when PG is the result of a WHILEcc
1370 // instruction and the condition is "any" since WHILcc does an implicit
1371 // PTEST(ALL, PG) check and PG is always a subset of ALL.
1372 if ((Mask == Pred) && PTest->getOpcode() == AArch64::PTEST_PP_ANY)
1373 return PredOpcode;
1374
1375 // For PTEST(PTRUE_ALL, WHILE), if the element size matches, the PTEST is
1376 // redundant since WHILE performs an implicit PTEST with an all active
1377 // mask.
1378 if (isPTrueOpcode(MaskOpcode) && Mask->getOperand(1).getImm() == 31 &&
1379 getElementSizeForOpcode(MaskOpcode) ==
1380 getElementSizeForOpcode(PredOpcode))
1381 return PredOpcode;
1382
1383 return {};
1384 }
1385
1386 if (PredIsPTestLike) {
1387 // For PTEST(PG, PG), PTEST is redundant when PG is the result of an
1388 // instruction that sets the flags as PTEST would and the condition is
1389 // "any" since PG is always a subset of the governing predicate of the
1390 // ptest-like instruction.
1391 if ((Mask == Pred) && PTest->getOpcode() == AArch64::PTEST_PP_ANY)
1392 return PredOpcode;
1393
1394 // For PTEST(PTRUE_ALL, PTEST_LIKE), the PTEST is redundant if the
1395 // the element size matches and either the PTEST_LIKE instruction uses
1396 // the same all active mask or the condition is "any".
1397 if (isPTrueOpcode(MaskOpcode) && Mask->getOperand(1).getImm() == 31 &&
1398 getElementSizeForOpcode(MaskOpcode) ==
1399 getElementSizeForOpcode(PredOpcode)) {
1400 auto PTestLikeMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg());
1401 if (Mask == PTestLikeMask || PTest->getOpcode() == AArch64::PTEST_PP_ANY)
1402 return PredOpcode;
1403 }
1404
1405 // For PTEST(PG, PTEST_LIKE(PG, ...)), the PTEST is redundant since the
1406 // flags are set based on the same mask 'PG', but PTEST_LIKE must operate
1407 // on 8-bit predicates like the PTEST. Otherwise, for instructions like
1408 // compare that also support 16/32/64-bit predicates, the implicit PTEST
1409 // performed by the compare could consider fewer lanes for these element
1410 // sizes.
1411 //
1412 // For example, consider
1413 //
1414 // ptrue p0.b ; P0=1111-1111-1111-1111
1415 // index z0.s, #0, #1 ; Z0=<0,1,2,3>
1416 // index z1.s, #1, #1 ; Z1=<1,2,3,4>
1417 // cmphi p1.s, p0/z, z1.s, z0.s ; P1=0001-0001-0001-0001
1418 // ; ^ last active
1419 // ptest p0, p1.b ; P1=0001-0001-0001-0001
1420 // ; ^ last active
1421 //
1422 // where the compare generates a canonical all active 32-bit predicate
1423 // (equivalent to 'ptrue p1.s, all'). The implicit PTEST sets the last
1424 // active flag, whereas the PTEST instruction with the same mask doesn't.
1425 // For PTEST_ANY this doesn't apply as the flags in this case would be
1426 // identical regardless of element size.
1427 auto PTestLikeMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg());
1428 uint64_t PredElementSize = getElementSizeForOpcode(PredOpcode);
1429 if (Mask == PTestLikeMask && (PredElementSize == AArch64::ElementSizeB ||
1430 PTest->getOpcode() == AArch64::PTEST_PP_ANY))
1431 return PredOpcode;
1432
1433 return {};
1434 }
1435
1436 // If OP in PTEST(PG, OP(PG, ...)) has a flag-setting variant change the
1437 // opcode so the PTEST becomes redundant.
1438 switch (PredOpcode) {
1439 case AArch64::AND_PPzPP:
1440 case AArch64::BIC_PPzPP:
1441 case AArch64::EOR_PPzPP:
1442 case AArch64::NAND_PPzPP:
1443 case AArch64::NOR_PPzPP:
1444 case AArch64::ORN_PPzPP:
1445 case AArch64::ORR_PPzPP:
1446 case AArch64::BRKA_PPzP:
1447 case AArch64::BRKPA_PPzPP:
1448 case AArch64::BRKB_PPzP:
1449 case AArch64::BRKPB_PPzPP:
1450 case AArch64::RDFFR_PPz: {
1451 // Check to see if our mask is the same. If not the resulting flag bits
1452 // may be different and we can't remove the ptest.
1453 auto *PredMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg());
1454 if (Mask != PredMask)
1455 return {};
1456 break;
1457 }
1458 case AArch64::BRKN_PPzP: {
1459 // BRKN uses an all active implicit mask to set flags unlike the other
1460 // flag-setting instructions.
1461 // PTEST(PTRUE_B(31), BRKN(PG, A, B)) -> BRKNS(PG, A, B).
1462 if ((MaskOpcode != AArch64::PTRUE_B) ||
1463 (Mask->getOperand(1).getImm() != 31))
1464 return {};
1465 break;
1466 }
1467 case AArch64::PTRUE_B:
1468 // PTEST(OP=PTRUE_B(A), OP) -> PTRUES_B(A)
1469 break;
1470 default:
1471 // Bail out if we don't recognize the input
1472 return {};
1473 }
1474
1475 return convertToFlagSettingOpc(PredOpcode);
1476}
1477
1478/// optimizePTestInstr - Attempt to remove a ptest of a predicate-generating
1479/// operation which could set the flags in an identical manner
1480bool AArch64InstrInfo::optimizePTestInstr(
1481 MachineInstr *PTest, unsigned MaskReg, unsigned PredReg,
1482 const MachineRegisterInfo *MRI) const {
1483 auto *Mask = MRI->getUniqueVRegDef(MaskReg);
1484 auto *Pred = MRI->getUniqueVRegDef(PredReg);
1485 unsigned PredOpcode = Pred->getOpcode();
1486 auto NewOp = canRemovePTestInstr(PTest, Mask, Pred, MRI);
1487 if (!NewOp)
1488 return false;
1489
1491
1492 // If another instruction between Pred and PTest accesses flags, don't remove
1493 // the ptest or update the earlier instruction to modify them.
1494 if (areCFlagsAccessedBetweenInstrs(Pred, PTest, TRI))
1495 return false;
1496
1497 // If we pass all the checks, it's safe to remove the PTEST and use the flags
1498 // as they are prior to PTEST. Sometimes this requires the tested PTEST
1499 // operand to be replaced with an equivalent instruction that also sets the
1500 // flags.
1501 PTest->eraseFromParent();
1502 if (*NewOp != PredOpcode) {
1503 Pred->setDesc(get(*NewOp));
1504 bool succeeded = UpdateOperandRegClass(*Pred);
1505 (void)succeeded;
1506 assert(succeeded && "Operands have incompatible register classes!");
1507 Pred->addRegisterDefined(AArch64::NZCV, TRI);
1508 }
1509
1510 // Ensure that the flags def is live.
1511 if (Pred->registerDefIsDead(AArch64::NZCV, TRI)) {
1512 unsigned i = 0, e = Pred->getNumOperands();
1513 for (; i != e; ++i) {
1514 MachineOperand &MO = Pred->getOperand(i);
1515 if (MO.isReg() && MO.isDef() && MO.getReg() == AArch64::NZCV) {
1516 MO.setIsDead(false);
1517 break;
1518 }
1519 }
1520 }
1521 return true;
1522}
1523
1524/// Try to optimize a compare instruction. A compare instruction is an
1525/// instruction which produces AArch64::NZCV. It can be truly compare
1526/// instruction
1527/// when there are no uses of its destination register.
1528///
1529/// The following steps are tried in order:
1530/// 1. Convert CmpInstr into an unconditional version.
1531/// 2. Remove CmpInstr if above there is an instruction producing a needed
1532/// condition code or an instruction which can be converted into such an
1533/// instruction.
1534/// Only comparison with zero is supported.
1536 MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int64_t CmpMask,
1537 int64_t CmpValue, const MachineRegisterInfo *MRI) const {
1538 assert(CmpInstr.getParent());
1539 assert(MRI);
1540
1541 // Replace SUBSWrr with SUBWrr if NZCV is not used.
1542 int DeadNZCVIdx =
1543 CmpInstr.findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr, true);
1544 if (DeadNZCVIdx != -1) {
1545 if (CmpInstr.definesRegister(AArch64::WZR, /*TRI=*/nullptr) ||
1546 CmpInstr.definesRegister(AArch64::XZR, /*TRI=*/nullptr)) {
1547 CmpInstr.eraseFromParent();
1548 return true;
1549 }
1550 unsigned Opc = CmpInstr.getOpcode();
1551 unsigned NewOpc = convertToNonFlagSettingOpc(CmpInstr);
1552 if (NewOpc == Opc)
1553 return false;
1554 const MCInstrDesc &MCID = get(NewOpc);
1555 CmpInstr.setDesc(MCID);
1556 CmpInstr.removeOperand(DeadNZCVIdx);
1557 bool succeeded = UpdateOperandRegClass(CmpInstr);
1558 (void)succeeded;
1559 assert(succeeded && "Some operands reg class are incompatible!");
1560 return true;
1561 }
1562
1563 if (CmpInstr.getOpcode() == AArch64::PTEST_PP ||
1564 CmpInstr.getOpcode() == AArch64::PTEST_PP_ANY)
1565 return optimizePTestInstr(&CmpInstr, SrcReg, SrcReg2, MRI);
1566
1567 if (SrcReg2 != 0)
1568 return false;
1569
1570 // CmpInstr is a Compare instruction if destination register is not used.
1571 if (!MRI->use_nodbg_empty(CmpInstr.getOperand(0).getReg()))
1572 return false;
1573
1574 if (CmpValue == 0 && substituteCmpToZero(CmpInstr, SrcReg, *MRI))
1575 return true;
1576 return (CmpValue == 0 || CmpValue == 1) &&
1577 removeCmpToZeroOrOne(CmpInstr, SrcReg, CmpValue, *MRI);
1578}
1579
1580/// Get opcode of S version of Instr.
1581/// If Instr is S version its opcode is returned.
1582/// AArch64::INSTRUCTION_LIST_END is returned if Instr does not have S version
1583/// or we are not interested in it.
1584static unsigned sForm(MachineInstr &Instr) {
1585 switch (Instr.getOpcode()) {
1586 default:
1587 return AArch64::INSTRUCTION_LIST_END;
1588
1589 case AArch64::ADDSWrr:
1590 case AArch64::ADDSWri:
1591 case AArch64::ADDSXrr:
1592 case AArch64::ADDSXri:
1593 case AArch64::SUBSWrr:
1594 case AArch64::SUBSWri:
1595 case AArch64::SUBSXrr:
1596 case AArch64::SUBSXri:
1597 return Instr.getOpcode();
1598
1599 case AArch64::ADDWrr:
1600 return AArch64::ADDSWrr;
1601 case AArch64::ADDWri:
1602 return AArch64::ADDSWri;
1603 case AArch64::ADDXrr:
1604 return AArch64::ADDSXrr;
1605 case AArch64::ADDXri:
1606 return AArch64::ADDSXri;
1607 case AArch64::ADCWr:
1608 return AArch64::ADCSWr;
1609 case AArch64::ADCXr:
1610 return AArch64::ADCSXr;
1611 case AArch64::SUBWrr:
1612 return AArch64::SUBSWrr;
1613 case AArch64::SUBWri:
1614 return AArch64::SUBSWri;
1615 case AArch64::SUBXrr:
1616 return AArch64::SUBSXrr;
1617 case AArch64::SUBXri:
1618 return AArch64::SUBSXri;
1619 case AArch64::SBCWr:
1620 return AArch64::SBCSWr;
1621 case AArch64::SBCXr:
1622 return AArch64::SBCSXr;
1623 case AArch64::ANDWri:
1624 return AArch64::ANDSWri;
1625 case AArch64::ANDXri:
1626 return AArch64::ANDSXri;
1627 }
1628}
1629
1630/// Check if AArch64::NZCV should be alive in successors of MBB.
1632 for (auto *BB : MBB->successors())
1633 if (BB->isLiveIn(AArch64::NZCV))
1634 return true;
1635 return false;
1636}
1637
1638/// \returns The condition code operand index for \p Instr if it is a branch
1639/// or select and -1 otherwise.
1640static int
1642 switch (Instr.getOpcode()) {
1643 default:
1644 return -1;
1645
1646 case AArch64::Bcc: {
1647 int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV, /*TRI=*/nullptr);
1648 assert(Idx >= 2);
1649 return Idx - 2;
1650 }
1651
1652 case AArch64::CSINVWr:
1653 case AArch64::CSINVXr:
1654 case AArch64::CSINCWr:
1655 case AArch64::CSINCXr:
1656 case AArch64::CSELWr:
1657 case AArch64::CSELXr:
1658 case AArch64::CSNEGWr:
1659 case AArch64::CSNEGXr:
1660 case AArch64::FCSELSrrr:
1661 case AArch64::FCSELDrrr: {
1662 int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV, /*TRI=*/nullptr);
1663 assert(Idx >= 1);
1664 return Idx - 1;
1665 }
1666 }
1667}
1668
1669/// Find a condition code used by the instruction.
1670/// Returns AArch64CC::Invalid if either the instruction does not use condition
1671/// codes or we don't optimize CmpInstr in the presence of such instructions.
1674 return CCIdx >= 0 ? static_cast<AArch64CC::CondCode>(
1675 Instr.getOperand(CCIdx).getImm())
1677}
1678
1681 UsedNZCV UsedFlags;
1682 switch (CC) {
1683 default:
1684 break;
1685
1686 case AArch64CC::EQ: // Z set
1687 case AArch64CC::NE: // Z clear
1688 UsedFlags.Z = true;
1689 break;
1690
1691 case AArch64CC::HI: // Z clear and C set
1692 case AArch64CC::LS: // Z set or C clear
1693 UsedFlags.Z = true;
1694 [[fallthrough]];
1695 case AArch64CC::HS: // C set
1696 case AArch64CC::LO: // C clear
1697 UsedFlags.C = true;
1698 break;
1699
1700 case AArch64CC::MI: // N set
1701 case AArch64CC::PL: // N clear
1702 UsedFlags.N = true;
1703 break;
1704
1705 case AArch64CC::VS: // V set
1706 case AArch64CC::VC: // V clear
1707 UsedFlags.V = true;
1708 break;
1709
1710 case AArch64CC::GT: // Z clear, N and V the same
1711 case AArch64CC::LE: // Z set, N and V differ
1712 UsedFlags.Z = true;
1713 [[fallthrough]];
1714 case AArch64CC::GE: // N and V the same
1715 case AArch64CC::LT: // N and V differ
1716 UsedFlags.N = true;
1717 UsedFlags.V = true;
1718 break;
1719 }
1720 return UsedFlags;
1721}
1722
1723/// \returns Conditions flags used after \p CmpInstr in its MachineBB if NZCV
1724/// flags are not alive in successors of the same \p CmpInstr and \p MI parent.
1725/// \returns std::nullopt otherwise.
1726///
1727/// Collect instructions using that flags in \p CCUseInstrs if provided.
1728std::optional<UsedNZCV>
1730 const TargetRegisterInfo &TRI,
1731 SmallVectorImpl<MachineInstr *> *CCUseInstrs) {
1732 MachineBasicBlock *CmpParent = CmpInstr.getParent();
1733 if (MI.getParent() != CmpParent)
1734 return std::nullopt;
1735
1736 if (areCFlagsAliveInSuccessors(CmpParent))
1737 return std::nullopt;
1738
1739 UsedNZCV NZCVUsedAfterCmp;
1741 std::next(CmpInstr.getIterator()), CmpParent->instr_end())) {
1742 if (Instr.readsRegister(AArch64::NZCV, &TRI)) {
1744 if (CC == AArch64CC::Invalid) // Unsupported conditional instruction
1745 return std::nullopt;
1746 NZCVUsedAfterCmp |= getUsedNZCV(CC);
1747 if (CCUseInstrs)
1748 CCUseInstrs->push_back(&Instr);
1749 }
1750 if (Instr.modifiesRegister(AArch64::NZCV, &TRI))
1751 break;
1752 }
1753 return NZCVUsedAfterCmp;
1754}
1755
1756static bool isADDSRegImm(unsigned Opcode) {
1757 return Opcode == AArch64::ADDSWri || Opcode == AArch64::ADDSXri;
1758}
1759
1760static bool isSUBSRegImm(unsigned Opcode) {
1761 return Opcode == AArch64::SUBSWri || Opcode == AArch64::SUBSXri;
1762}
1763
1764/// Check if CmpInstr can be substituted by MI.
1765///
1766/// CmpInstr can be substituted:
1767/// - CmpInstr is either 'ADDS %vreg, 0' or 'SUBS %vreg, 0'
1768/// - and, MI and CmpInstr are from the same MachineBB
1769/// - and, condition flags are not alive in successors of the CmpInstr parent
1770/// - and, if MI opcode is the S form there must be no defs of flags between
1771/// MI and CmpInstr
1772/// or if MI opcode is not the S form there must be neither defs of flags
1773/// nor uses of flags between MI and CmpInstr.
1774/// - and, if C/V flags are not used after CmpInstr
1775/// or if N flag is used but MI produces poison value if signed overflow
1776/// occurs.
1778 const TargetRegisterInfo &TRI) {
1779 // NOTE this assertion guarantees that MI.getOpcode() is add or subtraction
1780 // that may or may not set flags.
1781 assert(sForm(MI) != AArch64::INSTRUCTION_LIST_END);
1782
1783 const unsigned CmpOpcode = CmpInstr.getOpcode();
1784 if (!isADDSRegImm(CmpOpcode) && !isSUBSRegImm(CmpOpcode))
1785 return false;
1786
1787 assert((CmpInstr.getOperand(2).isImm() &&
1788 CmpInstr.getOperand(2).getImm() == 0) &&
1789 "Caller guarantees that CmpInstr compares with constant 0");
1790
1791 std::optional<UsedNZCV> NZVCUsed = examineCFlagsUse(MI, CmpInstr, TRI);
1792 if (!NZVCUsed || NZVCUsed->C)
1793 return false;
1794
1795 // CmpInstr is either 'ADDS %vreg, 0' or 'SUBS %vreg, 0', and MI is either
1796 // '%vreg = add ...' or '%vreg = sub ...'.
1797 // Condition flag V is used to indicate signed overflow.
1798 // 1) MI and CmpInstr set N and V to the same value.
1799 // 2) If MI is add/sub with no-signed-wrap, it produces a poison value when
1800 // signed overflow occurs, so CmpInstr could still be simplified away.
1801 if (NZVCUsed->V && !MI.getFlag(MachineInstr::NoSWrap))
1802 return false;
1803
1804 AccessKind AccessToCheck = AK_Write;
1805 if (sForm(MI) != MI.getOpcode())
1806 AccessToCheck = AK_All;
1807 return !areCFlagsAccessedBetweenInstrs(&MI, &CmpInstr, &TRI, AccessToCheck);
1808}
1809
1810/// Substitute an instruction comparing to zero with another instruction
1811/// which produces needed condition flags.
1812///
1813/// Return true on success.
1814bool AArch64InstrInfo::substituteCmpToZero(
1815 MachineInstr &CmpInstr, unsigned SrcReg,
1816 const MachineRegisterInfo &MRI) const {
1817 // Get the unique definition of SrcReg.
1818 MachineInstr *MI = MRI.getUniqueVRegDef(SrcReg);
1819 if (!MI)
1820 return false;
1821
1823
1824 unsigned NewOpc = sForm(*MI);
1825 if (NewOpc == AArch64::INSTRUCTION_LIST_END)
1826 return false;
1827
1828 if (!canInstrSubstituteCmpInstr(*MI, CmpInstr, TRI))
1829 return false;
1830
1831 // Update the instruction to set NZCV.
1832 MI->setDesc(get(NewOpc));
1833 CmpInstr.eraseFromParent();
1835 (void)succeeded;
1836 assert(succeeded && "Some operands reg class are incompatible!");
1837 MI->addRegisterDefined(AArch64::NZCV, &TRI);
1838 return true;
1839}
1840
1841/// \returns True if \p CmpInstr can be removed.
1842///
1843/// \p IsInvertCC is true if, after removing \p CmpInstr, condition
1844/// codes used in \p CCUseInstrs must be inverted.
1846 int CmpValue, const TargetRegisterInfo &TRI,
1848 bool &IsInvertCC) {
1849 assert((CmpValue == 0 || CmpValue == 1) &&
1850 "Only comparisons to 0 or 1 considered for removal!");
1851
1852 // MI is 'CSINCWr %vreg, wzr, wzr, <cc>' or 'CSINCXr %vreg, xzr, xzr, <cc>'
1853 unsigned MIOpc = MI.getOpcode();
1854 if (MIOpc == AArch64::CSINCWr) {
1855 if (MI.getOperand(1).getReg() != AArch64::WZR ||
1856 MI.getOperand(2).getReg() != AArch64::WZR)
1857 return false;
1858 } else if (MIOpc == AArch64::CSINCXr) {
1859 if (MI.getOperand(1).getReg() != AArch64::XZR ||
1860 MI.getOperand(2).getReg() != AArch64::XZR)
1861 return false;
1862 } else {
1863 return false;
1864 }
1866 if (MICC == AArch64CC::Invalid)
1867 return false;
1868
1869 // NZCV needs to be defined
1870 if (MI.findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr, true) != -1)
1871 return false;
1872
1873 // CmpInstr is 'ADDS %vreg, 0' or 'SUBS %vreg, 0' or 'SUBS %vreg, 1'
1874 const unsigned CmpOpcode = CmpInstr.getOpcode();
1875 bool IsSubsRegImm = isSUBSRegImm(CmpOpcode);
1876 if (CmpValue && !IsSubsRegImm)
1877 return false;
1878 if (!CmpValue && !IsSubsRegImm && !isADDSRegImm(CmpOpcode))
1879 return false;
1880
1881 // MI conditions allowed: eq, ne, mi, pl
1882 UsedNZCV MIUsedNZCV = getUsedNZCV(MICC);
1883 if (MIUsedNZCV.C || MIUsedNZCV.V)
1884 return false;
1885
1886 std::optional<UsedNZCV> NZCVUsedAfterCmp =
1887 examineCFlagsUse(MI, CmpInstr, TRI, &CCUseInstrs);
1888 // Condition flags are not used in CmpInstr basic block successors and only
1889 // Z or N flags allowed to be used after CmpInstr within its basic block
1890 if (!NZCVUsedAfterCmp || NZCVUsedAfterCmp->C || NZCVUsedAfterCmp->V)
1891 return false;
1892 // Z or N flag used after CmpInstr must correspond to the flag used in MI
1893 if ((MIUsedNZCV.Z && NZCVUsedAfterCmp->N) ||
1894 (MIUsedNZCV.N && NZCVUsedAfterCmp->Z))
1895 return false;
1896 // If CmpInstr is comparison to zero MI conditions are limited to eq, ne
1897 if (MIUsedNZCV.N && !CmpValue)
1898 return false;
1899
1900 // There must be no defs of flags between MI and CmpInstr
1901 if (areCFlagsAccessedBetweenInstrs(&MI, &CmpInstr, &TRI, AK_Write))
1902 return false;
1903
1904 // Condition code is inverted in the following cases:
1905 // 1. MI condition is ne; CmpInstr is 'ADDS %vreg, 0' or 'SUBS %vreg, 0'
1906 // 2. MI condition is eq, pl; CmpInstr is 'SUBS %vreg, 1'
1907 IsInvertCC = (CmpValue && (MICC == AArch64CC::EQ || MICC == AArch64CC::PL)) ||
1908 (!CmpValue && MICC == AArch64CC::NE);
1909 return true;
1910}
1911
1912/// Remove comparison in csinc-cmp sequence
1913///
1914/// Examples:
1915/// 1. \code
1916/// csinc w9, wzr, wzr, ne
1917/// cmp w9, #0
1918/// b.eq
1919/// \endcode
1920/// to
1921/// \code
1922/// csinc w9, wzr, wzr, ne
1923/// b.ne
1924/// \endcode
1925///
1926/// 2. \code
1927/// csinc x2, xzr, xzr, mi
1928/// cmp x2, #1
1929/// b.pl
1930/// \endcode
1931/// to
1932/// \code
1933/// csinc x2, xzr, xzr, mi
1934/// b.pl
1935/// \endcode
1936///
1937/// \param CmpInstr comparison instruction
1938/// \return True when comparison removed
1939bool AArch64InstrInfo::removeCmpToZeroOrOne(
1940 MachineInstr &CmpInstr, unsigned SrcReg, int CmpValue,
1941 const MachineRegisterInfo &MRI) const {
1942 MachineInstr *MI = MRI.getUniqueVRegDef(SrcReg);
1943 if (!MI)
1944 return false;
1947 bool IsInvertCC = false;
1948 if (!canCmpInstrBeRemoved(*MI, CmpInstr, CmpValue, TRI, CCUseInstrs,
1949 IsInvertCC))
1950 return false;
1951 // Make transformation
1952 CmpInstr.eraseFromParent();
1953 if (IsInvertCC) {
1954 // Invert condition codes in CmpInstr CC users
1955 for (MachineInstr *CCUseInstr : CCUseInstrs) {
1957 assert(Idx >= 0 && "Unexpected instruction using CC.");
1958 MachineOperand &CCOperand = CCUseInstr->getOperand(Idx);
1960 static_cast<AArch64CC::CondCode>(CCOperand.getImm()));
1961 CCOperand.setImm(CCUse);
1962 }
1963 }
1964 return true;
1965}
1966
1968 if (MI.getOpcode() != TargetOpcode::LOAD_STACK_GUARD &&
1969 MI.getOpcode() != AArch64::CATCHRET)
1970 return false;
1971
1972 MachineBasicBlock &MBB = *MI.getParent();
1973 auto &Subtarget = MBB.getParent()->getSubtarget<AArch64Subtarget>();
1974 auto TRI = Subtarget.getRegisterInfo();
1975 DebugLoc DL = MI.getDebugLoc();
1976
1977 if (MI.getOpcode() == AArch64::CATCHRET) {
1978 // Skip to the first instruction before the epilog.
1979 const TargetInstrInfo *TII =
1981 MachineBasicBlock *TargetMBB = MI.getOperand(0).getMBB();
1983 MachineBasicBlock::iterator FirstEpilogSEH = std::prev(MBBI);
1984 while (FirstEpilogSEH->getFlag(MachineInstr::FrameDestroy) &&
1985 FirstEpilogSEH != MBB.begin())
1986 FirstEpilogSEH = std::prev(FirstEpilogSEH);
1987 if (FirstEpilogSEH != MBB.begin())
1988 FirstEpilogSEH = std::next(FirstEpilogSEH);
1989 BuildMI(MBB, FirstEpilogSEH, DL, TII->get(AArch64::ADRP))
1990 .addReg(AArch64::X0, RegState::Define)
1991 .addMBB(TargetMBB);
1992 BuildMI(MBB, FirstEpilogSEH, DL, TII->get(AArch64::ADDXri))
1993 .addReg(AArch64::X0, RegState::Define)
1994 .addReg(AArch64::X0)
1995 .addMBB(TargetMBB)
1996 .addImm(0);
1997 return true;
1998 }
1999
2000 Register Reg = MI.getOperand(0).getReg();
2002 if (M.getStackProtectorGuard() == "sysreg") {
2003 const AArch64SysReg::SysReg *SrcReg =
2004 AArch64SysReg::lookupSysRegByName(M.getStackProtectorGuardReg());
2005 if (!SrcReg)
2006 report_fatal_error("Unknown SysReg for Stack Protector Guard Register");
2007
2008 // mrs xN, sysreg
2009 BuildMI(MBB, MI, DL, get(AArch64::MRS))
2011 .addImm(SrcReg->Encoding);
2012 int Offset = M.getStackProtectorGuardOffset();
2013 if (Offset >= 0 && Offset <= 32760 && Offset % 8 == 0) {
2014 // ldr xN, [xN, #offset]
2015 BuildMI(MBB, MI, DL, get(AArch64::LDRXui))
2016 .addDef(Reg)
2017 .addUse(Reg, RegState::Kill)
2018 .addImm(Offset / 8);
2019 } else if (Offset >= -256 && Offset <= 255) {
2020 // ldur xN, [xN, #offset]
2021 BuildMI(MBB, MI, DL, get(AArch64::LDURXi))
2022 .addDef(Reg)
2023 .addUse(Reg, RegState::Kill)
2024 .addImm(Offset);
2025 } else if (Offset >= -4095 && Offset <= 4095) {
2026 if (Offset > 0) {
2027 // add xN, xN, #offset
2028 BuildMI(MBB, MI, DL, get(AArch64::ADDXri))
2029 .addDef(Reg)
2030 .addUse(Reg, RegState::Kill)
2031 .addImm(Offset)
2032 .addImm(0);
2033 } else {
2034 // sub xN, xN, #offset
2035 BuildMI(MBB, MI, DL, get(AArch64::SUBXri))
2036 .addDef(Reg)
2037 .addUse(Reg, RegState::Kill)
2038 .addImm(-Offset)
2039 .addImm(0);
2040 }
2041 // ldr xN, [xN]
2042 BuildMI(MBB, MI, DL, get(AArch64::LDRXui))
2043 .addDef(Reg)
2044 .addUse(Reg, RegState::Kill)
2045 .addImm(0);
2046 } else {
2047 // Cases that are larger than +/- 4095 and not a multiple of 8, or larger
2048 // than 23760.
2049 // It might be nice to use AArch64::MOVi32imm here, which would get
2050 // expanded in PreSched2 after PostRA, but our lone scratch Reg already
2051 // contains the MRS result. findScratchNonCalleeSaveRegister() in
2052 // AArch64FrameLowering might help us find such a scratch register
2053 // though. If we failed to find a scratch register, we could emit a
2054 // stream of add instructions to build up the immediate. Or, we could try
2055 // to insert a AArch64::MOVi32imm before register allocation so that we
2056 // didn't need to scavenge for a scratch register.
2057 report_fatal_error("Unable to encode Stack Protector Guard Offset");
2058 }
2059 MBB.erase(MI);
2060 return true;
2061 }
2062
2063 const GlobalValue *GV =
2064 cast<GlobalValue>((*MI.memoperands_begin())->getValue());
2065 const TargetMachine &TM = MBB.getParent()->getTarget();
2066 unsigned OpFlags = Subtarget.ClassifyGlobalReference(GV, TM);
2067 const unsigned char MO_NC = AArch64II::MO_NC;
2068
2069 if ((OpFlags & AArch64II::MO_GOT) != 0) {
2070 BuildMI(MBB, MI, DL, get(AArch64::LOADgot), Reg)
2071 .addGlobalAddress(GV, 0, OpFlags);
2072 if (Subtarget.isTargetILP32()) {
2073 unsigned Reg32 = TRI->getSubReg(Reg, AArch64::sub_32);
2074 BuildMI(MBB, MI, DL, get(AArch64::LDRWui))
2075 .addDef(Reg32, RegState::Dead)
2076 .addUse(Reg, RegState::Kill)
2077 .addImm(0)
2078 .addMemOperand(*MI.memoperands_begin())
2080 } else {
2081 BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
2082 .addReg(Reg, RegState::Kill)
2083 .addImm(0)
2084 .addMemOperand(*MI.memoperands_begin());
2085 }
2086 } else if (TM.getCodeModel() == CodeModel::Large) {
2087 assert(!Subtarget.isTargetILP32() && "how can large exist in ILP32?");
2088 BuildMI(MBB, MI, DL, get(AArch64::MOVZXi), Reg)
2089 .addGlobalAddress(GV, 0, AArch64II::MO_G0 | MO_NC)
2090 .addImm(0);
2091 BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
2092 .addReg(Reg, RegState::Kill)
2093 .addGlobalAddress(GV, 0, AArch64II::MO_G1 | MO_NC)
2094 .addImm(16);
2095 BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
2096 .addReg(Reg, RegState::Kill)
2097 .addGlobalAddress(GV, 0, AArch64II::MO_G2 | MO_NC)
2098 .addImm(32);
2099 BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
2100 .addReg(Reg, RegState::Kill)
2102 .addImm(48);
2103 BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
2104 .addReg(Reg, RegState::Kill)
2105 .addImm(0)
2106 .addMemOperand(*MI.memoperands_begin());
2107 } else if (TM.getCodeModel() == CodeModel::Tiny) {
2108 BuildMI(MBB, MI, DL, get(AArch64::ADR), Reg)
2109 .addGlobalAddress(GV, 0, OpFlags);
2110 } else {
2111 BuildMI(MBB, MI, DL, get(AArch64::ADRP), Reg)
2112 .addGlobalAddress(GV, 0, OpFlags | AArch64II::MO_PAGE);
2113 unsigned char LoFlags = OpFlags | AArch64II::MO_PAGEOFF | MO_NC;
2114 if (Subtarget.isTargetILP32()) {
2115 unsigned Reg32 = TRI->getSubReg(Reg, AArch64::sub_32);
2116 BuildMI(MBB, MI, DL, get(AArch64::LDRWui))
2117 .addDef(Reg32, RegState::Dead)
2118 .addUse(Reg, RegState::Kill)
2119 .addGlobalAddress(GV, 0, LoFlags)
2120 .addMemOperand(*MI.memoperands_begin())
2122 } else {
2123 BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
2124 .addReg(Reg, RegState::Kill)
2125 .addGlobalAddress(GV, 0, LoFlags)
2126 .addMemOperand(*MI.memoperands_begin());
2127 }
2128 }
2129
2130 MBB.erase(MI);
2131
2132 return true;
2133}
2134
2135// Return true if this instruction simply sets its single destination register
2136// to zero. This is equivalent to a register rename of the zero-register.
2138 switch (MI.getOpcode()) {
2139 default:
2140 break;
2141 case AArch64::MOVZWi:
2142 case AArch64::MOVZXi: // movz Rd, #0 (LSL #0)
2143 if (MI.getOperand(1).isImm() && MI.getOperand(1).getImm() == 0) {
2144 assert(MI.getDesc().getNumOperands() == 3 &&
2145 MI.getOperand(2).getImm() == 0 && "invalid MOVZi operands");
2146 return true;
2147 }
2148 break;
2149 case AArch64::ANDWri: // and Rd, Rzr, #imm
2150 return MI.getOperand(1).getReg() == AArch64::WZR;
2151 case AArch64::ANDXri:
2152 return MI.getOperand(1).getReg() == AArch64::XZR;
2153 case TargetOpcode::COPY:
2154 return MI.getOperand(1).getReg() == AArch64::WZR;
2155 }
2156 return false;
2157}
2158
2159// Return true if this instruction simply renames a general register without
2160// modifying bits.
2162 switch (MI.getOpcode()) {
2163 default:
2164 break;
2165 case TargetOpcode::COPY: {
2166 // GPR32 copies will by lowered to ORRXrs
2167 Register DstReg = MI.getOperand(0).getReg();
2168 return (AArch64::GPR32RegClass.contains(DstReg) ||
2169 AArch64::GPR64RegClass.contains(DstReg));
2170 }
2171 case AArch64::ORRXrs: // orr Xd, Xzr, Xm (LSL #0)
2172 if (MI.getOperand(1).getReg() == AArch64::XZR) {
2173 assert(MI.getDesc().getNumOperands() == 4 &&
2174 MI.getOperand(3).getImm() == 0 && "invalid ORRrs operands");
2175 return true;
2176 }
2177 break;
2178 case AArch64::ADDXri: // add Xd, Xn, #0 (LSL #0)
2179 if (MI.getOperand(2).getImm() == 0) {
2180 assert(MI.getDesc().getNumOperands() == 4 &&
2181 MI.getOperand(3).getImm() == 0 && "invalid ADDXri operands");
2182 return true;
2183 }
2184 break;
2185 }
2186 return false;
2187}
2188
2189// Return true if this instruction simply renames a general register without
2190// modifying bits.
2192 switch (MI.getOpcode()) {
2193 default:
2194 break;
2195 case TargetOpcode::COPY: {
2196 Register DstReg = MI.getOperand(0).getReg();
2197 return AArch64::FPR128RegClass.contains(DstReg);
2198 }
2199 case AArch64::ORRv16i8:
2200 if (MI.getOperand(1).getReg() == MI.getOperand(2).getReg()) {
2201 assert(MI.getDesc().getNumOperands() == 3 && MI.getOperand(0).isReg() &&
2202 "invalid ORRv16i8 operands");
2203 return true;
2204 }
2205 break;
2206 }
2207 return false;
2208}
2209
2211 int &FrameIndex) const {
2212 switch (MI.getOpcode()) {
2213 default:
2214 break;
2215 case AArch64::LDRWui:
2216 case AArch64::LDRXui:
2217 case AArch64::LDRBui:
2218 case AArch64::LDRHui:
2219 case AArch64::LDRSui:
2220 case AArch64::LDRDui:
2221 case AArch64::LDRQui:
2222 case AArch64::LDR_PXI:
2223 if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() &&
2224 MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) {
2225 FrameIndex = MI.getOperand(1).getIndex();
2226 return MI.getOperand(0).getReg();
2227 }
2228 break;
2229 }
2230
2231 return 0;
2232}
2233
2235 int &FrameIndex) const {
2236 switch (MI.getOpcode()) {
2237 default:
2238 break;
2239 case AArch64::STRWui:
2240 case AArch64::STRXui:
2241 case AArch64::STRBui:
2242 case AArch64::STRHui:
2243 case AArch64::STRSui:
2244 case AArch64::STRDui:
2245 case AArch64::STRQui:
2246 case AArch64::STR_PXI:
2247 if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() &&
2248 MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) {
2249 FrameIndex = MI.getOperand(1).getIndex();
2250 return MI.getOperand(0).getReg();
2251 }
2252 break;
2253 }
2254 return 0;
2255}
2256
2257/// Check all MachineMemOperands for a hint to suppress pairing.
2259 return llvm::any_of(MI.memoperands(), [](MachineMemOperand *MMO) {
2260 return MMO->getFlags() & MOSuppressPair;
2261 });
2262}
2263
2264/// Set a flag on the first MachineMemOperand to suppress pairing.
2266 if (MI.memoperands_empty())
2267 return;
2268 (*MI.memoperands_begin())->setFlags(MOSuppressPair);
2269}
2270
2271/// Check all MachineMemOperands for a hint that the load/store is strided.
2273 return llvm::any_of(MI.memoperands(), [](MachineMemOperand *MMO) {
2274 return MMO->getFlags() & MOStridedAccess;
2275 });
2276}
2277
2279 switch (Opc) {
2280 default:
2281 return false;
2282 case AArch64::STURSi:
2283 case AArch64::STRSpre:
2284 case AArch64::STURDi:
2285 case AArch64::STRDpre:
2286 case AArch64::STURQi:
2287 case AArch64::STRQpre:
2288 case AArch64::STURBBi:
2289 case AArch64::STURHHi:
2290 case AArch64::STURWi:
2291 case AArch64::STRWpre:
2292 case AArch64::STURXi:
2293 case AArch64::STRXpre:
2294 case AArch64::LDURSi:
2295 case AArch64::LDRSpre:
2296 case AArch64::LDURDi:
2297 case AArch64::LDRDpre:
2298 case AArch64::LDURQi:
2299 case AArch64::LDRQpre:
2300 case AArch64::LDURWi:
2301 case AArch64::LDRWpre:
2302 case AArch64::LDURXi:
2303 case AArch64::LDRXpre:
2304 case AArch64::LDRSWpre:
2305 case AArch64::LDURSWi:
2306 case AArch64::LDURHHi:
2307 case AArch64::LDURBBi:
2308 case AArch64::LDURSBWi:
2309 case AArch64::LDURSHWi:
2310 return true;
2311 }
2312}
2313
2314std::optional<unsigned> AArch64InstrInfo::getUnscaledLdSt(unsigned Opc) {
2315 switch (Opc) {
2316 default: return {};
2317 case AArch64::PRFMui: return AArch64::PRFUMi;
2318 case AArch64::LDRXui: return AArch64::LDURXi;
2319 case AArch64::LDRWui: return AArch64::LDURWi;
2320 case AArch64::LDRBui: return AArch64::LDURBi;
2321 case AArch64::LDRHui: return AArch64::LDURHi;
2322 case AArch64::LDRSui: return AArch64::LDURSi;
2323 case AArch64::LDRDui: return AArch64::LDURDi;
2324 case AArch64::LDRQui: return AArch64::LDURQi;
2325 case AArch64::LDRBBui: return AArch64::LDURBBi;
2326 case AArch64::LDRHHui: return AArch64::LDURHHi;
2327 case AArch64::LDRSBXui: return AArch64::LDURSBXi;
2328 case AArch64::LDRSBWui: return AArch64::LDURSBWi;
2329 case AArch64::LDRSHXui: return AArch64::LDURSHXi;
2330 case AArch64::LDRSHWui: return AArch64::LDURSHWi;
2331 case AArch64::LDRSWui: return AArch64::LDURSWi;
2332 case AArch64::STRXui: return AArch64::STURXi;
2333 case AArch64::STRWui: return AArch64::STURWi;
2334 case AArch64::STRBui: return AArch64::STURBi;
2335 case AArch64::STRHui: return AArch64::STURHi;
2336 case AArch64::STRSui: return AArch64::STURSi;
2337 case AArch64::STRDui: return AArch64::STURDi;
2338 case AArch64::STRQui: return AArch64::STURQi;
2339 case AArch64::STRBBui: return AArch64::STURBBi;
2340 case AArch64::STRHHui: return AArch64::STURHHi;
2341 }
2342}
2343
2345 switch (Opc) {
2346 default:
2347 llvm_unreachable("Unhandled Opcode in getLoadStoreImmIdx");
2348 case AArch64::ADDG:
2349 case AArch64::LDAPURBi:
2350 case AArch64::LDAPURHi:
2351 case AArch64::LDAPURi:
2352 case AArch64::LDAPURSBWi:
2353 case AArch64::LDAPURSBXi:
2354 case AArch64::LDAPURSHWi:
2355 case AArch64::LDAPURSHXi:
2356 case AArch64::LDAPURSWi:
2357 case AArch64::LDAPURXi:
2358 case AArch64::LDR_PPXI:
2359 case AArch64::LDR_PXI:
2360 case AArch64::LDR_ZXI:
2361 case AArch64::LDR_ZZXI:
2362 case AArch64::LDR_ZZZXI:
2363 case AArch64::LDR_ZZZZXI:
2364 case AArch64::LDRBBui:
2365 case AArch64::LDRBui:
2366 case AArch64::LDRDui:
2367 case AArch64::LDRHHui:
2368 case AArch64::LDRHui:
2369 case AArch64::LDRQui:
2370 case AArch64::LDRSBWui:
2371 case AArch64::LDRSBXui:
2372 case AArch64::LDRSHWui:
2373 case AArch64::LDRSHXui:
2374 case AArch64::LDRSui:
2375 case AArch64::LDRSWui:
2376 case AArch64::LDRWui:
2377 case AArch64::LDRXui:
2378 case AArch64::LDURBBi:
2379 case AArch64::LDURBi:
2380 case AArch64::LDURDi:
2381 case AArch64::LDURHHi:
2382 case AArch64::LDURHi:
2383 case AArch64::LDURQi:
2384 case AArch64::LDURSBWi:
2385 case AArch64::LDURSBXi:
2386 case AArch64::LDURSHWi:
2387 case AArch64::LDURSHXi:
2388 case AArch64::LDURSi:
2389 case AArch64::LDURSWi:
2390 case AArch64::LDURWi:
2391 case AArch64::LDURXi:
2392 case AArch64::PRFMui:
2393 case AArch64::PRFUMi:
2394 case AArch64::ST2Gi:
2395 case AArch64::STGi:
2396 case AArch64::STLURBi:
2397 case AArch64::STLURHi:
2398 case AArch64::STLURWi:
2399 case AArch64::STLURXi:
2400 case AArch64::StoreSwiftAsyncContext:
2401 case AArch64::STR_PPXI:
2402 case AArch64::STR_PXI:
2403 case AArch64::STR_ZXI:
2404 case AArch64::STR_ZZXI:
2405 case AArch64::STR_ZZZXI:
2406 case AArch64::STR_ZZZZXI:
2407 case AArch64::STRBBui:
2408 case AArch64::STRBui:
2409 case AArch64::STRDui:
2410 case AArch64::STRHHui:
2411 case AArch64::STRHui:
2412 case AArch64::STRQui:
2413 case AArch64::STRSui:
2414 case AArch64::STRWui:
2415 case AArch64::STRXui:
2416 case AArch64::STURBBi:
2417 case AArch64::STURBi:
2418 case AArch64::STURDi:
2419 case AArch64::STURHHi:
2420 case AArch64::STURHi:
2421 case AArch64::STURQi:
2422 case AArch64::STURSi:
2423 case AArch64::STURWi:
2424 case AArch64::STURXi:
2425 case AArch64::STZ2Gi:
2426 case AArch64::STZGi:
2427 case AArch64::TAGPstack:
2428 return 2;
2429 case AArch64::LD1B_D_IMM:
2430 case AArch64::LD1B_H_IMM:
2431 case AArch64::LD1B_IMM:
2432 case AArch64::LD1B_S_IMM:
2433 case AArch64::LD1D_IMM:
2434 case AArch64::LD1H_D_IMM:
2435 case AArch64::LD1H_IMM:
2436 case AArch64::LD1H_S_IMM:
2437 case AArch64::LD1RB_D_IMM:
2438 case AArch64::LD1RB_H_IMM:
2439 case AArch64::LD1RB_IMM:
2440 case AArch64::LD1RB_S_IMM:
2441 case AArch64::LD1RD_IMM:
2442 case AArch64::LD1RH_D_IMM:
2443 case AArch64::LD1RH_IMM:
2444 case AArch64::LD1RH_S_IMM:
2445 case AArch64::LD1RSB_D_IMM:
2446 case AArch64::LD1RSB_H_IMM:
2447 case AArch64::LD1RSB_S_IMM:
2448 case AArch64::LD1RSH_D_IMM:
2449 case AArch64::LD1RSH_S_IMM:
2450 case AArch64::LD1RSW_IMM:
2451 case AArch64::LD1RW_D_IMM:
2452 case AArch64::LD1RW_IMM:
2453 case AArch64::LD1SB_D_IMM:
2454 case AArch64::LD1SB_H_IMM:
2455 case AArch64::LD1SB_S_IMM:
2456 case AArch64::LD1SH_D_IMM:
2457 case AArch64::LD1SH_S_IMM:
2458 case AArch64::LD1SW_D_IMM:
2459 case AArch64::LD1W_D_IMM:
2460 case AArch64::LD1W_IMM:
2461 case AArch64::LD2B_IMM:
2462 case AArch64::LD2D_IMM:
2463 case AArch64::LD2H_IMM:
2464 case AArch64::LD2W_IMM:
2465 case AArch64::LD3B_IMM:
2466 case AArch64::LD3D_IMM:
2467 case AArch64::LD3H_IMM:
2468 case AArch64::LD3W_IMM:
2469 case AArch64::LD4B_IMM:
2470 case AArch64::LD4D_IMM:
2471 case AArch64::LD4H_IMM:
2472 case AArch64::LD4W_IMM:
2473 case AArch64::LDG:
2474 case AArch64::LDNF1B_D_IMM:
2475 case AArch64::LDNF1B_H_IMM:
2476 case AArch64::LDNF1B_IMM:
2477 case AArch64::LDNF1B_S_IMM:
2478 case AArch64::LDNF1D_IMM:
2479 case AArch64::LDNF1H_D_IMM:
2480 case AArch64::LDNF1H_IMM:
2481 case AArch64::LDNF1H_S_IMM:
2482 case AArch64::LDNF1SB_D_IMM:
2483 case AArch64::LDNF1SB_H_IMM:
2484 case AArch64::LDNF1SB_S_IMM:
2485 case AArch64::LDNF1SH_D_IMM:
2486 case AArch64::LDNF1SH_S_IMM:
2487 case AArch64::LDNF1SW_D_IMM:
2488 case AArch64::LDNF1W_D_IMM:
2489 case AArch64::LDNF1W_IMM:
2490 case AArch64::LDNPDi:
2491 case AArch64::LDNPQi:
2492 case AArch64::LDNPSi:
2493 case AArch64::LDNPWi:
2494 case AArch64::LDNPXi:
2495 case AArch64::LDNT1B_ZRI:
2496 case AArch64::LDNT1D_ZRI:
2497 case AArch64::LDNT1H_ZRI:
2498 case AArch64::LDNT1W_ZRI:
2499 case AArch64::LDPDi:
2500 case AArch64::LDPQi:
2501 case AArch64::LDPSi:
2502 case AArch64::LDPWi:
2503 case AArch64::LDPXi:
2504 case AArch64::LDRBBpost:
2505 case AArch64::LDRBBpre:
2506 case AArch64::LDRBpost:
2507 case AArch64::LDRBpre:
2508 case AArch64::LDRDpost:
2509 case AArch64::LDRDpre:
2510 case AArch64::LDRHHpost:
2511 case AArch64::LDRHHpre:
2512 case AArch64::LDRHpost:
2513 case AArch64::LDRHpre:
2514 case AArch64::LDRQpost:
2515 case AArch64::LDRQpre:
2516 case AArch64::LDRSpost:
2517 case AArch64::LDRSpre:
2518 case AArch64::LDRWpost:
2519 case AArch64::LDRWpre:
2520 case AArch64::LDRXpost:
2521 case AArch64::LDRXpre:
2522 case AArch64::ST1B_D_IMM:
2523 case AArch64::ST1B_H_IMM:
2524 case AArch64::ST1B_IMM:
2525 case AArch64::ST1B_S_IMM:
2526 case AArch64::ST1D_IMM:
2527 case AArch64::ST1H_D_IMM:
2528 case AArch64::ST1H_IMM:
2529 case AArch64::ST1H_S_IMM:
2530 case AArch64::ST1W_D_IMM:
2531 case AArch64::ST1W_IMM:
2532 case AArch64::ST2B_IMM:
2533 case AArch64::ST2D_IMM:
2534 case AArch64::ST2H_IMM:
2535 case AArch64::ST2W_IMM:
2536 case AArch64::ST3B_IMM:
2537 case AArch64::ST3D_IMM:
2538 case AArch64::ST3H_IMM:
2539 case AArch64::ST3W_IMM:
2540 case AArch64::ST4B_IMM:
2541 case AArch64::ST4D_IMM:
2542 case AArch64::ST4H_IMM:
2543 case AArch64::ST4W_IMM:
2544 case AArch64::STGPi:
2545 case AArch64::STNPDi:
2546 case AArch64::STNPQi:
2547 case AArch64::STNPSi:
2548 case AArch64::STNPWi:
2549 case AArch64::STNPXi:
2550 case AArch64::STNT1B_ZRI:
2551 case AArch64::STNT1D_ZRI:
2552 case AArch64::STNT1H_ZRI:
2553 case AArch64::STNT1W_ZRI:
2554 case AArch64::STPDi:
2555 case AArch64::STPQi:
2556 case AArch64::STPSi:
2557 case AArch64::STPWi:
2558 case AArch64::STPXi:
2559 case AArch64::STRBBpost:
2560 case AArch64::STRBBpre:
2561 case AArch64::STRBpost:
2562 case AArch64::STRBpre:
2563 case AArch64::STRDpost:
2564 case AArch64::STRDpre:
2565 case AArch64::STRHHpost:
2566 case AArch64::STRHHpre:
2567 case AArch64::STRHpost:
2568 case AArch64::STRHpre:
2569 case AArch64::STRQpost:
2570 case AArch64::STRQpre:
2571 case AArch64::STRSpost:
2572 case AArch64::STRSpre:
2573 case AArch64::STRWpost:
2574 case AArch64::STRWpre:
2575 case AArch64::STRXpost:
2576 case AArch64::STRXpre:
2577 return 3;
2578 case AArch64::LDPDpost:
2579 case AArch64::LDPDpre:
2580 case AArch64::LDPQpost:
2581 case AArch64::LDPQpre:
2582 case AArch64::LDPSpost:
2583 case AArch64::LDPSpre:
2584 case AArch64::LDPWpost:
2585 case AArch64::LDPWpre:
2586 case AArch64::LDPXpost:
2587 case AArch64::LDPXpre:
2588 case AArch64::STPDpost:
2589 case AArch64::STPDpre:
2590 case AArch64::STPQpost:
2591 case AArch64::STPQpre:
2592 case AArch64::STPSpost:
2593 case AArch64::STPSpre:
2594 case AArch64::STPWpost:
2595 case AArch64::STPWpre:
2596 case AArch64::STPXpost:
2597 case AArch64::STPXpre:
2598 return 4;
2599 }
2600}
2601
2603 switch (MI.getOpcode()) {
2604 default:
2605 return false;
2606 // Scaled instructions.
2607 case AArch64::STRSui:
2608 case AArch64::STRDui:
2609 case AArch64::STRQui:
2610 case AArch64::STRXui:
2611 case AArch64::STRWui:
2612 case AArch64::LDRSui:
2613 case AArch64::LDRDui:
2614 case AArch64::LDRQui:
2615 case AArch64::LDRXui:
2616 case AArch64::LDRWui:
2617 case AArch64::LDRSWui:
2618 // Unscaled instructions.
2619 case AArch64::STURSi:
2620 case AArch64::STRSpre:
2621 case AArch64::STURDi:
2622 case AArch64::STRDpre:
2623 case AArch64::STURQi:
2624 case AArch64::STRQpre:
2625 case AArch64::STURWi:
2626 case AArch64::STRWpre:
2627 case AArch64::STURXi:
2628 case AArch64::STRXpre:
2629 case AArch64::LDURSi:
2630 case AArch64::LDRSpre:
2631 case AArch64::LDURDi:
2632 case AArch64::LDRDpre:
2633 case AArch64::LDURQi:
2634 case AArch64::LDRQpre:
2635 case AArch64::LDURWi:
2636 case AArch64::LDRWpre:
2637 case AArch64::LDURXi:
2638 case AArch64::LDRXpre:
2639 case AArch64::LDURSWi:
2640 case AArch64::LDRSWpre:
2641 return true;
2642 }
2643}
2644
2646 switch (MI.getOpcode()) {
2647 default:
2648 assert((!MI.isCall() || !MI.isReturn()) &&
2649 "Unexpected instruction - was a new tail call opcode introduced?");
2650 return false;
2651 case AArch64::TCRETURNdi:
2652 case AArch64::TCRETURNri:
2653 case AArch64::TCRETURNrix16x17:
2654 case AArch64::TCRETURNrix17:
2655 case AArch64::TCRETURNrinotx16:
2656 case AArch64::TCRETURNriALL:
2657 case AArch64::AUTH_TCRETURN:
2658 case AArch64::AUTH_TCRETURN_BTI:
2659 return true;
2660 }
2661}
2662
2664 switch (Opc) {
2665 default:
2666 llvm_unreachable("Opcode has no flag setting equivalent!");
2667 // 32-bit cases:
2668 case AArch64::ADDWri:
2669 return AArch64::ADDSWri;
2670 case AArch64::ADDWrr:
2671 return AArch64::ADDSWrr;
2672 case AArch64::ADDWrs:
2673 return AArch64::ADDSWrs;
2674 case AArch64::ADDWrx:
2675 return AArch64::ADDSWrx;
2676 case AArch64::ANDWri:
2677 return AArch64::ANDSWri;
2678 case AArch64::ANDWrr:
2679 return AArch64::ANDSWrr;
2680 case AArch64::ANDWrs:
2681 return AArch64::ANDSWrs;
2682 case AArch64::BICWrr:
2683 return AArch64::BICSWrr;
2684 case AArch64::BICWrs:
2685 return AArch64::BICSWrs;
2686 case AArch64::SUBWri:
2687 return AArch64::SUBSWri;
2688 case AArch64::SUBWrr:
2689 return AArch64::SUBSWrr;
2690 case AArch64::SUBWrs:
2691 return AArch64::SUBSWrs;
2692 case AArch64::SUBWrx:
2693 return AArch64::SUBSWrx;
2694 // 64-bit cases:
2695 case AArch64::ADDXri:
2696 return AArch64::ADDSXri;
2697 case AArch64::ADDXrr:
2698 return AArch64::ADDSXrr;
2699 case AArch64::ADDXrs:
2700 return AArch64::ADDSXrs;
2701 case AArch64::ADDXrx:
2702 return AArch64::ADDSXrx;
2703 case AArch64::ANDXri:
2704 return AArch64::ANDSXri;
2705 case AArch64::ANDXrr:
2706 return AArch64::ANDSXrr;
2707 case AArch64::ANDXrs:
2708 return AArch64::ANDSXrs;
2709 case AArch64::BICXrr:
2710 return AArch64::BICSXrr;
2711 case AArch64::BICXrs:
2712 return AArch64::BICSXrs;
2713 case AArch64::SUBXri:
2714 return AArch64::SUBSXri;
2715 case AArch64::SUBXrr:
2716 return AArch64::SUBSXrr;
2717 case AArch64::SUBXrs:
2718 return AArch64::SUBSXrs;
2719 case AArch64::SUBXrx:
2720 return AArch64::SUBSXrx;
2721 // SVE instructions:
2722 case AArch64::AND_PPzPP:
2723 return AArch64::ANDS_PPzPP;
2724 case AArch64::BIC_PPzPP:
2725 return AArch64::BICS_PPzPP;
2726 case AArch64::EOR_PPzPP:
2727 return AArch64::EORS_PPzPP;
2728 case AArch64::NAND_PPzPP:
2729 return AArch64::NANDS_PPzPP;
2730 case AArch64::NOR_PPzPP:
2731 return AArch64::NORS_PPzPP;
2732 case AArch64::ORN_PPzPP:
2733 return AArch64::ORNS_PPzPP;
2734 case AArch64::ORR_PPzPP:
2735 return AArch64::ORRS_PPzPP;
2736 case AArch64::BRKA_PPzP:
2737 return AArch64::BRKAS_PPzP;
2738 case AArch64::BRKPA_PPzPP:
2739 return AArch64::BRKPAS_PPzPP;
2740 case AArch64::BRKB_PPzP:
2741 return AArch64::BRKBS_PPzP;
2742 case AArch64::BRKPB_PPzPP:
2743 return AArch64::BRKPBS_PPzPP;
2744 case AArch64::BRKN_PPzP:
2745 return AArch64::BRKNS_PPzP;
2746 case AArch64::RDFFR_PPz:
2747 return AArch64::RDFFRS_PPz;
2748 case AArch64::PTRUE_B:
2749 return AArch64::PTRUES_B;
2750 }
2751}
2752
2753// Is this a candidate for ld/st merging or pairing? For example, we don't
2754// touch volatiles or load/stores that have a hint to avoid pair formation.
2756
2757 bool IsPreLdSt = isPreLdSt(MI);
2758
2759 // If this is a volatile load/store, don't mess with it.
2760 if (MI.hasOrderedMemoryRef())
2761 return false;
2762
2763 // Make sure this is a reg/fi+imm (as opposed to an address reloc).
2764 // For Pre-inc LD/ST, the operand is shifted by one.
2765 assert((MI.getOperand(IsPreLdSt ? 2 : 1).isReg() ||
2766 MI.getOperand(IsPreLdSt ? 2 : 1).isFI()) &&
2767 "Expected a reg or frame index operand.");
2768
2769 // For Pre-indexed addressing quadword instructions, the third operand is the
2770 // immediate value.
2771 bool IsImmPreLdSt = IsPreLdSt && MI.getOperand(3).isImm();
2772
2773 if (!MI.getOperand(2).isImm() && !IsImmPreLdSt)
2774 return false;
2775
2776 // Can't merge/pair if the instruction modifies the base register.
2777 // e.g., ldr x0, [x0]
2778 // This case will never occur with an FI base.
2779 // However, if the instruction is an LDR<S,D,Q,W,X,SW>pre or
2780 // STR<S,D,Q,W,X>pre, it can be merged.
2781 // For example:
2782 // ldr q0, [x11, #32]!
2783 // ldr q1, [x11, #16]
2784 // to
2785 // ldp q0, q1, [x11, #32]!
2786 if (MI.getOperand(1).isReg() && !IsPreLdSt) {
2787 Register BaseReg = MI.getOperand(1).getReg();
2789 if (MI.modifiesRegister(BaseReg, TRI))
2790 return false;
2791 }
2792
2793 // Check if this load/store has a hint to avoid pair formation.
2794 // MachineMemOperands hints are set by the AArch64StorePairSuppress pass.
2796 return false;
2797
2798 // Do not pair any callee-save store/reload instructions in the
2799 // prologue/epilogue if the CFI information encoded the operations as separate
2800 // instructions, as that will cause the size of the actual prologue to mismatch
2801 // with the prologue size recorded in the Windows CFI.
2802 const MCAsmInfo *MAI = MI.getMF()->getTarget().getMCAsmInfo();
2803 bool NeedsWinCFI = MAI->usesWindowsCFI() &&
2804 MI.getMF()->getFunction().needsUnwindTableEntry();
2805 if (NeedsWinCFI && (MI.getFlag(MachineInstr::FrameSetup) ||
2807 return false;
2808
2809 // On some CPUs quad load/store pairs are slower than two single load/stores.
2810 if (Subtarget.isPaired128Slow()) {
2811 switch (MI.getOpcode()) {
2812 default:
2813 break;
2814 case AArch64::LDURQi:
2815 case AArch64::STURQi:
2816 case AArch64::LDRQui:
2817 case AArch64::STRQui:
2818 return false;
2819 }
2820 }
2821
2822 return true;
2823}
2824
2827 int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width,
2828 const TargetRegisterInfo *TRI) const {
2829 if (!LdSt.mayLoadOrStore())
2830 return false;
2831
2832 const MachineOperand *BaseOp;
2833 TypeSize WidthN(0, false);
2834 if (!getMemOperandWithOffsetWidth(LdSt, BaseOp, Offset, OffsetIsScalable,
2835 WidthN, TRI))
2836 return false;
2837 // The maximum vscale is 16 under AArch64, return the maximal extent for the
2838 // vector.
2839 Width = LocationSize::precise(WidthN);
2840 BaseOps.push_back(BaseOp);
2841 return true;
2842}
2843
2844std::optional<ExtAddrMode>
2846 const TargetRegisterInfo *TRI) const {
2847 const MachineOperand *Base; // Filled with the base operand of MI.
2848 int64_t Offset; // Filled with the offset of MI.
2849 bool OffsetIsScalable;
2850 if (!getMemOperandWithOffset(MemI, Base, Offset, OffsetIsScalable, TRI))
2851 return std::nullopt;
2852
2853 if (!Base->isReg())
2854 return std::nullopt;
2855 ExtAddrMode AM;
2856 AM.BaseReg = Base->getReg();
2857 AM.Displacement = Offset;
2858 AM.ScaledReg = 0;
2859 AM.Scale = 0;
2860 return AM;
2861}
2862
2864 Register Reg,
2865 const MachineInstr &AddrI,
2866 ExtAddrMode &AM) const {
2867 // Filter out instructions into which we cannot fold.
2868 unsigned NumBytes;
2869 int64_t OffsetScale = 1;
2870 switch (MemI.getOpcode()) {
2871 default:
2872 return false;
2873
2874 case AArch64::LDURQi:
2875 case AArch64::STURQi:
2876 NumBytes = 16;
2877 break;
2878
2879 case AArch64::LDURDi:
2880 case AArch64::STURDi:
2881 case AArch64::LDURXi:
2882 case AArch64::STURXi:
2883 NumBytes = 8;
2884 break;
2885
2886 case AArch64::LDURWi:
2887 case AArch64::LDURSWi:
2888 case AArch64::STURWi:
2889 NumBytes = 4;
2890 break;
2891
2892 case AArch64::LDURHi:
2893 case AArch64::STURHi:
2894 case AArch64::LDURHHi:
2895 case AArch64::STURHHi:
2896 case AArch64::LDURSHXi:
2897 case AArch64::LDURSHWi:
2898 NumBytes = 2;
2899 break;
2900
2901 case AArch64::LDRBroX:
2902 case AArch64::LDRBBroX:
2903 case AArch64::LDRSBXroX:
2904 case AArch64::LDRSBWroX:
2905 case AArch64::STRBroX:
2906 case AArch64::STRBBroX:
2907 case AArch64::LDURBi:
2908 case AArch64::LDURBBi:
2909 case AArch64::LDURSBXi:
2910 case AArch64::LDURSBWi:
2911 case AArch64::STURBi:
2912 case AArch64::STURBBi:
2913 case AArch64::LDRBui:
2914 case AArch64::LDRBBui:
2915 case AArch64::LDRSBXui:
2916 case AArch64::LDRSBWui:
2917 case AArch64::STRBui:
2918 case AArch64::STRBBui:
2919 NumBytes = 1;
2920 break;
2921
2922 case AArch64::LDRQroX:
2923 case AArch64::STRQroX:
2924 case AArch64::LDRQui:
2925 case AArch64::STRQui:
2926 NumBytes = 16;
2927 OffsetScale = 16;
2928 break;
2929
2930 case AArch64::LDRDroX:
2931 case AArch64::STRDroX:
2932 case AArch64::LDRXroX:
2933 case AArch64::STRXroX:
2934 case AArch64::LDRDui:
2935 case AArch64::STRDui:
2936 case AArch64::LDRXui:
2937 case AArch64::STRXui:
2938 NumBytes = 8;
2939 OffsetScale = 8;
2940 break;
2941
2942 case AArch64::LDRWroX:
2943 case AArch64::LDRSWroX:
2944 case AArch64::STRWroX:
2945 case AArch64::LDRWui:
2946 case AArch64::LDRSWui:
2947 case AArch64::STRWui:
2948 NumBytes = 4;
2949 OffsetScale = 4;
2950 break;
2951
2952 case AArch64::LDRHroX:
2953 case AArch64::STRHroX:
2954 case AArch64::LDRHHroX:
2955 case AArch64::STRHHroX:
2956 case AArch64::LDRSHXroX:
2957 case AArch64::LDRSHWroX:
2958 case AArch64::LDRHui:
2959 case AArch64::STRHui:
2960 case AArch64::LDRHHui:
2961 case AArch64::STRHHui:
2962 case AArch64::LDRSHXui:
2963 case AArch64::LDRSHWui:
2964 NumBytes = 2;
2965 OffsetScale = 2;
2966 break;
2967 }
2968
2969 // Check the fold operand is not the loaded/stored value.
2970 const MachineOperand &BaseRegOp = MemI.getOperand(0);
2971 if (BaseRegOp.isReg() && BaseRegOp.getReg() == Reg)
2972 return false;
2973
2974 // Handle memory instructions with a [Reg, Reg] addressing mode.
2975 if (MemI.getOperand(2).isReg()) {
2976 // Bail if the addressing mode already includes extension of the offset
2977 // register.
2978 if (MemI.getOperand(3).getImm())
2979 return false;
2980
2981 // Check if we actually have a scaled offset.
2982 if (MemI.getOperand(4).getImm() == 0)
2983 OffsetScale = 1;
2984
2985 // If the address instructions is folded into the base register, then the
2986 // addressing mode must not have a scale. Then we can swap the base and the
2987 // scaled registers.
2988 if (MemI.getOperand(1).getReg() == Reg && OffsetScale != 1)
2989 return false;
2990
2991 switch (AddrI.getOpcode()) {
2992 default:
2993 return false;
2994
2995 case AArch64::SBFMXri:
2996 // sxtw Xa, Wm
2997 // ldr Xd, [Xn, Xa, lsl #N]
2998 // ->
2999 // ldr Xd, [Xn, Wm, sxtw #N]
3000 if (AddrI.getOperand(2).getImm() != 0 ||
3001 AddrI.getOperand(3).getImm() != 31)
3002 return false;
3003
3004 AM.BaseReg = MemI.getOperand(1).getReg();
3005 if (AM.BaseReg == Reg)
3006 AM.BaseReg = MemI.getOperand(2).getReg();
3007 AM.ScaledReg = AddrI.getOperand(1).getReg();
3008 AM.Scale = OffsetScale;
3009 AM.Displacement = 0;
3011 return true;
3012
3013 case TargetOpcode::SUBREG_TO_REG: {
3014 // mov Wa, Wm
3015 // ldr Xd, [Xn, Xa, lsl #N]
3016 // ->
3017 // ldr Xd, [Xn, Wm, uxtw #N]
3018
3019 // Zero-extension looks like an ORRWrs followed by a SUBREG_TO_REG.
3020 if (AddrI.getOperand(1).getImm() != 0 ||
3021 AddrI.getOperand(3).getImm() != AArch64::sub_32)
3022 return false;
3023
3024 const MachineRegisterInfo &MRI = AddrI.getMF()->getRegInfo();
3025 Register OffsetReg = AddrI.getOperand(2).getReg();
3026 if (!OffsetReg.isVirtual() || !MRI.hasOneNonDBGUse(OffsetReg))
3027 return false;
3028
3029 const MachineInstr &DefMI = *MRI.getVRegDef(OffsetReg);
3030 if (DefMI.getOpcode() != AArch64::ORRWrs ||
3031 DefMI.getOperand(1).getReg() != AArch64::WZR ||
3032 DefMI.getOperand(3).getImm() != 0)
3033 return false;
3034
3035 AM.BaseReg = MemI.getOperand(1).getReg();
3036 if (AM.BaseReg == Reg)
3037 AM.BaseReg = MemI.getOperand(2).getReg();
3038 AM.ScaledReg = DefMI.getOperand(2).getReg();
3039 AM.Scale = OffsetScale;
3040 AM.Displacement = 0;
3042 return true;
3043 }
3044 }
3045 }
3046
3047 // Handle memory instructions with a [Reg, #Imm] addressing mode.
3048
3049 // Check we are not breaking a potential conversion to an LDP.
3050 auto validateOffsetForLDP = [](unsigned NumBytes, int64_t OldOffset,
3051 int64_t NewOffset) -> bool {
3052 int64_t MinOffset, MaxOffset;
3053 switch (NumBytes) {
3054 default:
3055 return true;
3056 case 4:
3057 MinOffset = -256;
3058 MaxOffset = 252;
3059 break;
3060 case 8:
3061 MinOffset = -512;
3062 MaxOffset = 504;
3063 break;
3064 case 16:
3065 MinOffset = -1024;
3066 MaxOffset = 1008;
3067 break;
3068 }
3069 return OldOffset < MinOffset || OldOffset > MaxOffset ||
3070 (NewOffset >= MinOffset && NewOffset <= MaxOffset);
3071 };
3072 auto canFoldAddSubImmIntoAddrMode = [&](int64_t Disp) -> bool {
3073 int64_t OldOffset = MemI.getOperand(2).getImm() * OffsetScale;
3074 int64_t NewOffset = OldOffset + Disp;
3075 if (!isLegalAddressingMode(NumBytes, NewOffset, /* Scale */ 0))
3076 return false;
3077 // If the old offset would fit into an LDP, but the new offset wouldn't,
3078 // bail out.
3079 if (!validateOffsetForLDP(NumBytes, OldOffset, NewOffset))
3080 return false;
3081 AM.BaseReg = AddrI.getOperand(1).getReg();
3082 AM.ScaledReg = 0;
3083 AM.Scale = 0;
3084 AM.Displacement = NewOffset;
3086 return true;
3087 };
3088
3089 auto canFoldAddRegIntoAddrMode =
3090 [&](int64_t Scale,
3092 if (MemI.getOperand(2).getImm() != 0)
3093 return false;
3094 if (!isLegalAddressingMode(NumBytes, /* Offset */ 0, Scale))
3095 return false;
3096 AM.BaseReg = AddrI.getOperand(1).getReg();
3097 AM.ScaledReg = AddrI.getOperand(2).getReg();
3098 AM.Scale = Scale;
3099 AM.Displacement = 0;
3100 AM.Form = Form;
3101 return true;
3102 };
3103
3104 auto avoidSlowSTRQ = [&](const MachineInstr &MemI) {
3105 unsigned Opcode = MemI.getOpcode();
3106 return (Opcode == AArch64::STURQi || Opcode == AArch64::STRQui) &&
3107 Subtarget.isSTRQroSlow();
3108 };
3109
3110 int64_t Disp = 0;
3111 const bool OptSize = MemI.getMF()->getFunction().hasOptSize();
3112 switch (AddrI.getOpcode()) {
3113 default:
3114 return false;
3115
3116 case AArch64::ADDXri:
3117 // add Xa, Xn, #N
3118 // ldr Xd, [Xa, #M]
3119 // ->
3120 // ldr Xd, [Xn, #N'+M]
3121 Disp = AddrI.getOperand(2).getImm() << AddrI.getOperand(3).getImm();
3122 return canFoldAddSubImmIntoAddrMode(Disp);
3123
3124 case AArch64::SUBXri:
3125 // sub Xa, Xn, #N
3126 // ldr Xd, [Xa, #M]
3127 // ->
3128 // ldr Xd, [Xn, #N'+M]
3129 Disp = AddrI.getOperand(2).getImm() << AddrI.getOperand(3).getImm();
3130 return canFoldAddSubImmIntoAddrMode(-Disp);
3131
3132 case AArch64::ADDXrs: {
3133 // add Xa, Xn, Xm, lsl #N
3134 // ldr Xd, [Xa]
3135 // ->
3136 // ldr Xd, [Xn, Xm, lsl #N]
3137
3138 // Don't fold the add if the result would be slower, unless optimising for
3139 // size.
3140 unsigned Shift = static_cast<unsigned>(AddrI.getOperand(3).getImm());
3142 return false;
3143 Shift = AArch64_AM::getShiftValue(Shift);
3144 if (!OptSize) {
3145 if (Shift != 2 && Shift != 3 && Subtarget.hasAddrLSLSlow14())
3146 return false;
3147 if (avoidSlowSTRQ(MemI))
3148 return false;
3149 }
3150 return canFoldAddRegIntoAddrMode(1ULL << Shift);
3151 }
3152
3153 case AArch64::ADDXrr:
3154 // add Xa, Xn, Xm
3155 // ldr Xd, [Xa]
3156 // ->
3157 // ldr Xd, [Xn, Xm, lsl #0]
3158
3159 // Don't fold the add if the result would be slower, unless optimising for
3160 // size.
3161 if (!OptSize && avoidSlowSTRQ(MemI))
3162 return false;
3163 return canFoldAddRegIntoAddrMode(1);
3164
3165 case AArch64::ADDXrx:
3166 // add Xa, Xn, Wm, {s,u}xtw #N
3167 // ldr Xd, [Xa]
3168 // ->
3169 // ldr Xd, [Xn, Wm, {s,u}xtw #N]
3170
3171 // Don't fold the add if the result would be slower, unless optimising for
3172 // size.
3173 if (!OptSize && avoidSlowSTRQ(MemI))
3174 return false;
3175
3176 // Can fold only sign-/zero-extend of a word.
3177 unsigned Imm = static_cast<unsigned>(AddrI.getOperand(3).getImm());
3179 if (Extend != AArch64_AM::UXTW && Extend != AArch64_AM::SXTW)
3180 return false;
3181
3182 return canFoldAddRegIntoAddrMode(
3183 1ULL << AArch64_AM::getArithShiftValue(Imm),
3186 }
3187}
3188
3189// Given an opcode for an instruction with a [Reg, #Imm] addressing mode,
3190// return the opcode of an instruction performing the same operation, but using
3191// the [Reg, Reg] addressing mode.
3192static unsigned regOffsetOpcode(unsigned Opcode) {
3193 switch (Opcode) {
3194 default:
3195 llvm_unreachable("Address folding not implemented for instruction");
3196
3197 case AArch64::LDURQi:
3198 case AArch64::LDRQui:
3199 return AArch64::LDRQroX;
3200 case AArch64::STURQi:
3201 case AArch64::STRQui:
3202 return AArch64::STRQroX;
3203 case AArch64::LDURDi:
3204 case AArch64::LDRDui:
3205 return AArch64::LDRDroX;
3206 case AArch64::STURDi:
3207 case AArch64::STRDui:
3208 return AArch64::STRDroX;
3209 case AArch64::LDURXi:
3210 case AArch64::LDRXui:
3211 return AArch64::LDRXroX;
3212 case AArch64::STURXi:
3213 case AArch64::STRXui:
3214 return AArch64::STRXroX;
3215 case AArch64::LDURWi:
3216 case AArch64::LDRWui:
3217 return AArch64::LDRWroX;
3218 case AArch64::LDURSWi:
3219 case AArch64::LDRSWui:
3220 return AArch64::LDRSWroX;
3221 case AArch64::STURWi:
3222 case AArch64::STRWui:
3223 return AArch64::STRWroX;
3224 case AArch64::LDURHi:
3225 case AArch64::LDRHui:
3226 return AArch64::LDRHroX;
3227 case AArch64::STURHi:
3228 case AArch64::STRHui:
3229 return AArch64::STRHroX;
3230 case AArch64::LDURHHi:
3231 case AArch64::LDRHHui:
3232 return AArch64::LDRHHroX;
3233 case AArch64::STURHHi:
3234 case AArch64::STRHHui:
3235 return AArch64::STRHHroX;
3236 case AArch64::LDURSHXi:
3237 case AArch64::LDRSHXui:
3238 return AArch64::LDRSHXroX;
3239 case AArch64::LDURSHWi:
3240 case AArch64::LDRSHWui:
3241 return AArch64::LDRSHWroX;
3242 case AArch64::LDURBi:
3243 case AArch64::LDRBui:
3244 return AArch64::LDRBroX;
3245 case AArch64::LDURBBi:
3246 case AArch64::LDRBBui:
3247 return AArch64::LDRBBroX;
3248 case AArch64::LDURSBXi:
3249 case AArch64::LDRSBXui:
3250 return AArch64::LDRSBXroX;
3251 case AArch64::LDURSBWi:
3252 case AArch64::LDRSBWui:
3253 return AArch64::LDRSBWroX;
3254 case AArch64::STURBi:
3255 case AArch64::STRBui:
3256 return AArch64::STRBroX;
3257 case AArch64::STURBBi:
3258 case AArch64::STRBBui:
3259 return AArch64::STRBBroX;
3260 }
3261}
3262
3263// Given an opcode for an instruction with a [Reg, #Imm] addressing mode, return
3264// the opcode of an instruction performing the same operation, but using the
3265// [Reg, #Imm] addressing mode with scaled offset.
3266unsigned scaledOffsetOpcode(unsigned Opcode, unsigned &Scale) {
3267 switch (Opcode) {
3268 default:
3269 llvm_unreachable("Address folding not implemented for instruction");
3270
3271 case AArch64::LDURQi:
3272 Scale = 16;
3273 return AArch64::LDRQui;
3274 case AArch64::STURQi:
3275 Scale = 16;
3276 return AArch64::STRQui;
3277 case AArch64::LDURDi:
3278 Scale = 8;
3279 return AArch64::LDRDui;
3280 case AArch64::STURDi:
3281 Scale = 8;
3282 return AArch64::STRDui;
3283 case AArch64::LDURXi:
3284 Scale = 8;
3285 return AArch64::LDRXui;
3286 case AArch64::STURXi:
3287 Scale = 8;
3288 return AArch64::STRXui;
3289 case AArch64::LDURWi:
3290 Scale = 4;
3291 return AArch64::LDRWui;
3292 case AArch64::LDURSWi:
3293 Scale = 4;
3294 return AArch64::LDRSWui;
3295 case AArch64::STURWi:
3296 Scale = 4;
3297 return AArch64::STRWui;
3298 case AArch64::LDURHi:
3299 Scale = 2;
3300 return AArch64::LDRHui;
3301 case AArch64::STURHi:
3302 Scale = 2;
3303 return AArch64::STRHui;
3304 case AArch64::LDURHHi:
3305 Scale = 2;
3306 return AArch64::LDRHHui;
3307 case AArch64::STURHHi:
3308 Scale = 2;
3309 return AArch64::STRHHui;
3310 case AArch64::LDURSHXi:
3311 Scale = 2;
3312 return AArch64::LDRSHXui;
3313 case AArch64::LDURSHWi:
3314 Scale = 2;
3315 return AArch64::LDRSHWui;
3316 case AArch64::LDURBi:
3317 Scale = 1;
3318 return AArch64::LDRBui;
3319 case AArch64::LDURBBi:
3320 Scale = 1;
3321 return AArch64::LDRBBui;
3322 case AArch64::LDURSBXi:
3323 Scale = 1;
3324 return AArch64::LDRSBXui;
3325 case AArch64::LDURSBWi:
3326 Scale = 1;
3327 return AArch64::LDRSBWui;
3328 case AArch64::STURBi:
3329 Scale = 1;
3330 return AArch64::STRBui;
3331 case AArch64::STURBBi:
3332 Scale = 1;
3333 return AArch64::STRBBui;
3334 case AArch64::LDRQui:
3335 case AArch64::STRQui:
3336 Scale = 16;
3337 return Opcode;
3338 case AArch64::LDRDui:
3339 case AArch64::STRDui:
3340 case AArch64::LDRXui:
3341 case AArch64::STRXui:
3342 Scale = 8;
3343 return Opcode;
3344 case AArch64::LDRWui:
3345 case AArch64::LDRSWui:
3346 case AArch64::STRWui:
3347 Scale = 4;
3348 return Opcode;
3349 case AArch64::LDRHui:
3350 case AArch64::STRHui:
3351 case AArch64::LDRHHui:
3352 case AArch64::STRHHui:
3353 case AArch64::LDRSHXui:
3354 case AArch64::LDRSHWui:
3355 Scale = 2;
3356 return Opcode;
3357 case AArch64::LDRBui:
3358 case AArch64::LDRBBui:
3359 case AArch64::LDRSBXui:
3360 case AArch64::LDRSBWui:
3361 case AArch64::STRBui:
3362 case AArch64::STRBBui:
3363 Scale = 1;
3364 return Opcode;
3365 }
3366}
3367
3368// Given an opcode for an instruction with a [Reg, #Imm] addressing mode, return
3369// the opcode of an instruction performing the same operation, but using the
3370// [Reg, #Imm] addressing mode with unscaled offset.
3371unsigned unscaledOffsetOpcode(unsigned Opcode) {
3372 switch (Opcode) {
3373 default:
3374 llvm_unreachable("Address folding not implemented for instruction");
3375
3376 case AArch64::LDURQi:
3377 case AArch64::STURQi:
3378 case AArch64::LDURDi:
3379 case AArch64::STURDi:
3380 case AArch64::LDURXi:
3381 case AArch64::STURXi:
3382 case AArch64::LDURWi:
3383 case AArch64::LDURSWi:
3384 case AArch64::STURWi:
3385 case AArch64::LDURHi:
3386 case AArch64::STURHi:
3387 case AArch64::LDURHHi:
3388 case AArch64::STURHHi:
3389 case AArch64::LDURSHXi:
3390 case AArch64::LDURSHWi:
3391 case AArch64::LDURBi:
3392 case AArch64::STURBi:
3393 case AArch64::LDURBBi:
3394 case AArch64::STURBBi:
3395 case AArch64::LDURSBWi:
3396 case AArch64::LDURSBXi:
3397 return Opcode;
3398 case AArch64::LDRQui:
3399 return AArch64::LDURQi;
3400 case AArch64::STRQui:
3401 return AArch64::STURQi;
3402 case AArch64::LDRDui:
3403 return AArch64::LDURDi;
3404 case AArch64::STRDui:
3405 return AArch64::STURDi;
3406 case AArch64::LDRXui:
3407 return AArch64::LDURXi;
3408 case AArch64::STRXui:
3409 return AArch64::STURXi;
3410 case AArch64::LDRWui:
3411 return AArch64::LDURWi;
3412 case AArch64::LDRSWui:
3413 return AArch64::LDURSWi;
3414 case AArch64::STRWui:
3415 return AArch64::STURWi;
3416 case AArch64::LDRHui:
3417 return AArch64::LDURHi;
3418 case AArch64::STRHui:
3419 return AArch64::STURHi;
3420 case AArch64::LDRHHui:
3421 return AArch64::LDURHHi;
3422 case AArch64::STRHHui:
3423 return AArch64::STURHHi;
3424 case AArch64::LDRSHXui:
3425 return AArch64::LDURSHXi;
3426 case AArch64::LDRSHWui:
3427 return AArch64::LDURSHWi;
3428 case AArch64::LDRBBui:
3429 return AArch64::LDURBBi;
3430 case AArch64::LDRBui:
3431 return AArch64::LDURBi;
3432 case AArch64::STRBBui:
3433 return AArch64::STURBBi;
3434 case AArch64::STRBui:
3435 return AArch64::STURBi;
3436 case AArch64::LDRSBWui:
3437 return AArch64::LDURSBWi;
3438 case AArch64::LDRSBXui:
3439 return AArch64::LDURSBXi;
3440 }
3441}
3442
3443// Given the opcode of a memory load/store instruction, return the opcode of an
3444// instruction performing the same operation, but using
3445// the [Reg, Reg, {s,u}xtw #N] addressing mode with sign-/zero-extend of the
3446// offset register.
3447static unsigned offsetExtendOpcode(unsigned Opcode) {
3448 switch (Opcode) {
3449 default:
3450 llvm_unreachable("Address folding not implemented for instruction");
3451
3452 case AArch64::LDRQroX:
3453 case AArch64::LDURQi:
3454 case AArch64::LDRQui:
3455 return AArch64::LDRQroW;
3456 case AArch64::STRQroX:
3457 case AArch64::STURQi:
3458 case AArch64::STRQui:
3459 return AArch64::STRQroW;
3460 case AArch64::LDRDroX:
3461 case AArch64::LDURDi:
3462 case AArch64::LDRDui:
3463 return AArch64::LDRDroW;
3464 case AArch64::STRDroX:
3465 case AArch64::STURDi:
3466 case AArch64::STRDui:
3467 return AArch64::STRDroW;
3468 case AArch64::LDRXroX:
3469 case AArch64::LDURXi:
3470 case AArch64::LDRXui:
3471 return AArch64::LDRXroW;
3472 case AArch64::STRXroX:
3473 case AArch64::STURXi:
3474 case AArch64::STRXui:
3475 return AArch64::STRXroW;
3476 case AArch64::LDRWroX:
3477 case AArch64::LDURWi:
3478 case AArch64::LDRWui:
3479 return AArch64::LDRWroW;
3480 case AArch64::LDRSWroX:
3481 case AArch64::LDURSWi:
3482 case AArch64::LDRSWui:
3483 return AArch64::LDRSWroW;
3484 case AArch64::STRWroX:
3485 case AArch64::STURWi:
3486 case AArch64::STRWui:
3487 return AArch64::STRWroW;
3488 case AArch64::LDRHroX:
3489 case AArch64::LDURHi:
3490 case AArch64::LDRHui:
3491 return AArch64::LDRHroW;
3492 case AArch64::STRHroX:
3493 case AArch64::STURHi:
3494 case AArch64::STRHui:
3495 return AArch64::STRHroW;
3496 case AArch64::LDRHHroX:
3497 case AArch64::LDURHHi:
3498 case AArch64::LDRHHui:
3499 return AArch64::LDRHHroW;
3500 case AArch64::STRHHroX:
3501 case AArch64::STURHHi:
3502 case AArch64::STRHHui:
3503 return AArch64::STRHHroW;
3504 case AArch64::LDRSHXroX:
3505 case AArch64::LDURSHXi:
3506 case AArch64::LDRSHXui:
3507 return AArch64::LDRSHXroW;
3508 case AArch64::LDRSHWroX:
3509 case AArch64::LDURSHWi:
3510 case AArch64::LDRSHWui:
3511 return AArch64::LDRSHWroW;
3512 case AArch64::LDRBroX:
3513 case AArch64::LDURBi:
3514 case AArch64::LDRBui:
3515 return AArch64::LDRBroW;
3516 case AArch64::LDRBBroX:
3517 case AArch64::LDURBBi:
3518 case AArch64::LDRBBui:
3519 return AArch64::LDRBBroW;
3520 case AArch64::LDRSBXroX:
3521 case AArch64::LDURSBXi:
3522 case AArch64::LDRSBXui:
3523 return AArch64::LDRSBXroW;
3524 case AArch64::LDRSBWroX:
3525 case AArch64::LDURSBWi:
3526 case AArch64::LDRSBWui:
3527 return AArch64::LDRSBWroW;
3528 case AArch64::STRBroX:
3529 case AArch64::STURBi:
3530 case AArch64::STRBui:
3531 return AArch64::STRBroW;
3532 case AArch64::STRBBroX:
3533 case AArch64::STURBBi:
3534 case AArch64::STRBBui:
3535 return AArch64::STRBBroW;
3536 }
3537}
3538
3540 const ExtAddrMode &AM) const {
3541
3542 const DebugLoc &DL = MemI.getDebugLoc();
3543 MachineBasicBlock &MBB = *MemI.getParent();
3545
3547 if (AM.ScaledReg) {
3548 // The new instruction will be in the form `ldr Rt, [Xn, Xm, lsl #imm]`.
3549 unsigned Opcode = regOffsetOpcode(MemI.getOpcode());
3550 MRI.constrainRegClass(AM.BaseReg, &AArch64::GPR64spRegClass);
3551 auto B = BuildMI(MBB, MemI, DL, get(Opcode))
3552 .addReg(MemI.getOperand(0).getReg(),
3553 MemI.mayLoad() ? RegState::Define : 0)
3554 .addReg(AM.BaseReg)
3555 .addReg(AM.ScaledReg)
3556 .addImm(0)
3557 .addImm(AM.Scale > 1)
3558 .setMemRefs(MemI.memoperands())
3559 .setMIFlags(MemI.getFlags());
3560 return B.getInstr();
3561 }
3562
3563 assert(AM.ScaledReg == 0 && AM.Scale == 0 &&
3564 "Addressing mode not supported for folding");
3565
3566 // The new instruction will be in the form `ld[u]r Rt, [Xn, #imm]`.
3567 unsigned Scale = 1;
3568 unsigned Opcode = MemI.getOpcode();
3569 if (isInt<9>(AM.Displacement))
3570 Opcode = unscaledOffsetOpcode(Opcode);
3571 else
3572 Opcode = scaledOffsetOpcode(Opcode, Scale);
3573
3574 auto B = BuildMI(MBB, MemI, DL, get(Opcode))
3575 .addReg(MemI.getOperand(0).getReg(),
3576 MemI.mayLoad() ? RegState::Define : 0)
3577 .addReg(AM.BaseReg)
3578 .addImm(AM.Displacement / Scale)
3579 .setMemRefs(MemI.memoperands())
3580 .setMIFlags(MemI.getFlags());
3581 return B.getInstr();
3582 }
3583
3586 // The new instruction will be in the form `ldr Rt, [Xn, Wm, {s,u}xtw #N]`.
3587 assert(AM.ScaledReg && !AM.Displacement &&
3588 "Address offset can be a register or an immediate, but not both");
3589 unsigned Opcode = offsetExtendOpcode(MemI.getOpcode());
3590 MRI.constrainRegClass(AM.BaseReg, &AArch64::GPR64spRegClass);
3591 // Make sure the offset register is in the correct register class.
3592 Register OffsetReg = AM.ScaledReg;
3593 const TargetRegisterClass *RC = MRI.getRegClass(OffsetReg);
3594 if (RC->hasSuperClassEq(&AArch64::GPR64RegClass)) {
3595 OffsetReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3596 BuildMI(MBB, MemI, DL, get(TargetOpcode::COPY), OffsetReg)
3597 .addReg(AM.ScaledReg, 0, AArch64::sub_32);
3598 }
3599 auto B = BuildMI(MBB, MemI, DL, get(Opcode))
3600 .addReg(MemI.getOperand(0).getReg(),
3601 MemI.mayLoad() ? RegState::Define : 0)
3602 .addReg(AM.BaseReg)
3603 .addReg(OffsetReg)
3605 .addImm(AM.Scale != 1)
3606 .setMemRefs(MemI.memoperands())
3607 .setMIFlags(MemI.getFlags());
3608
3609 return B.getInstr();
3610 }
3611
3613 "Function must not be called with an addressing mode it can't handle");
3614}
3615
3616/// Return true if the opcode is a post-index ld/st instruction, which really
3617/// loads from base+0.
3618static bool isPostIndexLdStOpcode(unsigned Opcode) {
3619 switch (Opcode) {
3620 default:
3621 return false;
3622 case AArch64::LD1Fourv16b_POST:
3623 case AArch64::LD1Fourv1d_POST:
3624 case AArch64::LD1Fourv2d_POST:
3625 case AArch64::LD1Fourv2s_POST:
3626 case AArch64::LD1Fourv4h_POST:
3627 case AArch64::LD1Fourv4s_POST:
3628 case AArch64::LD1Fourv8b_POST:
3629 case AArch64::LD1Fourv8h_POST:
3630 case AArch64::LD1Onev16b_POST:
3631 case AArch64::LD1Onev1d_POST:
3632 case AArch64::LD1Onev2d_POST:
3633 case AArch64::LD1Onev2s_POST:
3634 case AArch64::LD1Onev4h_POST:
3635 case AArch64::LD1Onev4s_POST:
3636 case AArch64::LD1Onev8b_POST:
3637 case AArch64::LD1Onev8h_POST:
3638 case AArch64::LD1Rv16b_POST:
3639 case AArch64::LD1Rv1d_POST:
3640 case AArch64::LD1Rv2d_POST:
3641 case AArch64::LD1Rv2s_POST:
3642 case AArch64::LD1Rv4h_POST:
3643 case AArch64::LD1Rv4s_POST:
3644 case AArch64::LD1Rv8b_POST:
3645 case AArch64::LD1Rv8h_POST:
3646 case AArch64::LD1Threev16b_POST:
3647 case AArch64::LD1Threev1d_POST:
3648 case AArch64::LD1Threev2d_POST:
3649 case AArch64::LD1Threev2s_POST:
3650 case AArch64::LD1Threev4h_POST:
3651 case AArch64::LD1Threev4s_POST:
3652 case AArch64::LD1Threev8b_POST:
3653 case AArch64::LD1Threev8h_POST:
3654 case AArch64::LD1Twov16b_POST:
3655 case AArch64::LD1Twov1d_POST:
3656 case AArch64::LD1Twov2d_POST:
3657 case AArch64::LD1Twov2s_POST:
3658 case AArch64::LD1Twov4h_POST:
3659 case AArch64::LD1Twov4s_POST:
3660 case AArch64::LD1Twov8b_POST:
3661 case AArch64::LD1Twov8h_POST:
3662 case AArch64::LD1i16_POST:
3663 case AArch64::LD1i32_POST:
3664 case AArch64::LD1i64_POST:
3665 case AArch64::LD1i8_POST:
3666 case AArch64::LD2Rv16b_POST:
3667 case AArch64::LD2Rv1d_POST:
3668 case AArch64::LD2Rv2d_POST:
3669 case AArch64::LD2Rv2s_POST:
3670 case AArch64::LD2Rv4h_POST:
3671 case AArch64::LD2Rv4s_POST:
3672 case AArch64::LD2Rv8b_POST:
3673 case AArch64::LD2Rv8h_POST:
3674 case AArch64::LD2Twov16b_POST:
3675 case AArch64::LD2Twov2d_POST:
3676 case AArch64::LD2Twov2s_POST:
3677 case AArch64::LD2Twov4h_POST:
3678 case AArch64::LD2Twov4s_POST:
3679 case AArch64::LD2Twov8b_POST:
3680 case AArch64::LD2Twov8h_POST:
3681 case AArch64::LD2i16_POST:
3682 case AArch64::LD2i32_POST:
3683 case AArch64::LD2i64_POST:
3684 case AArch64::LD2i8_POST:
3685 case AArch64::LD3Rv16b_POST:
3686 case AArch64::LD3Rv1d_POST:
3687 case AArch64::LD3Rv2d_POST:
3688 case AArch64::LD3Rv2s_POST:
3689 case AArch64::LD3Rv4h_POST:
3690 case AArch64::LD3Rv4s_POST:
3691 case AArch64::LD3Rv8b_POST:
3692 case AArch64::LD3Rv8h_POST:
3693 case AArch64::LD3Threev16b_POST:
3694 case AArch64::LD3Threev2d_POST:
3695 case AArch64::LD3Threev2s_POST:
3696 case AArch64::LD3Threev4h_POST:
3697 case AArch64::LD3Threev4s_POST:
3698 case AArch64::LD3Threev8b_POST:
3699 case AArch64::LD3Threev8h_POST:
3700 case AArch64::LD3i16_POST:
3701 case AArch64::LD3i32_POST:
3702 case AArch64::LD3i64_POST:
3703 case AArch64::LD3i8_POST:
3704 case AArch64::LD4Fourv16b_POST:
3705 case AArch64::LD4Fourv2d_POST:
3706 case AArch64::LD4Fourv2s_POST:
3707 case AArch64::LD4Fourv4h_POST:
3708 case AArch64::LD4Fourv4s_POST:
3709 case AArch64::LD4Fourv8b_POST:
3710 case AArch64::LD4Fourv8h_POST:
3711 case AArch64::LD4Rv16b_POST:
3712 case AArch64::LD4Rv1d_POST:
3713 case AArch64::LD4Rv2d_POST:
3714 case AArch64::LD4Rv2s_POST:
3715 case AArch64::LD4Rv4h_POST:
3716 case AArch64::LD4Rv4s_POST:
3717 case AArch64::LD4Rv8b_POST:
3718 case AArch64::LD4Rv8h_POST:
3719 case AArch64::LD4i16_POST:
3720 case AArch64::LD4i32_POST:
3721 case AArch64::LD4i64_POST:
3722 case AArch64::LD4i8_POST:
3723 case AArch64::LDAPRWpost:
3724 case AArch64::LDAPRXpost:
3725 case AArch64::LDIAPPWpost:
3726 case AArch64::LDIAPPXpost:
3727 case AArch64::LDPDpost:
3728 case AArch64::LDPQpost:
3729 case AArch64::LDPSWpost:
3730 case AArch64::LDPSpost:
3731 case AArch64::LDPWpost:
3732 case AArch64::LDPXpost:
3733 case AArch64::LDRBBpost:
3734 case AArch64::LDRBpost:
3735 case AArch64::LDRDpost:
3736 case AArch64::LDRHHpost:
3737 case AArch64::LDRHpost:
3738 case AArch64::LDRQpost:
3739 case AArch64::LDRSBWpost:
3740 case AArch64::LDRSBXpost:
3741 case AArch64::LDRSHWpost:
3742 case AArch64::LDRSHXpost:
3743 case AArch64::LDRSWpost:
3744 case AArch64::LDRSpost:
3745 case AArch64::LDRWpost:
3746 case AArch64::LDRXpost:
3747 case AArch64::ST1Fourv16b_POST:
3748 case AArch64::ST1Fourv1d_POST:
3749 case AArch64::ST1Fourv2d_POST:
3750 case AArch64::ST1Fourv2s_POST:
3751 case AArch64::ST1Fourv4h_POST:
3752 case AArch64::ST1Fourv4s_POST:
3753 case AArch64::ST1Fourv8b_POST:
3754 case AArch64::ST1Fourv8h_POST:
3755 case AArch64::ST1Onev16b_POST:
3756 case AArch64::ST1Onev1d_POST:
3757 case AArch64::ST1Onev2d_POST:
3758 case AArch64::ST1Onev2s_POST:
3759 case AArch64::ST1Onev4h_POST:
3760 case AArch64::ST1Onev4s_POST:
3761 case AArch64::ST1Onev8b_POST:
3762 case AArch64::ST1Onev8h_POST:
3763 case AArch64::ST1Threev16b_POST:
3764 case AArch64::ST1Threev1d_POST:
3765 case AArch64::ST1Threev2d_POST:
3766 case AArch64::ST1Threev2s_POST:
3767 case AArch64::ST1Threev4h_POST:
3768 case AArch64::ST1Threev4s_POST:
3769 case AArch64::ST1Threev8b_POST:
3770 case AArch64::ST1Threev8h_POST:
3771 case AArch64::ST1Twov16b_POST:
3772 case AArch64::ST1Twov1d_POST:
3773 case AArch64::ST1Twov2d_POST:
3774 case AArch64::ST1Twov2s_POST:
3775 case AArch64::ST1Twov4h_POST:
3776 case AArch64::ST1Twov4s_POST:
3777 case AArch64::ST1Twov8b_POST:
3778 case AArch64::ST1Twov8h_POST:
3779 case AArch64::ST1i16_POST:
3780 case AArch64::ST1i32_POST:
3781 case AArch64::ST1i64_POST:
3782 case AArch64::ST1i8_POST:
3783 case AArch64::ST2GPostIndex:
3784 case AArch64::ST2Twov16b_POST:
3785 case AArch64::ST2Twov2d_POST:
3786 case AArch64::ST2Twov2s_POST:
3787 case AArch64::ST2Twov4h_POST:
3788 case AArch64::ST2Twov4s_POST:
3789 case AArch64::ST2Twov8b_POST:
3790 case AArch64::ST2Twov8h_POST:
3791 case AArch64::ST2i16_POST:
3792 case AArch64::ST2i32_POST:
3793 case AArch64::ST2i64_POST:
3794 case AArch64::ST2i8_POST:
3795 case AArch64::ST3Threev16b_POST:
3796 case AArch64::ST3Threev2d_POST:
3797 case AArch64::ST3Threev2s_POST:
3798 case AArch64::ST3Threev4h_POST:
3799 case AArch64::ST3Threev4s_POST:
3800 case AArch64::ST3Threev8b_POST:
3801 case AArch64::ST3Threev8h_POST:
3802 case AArch64::ST3i16_POST:
3803 case AArch64::ST3i32_POST:
3804 case AArch64::ST3i64_POST:
3805 case AArch64::ST3i8_POST:
3806 case AArch64::ST4Fourv16b_POST:
3807 case AArch64::ST4Fourv2d_POST:
3808 case AArch64::ST4Fourv2s_POST:
3809 case AArch64::ST4Fourv4h_POST:
3810 case AArch64::ST4Fourv4s_POST:
3811 case AArch64::ST4Fourv8b_POST:
3812 case AArch64::ST4Fourv8h_POST:
3813 case AArch64::ST4i16_POST:
3814 case AArch64::ST4i32_POST:
3815 case AArch64::ST4i64_POST:
3816 case AArch64::ST4i8_POST:
3817 case AArch64::STGPostIndex:
3818 case AArch64::STGPpost:
3819 case AArch64::STPDpost:
3820 case AArch64::STPQpost:
3821 case AArch64::STPSpost:
3822 case AArch64::STPWpost:
3823 case AArch64::STPXpost:
3824 case AArch64::STRBBpost:
3825 case AArch64::STRBpost:
3826 case AArch64::STRDpost:
3827 case AArch64::STRHHpost:
3828 case AArch64::STRHpost:
3829 case AArch64::STRQpost:
3830 case AArch64::STRSpost:
3831 case AArch64::STRWpost:
3832 case AArch64::STRXpost:
3833 case AArch64::STZ2GPostIndex:
3834 case AArch64::STZGPostIndex:
3835 return true;
3836 }
3837}
3838
3840 const MachineInstr &LdSt, const MachineOperand *&BaseOp, int64_t &Offset,
3841 bool &OffsetIsScalable, TypeSize &Width,
3842 const TargetRegisterInfo *TRI) const {
3843 assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
3844 // Handle only loads/stores with base register followed by immediate offset.
3845 if (LdSt.getNumExplicitOperands() == 3) {
3846 // Non-paired instruction (e.g., ldr x1, [x0, #8]).
3847 if ((!LdSt.getOperand(1).isReg() && !LdSt.getOperand(1).isFI()) ||
3848 !LdSt.getOperand(2).isImm())
3849 return false;
3850 } else if (LdSt.getNumExplicitOperands() == 4) {
3851 // Paired instruction (e.g., ldp x1, x2, [x0, #8]).
3852 if (!LdSt.getOperand(1).isReg() ||
3853 (!LdSt.getOperand(2).isReg() && !LdSt.getOperand(2).isFI()) ||
3854 !LdSt.getOperand(3).isImm())
3855 return false;
3856 } else
3857 return false;
3858
3859 // Get the scaling factor for the instruction and set the width for the
3860 // instruction.
3861 TypeSize Scale(0U, false);
3862 int64_t Dummy1, Dummy2;
3863
3864 // If this returns false, then it's an instruction we don't want to handle.
3865 if (!getMemOpInfo(LdSt.getOpcode(), Scale, Width, Dummy1, Dummy2))
3866 return false;
3867
3868 // Compute the offset. Offset is calculated as the immediate operand
3869 // multiplied by the scaling factor. Unscaled instructions have scaling factor
3870 // set to 1. Postindex are a special case which have an offset of 0.
3871 if (isPostIndexLdStOpcode(LdSt.getOpcode())) {
3872 BaseOp = &LdSt.getOperand(2);
3873 Offset = 0;
3874 } else if (LdSt.getNumExplicitOperands() == 3) {
3875 BaseOp = &LdSt.getOperand(1);
3876 Offset = LdSt.getOperand(2).getImm() * Scale.getKnownMinValue();
3877 } else {
3878 assert(LdSt.getNumExplicitOperands() == 4 && "invalid number of operands");
3879 BaseOp = &LdSt.getOperand(2);
3880 Offset = LdSt.getOperand(3).getImm() * Scale.getKnownMinValue();
3881 }
3882 OffsetIsScalable = Scale.isScalable();
3883
3884 return BaseOp->isReg() || BaseOp->isFI();
3885}
3886
3889 assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
3890 MachineOperand &OfsOp = LdSt.getOperand(LdSt.getNumExplicitOperands() - 1);
3891 assert(OfsOp.isImm() && "Offset operand wasn't immediate.");
3892 return OfsOp;
3893}
3894
3895bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
3896 TypeSize &Width, int64_t &MinOffset,
3897 int64_t &MaxOffset) {
3898 switch (Opcode) {
3899 // Not a memory operation or something we want to handle.
3900 default:
3901 Scale = TypeSize::getFixed(0);
3902 Width = TypeSize::getFixed(0);
3903 MinOffset = MaxOffset = 0;
3904 return false;
3905 // LDR / STR
3906 case AArch64::LDRQui:
3907 case AArch64::STRQui:
3908 Scale = TypeSize::getFixed(16);
3909 Width = TypeSize::getFixed(16);
3910 MinOffset = 0;
3911 MaxOffset = 4095;
3912 break;
3913 case AArch64::LDRXui:
3914 case AArch64::LDRDui:
3915 case AArch64::STRXui:
3916 case AArch64::STRDui:
3917 case AArch64::PRFMui:
3918 Scale = TypeSize::getFixed(8);
3919 Width = TypeSize::getFixed(8);
3920 MinOffset = 0;
3921 MaxOffset = 4095;
3922 break;
3923 case AArch64::LDRWui:
3924 case AArch64::LDRSui:
3925 case AArch64::LDRSWui:
3926 case AArch64::STRWui:
3927 case AArch64::STRSui:
3928 Scale = TypeSize::getFixed(4);
3929 Width = TypeSize::getFixed(4);
3930 MinOffset = 0;
3931 MaxOffset = 4095;
3932 break;
3933 case AArch64::LDRHui:
3934 case AArch64::LDRHHui:
3935 case AArch64::LDRSHWui:
3936 case AArch64::LDRSHXui:
3937 case AArch64::STRHui:
3938 case AArch64::STRHHui:
3939 Scale = TypeSize::getFixed(2);
3940 Width = TypeSize::getFixed(2);
3941 MinOffset = 0;
3942 MaxOffset = 4095;
3943 break;
3944 case AArch64::LDRBui:
3945 case AArch64::LDRBBui:
3946 case AArch64::LDRSBWui:
3947 case AArch64::LDRSBXui:
3948 case AArch64::STRBui:
3949 case AArch64::STRBBui:
3950 Scale = TypeSize::getFixed(1);
3951 Width = TypeSize::getFixed(1);
3952 MinOffset = 0;
3953 MaxOffset = 4095;
3954 break;
3955 // post/pre inc
3956 case AArch64::STRQpre:
3957 case AArch64::LDRQpost:
3958 Scale = TypeSize::getFixed(1);
3959 Width = TypeSize::getFixed(16);
3960 MinOffset = -256;
3961 MaxOffset = 255;
3962 break;
3963 case AArch64::LDRDpost:
3964 case AArch64::LDRDpre:
3965 case AArch64::LDRXpost:
3966 case AArch64::LDRXpre:
3967 case AArch64::STRDpost:
3968 case AArch64::STRDpre:
3969 case AArch64::STRXpost:
3970 case AArch64::STRXpre:
3971 Scale = TypeSize::getFixed(1);
3972 Width = TypeSize::getFixed(8);
3973 MinOffset = -256;
3974 MaxOffset = 255;
3975 break;
3976 case AArch64::STRWpost:
3977 case AArch64::STRWpre:
3978 case AArch64::LDRWpost:
3979 case AArch64::LDRWpre:
3980 case AArch64::STRSpost:
3981 case AArch64::STRSpre:
3982 case AArch64::LDRSpost:
3983 case AArch64::LDRSpre:
3984 Scale = TypeSize::getFixed(1);
3985 Width = TypeSize::getFixed(4);
3986 MinOffset = -256;
3987 MaxOffset = 255;
3988 break;
3989 case AArch64::LDRHpost:
3990 case AArch64::LDRHpre:
3991 case AArch64::STRHpost:
3992 case AArch64::STRHpre:
3993 case AArch64::LDRHHpost:
3994 case AArch64::LDRHHpre:
3995 case AArch64::STRHHpost:
3996 case AArch64::STRHHpre:
3997 Scale = TypeSize::getFixed(1);
3998 Width = TypeSize::getFixed(2);
3999 MinOffset = -256;
4000 MaxOffset = 255;
4001 break;
4002 case AArch64::LDRBpost:
4003 case AArch64::LDRBpre:
4004 case AArch64::STRBpost:
4005 case AArch64::STRBpre:
4006 case AArch64::LDRBBpost:
4007 case AArch64::LDRBBpre:
4008 case AArch64::STRBBpost:
4009 case AArch64::STRBBpre:
4010 Scale = TypeSize::getFixed(1);
4011 Width = TypeSize::getFixed(1);
4012 MinOffset = -256;
4013 MaxOffset = 255;
4014 break;
4015 // Unscaled
4016 case AArch64::LDURQi:
4017 case AArch64::STURQi:
4018 Scale = TypeSize::getFixed(1);
4019 Width = TypeSize::getFixed(16);
4020 MinOffset = -256;
4021 MaxOffset = 255;
4022 break;
4023 case AArch64::LDURXi:
4024 case AArch64::LDURDi:
4025 case AArch64::LDAPURXi:
4026 case AArch64::STURXi:
4027 case AArch64::STURDi:
4028 case AArch64::STLURXi:
4029 case AArch64::PRFUMi:
4030 Scale = TypeSize::getFixed(1);
4031 Width = TypeSize::getFixed(8);
4032 MinOffset = -256;
4033 MaxOffset = 255;
4034 break;
4035 case AArch64::LDURWi:
4036 case AArch64::LDURSi:
4037 case AArch64::LDURSWi:
4038 case AArch64::LDAPURi:
4039 case AArch64::LDAPURSWi:
4040 case AArch64::STURWi:
4041 case AArch64::STURSi:
4042 case AArch64::STLURWi:
4043 Scale = TypeSize::getFixed(1);
4044 Width = TypeSize::getFixed(4);
4045 MinOffset = -256;
4046 MaxOffset = 255;
4047 break;
4048 case AArch64::LDURHi:
4049 case AArch64::LDURHHi:
4050 case AArch64::LDURSHXi:
4051 case AArch64::LDURSHWi:
4052 case AArch64::LDAPURHi:
4053 case AArch64::LDAPURSHWi:
4054 case AArch64::LDAPURSHXi:
4055 case AArch64::STURHi:
4056 case AArch64::STURHHi:
4057 case AArch64::STLURHi:
4058 Scale = TypeSize::getFixed(1);
4059 Width = TypeSize::getFixed(2);
4060 MinOffset = -256;
4061 MaxOffset = 255;
4062 break;
4063 case AArch64::LDURBi:
4064 case AArch64::LDURBBi:
4065 case AArch64::LDURSBXi:
4066 case AArch64::LDURSBWi:
4067 case AArch64::LDAPURBi:
4068 case AArch64::LDAPURSBWi:
4069 case AArch64::LDAPURSBXi:
4070 case AArch64::STURBi:
4071 case AArch64::STURBBi:
4072 case AArch64::STLURBi:
4073 Scale = TypeSize::getFixed(1);
4074 Width = TypeSize::getFixed(1);
4075 MinOffset = -256;
4076 MaxOffset = 255;
4077 break;
4078 // LDP / STP (including pre/post inc)
4079 case AArch64::LDPQi:
4080 case AArch64::LDNPQi:
4081 case AArch64::STPQi:
4082 case AArch64::STNPQi:
4083 case AArch64::LDPQpost:
4084 case AArch64::LDPQpre:
4085 case AArch64::STPQpost:
4086 case AArch64::STPQpre:
4087 Scale = TypeSize::getFixed(16);
4088 Width = TypeSize::getFixed(16 * 2);
4089 MinOffset = -64;
4090 MaxOffset = 63;
4091 break;
4092 case AArch64::LDPXi:
4093 case AArch64::LDPDi:
4094 case AArch64::LDNPXi:
4095 case AArch64::LDNPDi:
4096 case AArch64::STPXi:
4097 case AArch64::STPDi:
4098 case AArch64::STNPXi:
4099 case AArch64::STNPDi:
4100 case AArch64::LDPDpost:
4101 case AArch64::LDPDpre:
4102 case AArch64::LDPXpost:
4103 case AArch64::LDPXpre:
4104 case AArch64::STPDpost:
4105 case AArch64::STPDpre:
4106 case AArch64::STPXpost:
4107 case AArch64::STPXpre:
4108 Scale = TypeSize::getFixed(8);
4109 Width = TypeSize::getFixed(8 * 2);
4110 MinOffset = -64;
4111 MaxOffset = 63;
4112 break;
4113 case AArch64::LDPWi:
4114 case AArch64::LDPSi:
4115 case AArch64::LDNPWi:
4116 case AArch64::LDNPSi:
4117 case AArch64::STPWi:
4118 case AArch64::STPSi:
4119 case AArch64::STNPWi:
4120 case AArch64::STNPSi:
4121 case AArch64::LDPSpost:
4122 case AArch64::LDPSpre:
4123 case AArch64::LDPWpost:
4124 case AArch64::LDPWpre:
4125 case AArch64::STPSpost:
4126 case AArch64::STPSpre:
4127 case AArch64::STPWpost:
4128 case AArch64::STPWpre:
4129 Scale = TypeSize::getFixed(4);
4130 Width = TypeSize::getFixed(4 * 2);
4131 MinOffset = -64;
4132 MaxOffset = 63;
4133 break;
4134 case AArch64::StoreSwiftAsyncContext:
4135 // Store is an STRXui, but there might be an ADDXri in the expansion too.
4136 Scale = TypeSize::getFixed(1);
4137 Width = TypeSize::getFixed(8);
4138 MinOffset = 0;
4139 MaxOffset = 4095;
4140 break;
4141 case AArch64::ADDG:
4142 Scale = TypeSize::getFixed(16);
4143 Width = TypeSize::getFixed(0);
4144 MinOffset = 0;
4145 MaxOffset = 63;
4146 break;
4147 case AArch64::TAGPstack:
4148 Scale = TypeSize::getFixed(16);
4149 Width = TypeSize::getFixed(0);
4150 // TAGP with a negative offset turns into SUBP, which has a maximum offset
4151 // of 63 (not 64!).
4152 MinOffset = -63;
4153 MaxOffset = 63;
4154 break;
4155 case AArch64::LDG:
4156 case AArch64::STGi:
4157 case AArch64::STZGi:
4158 Scale = TypeSize::getFixed(16);
4159 Width = TypeSize::getFixed(16);
4160 MinOffset = -256;
4161 MaxOffset = 255;
4162 break;
4163 // SVE
4164 case AArch64::STR_ZZZZXI:
4165 case AArch64::LDR_ZZZZXI:
4166 Scale = TypeSize::getScalable(16);
4167 Width = TypeSize::getScalable(16 * 4);
4168 MinOffset = -256;
4169 MaxOffset = 252;
4170 break;
4171 case AArch64::STR_ZZZXI:
4172 case AArch64::LDR_ZZZXI:
4173 Scale = TypeSize::getScalable(16);
4174 Width = TypeSize::getScalable(16 * 3);
4175 MinOffset = -256;
4176 MaxOffset = 253;
4177 break;
4178 case AArch64::STR_ZZXI:
4179 case AArch64::LDR_ZZXI:
4180 Scale = TypeSize::getScalable(16);
4181 Width = TypeSize::getScalable(16 * 2);
4182 MinOffset = -256;
4183 MaxOffset = 254;
4184 break;
4185 case AArch64::LDR_PXI:
4186 case AArch64::STR_PXI:
4187 Scale = TypeSize::getScalable(2);
4188 Width = TypeSize::getScalable(2);
4189 MinOffset = -256;
4190 MaxOffset = 255;
4191 break;
4192 case AArch64::LDR_PPXI:
4193 case AArch64::STR_PPXI:
4194 Scale = TypeSize::getScalable(2);
4195 Width = TypeSize::getScalable(2 * 2);
4196 MinOffset = -256;
4197 MaxOffset = 254;
4198 break;
4199 case AArch64::LDR_ZXI:
4200 case AArch64::STR_ZXI:
4201 Scale = TypeSize::getScalable(16);
4202 Width = TypeSize::getScalable(16);
4203 MinOffset = -256;
4204 MaxOffset = 255;
4205 break;
4206 case AArch64::LD1B_IMM:
4207 case AArch64::LD1H_IMM:
4208 case AArch64::LD1W_IMM:
4209 case AArch64::LD1D_IMM:
4210 case AArch64::LDNT1B_ZRI:
4211 case AArch64::LDNT1H_ZRI:
4212 case AArch64::LDNT1W_ZRI:
4213 case AArch64::LDNT1D_ZRI:
4214 case AArch64::ST1B_IMM:
4215 case AArch64::ST1H_IMM:
4216 case AArch64::ST1W_IMM:
4217 case AArch64::ST1D_IMM:
4218 case AArch64::STNT1B_ZRI:
4219 case AArch64::STNT1H_ZRI:
4220 case AArch64::STNT1W_ZRI:
4221 case AArch64::STNT1D_ZRI:
4222 case AArch64::LDNF1B_IMM:
4223 case AArch64::LDNF1H_IMM:
4224 case AArch64::LDNF1W_IMM:
4225 case AArch64::LDNF1D_IMM:
4226 // A full vectors worth of data
4227 // Width = mbytes * elements
4228 Scale = TypeSize::getScalable(16);
4229 Width = TypeSize::getScalable(16);
4230 MinOffset = -8;
4231 MaxOffset = 7;
4232 break;
4233 case AArch64::LD2B_IMM:
4234 case AArch64::LD2H_IMM:
4235 case AArch64::LD2W_IMM:
4236 case AArch64::LD2D_IMM:
4237 case AArch64::ST2B_IMM:
4238 case AArch64::ST2H_IMM:
4239 case AArch64::ST2W_IMM:
4240 case AArch64::ST2D_IMM:
4241 Scale = TypeSize::getScalable(32);
4242 Width = TypeSize::getScalable(16 * 2);
4243 MinOffset = -8;
4244 MaxOffset = 7;
4245 break;
4246 case AArch64::LD3B_IMM:
4247 case AArch64::LD3H_IMM:
4248 case AArch64::LD3W_IMM:
4249 case AArch64::LD3D_IMM:
4250 case AArch64::ST3B_IMM:
4251 case AArch64::ST3H_IMM:
4252 case AArch64::ST3W_IMM:
4253 case AArch64::ST3D_IMM:
4254 Scale = TypeSize::getScalable(48);
4255 Width = TypeSize::getScalable(16 * 3);
4256 MinOffset = -8;
4257 MaxOffset = 7;
4258 break;
4259 case AArch64::LD4B_IMM:
4260 case AArch64::LD4H_IMM:
4261 case AArch64::LD4W_IMM:
4262 case AArch64::LD4D_IMM:
4263 case AArch64::ST4B_IMM:
4264 case AArch64::ST4H_IMM:
4265 case AArch64::ST4W_IMM:
4266 case AArch64::ST4D_IMM:
4267 Scale = TypeSize::getScalable(64);
4268 Width = TypeSize::getScalable(16 * 4);
4269 MinOffset = -8;
4270 MaxOffset = 7;
4271 break;
4272 case AArch64::LD1B_H_IMM:
4273 case AArch64::LD1SB_H_IMM:
4274 case AArch64::LD1H_S_IMM:
4275 case AArch64::LD1SH_S_IMM:
4276 case AArch64::LD1W_D_IMM:
4277 case AArch64::LD1SW_D_IMM:
4278 case AArch64::ST1B_H_IMM:
4279 case AArch64::ST1H_S_IMM:
4280 case AArch64::ST1W_D_IMM:
4281 case AArch64::LDNF1B_H_IMM:
4282 case AArch64::LDNF1SB_H_IMM:
4283 case AArch64::LDNF1H_S_IMM:
4284 case AArch64::LDNF1SH_S_IMM:
4285 case AArch64::LDNF1W_D_IMM:
4286 case AArch64::LDNF1SW_D_IMM:
4287 // A half vector worth of data
4288 // Width = mbytes * elements
4289 Scale = TypeSize::getScalable(8);
4290 Width = TypeSize::getScalable(8);
4291 MinOffset = -8;
4292 MaxOffset = 7;
4293 break;
4294 case AArch64::LD1B_S_IMM:
4295 case AArch64::LD1SB_S_IMM:
4296 case AArch64::LD1H_D_IMM:
4297 case AArch64::LD1SH_D_IMM:
4298 case AArch64::ST1B_S_IMM:
4299 case AArch64::ST1H_D_IMM:
4300 case AArch64::LDNF1B_S_IMM:
4301 case AArch64::LDNF1SB_S_IMM:
4302 case AArch64::LDNF1H_D_IMM:
4303 case AArch64::LDNF1SH_D_IMM:
4304 // A quarter vector worth of data
4305 // Width = mbytes * elements
4306 Scale = TypeSize::getScalable(4);
4307 Width = TypeSize::getScalable(4);
4308 MinOffset = -8;
4309 MaxOffset = 7;
4310 break;
4311 case AArch64::LD1B_D_IMM:
4312 case AArch64::LD1SB_D_IMM:
4313 case AArch64::ST1B_D_IMM:
4314 case AArch64::LDNF1B_D_IMM:
4315 case AArch64::LDNF1SB_D_IMM:
4316 // A eighth vector worth of data
4317 // Width = mbytes * elements
4318 Scale = TypeSize::getScalable(2);
4319 Width = TypeSize::getScalable(2);
4320 MinOffset = -8;
4321 MaxOffset = 7;
4322 break;
4323 case AArch64::ST2Gi:
4324 case AArch64::STZ2Gi:
4325 Scale = TypeSize::getFixed(16);
4326 Width = TypeSize::getFixed(32);
4327 MinOffset = -256;
4328 MaxOffset = 255;
4329 break;
4330 case AArch64::STGPi:
4331 Scale = TypeSize::getFixed(16);
4332 Width = TypeSize::getFixed(16);
4333 MinOffset = -64;
4334 MaxOffset = 63;
4335 break;
4336 case AArch64::LD1RB_IMM:
4337 case AArch64::LD1RB_H_IMM:
4338 case AArch64::LD1RB_S_IMM:
4339 case AArch64::LD1RB_D_IMM:
4340 case AArch64::LD1RSB_H_IMM:
4341 case AArch64::LD1RSB_S_IMM:
4342 case AArch64::LD1RSB_D_IMM:
4343 Scale = TypeSize::getFixed(1);
4344 Width = TypeSize::getFixed(1);
4345 MinOffset = 0;
4346 MaxOffset = 63;
4347 break;
4348 case AArch64::LD1RH_IMM:
4349 case AArch64::LD1RH_S_IMM:
4350 case AArch64::LD1RH_D_IMM:
4351 case AArch64::LD1RSH_S_IMM:
4352 case AArch64::LD1RSH_D_IMM:
4353 Scale = TypeSize::getFixed(2);
4354 Width = TypeSize::getFixed(2);
4355 MinOffset = 0;
4356 MaxOffset = 63;
4357 break;
4358 case AArch64::LD1RW_IMM:
4359 case AArch64::LD1RW_D_IMM:
4360 case AArch64::LD1RSW_IMM:
4361 Scale = TypeSize::getFixed(4);
4362 Width = TypeSize::getFixed(4);
4363 MinOffset = 0;
4364 MaxOffset = 63;
4365 break;
4366 case AArch64::LD1RD_IMM:
4367 Scale = TypeSize::getFixed(8);
4368 Width = TypeSize::getFixed(8);
4369 MinOffset = 0;
4370 MaxOffset = 63;
4371 break;
4372 }
4373
4374 return true;
4375}
4376
4377// Scaling factor for unscaled load or store.
4379 switch (Opc) {
4380 default:
4381 llvm_unreachable("Opcode has unknown scale!");
4382 case AArch64::LDRBBui:
4383 case AArch64::LDURBBi:
4384 case AArch64::LDRSBWui:
4385 case AArch64::LDURSBWi:
4386 case AArch64::STRBBui:
4387 case AArch64::STURBBi:
4388 return 1;
4389 case AArch64::LDRHHui:
4390 case AArch64::LDURHHi:
4391 case AArch64::LDRSHWui:
4392 case AArch64::LDURSHWi:
4393 case AArch64::STRHHui:
4394 case AArch64::STURHHi:
4395 return 2;
4396 case AArch64::LDRSui:
4397 case AArch64::LDURSi:
4398 case AArch64::LDRSpre:
4399 case AArch64::LDRSWui:
4400 case AArch64::LDURSWi:
4401 case AArch64::LDRSWpre:
4402 case AArch64::LDRWpre:
4403 case AArch64::LDRWui:
4404 case AArch64::LDURWi:
4405 case AArch64::STRSui:
4406 case AArch64::STURSi:
4407 case AArch64::STRSpre:
4408 case AArch64::STRWui:
4409 case AArch64::STURWi:
4410 case AArch64::STRWpre:
4411 case AArch64::LDPSi:
4412 case AArch64::LDPSWi:
4413 case AArch64::LDPWi:
4414 case AArch64::STPSi:
4415 case AArch64::STPWi:
4416 return 4;
4417 case AArch64::LDRDui:
4418 case AArch64::LDURDi:
4419 case AArch64::LDRDpre:
4420 case AArch64::LDRXui:
4421 case AArch64::LDURXi:
4422 case AArch64::LDRXpre:
4423 case AArch64::STRDui:
4424 case AArch64::STURDi:
4425 case AArch64::STRDpre:
4426 case AArch64::STRXui:
4427 case AArch64::STURXi:
4428 case AArch64::STRXpre:
4429 case AArch64::LDPDi:
4430 case AArch64::LDPXi:
4431 case AArch64::STPDi:
4432 case AArch64::STPXi:
4433 return 8;
4434 case AArch64::LDRQui:
4435 case AArch64::LDURQi:
4436 case AArch64::STRQui:
4437 case AArch64::STURQi:
4438 case AArch64::STRQpre:
4439 case AArch64::LDPQi:
4440 case AArch64::LDRQpre:
4441 case AArch64::STPQi:
4442 case AArch64::STGi:
4443 case AArch64::STZGi:
4444 case AArch64::ST2Gi:
4445 case AArch64::STZ2Gi:
4446 case AArch64::STGPi:
4447 return 16;
4448 }
4449}
4450
4452 switch (MI.getOpcode()) {
4453 default:
4454 return false;
4455 case AArch64::LDRWpre:
4456 case AArch64::LDRXpre:
4457 case AArch64::LDRSWpre:
4458 case AArch64::LDRSpre:
4459 case AArch64::LDRDpre:
4460 case AArch64::LDRQpre:
4461 return true;
4462 }
4463}
4464
4466 switch (MI.getOpcode()) {
4467 default:
4468 return false;
4469 case AArch64::STRWpre:
4470 case AArch64::STRXpre:
4471 case AArch64::STRSpre:
4472 case AArch64::STRDpre:
4473 case AArch64::STRQpre:
4474 return true;
4475 }
4476}
4477
4479 return isPreLd(MI) || isPreSt(MI);
4480}
4481
4483 switch (MI.getOpcode()) {
4484 default:
4485 return false;
4486 case AArch64::LDPSi:
4487 case AArch64::LDPSWi:
4488 case AArch64::LDPDi:
4489 case AArch64::LDPQi:
4490 case AArch64::LDPWi:
4491 case AArch64::LDPXi:
4492 case AArch64::STPSi:
4493 case AArch64::STPDi:
4494 case AArch64::STPQi:
4495 case AArch64::STPWi:
4496 case AArch64::STPXi:
4497 case AArch64::STGPi:
4498 return true;
4499 }
4500}
4501
4503 assert(MI.mayLoadOrStore() && "Load or store instruction expected");
4504 unsigned Idx =
4506 : 1;
4507 return MI.getOperand(Idx);
4508}
4509
4510const MachineOperand &
4512 assert(MI.mayLoadOrStore() && "Load or store instruction expected");
4513 unsigned Idx =
4515 : 2;
4516 return MI.getOperand(Idx);
4517}
4518
4519const MachineOperand &
4521 switch (MI.getOpcode()) {
4522 default:
4523 llvm_unreachable("Unexpected opcode");
4524 case AArch64::LDRBroX:
4525 case AArch64::LDRBBroX:
4526 case AArch64::LDRSBXroX:
4527 case AArch64::LDRSBWroX:
4528 case AArch64::LDRHroX:
4529 case AArch64::LDRHHroX:
4530 case AArch64::LDRSHXroX:
4531 case AArch64::LDRSHWroX:
4532 case AArch64::LDRWroX:
4533 case AArch64::LDRSroX:
4534 case AArch64::LDRSWroX:
4535 case AArch64::LDRDroX:
4536 case AArch64::LDRXroX:
4537 case AArch64::LDRQroX:
4538 return MI.getOperand(4);
4539 }
4540}
4541
4543 Register Reg) {
4544 if (MI.getParent() == nullptr)
4545 return nullptr;
4546 const MachineFunction *MF = MI.getParent()->getParent();
4547 return MF ? MF->getRegInfo().getRegClassOrNull(Reg) : nullptr;
4548}
4549
4551 auto IsHFPR = [&](const MachineOperand &Op) {
4552 if (!Op.isReg())
4553 return false;
4554 auto Reg = Op.getReg();
4555 if (Reg.isPhysical())
4556 return AArch64::FPR16RegClass.contains(Reg);
4557 const TargetRegisterClass *TRC = ::getRegClass(MI, Reg);
4558 return TRC == &AArch64::FPR16RegClass ||
4559 TRC == &AArch64::FPR16_loRegClass;
4560 };
4561 return llvm::any_of(MI.operands(), IsHFPR);
4562}
4563
4565 auto IsQFPR = [&](const MachineOperand &Op) {
4566 if (!Op.isReg())
4567 return false;
4568 auto Reg = Op.getReg();
4569 if (Reg.isPhysical())
4570 return AArch64::FPR128RegClass.contains(Reg);
4571 const TargetRegisterClass *TRC = ::getRegClass(MI, Reg);
4572 return TRC == &AArch64::FPR128RegClass ||
4573 TRC == &AArch64::FPR128_loRegClass;
4574 };
4575 return llvm::any_of(MI.operands(), IsQFPR);
4576}
4577
4579 switch (MI.getOpcode()) {
4580 case AArch64::BRK:
4581 case AArch64::HLT:
4582 case AArch64::PACIASP:
4583 case AArch64::PACIBSP:
4584 // Implicit BTI behavior.
4585 return true;
4586 case AArch64::PAUTH_PROLOGUE:
4587 // PAUTH_PROLOGUE expands to PACI(A|B)SP.
4588 return true;
4589 case AArch64::HINT: {
4590 unsigned Imm = MI.getOperand(0).getImm();
4591 // Explicit BTI instruction.
4592 if (Imm == 32 || Imm == 34 || Imm == 36 || Imm == 38)
4593 return true;
4594 // PACI(A|B)SP instructions.
4595 if (Imm == 25 || Imm == 27)
4596 return true;
4597 return false;
4598 }
4599 default:
4600 return false;
4601 }
4602}
4603
4605 if (Reg == 0)
4606 return false;
4607 assert(Reg.isPhysical() && "Expected physical register in isFpOrNEON");
4608 return AArch64::FPR128RegClass.contains(Reg) ||
4609 AArch64::FPR64RegClass.contains(Reg) ||
4610 AArch64::FPR32RegClass.contains(Reg) ||
4611 AArch64::FPR16RegClass.contains(Reg) ||
4612 AArch64::FPR8RegClass.contains(Reg);
4613}
4614
4616 auto IsFPR = [&](const MachineOperand &Op) {
4617 if (!Op.isReg())
4618 return false;
4619 auto Reg = Op.getReg();
4620 if (Reg.isPhysical())
4621 return isFpOrNEON(Reg);
4622
4623 const TargetRegisterClass *TRC = ::getRegClass(MI, Reg);
4624 return TRC == &AArch64::FPR128RegClass ||
4625 TRC == &AArch64::FPR128_loRegClass ||
4626 TRC == &AArch64::FPR64RegClass ||
4627 TRC == &AArch64::FPR64_loRegClass ||
4628 TRC == &AArch64::FPR32RegClass || TRC == &AArch64::FPR16RegClass ||
4629 TRC == &AArch64::FPR8RegClass;
4630 };
4631 return llvm::any_of(MI.operands(), IsFPR);
4632}
4633
4634// Scale the unscaled offsets. Returns false if the unscaled offset can't be
4635// scaled.
4636static bool scaleOffset(unsigned Opc, int64_t &Offset) {
4637 int Scale = AArch64InstrInfo::getMemScale(Opc);
4638
4639 // If the byte-offset isn't a multiple of the stride, we can't scale this
4640 // offset.
4641 if (Offset % Scale != 0)
4642 return false;
4643
4644 // Convert the byte-offset used by unscaled into an "element" offset used
4645 // by the scaled pair load/store instructions.
4646 Offset /= Scale;
4647 return true;
4648}
4649
4650static bool canPairLdStOpc(unsigned FirstOpc, unsigned SecondOpc) {
4651 if (FirstOpc == SecondOpc)
4652 return true;
4653 // We can also pair sign-ext and zero-ext instructions.
4654 switch (FirstOpc) {
4655 default:
4656 return false;
4657 case AArch64::STRSui:
4658 case AArch64::STURSi:
4659 return SecondOpc == AArch64::STRSui || SecondOpc == AArch64::STURSi;
4660 case AArch64::STRDui:
4661 case AArch64::STURDi:
4662 return SecondOpc == AArch64::STRDui || SecondOpc == AArch64::STURDi;
4663 case AArch64::STRQui:
4664 case AArch64::STURQi:
4665 return SecondOpc == AArch64::STRQui || SecondOpc == AArch64::STURQi;
4666 case AArch64::STRWui:
4667 case AArch64::STURWi:
4668 return SecondOpc == AArch64::STRWui || SecondOpc == AArch64::STURWi;
4669 case AArch64::STRXui:
4670 case AArch64::STURXi:
4671 return SecondOpc == AArch64::STRXui || SecondOpc == AArch64::STURXi;
4672 case AArch64::LDRSui:
4673 case AArch64::LDURSi:
4674 return SecondOpc == AArch64::LDRSui || SecondOpc == AArch64::LDURSi;
4675 case AArch64::LDRDui:
4676 case AArch64::LDURDi:
4677 return SecondOpc == AArch64::LDRDui || SecondOpc == AArch64::LDURDi;
4678 case AArch64::LDRQui:
4679 case AArch64::LDURQi:
4680 return SecondOpc == AArch64::LDRQui || SecondOpc == AArch64::LDURQi;
4681 case AArch64::LDRWui:
4682 case AArch64::LDURWi:
4683 return SecondOpc == AArch64::LDRSWui || SecondOpc == AArch64::LDURSWi;
4684 case AArch64::LDRSWui:
4685 case AArch64::LDURSWi:
4686 return SecondOpc == AArch64::LDRWui || SecondOpc == AArch64::LDURWi;
4687 case AArch64::LDRXui:
4688 case AArch64::LDURXi:
4689 return SecondOpc == AArch64::LDRXui || SecondOpc == AArch64::LDURXi;
4690 }
4691 // These instructions can't be paired based on their opcodes.
4692 return false;
4693}
4694
4695static bool shouldClusterFI(const MachineFrameInfo &MFI, int FI1,
4696 int64_t Offset1, unsigned Opcode1, int FI2,
4697 int64_t Offset2, unsigned Opcode2) {
4698 // Accesses through fixed stack object frame indices may access a different
4699 // fixed stack slot. Check that the object offsets + offsets match.
4700 if (MFI.isFixedObjectIndex(FI1) && MFI.isFixedObjectIndex(FI2)) {
4701 int64_t ObjectOffset1 = MFI.getObjectOffset(FI1);
4702 int64_t ObjectOffset2 = MFI.getObjectOffset(FI2);
4703 assert(ObjectOffset1 <= ObjectOffset2 && "Object offsets are not ordered.");
4704 // Convert to scaled object offsets.
4705 int Scale1 = AArch64InstrInfo::getMemScale(Opcode1);
4706 if (ObjectOffset1 % Scale1 != 0)
4707 return false;
4708 ObjectOffset1 /= Scale1;
4709 int Scale2 = AArch64InstrInfo::getMemScale(Opcode2);
4710 if (ObjectOffset2 % Scale2 != 0)
4711 return false;
4712 ObjectOffset2 /= Scale2;
4713 ObjectOffset1 += Offset1;
4714 ObjectOffset2 += Offset2;
4715 return ObjectOffset1 + 1 == ObjectOffset2;
4716 }
4717
4718 return FI1 == FI2;
4719}
4720
4721/// Detect opportunities for ldp/stp formation.
4722///
4723/// Only called for LdSt for which getMemOperandWithOffset returns true.
4725 ArrayRef<const MachineOperand *> BaseOps1, int64_t OpOffset1,
4726 bool OffsetIsScalable1, ArrayRef<const MachineOperand *> BaseOps2,
4727 int64_t OpOffset2, bool OffsetIsScalable2, unsigned ClusterSize,
4728 unsigned NumBytes) const {
4729 assert(BaseOps1.size() == 1 && BaseOps2.size() == 1);
4730 const MachineOperand &BaseOp1 = *BaseOps1.front();
4731 const MachineOperand &BaseOp2 = *BaseOps2.front();
4732 const MachineInstr &FirstLdSt = *BaseOp1.getParent();
4733 const MachineInstr &SecondLdSt = *BaseOp2.getParent();
4734 if (BaseOp1.getType() != BaseOp2.getType())
4735 return false;
4736
4737 assert((BaseOp1.isReg() || BaseOp1.isFI()) &&
4738 "Only base registers and frame indices are supported.");
4739
4740 // Check for both base regs and base FI.
4741 if (BaseOp1.isReg() && BaseOp1.getReg() != BaseOp2.getReg())
4742 return false;
4743
4744 // Only cluster up to a single pair.
4745 if (ClusterSize > 2)
4746 return false;
4747
4748 if (!isPairableLdStInst(FirstLdSt) || !isPairableLdStInst(SecondLdSt))
4749 return false;
4750
4751 // Can we pair these instructions based on their opcodes?
4752 unsigned FirstOpc = FirstLdSt.getOpcode();
4753 unsigned SecondOpc = SecondLdSt.getOpcode();
4754 if (!canPairLdStOpc(FirstOpc, SecondOpc))
4755 return false;
4756
4757 // Can't merge volatiles or load/stores that have a hint to avoid pair
4758 // formation, for example.
4759 if (!isCandidateToMergeOrPair(FirstLdSt) ||
4760 !isCandidateToMergeOrPair(SecondLdSt))
4761 return false;
4762
4763 // isCandidateToMergeOrPair guarantees that operand 2 is an immediate.
4764 int64_t Offset1 = FirstLdSt.getOperand(2).getImm();
4765 if (hasUnscaledLdStOffset(FirstOpc) && !scaleOffset(FirstOpc, Offset1))
4766 return false;
4767
4768 int64_t Offset2 = SecondLdSt.getOperand(2).getImm();
4769 if (hasUnscaledLdStOffset(SecondOpc) && !scaleOffset(SecondOpc, Offset2))
4770 return false;
4771
4772 // Pairwise instructions have a 7-bit signed offset field.
4773 if (Offset1 > 63 || Offset1 < -64)
4774 return false;
4775
4776 // The caller should already have ordered First/SecondLdSt by offset.
4777 // Note: except for non-equal frame index bases
4778 if (BaseOp1.isFI()) {
4779 assert((!BaseOp1.isIdenticalTo(BaseOp2) || Offset1 <= Offset2) &&
4780 "Caller should have ordered offsets.");
4781
4782 const MachineFrameInfo &MFI =
4783 FirstLdSt.getParent()->getParent()->getFrameInfo();
4784 return shouldClusterFI(MFI, BaseOp1.getIndex(), Offset1, FirstOpc,
4785 BaseOp2.getIndex(), Offset2, SecondOpc);
4786 }
4787
4788 assert(Offset1 <= Offset2 && "Caller should have ordered offsets.");
4789
4790 return Offset1 + 1 == Offset2;
4791}
4792
4794 unsigned Reg, unsigned SubIdx,
4795 unsigned State,
4796 const TargetRegisterInfo *TRI) {
4797 if (!SubIdx)
4798 return MIB.addReg(Reg, State);
4799
4801 return MIB.addReg(TRI->getSubReg(Reg, SubIdx), State);
4802 return MIB.addReg(Reg, State, SubIdx);
4803}
4804
4805static bool forwardCopyWillClobberTuple(unsigned DestReg, unsigned SrcReg,
4806 unsigned NumRegs) {
4807 // We really want the positive remainder mod 32 here, that happens to be
4808 // easily obtainable with a mask.
4809 return ((DestReg - SrcReg) & 0x1f) < NumRegs;
4810}
4811
4814 const DebugLoc &DL, MCRegister DestReg,
4815 MCRegister SrcReg, bool KillSrc,
4816 unsigned Opcode,
4817 ArrayRef<unsigned> Indices) const {
4818 assert(Subtarget.hasNEON() && "Unexpected register copy without NEON");
4820 uint16_t DestEncoding = TRI->getEncodingValue(DestReg);
4821 uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg);
4822 unsigned NumRegs = Indices.size();
4823
4824 int SubReg = 0, End = NumRegs, Incr = 1;
4825 if (forwardCopyWillClobberTuple(DestEncoding, SrcEncoding, NumRegs)) {
4826 SubReg = NumRegs - 1;
4827 End = -1;
4828 Incr = -1;
4829 }
4830
4831 for (; SubReg != End; SubReg += Incr) {
4832 const MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opcode));
4833 AddSubReg(MIB, DestReg, Indices[SubReg], RegState::Define, TRI);
4834 AddSubReg(MIB, SrcReg, Indices[SubReg], 0, TRI);
4835 AddSubReg(MIB, SrcReg, Indices[SubReg], getKillRegState(KillSrc), TRI);
4836 }
4837}
4838
4841 DebugLoc DL, unsigned DestReg,
4842 unsigned SrcReg, bool KillSrc,
4843 unsigned Opcode, unsigned ZeroReg,
4844 llvm::ArrayRef<unsigned> Indices) const {
4846 unsigned NumRegs = Indices.size();
4847
4848#ifndef NDEBUG
4849 uint16_t DestEncoding = TRI->getEncodingValue(DestReg);
4850 uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg);
4851 assert(DestEncoding % NumRegs == 0 && SrcEncoding % NumRegs == 0 &&
4852 "GPR reg sequences should not be able to overlap");
4853#endif
4854
4855 for (unsigned SubReg = 0; SubReg != NumRegs; ++SubReg) {
4856 const MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opcode));
4857 AddSubReg(MIB, DestReg, Indices[SubReg], RegState::Define, TRI);
4858 MIB.addReg(ZeroReg);
4859 AddSubReg(MIB, SrcReg, Indices[SubReg], getKillRegState(KillSrc), TRI);
4860 MIB.addImm(0);
4861 }
4862}
4863
4866 const DebugLoc &DL, MCRegister DestReg,
4867 MCRegister SrcReg, bool KillSrc) const {
4868 if (AArch64::GPR32spRegClass.contains(DestReg) &&
4869 (AArch64::GPR32spRegClass.contains(SrcReg) || SrcReg == AArch64::WZR)) {
4871
4872 if (DestReg == AArch64::WSP || SrcReg == AArch64::WSP) {
4873 // If either operand is WSP, expand to ADD #0.
4874 if (Subtarget.hasZeroCycleRegMove()) {
4875 // Cyclone recognizes "ADD Xd, Xn, #0" as a zero-cycle register move.
4876 MCRegister DestRegX = TRI->getMatchingSuperReg(
4877 DestReg, AArch64::sub_32, &AArch64::GPR64spRegClass);
4878 MCRegister SrcRegX = TRI->getMatchingSuperReg(
4879 SrcReg, AArch64::sub_32, &AArch64::GPR64spRegClass);
4880 // This instruction is reading and writing X registers. This may upset
4881 // the register scavenger and machine verifier, so we need to indicate
4882 // that we are reading an undefined value from SrcRegX, but a proper
4883 // value from SrcReg.
4884 BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestRegX)
4885 .addReg(SrcRegX, RegState::Undef)
4886 .addImm(0)
4888 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
4889 } else {
4890 BuildMI(MBB, I, DL, get(AArch64::ADDWri), DestReg)
4891 .addReg(SrcReg, getKillRegState(KillSrc))
4892 .addImm(0)
4894 }
4895 } else if (SrcReg == AArch64::WZR && Subtarget.hasZeroCycleZeroingGP()) {
4896 BuildMI(MBB, I, DL, get(AArch64::MOVZWi), DestReg)
4897 .addImm(0)
4899 } else {
4900 if (Subtarget.hasZeroCycleRegMove()) {
4901 // Cyclone recognizes "ORR Xd, XZR, Xm" as a zero-cycle register move.
4902 MCRegister DestRegX = TRI->getMatchingSuperReg(
4903 DestReg, AArch64::sub_32, &AArch64::GPR64spRegClass);
4904 MCRegister SrcRegX = TRI->getMatchingSuperReg(
4905 SrcReg, AArch64::sub_32, &AArch64::GPR64spRegClass);
4906 // This instruction is reading and writing X registers. This may upset
4907 // the register scavenger and machine verifier, so we need to indicate
4908 // that we are reading an undefined value from SrcRegX, but a proper
4909 // value from SrcReg.
4910 BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestRegX)
4911 .addReg(AArch64::XZR)
4912 .addReg(SrcRegX, RegState::Undef)
4913 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
4914 } else {
4915 // Otherwise, expand to ORR WZR.
4916 BuildMI(MBB, I, DL, get(AArch64::ORRWrr), DestReg)
4917 .addReg(AArch64::WZR)
4918 .addReg(SrcReg, getKillRegState(KillSrc));
4919 }
4920 }
4921 return;
4922 }
4923
4924 // Copy a Predicate register by ORRing with itself.
4925 if (AArch64::PPRRegClass.contains(DestReg) &&
4926 AArch64::PPRRegClass.contains(SrcReg)) {
4928 "Unexpected SVE register.");
4929 BuildMI(MBB, I, DL, get(AArch64::ORR_PPzPP), DestReg)
4930 .addReg(SrcReg) // Pg
4931 .addReg(SrcReg)
4932 .addReg(SrcReg, getKillRegState(KillSrc));
4933 return;
4934 }
4935
4936 // Copy a predicate-as-counter register by ORRing with itself as if it
4937 // were a regular predicate (mask) register.
4938 bool DestIsPNR = AArch64::PNRRegClass.contains(DestReg);
4939 bool SrcIsPNR = AArch64::PNRRegClass.contains(SrcReg);
4940 if (DestIsPNR || SrcIsPNR) {
4941 auto ToPPR = [](MCRegister R) -> MCRegister {
4942 return (R - AArch64::PN0) + AArch64::P0;
4943 };
4944 MCRegister PPRSrcReg = SrcIsPNR ? ToPPR(SrcReg) : SrcReg;
4945 MCRegister PPRDestReg = DestIsPNR ? ToPPR(DestReg) : DestReg;
4946
4947 if (PPRSrcReg != PPRDestReg) {
4948 auto NewMI = BuildMI(MBB, I, DL, get(AArch64::ORR_PPzPP), PPRDestReg)
4949 .addReg(PPRSrcReg) // Pg
4950 .addReg(PPRSrcReg)
4951 .addReg(PPRSrcReg, getKillRegState(KillSrc));
4952 if (DestIsPNR)
4953 NewMI.addDef(DestReg, RegState::Implicit);
4954 }
4955 return;
4956 }
4957
4958 // Copy a Z register by ORRing with itself.
4959 if (AArch64::ZPRRegClass.contains(DestReg) &&
4960 AArch64::ZPRRegClass.contains(SrcReg)) {
4962 "Unexpected SVE register.");
4963 BuildMI(MBB, I, DL, get(AArch64::ORR_ZZZ), DestReg)
4964 .addReg(SrcReg)
4965 .addReg(SrcReg, getKillRegState(KillSrc));
4966 return;
4967 }
4968
4969 // Copy a Z register pair by copying the individual sub-registers.
4970 if ((AArch64::ZPR2RegClass.contains(DestReg) ||
4971 AArch64::ZPR2StridedOrContiguousRegClass.contains(DestReg)) &&
4972 (AArch64::ZPR2RegClass.contains(SrcReg) ||
4973 AArch64::ZPR2StridedOrContiguousRegClass.contains(SrcReg))) {
4975 "Unexpected SVE register.");
4976 static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1};
4977 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORR_ZZZ,
4978 Indices);
4979 return;
4980 }
4981
4982 // Copy a Z register triple by copying the individual sub-registers.
4983 if (AArch64::ZPR3RegClass.contains(DestReg) &&
4984 AArch64::ZPR3RegClass.contains(SrcReg)) {
4986 "Unexpected SVE register.");
4987 static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1,
4988 AArch64::zsub2};
4989 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORR_ZZZ,
4990 Indices);
4991 return;
4992 }
4993
4994 // Copy a Z register quad by copying the individual sub-registers.
4995 if ((AArch64::ZPR4RegClass.contains(DestReg) ||
4996 AArch64::ZPR4StridedOrContiguousRegClass.contains(DestReg)) &&
4997 (AArch64::ZPR4RegClass.contains(SrcReg) ||
4998 AArch64::ZPR4StridedOrContiguousRegClass.contains(SrcReg))) {
5000 "Unexpected SVE register.");
5001 static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1,
5002 AArch64::zsub2, AArch64::zsub3};
5003 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORR_ZZZ,
5004 Indices);
5005 return;
5006 }
5007
5008 if (AArch64::GPR64spRegClass.contains(DestReg) &&
5009 (AArch64::GPR64spRegClass.contains(SrcReg) || SrcReg == AArch64::XZR)) {
5010 if (DestReg == AArch64::SP || SrcReg == AArch64::SP) {
5011 // If either operand is SP, expand to ADD #0.
5012 BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestReg)
5013 .addReg(SrcReg, getKillRegState(KillSrc))
5014 .addImm(0)
5016 } else if (SrcReg == AArch64::XZR && Subtarget.hasZeroCycleZeroingGP()) {
5017 BuildMI(MBB, I, DL, get(AArch64::MOVZXi), DestReg)
5018 .addImm(0)
5020 } else {
5021 // Otherwise, expand to ORR XZR.
5022 BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestReg)
5023 .addReg(AArch64::XZR)
5024 .addReg(SrcReg, getKillRegState(KillSrc));
5025 }
5026 return;
5027 }
5028
5029 // Copy a DDDD register quad by copying the individual sub-registers.
5030 if (AArch64::DDDDRegClass.contains(DestReg) &&
5031 AArch64::DDDDRegClass.contains(SrcReg)) {
5032 static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1,
5033 AArch64::dsub2, AArch64::dsub3};
5034 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
5035 Indices);
5036 return;
5037 }
5038
5039 // Copy a DDD register triple by copying the individual sub-registers.
5040 if (AArch64::DDDRegClass.contains(DestReg) &&
5041 AArch64::DDDRegClass.contains(SrcReg)) {
5042 static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1,
5043 AArch64::dsub2};
5044 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
5045 Indices);
5046 return;
5047 }
5048
5049 // Copy a DD register pair by copying the individual sub-registers.
5050 if (AArch64::DDRegClass.contains(DestReg) &&
5051 AArch64::DDRegClass.contains(SrcReg)) {
5052 static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1};
5053 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
5054 Indices);
5055 return;
5056 }
5057
5058 // Copy a QQQQ register quad by copying the individual sub-registers.
5059 if (AArch64::QQQQRegClass.contains(DestReg) &&
5060 AArch64::QQQQRegClass.contains(SrcReg)) {
5061 static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1,
5062 AArch64::qsub2, AArch64::qsub3};
5063 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
5064 Indices);
5065 return;
5066 }
5067
5068 // Copy a QQQ register triple by copying the individual sub-registers.
5069 if (AArch64::QQQRegClass.contains(DestReg) &&
5070 AArch64::QQQRegClass.contains(SrcReg)) {
5071 static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1,
5072 AArch64::qsub2};
5073 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
5074 Indices);
5075 return;
5076 }
5077
5078 // Copy a QQ register pair by copying the individual sub-registers.
5079 if (AArch64::QQRegClass.contains(DestReg) &&
5080 AArch64::QQRegClass.contains(SrcReg)) {
5081 static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1};
5082 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
5083 Indices);
5084 return;
5085 }
5086
5087 if (AArch64::XSeqPairsClassRegClass.contains(DestReg) &&
5088 AArch64::XSeqPairsClassRegClass.contains(SrcReg)) {
5089 static const unsigned Indices[] = {AArch64::sube64, AArch64::subo64};
5090 copyGPRRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRXrs,
5091 AArch64::XZR, Indices);
5092 return;
5093 }
5094
5095 if (AArch64::WSeqPairsClassRegClass.contains(DestReg) &&
5096 AArch64::WSeqPairsClassRegClass.contains(SrcReg)) {
5097 static const unsigned Indices[] = {AArch64::sube32, AArch64::subo32};
5098 copyGPRRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRWrs,
5099 AArch64::WZR, Indices);
5100 return;
5101 }
5102
5103 if (AArch64::FPR128RegClass.contains(DestReg) &&
5104 AArch64::FPR128RegClass.contains(SrcReg)) {
5105 if (Subtarget.isSVEorStreamingSVEAvailable() &&
5106 !Subtarget.isNeonAvailable())
5107 BuildMI(MBB, I, DL, get(AArch64::ORR_ZZZ))
5108 .addReg(AArch64::Z0 + (DestReg - AArch64::Q0), RegState::Define)
5109 .addReg(AArch64::Z0 + (SrcReg - AArch64::Q0))
5110 .addReg(AArch64::Z0 + (SrcReg - AArch64::Q0));
5111 else if (Subtarget.isNeonAvailable())
5112 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
5113 .addReg(SrcReg)
5114 .addReg(SrcReg, getKillRegState(KillSrc));
5115 else {
5116 BuildMI(MBB, I, DL, get(AArch64::STRQpre))
5117 .addReg(AArch64::SP, RegState::Define)
5118 .addReg(SrcReg, getKillRegState(KillSrc))
5119 .addReg(AArch64::SP)
5120 .addImm(-16);
5121 BuildMI(MBB, I, DL, get(AArch64::LDRQpost))
5122 .addReg(AArch64::SP, RegState::Define)
5123 .addReg(DestReg, RegState::Define)
5124 .addReg(AArch64::SP)
5125 .addImm(16);
5126 }
5127 return;
5128 }
5129
5130 if (AArch64::FPR64RegClass.contains(DestReg) &&
5131 AArch64::FPR64RegClass.contains(SrcReg)) {
5132 BuildMI(MBB, I, DL, get(AArch64::FMOVDr), DestReg)
5133 .addReg(SrcReg, getKillRegState(KillSrc));
5134 return;
5135 }
5136
5137 if (AArch64::FPR32RegClass.contains(DestReg) &&
5138 AArch64::FPR32RegClass.contains(SrcReg)) {
5139 BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
5140 .addReg(SrcReg, getKillRegState(KillSrc));
5141 return;
5142 }
5143
5144 if (AArch64::FPR16RegClass.contains(DestReg) &&
5145 AArch64::FPR16RegClass.contains(SrcReg)) {
5146 DestReg =
5147 RI.getMatchingSuperReg(DestReg, AArch64::hsub, &AArch64::FPR32RegClass);
5148 SrcReg =
5149 RI.getMatchingSuperReg(SrcReg, AArch64::hsub, &AArch64::FPR32RegClass);
5150 BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
5151 .addReg(SrcReg, getKillRegState(KillSrc));
5152 return;
5153 }
5154
5155 if (AArch64::FPR8RegClass.contains(DestReg) &&
5156 AArch64::FPR8RegClass.contains(SrcReg)) {
5157 DestReg =
5158 RI.getMatchingSuperReg(DestReg, AArch64::bsub, &AArch64::FPR32RegClass);
5159 SrcReg =
5160 RI.getMatchingSuperReg(SrcReg, AArch64::bsub, &AArch64::FPR32RegClass);
5161 BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
5162 .addReg(SrcReg, getKillRegState(KillSrc));
5163 return;
5164 }
5165
5166 // Copies between GPR64 and FPR64.
5167 if (AArch64::FPR64RegClass.contains(DestReg) &&
5168 AArch64::GPR64RegClass.contains(SrcReg)) {
5169 BuildMI(MBB, I, DL, get(AArch64::FMOVXDr), DestReg)
5170 .addReg(SrcReg, getKillRegState(KillSrc));
5171 return;
5172 }
5173 if (AArch64::GPR64RegClass.contains(DestReg) &&
5174 AArch64::FPR64RegClass.contains(SrcReg)) {
5175 BuildMI(MBB, I, DL, get(AArch64::FMOVDXr), DestReg)
5176 .addReg(SrcReg, getKillRegState(KillSrc));
5177 return;
5178 }
5179 // Copies between GPR32 and FPR32.
5180 if (AArch64::FPR32RegClass.contains(DestReg) &&
5181 AArch64::GPR32RegClass.contains(SrcReg)) {
5182 BuildMI(MBB, I, DL, get(AArch64::FMOVWSr), DestReg)
5183 .addReg(SrcReg, getKillRegState(KillSrc));
5184 return;
5185 }
5186 if (AArch64::GPR32RegClass.contains(DestReg) &&
5187 AArch64::FPR32RegClass.contains(SrcReg)) {
5188 BuildMI(MBB, I, DL, get(AArch64::FMOVSWr), DestReg)
5189 .addReg(SrcReg, getKillRegState(KillSrc));
5190 return;
5191 }
5192
5193 if (DestReg == AArch64::NZCV) {
5194 assert(AArch64::GPR64RegClass.contains(SrcReg) && "Invalid NZCV copy");
5195 BuildMI(MBB, I, DL, get(AArch64::MSR))
5196 .addImm(AArch64SysReg::NZCV)
5197 .addReg(SrcReg, getKillRegState(KillSrc))
5198 .addReg(AArch64::NZCV, RegState::Implicit | RegState::Define);
5199 return;
5200 }
5201
5202 if (SrcReg == AArch64::NZCV) {
5203 assert(AArch64::GPR64RegClass.contains(DestReg) && "Invalid NZCV copy");
5204 BuildMI(MBB, I, DL, get(AArch64::MRS), DestReg)
5205 .addImm(AArch64SysReg::NZCV)
5206 .addReg(AArch64::NZCV, RegState::Implicit | getKillRegState(KillSrc));
5207 return;
5208 }
5209
5210#ifndef NDEBUG
5212 errs() << TRI.getRegAsmName(DestReg) << " = COPY "
5213 << TRI.getRegAsmName(SrcReg) << "\n";
5214#endif
5215 llvm_unreachable("unimplemented reg-to-reg copy");
5216}
5217
5220 MachineBasicBlock::iterator InsertBefore,
5221 const MCInstrDesc &MCID,
5222 Register SrcReg, bool IsKill,
5223 unsigned SubIdx0, unsigned SubIdx1, int FI,
5224 MachineMemOperand *MMO) {
5225 Register SrcReg0 = SrcReg;