LLVM 20.0.0git
AArch64InstrInfo.cpp
Go to the documentation of this file.
1//===- AArch64InstrInfo.cpp - AArch64 Instruction Information -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains the AArch64 implementation of the TargetInstrInfo class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "AArch64InstrInfo.h"
14#include "AArch64ExpandImm.h"
16#include "AArch64PointerAuth.h"
17#include "AArch64Subtarget.h"
21#include "llvm/ADT/ArrayRef.h"
22#include "llvm/ADT/STLExtras.h"
40#include "llvm/IR/DebugLoc.h"
41#include "llvm/IR/GlobalValue.h"
42#include "llvm/IR/Module.h"
43#include "llvm/MC/MCAsmInfo.h"
44#include "llvm/MC/MCInst.h"
46#include "llvm/MC/MCInstrDesc.h"
51#include "llvm/Support/LEB128.h"
55#include <cassert>
56#include <cstdint>
57#include <iterator>
58#include <utility>
59
60using namespace llvm;
61
62#define GET_INSTRINFO_CTOR_DTOR
63#include "AArch64GenInstrInfo.inc"
64
66 "aarch64-tbz-offset-bits", cl::Hidden, cl::init(14),
67 cl::desc("Restrict range of TB[N]Z instructions (DEBUG)"));
68
70 "aarch64-cbz-offset-bits", cl::Hidden, cl::init(19),
71 cl::desc("Restrict range of CB[N]Z instructions (DEBUG)"));
72
74 BCCDisplacementBits("aarch64-bcc-offset-bits", cl::Hidden, cl::init(19),
75 cl::desc("Restrict range of Bcc instructions (DEBUG)"));
76
78 BDisplacementBits("aarch64-b-offset-bits", cl::Hidden, cl::init(26),
79 cl::desc("Restrict range of B instructions (DEBUG)"));
80
82 : AArch64GenInstrInfo(AArch64::ADJCALLSTACKDOWN, AArch64::ADJCALLSTACKUP,
83 AArch64::CATCHRET),
84 RI(STI.getTargetTriple()), Subtarget(STI) {}
85
86/// GetInstSize - Return the number of bytes of code the specified
87/// instruction may be. This returns the maximum number of bytes.
89 const MachineBasicBlock &MBB = *MI.getParent();
90 const MachineFunction *MF = MBB.getParent();
91 const Function &F = MF->getFunction();
92 const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo();
93
94 {
95 auto Op = MI.getOpcode();
96 if (Op == AArch64::INLINEASM || Op == AArch64::INLINEASM_BR)
97 return getInlineAsmLength(MI.getOperand(0).getSymbolName(), *MAI);
98 }
99
100 // Meta-instructions emit no code.
101 if (MI.isMetaInstruction())
102 return 0;
103
104 // FIXME: We currently only handle pseudoinstructions that don't get expanded
105 // before the assembly printer.
106 unsigned NumBytes = 0;
107 const MCInstrDesc &Desc = MI.getDesc();
108
109 if (!MI.isBundle() && isTailCallReturnInst(MI)) {
110 NumBytes = Desc.getSize() ? Desc.getSize() : 4;
111
112 const auto *MFI = MF->getInfo<AArch64FunctionInfo>();
113 if (!MFI->shouldSignReturnAddress(MF))
114 return NumBytes;
115
116 const auto &STI = MF->getSubtarget<AArch64Subtarget>();
117 auto Method = STI.getAuthenticatedLRCheckMethod(*MF);
118 NumBytes += AArch64PAuth::getCheckerSizeInBytes(Method);
119 return NumBytes;
120 }
121
122 // Size should be preferably set in
123 // llvm/lib/Target/AArch64/AArch64InstrInfo.td (default case).
124 // Specific cases handle instructions of variable sizes
125 switch (Desc.getOpcode()) {
126 default:
127 if (Desc.getSize())
128 return Desc.getSize();
129
130 // Anything not explicitly designated otherwise (i.e. pseudo-instructions
131 // with fixed constant size but not specified in .td file) is a normal
132 // 4-byte insn.
133 NumBytes = 4;
134 break;
135 case TargetOpcode::STACKMAP:
136 // The upper bound for a stackmap intrinsic is the full length of its shadow
137 NumBytes = StackMapOpers(&MI).getNumPatchBytes();
138 assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
139 break;
140 case TargetOpcode::PATCHPOINT:
141 // The size of the patchpoint intrinsic is the number of bytes requested
142 NumBytes = PatchPointOpers(&MI).getNumPatchBytes();
143 assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
144 break;
145 case TargetOpcode::STATEPOINT:
146 NumBytes = StatepointOpers(&MI).getNumPatchBytes();
147 assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
148 // No patch bytes means a normal call inst is emitted
149 if (NumBytes == 0)
150 NumBytes = 4;
151 break;
152 case TargetOpcode::PATCHABLE_FUNCTION_ENTER:
153 // If `patchable-function-entry` is set, PATCHABLE_FUNCTION_ENTER
154 // instructions are expanded to the specified number of NOPs. Otherwise,
155 // they are expanded to 36-byte XRay sleds.
156 NumBytes =
157 F.getFnAttributeAsParsedInteger("patchable-function-entry", 9) * 4;
158 break;
159 case TargetOpcode::PATCHABLE_FUNCTION_EXIT:
160 case TargetOpcode::PATCHABLE_TYPED_EVENT_CALL:
161 // An XRay sled can be 4 bytes of alignment plus a 32-byte block.
162 NumBytes = 36;
163 break;
164 case TargetOpcode::PATCHABLE_EVENT_CALL:
165 // EVENT_CALL XRay sleds are exactly 6 instructions long (no alignment).
166 NumBytes = 24;
167 break;
168
169 case AArch64::SPACE:
170 NumBytes = MI.getOperand(1).getImm();
171 break;
172 case TargetOpcode::BUNDLE:
173 NumBytes = getInstBundleLength(MI);
174 break;
175 }
176
177 return NumBytes;
178}
179
180unsigned AArch64InstrInfo::getInstBundleLength(const MachineInstr &MI) const {
181 unsigned Size = 0;
183 MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end();
184 while (++I != E && I->isInsideBundle()) {
185 assert(!I->isBundle() && "No nested bundle!");
187 }
188 return Size;
189}
190
193 // Block ends with fall-through condbranch.
194 switch (LastInst->getOpcode()) {
195 default:
196 llvm_unreachable("Unknown branch instruction?");
197 case AArch64::Bcc:
198 Target = LastInst->getOperand(1).getMBB();
199 Cond.push_back(LastInst->getOperand(0));
200 break;
201 case AArch64::CBZW:
202 case AArch64::CBZX:
203 case AArch64::CBNZW:
204 case AArch64::CBNZX:
205 Target = LastInst->getOperand(1).getMBB();
206 Cond.push_back(MachineOperand::CreateImm(-1));
207 Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
208 Cond.push_back(LastInst->getOperand(0));
209 break;
210 case AArch64::TBZW:
211 case AArch64::TBZX:
212 case AArch64::TBNZW:
213 case AArch64::TBNZX:
214 Target = LastInst->getOperand(2).getMBB();
215 Cond.push_back(MachineOperand::CreateImm(-1));
216 Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
217 Cond.push_back(LastInst->getOperand(0));
218 Cond.push_back(LastInst->getOperand(1));
219 }
220}
221
222static unsigned getBranchDisplacementBits(unsigned Opc) {
223 switch (Opc) {
224 default:
225 llvm_unreachable("unexpected opcode!");
226 case AArch64::B:
227 return BDisplacementBits;
228 case AArch64::TBNZW:
229 case AArch64::TBZW:
230 case AArch64::TBNZX:
231 case AArch64::TBZX:
232 return TBZDisplacementBits;
233 case AArch64::CBNZW:
234 case AArch64::CBZW:
235 case AArch64::CBNZX:
236 case AArch64::CBZX:
237 return CBZDisplacementBits;
238 case AArch64::Bcc:
239 return BCCDisplacementBits;
240 }
241}
242
244 int64_t BrOffset) const {
245 unsigned Bits = getBranchDisplacementBits(BranchOp);
246 assert(Bits >= 3 && "max branch displacement must be enough to jump"
247 "over conditional branch expansion");
248 return isIntN(Bits, BrOffset / 4);
249}
250
253 switch (MI.getOpcode()) {
254 default:
255 llvm_unreachable("unexpected opcode!");
256 case AArch64::B:
257 return MI.getOperand(0).getMBB();
258 case AArch64::TBZW:
259 case AArch64::TBNZW:
260 case AArch64::TBZX:
261 case AArch64::TBNZX:
262 return MI.getOperand(2).getMBB();
263 case AArch64::CBZW:
264 case AArch64::CBNZW:
265 case AArch64::CBZX:
266 case AArch64::CBNZX:
267 case AArch64::Bcc:
268 return MI.getOperand(1).getMBB();
269 }
270}
271
273 MachineBasicBlock &NewDestBB,
274 MachineBasicBlock &RestoreBB,
275 const DebugLoc &DL,
276 int64_t BrOffset,
277 RegScavenger *RS) const {
278 assert(RS && "RegScavenger required for long branching");
279 assert(MBB.empty() &&
280 "new block should be inserted for expanding unconditional branch");
281 assert(MBB.pred_size() == 1);
282 assert(RestoreBB.empty() &&
283 "restore block should be inserted for restoring clobbered registers");
284
285 auto buildIndirectBranch = [&](Register Reg, MachineBasicBlock &DestBB) {
286 // Offsets outside of the signed 33-bit range are not supported for ADRP +
287 // ADD.
288 if (!isInt<33>(BrOffset))
290 "Branch offsets outside of the signed 33-bit range not supported");
291
292 BuildMI(MBB, MBB.end(), DL, get(AArch64::ADRP), Reg)
293 .addSym(DestBB.getSymbol(), AArch64II::MO_PAGE);
294 BuildMI(MBB, MBB.end(), DL, get(AArch64::ADDXri), Reg)
295 .addReg(Reg)
296 .addSym(DestBB.getSymbol(), AArch64II::MO_PAGEOFF | AArch64II::MO_NC)
297 .addImm(0);
298 BuildMI(MBB, MBB.end(), DL, get(AArch64::BR)).addReg(Reg);
299 };
300
302 // If X16 is unused, we can rely on the linker to insert a range extension
303 // thunk if NewDestBB is out of range of a single B instruction.
304 constexpr Register Reg = AArch64::X16;
305 if (!RS->isRegUsed(Reg)) {
306 insertUnconditionalBranch(MBB, &NewDestBB, DL);
307 RS->setRegUsed(Reg);
308 return;
309 }
310
311 // If there's a free register and it's worth inflating the code size,
312 // manually insert the indirect branch.
313 Register Scavenged = RS->FindUnusedReg(&AArch64::GPR64RegClass);
314 if (Scavenged != AArch64::NoRegister &&
316 buildIndirectBranch(Scavenged, NewDestBB);
317 RS->setRegUsed(Scavenged);
318 return;
319 }
320
321 // Note: Spilling X16 briefly moves the stack pointer, making it incompatible
322 // with red zones.
324 if (!AFI || AFI->hasRedZone().value_or(true))
326 "Unable to insert indirect branch inside function that has red zone");
327
328 // Otherwise, spill X16 and defer range extension to the linker.
329 BuildMI(MBB, MBB.end(), DL, get(AArch64::STRXpre))
330 .addReg(AArch64::SP, RegState::Define)
331 .addReg(Reg)
332 .addReg(AArch64::SP)
333 .addImm(-16);
334
335 BuildMI(MBB, MBB.end(), DL, get(AArch64::B)).addMBB(&RestoreBB);
336
337 BuildMI(RestoreBB, RestoreBB.end(), DL, get(AArch64::LDRXpost))
338 .addReg(AArch64::SP, RegState::Define)
340 .addReg(AArch64::SP)
341 .addImm(16);
342}
343
344// Branch analysis.
347 MachineBasicBlock *&FBB,
349 bool AllowModify) const {
350 // If the block has no terminators, it just falls into the block after it.
352 if (I == MBB.end())
353 return false;
354
355 // Skip over SpeculationBarrierEndBB terminators
356 if (I->getOpcode() == AArch64::SpeculationBarrierISBDSBEndBB ||
357 I->getOpcode() == AArch64::SpeculationBarrierSBEndBB) {
358 --I;
359 }
360
361 if (!isUnpredicatedTerminator(*I))
362 return false;
363
364 // Get the last instruction in the block.
365 MachineInstr *LastInst = &*I;
366
367 // If there is only one terminator instruction, process it.
368 unsigned LastOpc = LastInst->getOpcode();
369 if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
370 if (isUncondBranchOpcode(LastOpc)) {
371 TBB = LastInst->getOperand(0).getMBB();
372 return false;
373 }
374 if (isCondBranchOpcode(LastOpc)) {
375 // Block ends with fall-through condbranch.
376 parseCondBranch(LastInst, TBB, Cond);
377 return false;
378 }
379 return true; // Can't handle indirect branch.
380 }
381
382 // Get the instruction before it if it is a terminator.
383 MachineInstr *SecondLastInst = &*I;
384 unsigned SecondLastOpc = SecondLastInst->getOpcode();
385
386 // If AllowModify is true and the block ends with two or more unconditional
387 // branches, delete all but the first unconditional branch.
388 if (AllowModify && isUncondBranchOpcode(LastOpc)) {
389 while (isUncondBranchOpcode(SecondLastOpc)) {
390 LastInst->eraseFromParent();
391 LastInst = SecondLastInst;
392 LastOpc = LastInst->getOpcode();
393 if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
394 // Return now the only terminator is an unconditional branch.
395 TBB = LastInst->getOperand(0).getMBB();
396 return false;
397 }
398 SecondLastInst = &*I;
399 SecondLastOpc = SecondLastInst->getOpcode();
400 }
401 }
402
403 // If we're allowed to modify and the block ends in a unconditional branch
404 // which could simply fallthrough, remove the branch. (Note: This case only
405 // matters when we can't understand the whole sequence, otherwise it's also
406 // handled by BranchFolding.cpp.)
407 if (AllowModify && isUncondBranchOpcode(LastOpc) &&
409 LastInst->eraseFromParent();
410 LastInst = SecondLastInst;
411 LastOpc = LastInst->getOpcode();
412 if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
413 assert(!isUncondBranchOpcode(LastOpc) &&
414 "unreachable unconditional branches removed above");
415
416 if (isCondBranchOpcode(LastOpc)) {
417 // Block ends with fall-through condbranch.
418 parseCondBranch(LastInst, TBB, Cond);
419 return false;
420 }
421 return true; // Can't handle indirect branch.
422 }
423 SecondLastInst = &*I;
424 SecondLastOpc = SecondLastInst->getOpcode();
425 }
426
427 // If there are three terminators, we don't know what sort of block this is.
428 if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(*--I))
429 return true;
430
431 // If the block ends with a B and a Bcc, handle it.
432 if (isCondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
433 parseCondBranch(SecondLastInst, TBB, Cond);
434 FBB = LastInst->getOperand(0).getMBB();
435 return false;
436 }
437
438 // If the block ends with two unconditional branches, handle it. The second
439 // one is not executed, so remove it.
440 if (isUncondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
441 TBB = SecondLastInst->getOperand(0).getMBB();
442 I = LastInst;
443 if (AllowModify)
444 I->eraseFromParent();
445 return false;
446 }
447
448 // ...likewise if it ends with an indirect branch followed by an unconditional
449 // branch.
450 if (isIndirectBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
451 I = LastInst;
452 if (AllowModify)
453 I->eraseFromParent();
454 return true;
455 }
456
457 // Otherwise, can't handle this.
458 return true;
459}
460
462 MachineBranchPredicate &MBP,
463 bool AllowModify) const {
464 // For the moment, handle only a block which ends with a cb(n)zx followed by
465 // a fallthrough. Why this? Because it is a common form.
466 // TODO: Should we handle b.cc?
467
469 if (I == MBB.end())
470 return true;
471
472 // Skip over SpeculationBarrierEndBB terminators
473 if (I->getOpcode() == AArch64::SpeculationBarrierISBDSBEndBB ||
474 I->getOpcode() == AArch64::SpeculationBarrierSBEndBB) {
475 --I;
476 }
477
478 if (!isUnpredicatedTerminator(*I))
479 return true;
480
481 // Get the last instruction in the block.
482 MachineInstr *LastInst = &*I;
483 unsigned LastOpc = LastInst->getOpcode();
484 if (!isCondBranchOpcode(LastOpc))
485 return true;
486
487 switch (LastOpc) {
488 default:
489 return true;
490 case AArch64::CBZW:
491 case AArch64::CBZX:
492 case AArch64::CBNZW:
493 case AArch64::CBNZX:
494 break;
495 };
496
497 MBP.TrueDest = LastInst->getOperand(1).getMBB();
498 assert(MBP.TrueDest && "expected!");
499 MBP.FalseDest = MBB.getNextNode();
500
501 MBP.ConditionDef = nullptr;
502 MBP.SingleUseCondition = false;
503
504 MBP.LHS = LastInst->getOperand(0);
505 MBP.RHS = MachineOperand::CreateImm(0);
506 MBP.Predicate = LastOpc == AArch64::CBNZX ? MachineBranchPredicate::PRED_NE
507 : MachineBranchPredicate::PRED_EQ;
508 return false;
509}
510
513 if (Cond[0].getImm() != -1) {
514 // Regular Bcc
517 } else {
518 // Folded compare-and-branch
519 switch (Cond[1].getImm()) {
520 default:
521 llvm_unreachable("Unknown conditional branch!");
522 case AArch64::CBZW:
523 Cond[1].setImm(AArch64::CBNZW);
524 break;
525 case AArch64::CBNZW:
526 Cond[1].setImm(AArch64::CBZW);
527 break;
528 case AArch64::CBZX:
529 Cond[1].setImm(AArch64::CBNZX);
530 break;
531 case AArch64::CBNZX:
532 Cond[1].setImm(AArch64::CBZX);
533 break;
534 case AArch64::TBZW:
535 Cond[1].setImm(AArch64::TBNZW);
536 break;
537 case AArch64::TBNZW:
538 Cond[1].setImm(AArch64::TBZW);
539 break;
540 case AArch64::TBZX:
541 Cond[1].setImm(AArch64::TBNZX);
542 break;
543 case AArch64::TBNZX:
544 Cond[1].setImm(AArch64::TBZX);
545 break;
546 }
547 }
548
549 return false;
550}
551
553 int *BytesRemoved) const {
555 if (I == MBB.end())
556 return 0;
557
558 if (!isUncondBranchOpcode(I->getOpcode()) &&
559 !isCondBranchOpcode(I->getOpcode()))
560 return 0;
561
562 // Remove the branch.
563 I->eraseFromParent();
564
565 I = MBB.end();
566
567 if (I == MBB.begin()) {
568 if (BytesRemoved)
569 *BytesRemoved = 4;
570 return 1;
571 }
572 --I;
573 if (!isCondBranchOpcode(I->getOpcode())) {
574 if (BytesRemoved)
575 *BytesRemoved = 4;
576 return 1;
577 }
578
579 // Remove the branch.
580 I->eraseFromParent();
581 if (BytesRemoved)
582 *BytesRemoved = 8;
583
584 return 2;
585}
586
587void AArch64InstrInfo::instantiateCondBranch(
590 if (Cond[0].getImm() != -1) {
591 // Regular Bcc
592 BuildMI(&MBB, DL, get(AArch64::Bcc)).addImm(Cond[0].getImm()).addMBB(TBB);
593 } else {
594 // Folded compare-and-branch
595 // Note that we use addOperand instead of addReg to keep the flags.
596 const MachineInstrBuilder MIB =
597 BuildMI(&MBB, DL, get(Cond[1].getImm())).add(Cond[2]);
598 if (Cond.size() > 3)
599 MIB.addImm(Cond[3].getImm());
600 MIB.addMBB(TBB);
601 }
602}
603
606 ArrayRef<MachineOperand> Cond, const DebugLoc &DL, int *BytesAdded) const {
607 // Shouldn't be a fall through.
608 assert(TBB && "insertBranch must not be told to insert a fallthrough");
609
610 if (!FBB) {
611 if (Cond.empty()) // Unconditional branch?
612 BuildMI(&MBB, DL, get(AArch64::B)).addMBB(TBB);
613 else
614 instantiateCondBranch(MBB, DL, TBB, Cond);
615
616 if (BytesAdded)
617 *BytesAdded = 4;
618
619 return 1;
620 }
621
622 // Two-way conditional branch.
623 instantiateCondBranch(MBB, DL, TBB, Cond);
624 BuildMI(&MBB, DL, get(AArch64::B)).addMBB(FBB);
625
626 if (BytesAdded)
627 *BytesAdded = 8;
628
629 return 2;
630}
631
632// Find the original register that VReg is copied from.
633static unsigned removeCopies(const MachineRegisterInfo &MRI, unsigned VReg) {
634 while (Register::isVirtualRegister(VReg)) {
635 const MachineInstr *DefMI = MRI.getVRegDef(VReg);
636 if (!DefMI->isFullCopy())
637 return VReg;
638 VReg = DefMI->getOperand(1).getReg();
639 }
640 return VReg;
641}
642
643// Determine if VReg is defined by an instruction that can be folded into a
644// csel instruction. If so, return the folded opcode, and the replacement
645// register.
646static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg,
647 unsigned *NewVReg = nullptr) {
648 VReg = removeCopies(MRI, VReg);
650 return 0;
651
652 bool Is64Bit = AArch64::GPR64allRegClass.hasSubClassEq(MRI.getRegClass(VReg));
653 const MachineInstr *DefMI = MRI.getVRegDef(VReg);
654 unsigned Opc = 0;
655 unsigned SrcOpNum = 0;
656 switch (DefMI->getOpcode()) {
657 case AArch64::ADDSXri:
658 case AArch64::ADDSWri:
659 // if NZCV is used, do not fold.
660 if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr,
661 true) == -1)
662 return 0;
663 // fall-through to ADDXri and ADDWri.
664 [[fallthrough]];
665 case AArch64::ADDXri:
666 case AArch64::ADDWri:
667 // add x, 1 -> csinc.
668 if (!DefMI->getOperand(2).isImm() || DefMI->getOperand(2).getImm() != 1 ||
669 DefMI->getOperand(3).getImm() != 0)
670 return 0;
671 SrcOpNum = 1;
672 Opc = Is64Bit ? AArch64::CSINCXr : AArch64::CSINCWr;
673 break;
674
675 case AArch64::ORNXrr:
676 case AArch64::ORNWrr: {
677 // not x -> csinv, represented as orn dst, xzr, src.
678 unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg());
679 if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)
680 return 0;
681 SrcOpNum = 2;
682 Opc = Is64Bit ? AArch64::CSINVXr : AArch64::CSINVWr;
683 break;
684 }
685
686 case AArch64::SUBSXrr:
687 case AArch64::SUBSWrr:
688 // if NZCV is used, do not fold.
689 if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr,
690 true) == -1)
691 return 0;
692 // fall-through to SUBXrr and SUBWrr.
693 [[fallthrough]];
694 case AArch64::SUBXrr:
695 case AArch64::SUBWrr: {
696 // neg x -> csneg, represented as sub dst, xzr, src.
697 unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg());
698 if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)
699 return 0;
700 SrcOpNum = 2;
701 Opc = Is64Bit ? AArch64::CSNEGXr : AArch64::CSNEGWr;
702 break;
703 }
704 default:
705 return 0;
706 }
707 assert(Opc && SrcOpNum && "Missing parameters");
708
709 if (NewVReg)
710 *NewVReg = DefMI->getOperand(SrcOpNum).getReg();
711 return Opc;
712}
713
716 Register DstReg, Register TrueReg,
717 Register FalseReg, int &CondCycles,
718 int &TrueCycles,
719 int &FalseCycles) const {
720 // Check register classes.
722 const TargetRegisterClass *RC =
723 RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg));
724 if (!RC)
725 return false;
726
727 // Also need to check the dest regclass, in case we're trying to optimize
728 // something like:
729 // %1(gpr) = PHI %2(fpr), bb1, %(fpr), bb2
730 if (!RI.getCommonSubClass(RC, MRI.getRegClass(DstReg)))
731 return false;
732
733 // Expanding cbz/tbz requires an extra cycle of latency on the condition.
734 unsigned ExtraCondLat = Cond.size() != 1;
735
736 // GPRs are handled by csel.
737 // FIXME: Fold in x+1, -x, and ~x when applicable.
738 if (AArch64::GPR64allRegClass.hasSubClassEq(RC) ||
739 AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
740 // Single-cycle csel, csinc, csinv, and csneg.
741 CondCycles = 1 + ExtraCondLat;
742 TrueCycles = FalseCycles = 1;
743 if (canFoldIntoCSel(MRI, TrueReg))
744 TrueCycles = 0;
745 else if (canFoldIntoCSel(MRI, FalseReg))
746 FalseCycles = 0;
747 return true;
748 }
749
750 // Scalar floating point is handled by fcsel.
751 // FIXME: Form fabs, fmin, and fmax when applicable.
752 if (AArch64::FPR64RegClass.hasSubClassEq(RC) ||
753 AArch64::FPR32RegClass.hasSubClassEq(RC)) {
754 CondCycles = 5 + ExtraCondLat;
755 TrueCycles = FalseCycles = 2;
756 return true;
757 }
758
759 // Can't do vectors.
760 return false;
761}
762
765 const DebugLoc &DL, Register DstReg,
767 Register TrueReg, Register FalseReg) const {
769
770 // Parse the condition code, see parseCondBranch() above.
772 switch (Cond.size()) {
773 default:
774 llvm_unreachable("Unknown condition opcode in Cond");
775 case 1: // b.cc
776 CC = AArch64CC::CondCode(Cond[0].getImm());
777 break;
778 case 3: { // cbz/cbnz
779 // We must insert a compare against 0.
780 bool Is64Bit;
781 switch (Cond[1].getImm()) {
782 default:
783 llvm_unreachable("Unknown branch opcode in Cond");
784 case AArch64::CBZW:
785 Is64Bit = false;
787 break;
788 case AArch64::CBZX:
789 Is64Bit = true;
791 break;
792 case AArch64::CBNZW:
793 Is64Bit = false;
795 break;
796 case AArch64::CBNZX:
797 Is64Bit = true;
799 break;
800 }
801 Register SrcReg = Cond[2].getReg();
802 if (Is64Bit) {
803 // cmp reg, #0 is actually subs xzr, reg, #0.
804 MRI.constrainRegClass(SrcReg, &AArch64::GPR64spRegClass);
805 BuildMI(MBB, I, DL, get(AArch64::SUBSXri), AArch64::XZR)
806 .addReg(SrcReg)
807 .addImm(0)
808 .addImm(0);
809 } else {
810 MRI.constrainRegClass(SrcReg, &AArch64::GPR32spRegClass);
811 BuildMI(MBB, I, DL, get(AArch64::SUBSWri), AArch64::WZR)
812 .addReg(SrcReg)
813 .addImm(0)
814 .addImm(0);
815 }
816 break;
817 }
818 case 4: { // tbz/tbnz
819 // We must insert a tst instruction.
820 switch (Cond[1].getImm()) {
821 default:
822 llvm_unreachable("Unknown branch opcode in Cond");
823 case AArch64::TBZW:
824 case AArch64::TBZX:
826 break;
827 case AArch64::TBNZW:
828 case AArch64::TBNZX:
830 break;
831 }
832 // cmp reg, #foo is actually ands xzr, reg, #1<<foo.
833 if (Cond[1].getImm() == AArch64::TBZW || Cond[1].getImm() == AArch64::TBNZW)
834 BuildMI(MBB, I, DL, get(AArch64::ANDSWri), AArch64::WZR)
835 .addReg(Cond[2].getReg())
836 .addImm(
837 AArch64_AM::encodeLogicalImmediate(1ull << Cond[3].getImm(), 32));
838 else
839 BuildMI(MBB, I, DL, get(AArch64::ANDSXri), AArch64::XZR)
840 .addReg(Cond[2].getReg())
841 .addImm(
842 AArch64_AM::encodeLogicalImmediate(1ull << Cond[3].getImm(), 64));
843 break;
844 }
845 }
846
847 unsigned Opc = 0;
848 const TargetRegisterClass *RC = nullptr;
849 bool TryFold = false;
850 if (MRI.constrainRegClass(DstReg, &AArch64::GPR64RegClass)) {
851 RC = &AArch64::GPR64RegClass;
852 Opc = AArch64::CSELXr;
853 TryFold = true;
854 } else if (MRI.constrainRegClass(DstReg, &AArch64::GPR32RegClass)) {
855 RC = &AArch64::GPR32RegClass;
856 Opc = AArch64::CSELWr;
857 TryFold = true;
858 } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR64RegClass)) {
859 RC = &AArch64::FPR64RegClass;
860 Opc = AArch64::FCSELDrrr;
861 } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR32RegClass)) {
862 RC = &AArch64::FPR32RegClass;
863 Opc = AArch64::FCSELSrrr;
864 }
865 assert(RC && "Unsupported regclass");
866
867 // Try folding simple instructions into the csel.
868 if (TryFold) {
869 unsigned NewVReg = 0;
870 unsigned FoldedOpc = canFoldIntoCSel(MRI, TrueReg, &NewVReg);
871 if (FoldedOpc) {
872 // The folded opcodes csinc, csinc and csneg apply the operation to
873 // FalseReg, so we need to invert the condition.
875 TrueReg = FalseReg;
876 } else
877 FoldedOpc = canFoldIntoCSel(MRI, FalseReg, &NewVReg);
878
879 // Fold the operation. Leave any dead instructions for DCE to clean up.
880 if (FoldedOpc) {
881 FalseReg = NewVReg;
882 Opc = FoldedOpc;
883 // The extends the live range of NewVReg.
884 MRI.clearKillFlags(NewVReg);
885 }
886 }
887
888 // Pull all virtual register into the appropriate class.
889 MRI.constrainRegClass(TrueReg, RC);
890 MRI.constrainRegClass(FalseReg, RC);
891
892 // Insert the csel.
893 BuildMI(MBB, I, DL, get(Opc), DstReg)
894 .addReg(TrueReg)
895 .addReg(FalseReg)
896 .addImm(CC);
897}
898
899// Return true if Imm can be loaded into a register by a "cheap" sequence of
900// instructions. For now, "cheap" means at most two instructions.
901static bool isCheapImmediate(const MachineInstr &MI, unsigned BitSize) {
902 if (BitSize == 32)
903 return true;
904
905 assert(BitSize == 64 && "Only bit sizes of 32 or 64 allowed");
906 uint64_t Imm = static_cast<uint64_t>(MI.getOperand(1).getImm());
908 AArch64_IMM::expandMOVImm(Imm, BitSize, Is);
909
910 return Is.size() <= 2;
911}
912
913// FIXME: this implementation should be micro-architecture dependent, so a
914// micro-architecture target hook should be introduced here in future.
916 if (Subtarget.hasExynosCheapAsMoveHandling()) {
917 if (isExynosCheapAsMove(MI))
918 return true;
919 return MI.isAsCheapAsAMove();
920 }
921
922 switch (MI.getOpcode()) {
923 default:
924 return MI.isAsCheapAsAMove();
925
926 case AArch64::ADDWrs:
927 case AArch64::ADDXrs:
928 case AArch64::SUBWrs:
929 case AArch64::SUBXrs:
930 return Subtarget.hasALULSLFast() && MI.getOperand(3).getImm() <= 4;
931
932 // If MOVi32imm or MOVi64imm can be expanded into ORRWri or
933 // ORRXri, it is as cheap as MOV.
934 // Likewise if it can be expanded to MOVZ/MOVN/MOVK.
935 case AArch64::MOVi32imm:
936 return isCheapImmediate(MI, 32);
937 case AArch64::MOVi64imm:
938 return isCheapImmediate(MI, 64);
939 }
940}
941
943 switch (MI.getOpcode()) {
944 default:
945 return false;
946
947 case AArch64::ADDWrs:
948 case AArch64::ADDXrs:
949 case AArch64::ADDSWrs:
950 case AArch64::ADDSXrs: {
951 unsigned Imm = MI.getOperand(3).getImm();
952 unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
953 if (ShiftVal == 0)
954 return true;
955 return AArch64_AM::getShiftType(Imm) == AArch64_AM::LSL && ShiftVal <= 5;
956 }
957
958 case AArch64::ADDWrx:
959 case AArch64::ADDXrx:
960 case AArch64::ADDXrx64:
961 case AArch64::ADDSWrx:
962 case AArch64::ADDSXrx:
963 case AArch64::ADDSXrx64: {
964 unsigned Imm = MI.getOperand(3).getImm();
965 switch (AArch64_AM::getArithExtendType(Imm)) {
966 default:
967 return false;
968 case AArch64_AM::UXTB:
969 case AArch64_AM::UXTH:
970 case AArch64_AM::UXTW:
971 case AArch64_AM::UXTX:
972 return AArch64_AM::getArithShiftValue(Imm) <= 4;
973 }
974 }
975
976 case AArch64::SUBWrs:
977 case AArch64::SUBSWrs: {
978 unsigned Imm = MI.getOperand(3).getImm();
979 unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
980 return ShiftVal == 0 ||
981 (AArch64_AM::getShiftType(Imm) == AArch64_AM::ASR && ShiftVal == 31);
982 }
983
984 case AArch64::SUBXrs:
985 case AArch64::SUBSXrs: {
986 unsigned Imm = MI.getOperand(3).getImm();
987 unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
988 return ShiftVal == 0 ||
989 (AArch64_AM::getShiftType(Imm) == AArch64_AM::ASR && ShiftVal == 63);
990 }
991
992 case AArch64::SUBWrx:
993 case AArch64::SUBXrx:
994 case AArch64::SUBXrx64:
995 case AArch64::SUBSWrx:
996 case AArch64::SUBSXrx:
997 case AArch64::SUBSXrx64: {
998 unsigned Imm = MI.getOperand(3).getImm();
999 switch (AArch64_AM::getArithExtendType(Imm)) {
1000 default:
1001 return false;
1002 case AArch64_AM::UXTB:
1003 case AArch64_AM::UXTH:
1004 case AArch64_AM::UXTW:
1005 case AArch64_AM::UXTX:
1006 return AArch64_AM::getArithShiftValue(Imm) == 0;
1007 }
1008 }
1009
1010 case AArch64::LDRBBroW:
1011 case AArch64::LDRBBroX:
1012 case AArch64::LDRBroW:
1013 case AArch64::LDRBroX:
1014 case AArch64::LDRDroW:
1015 case AArch64::LDRDroX:
1016 case AArch64::LDRHHroW:
1017 case AArch64::LDRHHroX:
1018 case AArch64::LDRHroW:
1019 case AArch64::LDRHroX:
1020 case AArch64::LDRQroW:
1021 case AArch64::LDRQroX:
1022 case AArch64::LDRSBWroW:
1023 case AArch64::LDRSBWroX:
1024 case AArch64::LDRSBXroW:
1025 case AArch64::LDRSBXroX:
1026 case AArch64::LDRSHWroW:
1027 case AArch64::LDRSHWroX:
1028 case AArch64::LDRSHXroW:
1029 case AArch64::LDRSHXroX:
1030 case AArch64::LDRSWroW:
1031 case AArch64::LDRSWroX:
1032 case AArch64::LDRSroW:
1033 case AArch64::LDRSroX:
1034 case AArch64::LDRWroW:
1035 case AArch64::LDRWroX:
1036 case AArch64::LDRXroW:
1037 case AArch64::LDRXroX:
1038 case AArch64::PRFMroW:
1039 case AArch64::PRFMroX:
1040 case AArch64::STRBBroW:
1041 case AArch64::STRBBroX:
1042 case AArch64::STRBroW:
1043 case AArch64::STRBroX:
1044 case AArch64::STRDroW:
1045 case AArch64::STRDroX:
1046 case AArch64::STRHHroW:
1047 case AArch64::STRHHroX:
1048 case AArch64::STRHroW:
1049 case AArch64::STRHroX:
1050 case AArch64::STRQroW:
1051 case AArch64::STRQroX:
1052 case AArch64::STRSroW:
1053 case AArch64::STRSroX:
1054 case AArch64::STRWroW:
1055 case AArch64::STRWroX:
1056 case AArch64::STRXroW:
1057 case AArch64::STRXroX: {
1058 unsigned IsSigned = MI.getOperand(3).getImm();
1059 return !IsSigned;
1060 }
1061 }
1062}
1063
1065 unsigned Opc = MI.getOpcode();
1066 switch (Opc) {
1067 default:
1068 return false;
1069 case AArch64::SEH_StackAlloc:
1070 case AArch64::SEH_SaveFPLR:
1071 case AArch64::SEH_SaveFPLR_X:
1072 case AArch64::SEH_SaveReg:
1073 case AArch64::SEH_SaveReg_X:
1074 case AArch64::SEH_SaveRegP:
1075 case AArch64::SEH_SaveRegP_X:
1076 case AArch64::SEH_SaveFReg:
1077 case AArch64::SEH_SaveFReg_X:
1078 case AArch64::SEH_SaveFRegP:
1079 case AArch64::SEH_SaveFRegP_X:
1080 case AArch64::SEH_SetFP:
1081 case AArch64::SEH_AddFP:
1082 case AArch64::SEH_Nop:
1083 case AArch64::SEH_PrologEnd:
1084 case AArch64::SEH_EpilogStart:
1085 case AArch64::SEH_EpilogEnd:
1086 case AArch64::SEH_PACSignLR:
1087 case AArch64::SEH_SaveAnyRegQP:
1088 case AArch64::SEH_SaveAnyRegQPX:
1089 return true;
1090 }
1091}
1092
1094 Register &SrcReg, Register &DstReg,
1095 unsigned &SubIdx) const {
1096 switch (MI.getOpcode()) {
1097 default:
1098 return false;
1099 case AArch64::SBFMXri: // aka sxtw
1100 case AArch64::UBFMXri: // aka uxtw
1101 // Check for the 32 -> 64 bit extension case, these instructions can do
1102 // much more.
1103 if (MI.getOperand(2).getImm() != 0 || MI.getOperand(3).getImm() != 31)
1104 return false;
1105 // This is a signed or unsigned 32 -> 64 bit extension.
1106 SrcReg = MI.getOperand(1).getReg();
1107 DstReg = MI.getOperand(0).getReg();
1108 SubIdx = AArch64::sub_32;
1109 return true;
1110 }
1111}
1112
1114 const MachineInstr &MIa, const MachineInstr &MIb) const {
1116 const MachineOperand *BaseOpA = nullptr, *BaseOpB = nullptr;
1117 int64_t OffsetA = 0, OffsetB = 0;
1118 TypeSize WidthA(0, false), WidthB(0, false);
1119 bool OffsetAIsScalable = false, OffsetBIsScalable = false;
1120
1121 assert(MIa.mayLoadOrStore() && "MIa must be a load or store.");
1122 assert(MIb.mayLoadOrStore() && "MIb must be a load or store.");
1123
1126 return false;
1127
1128 // Retrieve the base, offset from the base and width. Width
1129 // is the size of memory that is being loaded/stored (e.g. 1, 2, 4, 8). If
1130 // base are identical, and the offset of a lower memory access +
1131 // the width doesn't overlap the offset of a higher memory access,
1132 // then the memory accesses are different.
1133 // If OffsetAIsScalable and OffsetBIsScalable are both true, they
1134 // are assumed to have the same scale (vscale).
1135 if (getMemOperandWithOffsetWidth(MIa, BaseOpA, OffsetA, OffsetAIsScalable,
1136 WidthA, TRI) &&
1137 getMemOperandWithOffsetWidth(MIb, BaseOpB, OffsetB, OffsetBIsScalable,
1138 WidthB, TRI)) {
1139 if (BaseOpA->isIdenticalTo(*BaseOpB) &&
1140 OffsetAIsScalable == OffsetBIsScalable) {
1141 int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB;
1142 int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA;
1143 TypeSize LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
1144 if (LowWidth.isScalable() == OffsetAIsScalable &&
1145 LowOffset + (int)LowWidth.getKnownMinValue() <= HighOffset)
1146 return true;
1147 }
1148 }
1149 return false;
1150}
1151
1153 const MachineBasicBlock *MBB,
1154 const MachineFunction &MF) const {
1156 return true;
1157
1158 // Do not move an instruction that can be recognized as a branch target.
1159 if (hasBTISemantics(MI))
1160 return true;
1161
1162 switch (MI.getOpcode()) {
1163 case AArch64::HINT:
1164 // CSDB hints are scheduling barriers.
1165 if (MI.getOperand(0).getImm() == 0x14)
1166 return true;
1167 break;
1168 case AArch64::DSB:
1169 case AArch64::ISB:
1170 // DSB and ISB also are scheduling barriers.
1171 return true;
1172 case AArch64::MSRpstatesvcrImm1:
1173 // SMSTART and SMSTOP are also scheduling barriers.
1174 return true;
1175 default:;
1176 }
1177 if (isSEHInstruction(MI))
1178 return true;
1179 auto Next = std::next(MI.getIterator());
1180 return Next != MBB->end() && Next->isCFIInstruction();
1181}
1182
1183/// analyzeCompare - For a comparison instruction, return the source registers
1184/// in SrcReg and SrcReg2, and the value it compares against in CmpValue.
1185/// Return true if the comparison instruction can be analyzed.
1187 Register &SrcReg2, int64_t &CmpMask,
1188 int64_t &CmpValue) const {
1189 // The first operand can be a frame index where we'd normally expect a
1190 // register.
1191 assert(MI.getNumOperands() >= 2 && "All AArch64 cmps should have 2 operands");
1192 if (!MI.getOperand(1).isReg())
1193 return false;
1194
1195 switch (MI.getOpcode()) {
1196 default:
1197 break;
1198 case AArch64::PTEST_PP:
1199 case AArch64::PTEST_PP_ANY:
1200 SrcReg = MI.getOperand(0).getReg();
1201 SrcReg2 = MI.getOperand(1).getReg();
1202 // Not sure about the mask and value for now...
1203 CmpMask = ~0;
1204 CmpValue = 0;
1205 return true;
1206 case AArch64::SUBSWrr:
1207 case AArch64::SUBSWrs:
1208 case AArch64::SUBSWrx:
1209 case AArch64::SUBSXrr:
1210 case AArch64::SUBSXrs:
1211 case AArch64::SUBSXrx:
1212 case AArch64::ADDSWrr:
1213 case AArch64::ADDSWrs:
1214 case AArch64::ADDSWrx:
1215 case AArch64::ADDSXrr:
1216 case AArch64::ADDSXrs:
1217 case AArch64::ADDSXrx:
1218 // Replace SUBSWrr with SUBWrr if NZCV is not used.
1219 SrcReg = MI.getOperand(1).getReg();
1220 SrcReg2 = MI.getOperand(2).getReg();
1221 CmpMask = ~0;
1222 CmpValue = 0;
1223 return true;
1224 case AArch64::SUBSWri:
1225 case AArch64::ADDSWri:
1226 case AArch64::SUBSXri:
1227 case AArch64::ADDSXri:
1228 SrcReg = MI.getOperand(1).getReg();
1229 SrcReg2 = 0;
1230 CmpMask = ~0;
1231 CmpValue = MI.getOperand(2).getImm();
1232 return true;
1233 case AArch64::ANDSWri:
1234 case AArch64::ANDSXri:
1235 // ANDS does not use the same encoding scheme as the others xxxS
1236 // instructions.
1237 SrcReg = MI.getOperand(1).getReg();
1238 SrcReg2 = 0;
1239 CmpMask = ~0;
1241 MI.getOperand(2).getImm(),
1242 MI.getOpcode() == AArch64::ANDSWri ? 32 : 64);
1243 return true;
1244 }
1245
1246 return false;
1247}
1248
1250 MachineBasicBlock *MBB = Instr.getParent();
1251 assert(MBB && "Can't get MachineBasicBlock here");
1252 MachineFunction *MF = MBB->getParent();
1253 assert(MF && "Can't get MachineFunction here");
1257
1258 for (unsigned OpIdx = 0, EndIdx = Instr.getNumOperands(); OpIdx < EndIdx;
1259 ++OpIdx) {
1260 MachineOperand &MO = Instr.getOperand(OpIdx);
1261 const TargetRegisterClass *OpRegCstraints =
1262 Instr.getRegClassConstraint(OpIdx, TII, TRI);
1263
1264 // If there's no constraint, there's nothing to do.
1265 if (!OpRegCstraints)
1266 continue;
1267 // If the operand is a frame index, there's nothing to do here.
1268 // A frame index operand will resolve correctly during PEI.
1269 if (MO.isFI())
1270 continue;
1271
1272 assert(MO.isReg() &&
1273 "Operand has register constraints without being a register!");
1274
1275 Register Reg = MO.getReg();
1276 if (Reg.isPhysical()) {
1277 if (!OpRegCstraints->contains(Reg))
1278 return false;
1279 } else if (!OpRegCstraints->hasSubClassEq(MRI->getRegClass(Reg)) &&
1280 !MRI->constrainRegClass(Reg, OpRegCstraints))
1281 return false;
1282 }
1283
1284 return true;
1285}
1286
1287/// Return the opcode that does not set flags when possible - otherwise
1288/// return the original opcode. The caller is responsible to do the actual
1289/// substitution and legality checking.
1291 // Don't convert all compare instructions, because for some the zero register
1292 // encoding becomes the sp register.
1293 bool MIDefinesZeroReg = false;
1294 if (MI.definesRegister(AArch64::WZR, /*TRI=*/nullptr) ||
1295 MI.definesRegister(AArch64::XZR, /*TRI=*/nullptr))
1296 MIDefinesZeroReg = true;
1297
1298 switch (MI.getOpcode()) {
1299 default:
1300 return MI.getOpcode();
1301 case AArch64::ADDSWrr:
1302 return AArch64::ADDWrr;
1303 case AArch64::ADDSWri:
1304 return MIDefinesZeroReg ? AArch64::ADDSWri : AArch64::ADDWri;
1305 case AArch64::ADDSWrs:
1306 return MIDefinesZeroReg ? AArch64::ADDSWrs : AArch64::ADDWrs;
1307 case AArch64::ADDSWrx:
1308 return AArch64::ADDWrx;
1309 case AArch64::ADDSXrr:
1310 return AArch64::ADDXrr;
1311 case AArch64::ADDSXri:
1312 return MIDefinesZeroReg ? AArch64::ADDSXri : AArch64::ADDXri;
1313 case AArch64::ADDSXrs:
1314 return MIDefinesZeroReg ? AArch64::ADDSXrs : AArch64::ADDXrs;
1315 case AArch64::ADDSXrx:
1316 return AArch64::ADDXrx;
1317 case AArch64::SUBSWrr:
1318 return AArch64::SUBWrr;
1319 case AArch64::SUBSWri:
1320 return MIDefinesZeroReg ? AArch64::SUBSWri : AArch64::SUBWri;
1321 case AArch64::SUBSWrs:
1322 return MIDefinesZeroReg ? AArch64::SUBSWrs : AArch64::SUBWrs;
1323 case AArch64::SUBSWrx:
1324 return AArch64::SUBWrx;
1325 case AArch64::SUBSXrr:
1326 return AArch64::SUBXrr;
1327 case AArch64::SUBSXri:
1328 return MIDefinesZeroReg ? AArch64::SUBSXri : AArch64::SUBXri;
1329 case AArch64::SUBSXrs:
1330 return MIDefinesZeroReg ? AArch64::SUBSXrs : AArch64::SUBXrs;
1331 case AArch64::SUBSXrx:
1332 return AArch64::SUBXrx;
1333 }
1334}
1335
1336enum AccessKind { AK_Write = 0x01, AK_Read = 0x10, AK_All = 0x11 };
1337
1338/// True when condition flags are accessed (either by writing or reading)
1339/// on the instruction trace starting at From and ending at To.
1340///
1341/// Note: If From and To are from different blocks it's assumed CC are accessed
1342/// on the path.
1345 const TargetRegisterInfo *TRI, const AccessKind AccessToCheck = AK_All) {
1346 // Early exit if To is at the beginning of the BB.
1347 if (To == To->getParent()->begin())
1348 return true;
1349
1350 // Check whether the instructions are in the same basic block
1351 // If not, assume the condition flags might get modified somewhere.
1352 if (To->getParent() != From->getParent())
1353 return true;
1354
1355 // From must be above To.
1356 assert(std::any_of(
1357 ++To.getReverse(), To->getParent()->rend(),
1358 [From](MachineInstr &MI) { return MI.getIterator() == From; }));
1359
1360 // We iterate backward starting at \p To until we hit \p From.
1361 for (const MachineInstr &Instr :
1362 instructionsWithoutDebug(++To.getReverse(), From.getReverse())) {
1363 if (((AccessToCheck & AK_Write) &&
1364 Instr.modifiesRegister(AArch64::NZCV, TRI)) ||
1365 ((AccessToCheck & AK_Read) && Instr.readsRegister(AArch64::NZCV, TRI)))
1366 return true;
1367 }
1368 return false;
1369}
1370
1371std::optional<unsigned>
1372AArch64InstrInfo::canRemovePTestInstr(MachineInstr *PTest, MachineInstr *Mask,
1373 MachineInstr *Pred,
1374 const MachineRegisterInfo *MRI) const {
1375 unsigned MaskOpcode = Mask->getOpcode();
1376 unsigned PredOpcode = Pred->getOpcode();
1377 bool PredIsPTestLike = isPTestLikeOpcode(PredOpcode);
1378 bool PredIsWhileLike = isWhileOpcode(PredOpcode);
1379
1380 if (PredIsWhileLike) {
1381 // For PTEST(PG, PG), PTEST is redundant when PG is the result of a WHILEcc
1382 // instruction and the condition is "any" since WHILcc does an implicit
1383 // PTEST(ALL, PG) check and PG is always a subset of ALL.
1384 if ((Mask == Pred) && PTest->getOpcode() == AArch64::PTEST_PP_ANY)
1385 return PredOpcode;
1386
1387 // For PTEST(PTRUE_ALL, WHILE), if the element size matches, the PTEST is
1388 // redundant since WHILE performs an implicit PTEST with an all active
1389 // mask.
1390 if (isPTrueOpcode(MaskOpcode) && Mask->getOperand(1).getImm() == 31 &&
1391 getElementSizeForOpcode(MaskOpcode) ==
1392 getElementSizeForOpcode(PredOpcode))
1393 return PredOpcode;
1394
1395 return {};
1396 }
1397
1398 if (PredIsPTestLike) {
1399 // For PTEST(PG, PG), PTEST is redundant when PG is the result of an
1400 // instruction that sets the flags as PTEST would and the condition is
1401 // "any" since PG is always a subset of the governing predicate of the
1402 // ptest-like instruction.
1403 if ((Mask == Pred) && PTest->getOpcode() == AArch64::PTEST_PP_ANY)
1404 return PredOpcode;
1405
1406 // For PTEST(PTRUE_ALL, PTEST_LIKE), the PTEST is redundant if the
1407 // the element size matches and either the PTEST_LIKE instruction uses
1408 // the same all active mask or the condition is "any".
1409 if (isPTrueOpcode(MaskOpcode) && Mask->getOperand(1).getImm() == 31 &&
1410 getElementSizeForOpcode(MaskOpcode) ==
1411 getElementSizeForOpcode(PredOpcode)) {
1412 auto PTestLikeMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg());
1413 if (Mask == PTestLikeMask || PTest->getOpcode() == AArch64::PTEST_PP_ANY)
1414 return PredOpcode;
1415 }
1416
1417 // For PTEST(PG, PTEST_LIKE(PG, ...)), the PTEST is redundant since the
1418 // flags are set based on the same mask 'PG', but PTEST_LIKE must operate
1419 // on 8-bit predicates like the PTEST. Otherwise, for instructions like
1420 // compare that also support 16/32/64-bit predicates, the implicit PTEST
1421 // performed by the compare could consider fewer lanes for these element
1422 // sizes.
1423 //
1424 // For example, consider
1425 //
1426 // ptrue p0.b ; P0=1111-1111-1111-1111
1427 // index z0.s, #0, #1 ; Z0=<0,1,2,3>
1428 // index z1.s, #1, #1 ; Z1=<1,2,3,4>
1429 // cmphi p1.s, p0/z, z1.s, z0.s ; P1=0001-0001-0001-0001
1430 // ; ^ last active
1431 // ptest p0, p1.b ; P1=0001-0001-0001-0001
1432 // ; ^ last active
1433 //
1434 // where the compare generates a canonical all active 32-bit predicate
1435 // (equivalent to 'ptrue p1.s, all'). The implicit PTEST sets the last
1436 // active flag, whereas the PTEST instruction with the same mask doesn't.
1437 // For PTEST_ANY this doesn't apply as the flags in this case would be
1438 // identical regardless of element size.
1439 auto PTestLikeMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg());
1440 uint64_t PredElementSize = getElementSizeForOpcode(PredOpcode);
1441 if (Mask == PTestLikeMask && (PredElementSize == AArch64::ElementSizeB ||
1442 PTest->getOpcode() == AArch64::PTEST_PP_ANY))
1443 return PredOpcode;
1444
1445 return {};
1446 }
1447
1448 // If OP in PTEST(PG, OP(PG, ...)) has a flag-setting variant change the
1449 // opcode so the PTEST becomes redundant.
1450 switch (PredOpcode) {
1451 case AArch64::AND_PPzPP:
1452 case AArch64::BIC_PPzPP:
1453 case AArch64::EOR_PPzPP:
1454 case AArch64::NAND_PPzPP:
1455 case AArch64::NOR_PPzPP:
1456 case AArch64::ORN_PPzPP:
1457 case AArch64::ORR_PPzPP:
1458 case AArch64::BRKA_PPzP:
1459 case AArch64::BRKPA_PPzPP:
1460 case AArch64::BRKB_PPzP:
1461 case AArch64::BRKPB_PPzPP:
1462 case AArch64::RDFFR_PPz: {
1463 // Check to see if our mask is the same. If not the resulting flag bits
1464 // may be different and we can't remove the ptest.
1465 auto *PredMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg());
1466 if (Mask != PredMask)
1467 return {};
1468 break;
1469 }
1470 case AArch64::BRKN_PPzP: {
1471 // BRKN uses an all active implicit mask to set flags unlike the other
1472 // flag-setting instructions.
1473 // PTEST(PTRUE_B(31), BRKN(PG, A, B)) -> BRKNS(PG, A, B).
1474 if ((MaskOpcode != AArch64::PTRUE_B) ||
1475 (Mask->getOperand(1).getImm() != 31))
1476 return {};
1477 break;
1478 }
1479 case AArch64::PTRUE_B:
1480 // PTEST(OP=PTRUE_B(A), OP) -> PTRUES_B(A)
1481 break;
1482 default:
1483 // Bail out if we don't recognize the input
1484 return {};
1485 }
1486
1487 return convertToFlagSettingOpc(PredOpcode);
1488}
1489
1490/// optimizePTestInstr - Attempt to remove a ptest of a predicate-generating
1491/// operation which could set the flags in an identical manner
1492bool AArch64InstrInfo::optimizePTestInstr(
1493 MachineInstr *PTest, unsigned MaskReg, unsigned PredReg,
1494 const MachineRegisterInfo *MRI) const {
1495 auto *Mask = MRI->getUniqueVRegDef(MaskReg);
1496 auto *Pred = MRI->getUniqueVRegDef(PredReg);
1497 unsigned PredOpcode = Pred->getOpcode();
1498 auto NewOp = canRemovePTestInstr(PTest, Mask, Pred, MRI);
1499 if (!NewOp)
1500 return false;
1501
1503
1504 // If another instruction between Pred and PTest accesses flags, don't remove
1505 // the ptest or update the earlier instruction to modify them.
1506 if (areCFlagsAccessedBetweenInstrs(Pred, PTest, TRI))
1507 return false;
1508
1509 // If we pass all the checks, it's safe to remove the PTEST and use the flags
1510 // as they are prior to PTEST. Sometimes this requires the tested PTEST
1511 // operand to be replaced with an equivalent instruction that also sets the
1512 // flags.
1513 PTest->eraseFromParent();
1514 if (*NewOp != PredOpcode) {
1515 Pred->setDesc(get(*NewOp));
1516 bool succeeded = UpdateOperandRegClass(*Pred);
1517 (void)succeeded;
1518 assert(succeeded && "Operands have incompatible register classes!");
1519 Pred->addRegisterDefined(AArch64::NZCV, TRI);
1520 }
1521
1522 // Ensure that the flags def is live.
1523 if (Pred->registerDefIsDead(AArch64::NZCV, TRI)) {
1524 unsigned i = 0, e = Pred->getNumOperands();
1525 for (; i != e; ++i) {
1526 MachineOperand &MO = Pred->getOperand(i);
1527 if (MO.isReg() && MO.isDef() && MO.getReg() == AArch64::NZCV) {
1528 MO.setIsDead(false);
1529 break;
1530 }
1531 }
1532 }
1533 return true;
1534}
1535
1536/// Try to optimize a compare instruction. A compare instruction is an
1537/// instruction which produces AArch64::NZCV. It can be truly compare
1538/// instruction
1539/// when there are no uses of its destination register.
1540///
1541/// The following steps are tried in order:
1542/// 1. Convert CmpInstr into an unconditional version.
1543/// 2. Remove CmpInstr if above there is an instruction producing a needed
1544/// condition code or an instruction which can be converted into such an
1545/// instruction.
1546/// Only comparison with zero is supported.
1548 MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int64_t CmpMask,
1549 int64_t CmpValue, const MachineRegisterInfo *MRI) const {
1550 assert(CmpInstr.getParent());
1551 assert(MRI);
1552
1553 // Replace SUBSWrr with SUBWrr if NZCV is not used.
1554 int DeadNZCVIdx =
1555 CmpInstr.findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr, true);
1556 if (DeadNZCVIdx != -1) {
1557 if (CmpInstr.definesRegister(AArch64::WZR, /*TRI=*/nullptr) ||
1558 CmpInstr.definesRegister(AArch64::XZR, /*TRI=*/nullptr)) {
1559 CmpInstr.eraseFromParent();
1560 return true;
1561 }
1562 unsigned Opc = CmpInstr.getOpcode();
1563 unsigned NewOpc = convertToNonFlagSettingOpc(CmpInstr);
1564 if (NewOpc == Opc)
1565 return false;
1566 const MCInstrDesc &MCID = get(NewOpc);
1567 CmpInstr.setDesc(MCID);
1568 CmpInstr.removeOperand(DeadNZCVIdx);
1569 bool succeeded = UpdateOperandRegClass(CmpInstr);
1570 (void)succeeded;
1571 assert(succeeded && "Some operands reg class are incompatible!");
1572 return true;
1573 }
1574
1575 if (CmpInstr.getOpcode() == AArch64::PTEST_PP ||
1576 CmpInstr.getOpcode() == AArch64::PTEST_PP_ANY)
1577 return optimizePTestInstr(&CmpInstr, SrcReg, SrcReg2, MRI);
1578
1579 if (SrcReg2 != 0)
1580 return false;
1581
1582 // CmpInstr is a Compare instruction if destination register is not used.
1583 if (!MRI->use_nodbg_empty(CmpInstr.getOperand(0).getReg()))
1584 return false;
1585
1586 if (CmpValue == 0 && substituteCmpToZero(CmpInstr, SrcReg, *MRI))
1587 return true;
1588 return (CmpValue == 0 || CmpValue == 1) &&
1589 removeCmpToZeroOrOne(CmpInstr, SrcReg, CmpValue, *MRI);
1590}
1591
1592/// Get opcode of S version of Instr.
1593/// If Instr is S version its opcode is returned.
1594/// AArch64::INSTRUCTION_LIST_END is returned if Instr does not have S version
1595/// or we are not interested in it.
1596static unsigned sForm(MachineInstr &Instr) {
1597 switch (Instr.getOpcode()) {
1598 default:
1599 return AArch64::INSTRUCTION_LIST_END;
1600
1601 case AArch64::ADDSWrr:
1602 case AArch64::ADDSWri:
1603 case AArch64::ADDSXrr:
1604 case AArch64::ADDSXri:
1605 case AArch64::SUBSWrr:
1606 case AArch64::SUBSWri:
1607 case AArch64::SUBSXrr:
1608 case AArch64::SUBSXri:
1609 return Instr.getOpcode();
1610
1611 case AArch64::ADDWrr:
1612 return AArch64::ADDSWrr;
1613 case AArch64::ADDWri:
1614 return AArch64::ADDSWri;
1615 case AArch64::ADDXrr:
1616 return AArch64::ADDSXrr;
1617 case AArch64::ADDXri:
1618 return AArch64::ADDSXri;
1619 case AArch64::ADCWr:
1620 return AArch64::ADCSWr;
1621 case AArch64::ADCXr:
1622 return AArch64::ADCSXr;
1623 case AArch64::SUBWrr:
1624 return AArch64::SUBSWrr;
1625 case AArch64::SUBWri:
1626 return AArch64::SUBSWri;
1627 case AArch64::SUBXrr:
1628 return AArch64::SUBSXrr;
1629 case AArch64::SUBXri:
1630 return AArch64::SUBSXri;
1631 case AArch64::SBCWr:
1632 return AArch64::SBCSWr;
1633 case AArch64::SBCXr:
1634 return AArch64::SBCSXr;
1635 case AArch64::ANDWri:
1636 return AArch64::ANDSWri;
1637 case AArch64::ANDXri:
1638 return AArch64::ANDSXri;
1639 }
1640}
1641
1642/// Check if AArch64::NZCV should be alive in successors of MBB.
1644 for (auto *BB : MBB->successors())
1645 if (BB->isLiveIn(AArch64::NZCV))
1646 return true;
1647 return false;
1648}
1649
1650/// \returns The condition code operand index for \p Instr if it is a branch
1651/// or select and -1 otherwise.
1652static int
1654 switch (Instr.getOpcode()) {
1655 default:
1656 return -1;
1657
1658 case AArch64::Bcc: {
1659 int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV, /*TRI=*/nullptr);
1660 assert(Idx >= 2);
1661 return Idx - 2;
1662 }
1663
1664 case AArch64::CSINVWr:
1665 case AArch64::CSINVXr:
1666 case AArch64::CSINCWr:
1667 case AArch64::CSINCXr:
1668 case AArch64::CSELWr:
1669 case AArch64::CSELXr:
1670 case AArch64::CSNEGWr:
1671 case AArch64::CSNEGXr:
1672 case AArch64::FCSELSrrr:
1673 case AArch64::FCSELDrrr: {
1674 int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV, /*TRI=*/nullptr);
1675 assert(Idx >= 1);
1676 return Idx - 1;
1677 }
1678 }
1679}
1680
1681/// Find a condition code used by the instruction.
1682/// Returns AArch64CC::Invalid if either the instruction does not use condition
1683/// codes or we don't optimize CmpInstr in the presence of such instructions.
1686 return CCIdx >= 0 ? static_cast<AArch64CC::CondCode>(
1687 Instr.getOperand(CCIdx).getImm())
1689}
1690
1693 UsedNZCV UsedFlags;
1694 switch (CC) {
1695 default:
1696 break;
1697
1698 case AArch64CC::EQ: // Z set
1699 case AArch64CC::NE: // Z clear
1700 UsedFlags.Z = true;
1701 break;
1702
1703 case AArch64CC::HI: // Z clear and C set
1704 case AArch64CC::LS: // Z set or C clear
1705 UsedFlags.Z = true;
1706 [[fallthrough]];
1707 case AArch64CC::HS: // C set
1708 case AArch64CC::LO: // C clear
1709 UsedFlags.C = true;
1710 break;
1711
1712 case AArch64CC::MI: // N set
1713 case AArch64CC::PL: // N clear
1714 UsedFlags.N = true;
1715 break;
1716
1717 case AArch64CC::VS: // V set
1718 case AArch64CC::VC: // V clear
1719 UsedFlags.V = true;
1720 break;
1721
1722 case AArch64CC::GT: // Z clear, N and V the same
1723 case AArch64CC::LE: // Z set, N and V differ
1724 UsedFlags.Z = true;
1725 [[fallthrough]];
1726 case AArch64CC::GE: // N and V the same
1727 case AArch64CC::LT: // N and V differ
1728 UsedFlags.N = true;
1729 UsedFlags.V = true;
1730 break;
1731 }
1732 return UsedFlags;
1733}
1734
1735/// \returns Conditions flags used after \p CmpInstr in its MachineBB if NZCV
1736/// flags are not alive in successors of the same \p CmpInstr and \p MI parent.
1737/// \returns std::nullopt otherwise.
1738///
1739/// Collect instructions using that flags in \p CCUseInstrs if provided.
1740std::optional<UsedNZCV>
1742 const TargetRegisterInfo &TRI,
1743 SmallVectorImpl<MachineInstr *> *CCUseInstrs) {
1744 MachineBasicBlock *CmpParent = CmpInstr.getParent();
1745 if (MI.getParent() != CmpParent)
1746 return std::nullopt;
1747
1748 if (areCFlagsAliveInSuccessors(CmpParent))
1749 return std::nullopt;
1750
1751 UsedNZCV NZCVUsedAfterCmp;
1753 std::next(CmpInstr.getIterator()), CmpParent->instr_end())) {
1754 if (Instr.readsRegister(AArch64::NZCV, &TRI)) {
1756 if (CC == AArch64CC::Invalid) // Unsupported conditional instruction
1757 return std::nullopt;
1758 NZCVUsedAfterCmp |= getUsedNZCV(CC);
1759 if (CCUseInstrs)
1760 CCUseInstrs->push_back(&Instr);
1761 }
1762 if (Instr.modifiesRegister(AArch64::NZCV, &TRI))
1763 break;
1764 }
1765 return NZCVUsedAfterCmp;
1766}
1767
1768static bool isADDSRegImm(unsigned Opcode) {
1769 return Opcode == AArch64::ADDSWri || Opcode == AArch64::ADDSXri;
1770}
1771
1772static bool isSUBSRegImm(unsigned Opcode) {
1773 return Opcode == AArch64::SUBSWri || Opcode == AArch64::SUBSXri;
1774}
1775
1776/// Check if CmpInstr can be substituted by MI.
1777///
1778/// CmpInstr can be substituted:
1779/// - CmpInstr is either 'ADDS %vreg, 0' or 'SUBS %vreg, 0'
1780/// - and, MI and CmpInstr are from the same MachineBB
1781/// - and, condition flags are not alive in successors of the CmpInstr parent
1782/// - and, if MI opcode is the S form there must be no defs of flags between
1783/// MI and CmpInstr
1784/// or if MI opcode is not the S form there must be neither defs of flags
1785/// nor uses of flags between MI and CmpInstr.
1786/// - and, if C/V flags are not used after CmpInstr
1787/// or if N flag is used but MI produces poison value if signed overflow
1788/// occurs.
1790 const TargetRegisterInfo &TRI) {
1791 // NOTE this assertion guarantees that MI.getOpcode() is add or subtraction
1792 // that may or may not set flags.
1793 assert(sForm(MI) != AArch64::INSTRUCTION_LIST_END);
1794
1795 const unsigned CmpOpcode = CmpInstr.getOpcode();
1796 if (!isADDSRegImm(CmpOpcode) && !isSUBSRegImm(CmpOpcode))
1797 return false;
1798
1799 assert((CmpInstr.getOperand(2).isImm() &&
1800 CmpInstr.getOperand(2).getImm() == 0) &&
1801 "Caller guarantees that CmpInstr compares with constant 0");
1802
1803 std::optional<UsedNZCV> NZVCUsed = examineCFlagsUse(MI, CmpInstr, TRI);
1804 if (!NZVCUsed || NZVCUsed->C)
1805 return false;
1806
1807 // CmpInstr is either 'ADDS %vreg, 0' or 'SUBS %vreg, 0', and MI is either
1808 // '%vreg = add ...' or '%vreg = sub ...'.
1809 // Condition flag V is used to indicate signed overflow.
1810 // 1) MI and CmpInstr set N and V to the same value.
1811 // 2) If MI is add/sub with no-signed-wrap, it produces a poison value when
1812 // signed overflow occurs, so CmpInstr could still be simplified away.
1813 if (NZVCUsed->V && !MI.getFlag(MachineInstr::NoSWrap))
1814 return false;
1815
1816 AccessKind AccessToCheck = AK_Write;
1817 if (sForm(MI) != MI.getOpcode())
1818 AccessToCheck = AK_All;
1819 return !areCFlagsAccessedBetweenInstrs(&MI, &CmpInstr, &TRI, AccessToCheck);
1820}
1821
1822/// Substitute an instruction comparing to zero with another instruction
1823/// which produces needed condition flags.
1824///
1825/// Return true on success.
1826bool AArch64InstrInfo::substituteCmpToZero(
1827 MachineInstr &CmpInstr, unsigned SrcReg,
1828 const MachineRegisterInfo &MRI) const {
1829 // Get the unique definition of SrcReg.
1830 MachineInstr *MI = MRI.getUniqueVRegDef(SrcReg);
1831 if (!MI)
1832 return false;
1833
1835
1836 unsigned NewOpc = sForm(*MI);
1837 if (NewOpc == AArch64::INSTRUCTION_LIST_END)
1838 return false;
1839
1840 if (!canInstrSubstituteCmpInstr(*MI, CmpInstr, TRI))
1841 return false;
1842
1843 // Update the instruction to set NZCV.
1844 MI->setDesc(get(NewOpc));
1845 CmpInstr.eraseFromParent();
1847 (void)succeeded;
1848 assert(succeeded && "Some operands reg class are incompatible!");
1849 MI->addRegisterDefined(AArch64::NZCV, &TRI);
1850 return true;
1851}
1852
1853/// \returns True if \p CmpInstr can be removed.
1854///
1855/// \p IsInvertCC is true if, after removing \p CmpInstr, condition
1856/// codes used in \p CCUseInstrs must be inverted.
1858 int CmpValue, const TargetRegisterInfo &TRI,
1860 bool &IsInvertCC) {
1861 assert((CmpValue == 0 || CmpValue == 1) &&
1862 "Only comparisons to 0 or 1 considered for removal!");
1863
1864 // MI is 'CSINCWr %vreg, wzr, wzr, <cc>' or 'CSINCXr %vreg, xzr, xzr, <cc>'
1865 unsigned MIOpc = MI.getOpcode();
1866 if (MIOpc == AArch64::CSINCWr) {
1867 if (MI.getOperand(1).getReg() != AArch64::WZR ||
1868 MI.getOperand(2).getReg() != AArch64::WZR)
1869 return false;
1870 } else if (MIOpc == AArch64::CSINCXr) {
1871 if (MI.getOperand(1).getReg() != AArch64::XZR ||
1872 MI.getOperand(2).getReg() != AArch64::XZR)
1873 return false;
1874 } else {
1875 return false;
1876 }
1878 if (MICC == AArch64CC::Invalid)
1879 return false;
1880
1881 // NZCV needs to be defined
1882 if (MI.findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr, true) != -1)
1883 return false;
1884
1885 // CmpInstr is 'ADDS %vreg, 0' or 'SUBS %vreg, 0' or 'SUBS %vreg, 1'
1886 const unsigned CmpOpcode = CmpInstr.getOpcode();
1887 bool IsSubsRegImm = isSUBSRegImm(CmpOpcode);
1888 if (CmpValue && !IsSubsRegImm)
1889 return false;
1890 if (!CmpValue && !IsSubsRegImm && !isADDSRegImm(CmpOpcode))
1891 return false;
1892
1893 // MI conditions allowed: eq, ne, mi, pl
1894 UsedNZCV MIUsedNZCV = getUsedNZCV(MICC);
1895 if (MIUsedNZCV.C || MIUsedNZCV.V)
1896 return false;
1897
1898 std::optional<UsedNZCV> NZCVUsedAfterCmp =
1899 examineCFlagsUse(MI, CmpInstr, TRI, &CCUseInstrs);
1900 // Condition flags are not used in CmpInstr basic block successors and only
1901 // Z or N flags allowed to be used after CmpInstr within its basic block
1902 if (!NZCVUsedAfterCmp || NZCVUsedAfterCmp->C || NZCVUsedAfterCmp->V)
1903 return false;
1904 // Z or N flag used after CmpInstr must correspond to the flag used in MI
1905 if ((MIUsedNZCV.Z && NZCVUsedAfterCmp->N) ||
1906 (MIUsedNZCV.N && NZCVUsedAfterCmp->Z))
1907 return false;
1908 // If CmpInstr is comparison to zero MI conditions are limited to eq, ne
1909 if (MIUsedNZCV.N && !CmpValue)
1910 return false;
1911
1912 // There must be no defs of flags between MI and CmpInstr
1913 if (areCFlagsAccessedBetweenInstrs(&MI, &CmpInstr, &TRI, AK_Write))
1914 return false;
1915
1916 // Condition code is inverted in the following cases:
1917 // 1. MI condition is ne; CmpInstr is 'ADDS %vreg, 0' or 'SUBS %vreg, 0'
1918 // 2. MI condition is eq, pl; CmpInstr is 'SUBS %vreg, 1'
1919 IsInvertCC = (CmpValue && (MICC == AArch64CC::EQ || MICC == AArch64CC::PL)) ||
1920 (!CmpValue && MICC == AArch64CC::NE);
1921 return true;
1922}
1923
1924/// Remove comparison in csinc-cmp sequence
1925///
1926/// Examples:
1927/// 1. \code
1928/// csinc w9, wzr, wzr, ne
1929/// cmp w9, #0
1930/// b.eq
1931/// \endcode
1932/// to
1933/// \code
1934/// csinc w9, wzr, wzr, ne
1935/// b.ne
1936/// \endcode
1937///
1938/// 2. \code
1939/// csinc x2, xzr, xzr, mi
1940/// cmp x2, #1
1941/// b.pl
1942/// \endcode
1943/// to
1944/// \code
1945/// csinc x2, xzr, xzr, mi
1946/// b.pl
1947/// \endcode
1948///
1949/// \param CmpInstr comparison instruction
1950/// \return True when comparison removed
1951bool AArch64InstrInfo::removeCmpToZeroOrOne(
1952 MachineInstr &CmpInstr, unsigned SrcReg, int CmpValue,
1953 const MachineRegisterInfo &MRI) const {
1954 MachineInstr *MI = MRI.getUniqueVRegDef(SrcReg);
1955 if (!MI)
1956 return false;
1959 bool IsInvertCC = false;
1960 if (!canCmpInstrBeRemoved(*MI, CmpInstr, CmpValue, TRI, CCUseInstrs,
1961 IsInvertCC))
1962 return false;
1963 // Make transformation
1964 CmpInstr.eraseFromParent();
1965 if (IsInvertCC) {
1966 // Invert condition codes in CmpInstr CC users
1967 for (MachineInstr *CCUseInstr : CCUseInstrs) {
1969 assert(Idx >= 0 && "Unexpected instruction using CC.");
1970 MachineOperand &CCOperand = CCUseInstr->getOperand(Idx);
1972 static_cast<AArch64CC::CondCode>(CCOperand.getImm()));
1973 CCOperand.setImm(CCUse);
1974 }
1975 }
1976 return true;
1977}
1978
1980 if (MI.getOpcode() != TargetOpcode::LOAD_STACK_GUARD &&
1981 MI.getOpcode() != AArch64::CATCHRET)
1982 return false;
1983
1984 MachineBasicBlock &MBB = *MI.getParent();
1985 auto &Subtarget = MBB.getParent()->getSubtarget<AArch64Subtarget>();
1986 auto TRI = Subtarget.getRegisterInfo();
1987 DebugLoc DL = MI.getDebugLoc();
1988
1989 if (MI.getOpcode() == AArch64::CATCHRET) {
1990 // Skip to the first instruction before the epilog.
1991 const TargetInstrInfo *TII =
1993 MachineBasicBlock *TargetMBB = MI.getOperand(0).getMBB();
1995 MachineBasicBlock::iterator FirstEpilogSEH = std::prev(MBBI);
1996 while (FirstEpilogSEH->getFlag(MachineInstr::FrameDestroy) &&
1997 FirstEpilogSEH != MBB.begin())
1998 FirstEpilogSEH = std::prev(FirstEpilogSEH);
1999 if (FirstEpilogSEH != MBB.begin())
2000 FirstEpilogSEH = std::next(FirstEpilogSEH);
2001 BuildMI(MBB, FirstEpilogSEH, DL, TII->get(AArch64::ADRP))
2002 .addReg(AArch64::X0, RegState::Define)
2003 .addMBB(TargetMBB);
2004 BuildMI(MBB, FirstEpilogSEH, DL, TII->get(AArch64::ADDXri))
2005 .addReg(AArch64::X0, RegState::Define)
2006 .addReg(AArch64::X0)
2007 .addMBB(TargetMBB)
2008 .addImm(0);
2009 TargetMBB->setMachineBlockAddressTaken();
2010 return true;
2011 }
2012
2013 Register Reg = MI.getOperand(0).getReg();
2015 if (M.getStackProtectorGuard() == "sysreg") {
2016 const AArch64SysReg::SysReg *SrcReg =
2017 AArch64SysReg::lookupSysRegByName(M.getStackProtectorGuardReg());
2018 if (!SrcReg)
2019 report_fatal_error("Unknown SysReg for Stack Protector Guard Register");
2020
2021 // mrs xN, sysreg
2022 BuildMI(MBB, MI, DL, get(AArch64::MRS))
2024 .addImm(SrcReg->Encoding);
2025 int Offset = M.getStackProtectorGuardOffset();
2026 if (Offset >= 0 && Offset <= 32760 && Offset % 8 == 0) {
2027 // ldr xN, [xN, #offset]
2028 BuildMI(MBB, MI, DL, get(AArch64::LDRXui))
2029 .addDef(Reg)
2030 .addUse(Reg, RegState::Kill)
2031 .addImm(Offset / 8);
2032 } else if (Offset >= -256 && Offset <= 255) {
2033 // ldur xN, [xN, #offset]
2034 BuildMI(MBB, MI, DL, get(AArch64::LDURXi))
2035 .addDef(Reg)
2036 .addUse(Reg, RegState::Kill)
2037 .addImm(Offset);
2038 } else if (Offset >= -4095 && Offset <= 4095) {
2039 if (Offset > 0) {
2040 // add xN, xN, #offset
2041 BuildMI(MBB, MI, DL, get(AArch64::ADDXri))
2042 .addDef(Reg)
2043 .addUse(Reg, RegState::Kill)
2044 .addImm(Offset)
2045 .addImm(0);
2046 } else {
2047 // sub xN, xN, #offset
2048 BuildMI(MBB, MI, DL, get(AArch64::SUBXri))
2049 .addDef(Reg)
2050 .addUse(Reg, RegState::Kill)
2051 .addImm(-Offset)
2052 .addImm(0);
2053 }
2054 // ldr xN, [xN]
2055 BuildMI(MBB, MI, DL, get(AArch64::LDRXui))
2056 .addDef(Reg)
2057 .addUse(Reg, RegState::Kill)
2058 .addImm(0);
2059 } else {
2060 // Cases that are larger than +/- 4095 and not a multiple of 8, or larger
2061 // than 23760.
2062 // It might be nice to use AArch64::MOVi32imm here, which would get
2063 // expanded in PreSched2 after PostRA, but our lone scratch Reg already
2064 // contains the MRS result. findScratchNonCalleeSaveRegister() in
2065 // AArch64FrameLowering might help us find such a scratch register
2066 // though. If we failed to find a scratch register, we could emit a
2067 // stream of add instructions to build up the immediate. Or, we could try
2068 // to insert a AArch64::MOVi32imm before register allocation so that we
2069 // didn't need to scavenge for a scratch register.
2070 report_fatal_error("Unable to encode Stack Protector Guard Offset");
2071 }
2072 MBB.erase(MI);
2073 return true;
2074 }
2075
2076 const GlobalValue *GV =
2077 cast<GlobalValue>((*MI.memoperands_begin())->getValue());
2078 const TargetMachine &TM = MBB.getParent()->getTarget();
2079 unsigned OpFlags = Subtarget.ClassifyGlobalReference(GV, TM);
2080 const unsigned char MO_NC = AArch64II::MO_NC;
2081
2082 if ((OpFlags & AArch64II::MO_GOT) != 0) {
2083 BuildMI(MBB, MI, DL, get(AArch64::LOADgot), Reg)
2084 .addGlobalAddress(GV, 0, OpFlags);
2085 if (Subtarget.isTargetILP32()) {
2086 unsigned Reg32 = TRI->getSubReg(Reg, AArch64::sub_32);
2087 BuildMI(MBB, MI, DL, get(AArch64::LDRWui))
2088 .addDef(Reg32, RegState::Dead)
2089 .addUse(Reg, RegState::Kill)
2090 .addImm(0)
2091 .addMemOperand(*MI.memoperands_begin())
2093 } else {
2094 BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
2095 .addReg(Reg, RegState::Kill)
2096 .addImm(0)
2097 .addMemOperand(*MI.memoperands_begin());
2098 }
2099 } else if (TM.getCodeModel() == CodeModel::Large) {
2100 assert(!Subtarget.isTargetILP32() && "how can large exist in ILP32?");
2101 BuildMI(MBB, MI, DL, get(AArch64::MOVZXi), Reg)
2102 .addGlobalAddress(GV, 0, AArch64II::MO_G0 | MO_NC)
2103 .addImm(0);
2104 BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
2105 .addReg(Reg, RegState::Kill)
2106 .addGlobalAddress(GV, 0, AArch64II::MO_G1 | MO_NC)
2107 .addImm(16);
2108 BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
2109 .addReg(Reg, RegState::Kill)
2110 .addGlobalAddress(GV, 0, AArch64II::MO_G2 | MO_NC)
2111 .addImm(32);
2112 BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
2113 .addReg(Reg, RegState::Kill)
2115 .addImm(48);
2116 BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
2117 .addReg(Reg, RegState::Kill)
2118 .addImm(0)
2119 .addMemOperand(*MI.memoperands_begin());
2120 } else if (TM.getCodeModel() == CodeModel::Tiny) {
2121 BuildMI(MBB, MI, DL, get(AArch64::ADR), Reg)
2122 .addGlobalAddress(GV, 0, OpFlags);
2123 } else {
2124 BuildMI(MBB, MI, DL, get(AArch64::ADRP), Reg)
2125 .addGlobalAddress(GV, 0, OpFlags | AArch64II::MO_PAGE);
2126 unsigned char LoFlags = OpFlags | AArch64II::MO_PAGEOFF | MO_NC;
2127 if (Subtarget.isTargetILP32()) {
2128 unsigned Reg32 = TRI->getSubReg(Reg, AArch64::sub_32);
2129 BuildMI(MBB, MI, DL, get(AArch64::LDRWui))
2130 .addDef(Reg32, RegState::Dead)
2131 .addUse(Reg, RegState::Kill)
2132 .addGlobalAddress(GV, 0, LoFlags)
2133 .addMemOperand(*MI.memoperands_begin())
2135 } else {
2136 BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
2137 .addReg(Reg, RegState::Kill)
2138 .addGlobalAddress(GV, 0, LoFlags)
2139 .addMemOperand(*MI.memoperands_begin());
2140 }
2141 }
2142
2143 MBB.erase(MI);
2144
2145 return true;
2146}
2147
2148// Return true if this instruction simply sets its single destination register
2149// to zero. This is equivalent to a register rename of the zero-register.
2151 switch (MI.getOpcode()) {
2152 default:
2153 break;
2154 case AArch64::MOVZWi:
2155 case AArch64::MOVZXi: // movz Rd, #0 (LSL #0)
2156 if (MI.getOperand(1).isImm() && MI.getOperand(1).getImm() == 0) {
2157 assert(MI.getDesc().getNumOperands() == 3 &&
2158 MI.getOperand(2).getImm() == 0 && "invalid MOVZi operands");
2159 return true;
2160 }
2161 break;
2162 case AArch64::ANDWri: // and Rd, Rzr, #imm
2163 return MI.getOperand(1).getReg() == AArch64::WZR;
2164 case AArch64::ANDXri:
2165 return MI.getOperand(1).getReg() == AArch64::XZR;
2166 case TargetOpcode::COPY:
2167 return MI.getOperand(1).getReg() == AArch64::WZR;
2168 }
2169 return false;
2170}
2171
2172// Return true if this instruction simply renames a general register without
2173// modifying bits.
2175 switch (MI.getOpcode()) {
2176 default:
2177 break;
2178 case TargetOpcode::COPY: {
2179 // GPR32 copies will by lowered to ORRXrs
2180 Register DstReg = MI.getOperand(0).getReg();
2181 return (AArch64::GPR32RegClass.contains(DstReg) ||
2182 AArch64::GPR64RegClass.contains(DstReg));
2183 }
2184 case AArch64::ORRXrs: // orr Xd, Xzr, Xm (LSL #0)
2185 if (MI.getOperand(1).getReg() == AArch64::XZR) {
2186 assert(MI.getDesc().getNumOperands() == 4 &&
2187 MI.getOperand(3).getImm() == 0 && "invalid ORRrs operands");
2188 return true;
2189 }
2190 break;
2191 case AArch64::ADDXri: // add Xd, Xn, #0 (LSL #0)
2192 if (MI.getOperand(2).getImm() == 0) {
2193 assert(MI.getDesc().getNumOperands() == 4 &&
2194 MI.getOperand(3).getImm() == 0 && "invalid ADDXri operands");
2195 return true;
2196 }
2197 break;
2198 }
2199 return false;
2200}
2201
2202// Return true if this instruction simply renames a general register without
2203// modifying bits.
2205 switch (MI.getOpcode()) {
2206 default:
2207 break;
2208 case TargetOpcode::COPY: {
2209 Register DstReg = MI.getOperand(0).getReg();
2210 return AArch64::FPR128RegClass.contains(DstReg);
2211 }
2212 case AArch64::ORRv16i8:
2213 if (MI.getOperand(1).getReg() == MI.getOperand(2).getReg()) {
2214 assert(MI.getDesc().getNumOperands() == 3 && MI.getOperand(0).isReg() &&
2215 "invalid ORRv16i8 operands");
2216 return true;
2217 }
2218 break;
2219 }
2220 return false;
2221}
2222
2224 int &FrameIndex) const {
2225 switch (MI.getOpcode()) {
2226 default:
2227 break;
2228 case AArch64::LDRWui:
2229 case AArch64::LDRXui:
2230 case AArch64::LDRBui:
2231 case AArch64::LDRHui:
2232 case AArch64::LDRSui:
2233 case AArch64::LDRDui:
2234 case AArch64::LDRQui:
2235 case AArch64::LDR_PXI:
2236 if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() &&
2237 MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) {
2238 FrameIndex = MI.getOperand(1).getIndex();
2239 return MI.getOperand(0).getReg();
2240 }
2241 break;
2242 }
2243
2244 return 0;
2245}
2246
2248 int &FrameIndex) const {
2249 switch (MI.getOpcode()) {
2250 default:
2251 break;
2252 case AArch64::STRWui:
2253 case AArch64::STRXui:
2254 case AArch64::STRBui:
2255 case AArch64::STRHui:
2256 case AArch64::STRSui:
2257 case AArch64::STRDui:
2258 case AArch64::STRQui:
2259 case AArch64::STR_PXI:
2260 if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() &&
2261 MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) {
2262 FrameIndex = MI.getOperand(1).getIndex();
2263 return MI.getOperand(0).getReg();
2264 }
2265 break;
2266 }
2267 return 0;
2268}
2269
2270/// Check all MachineMemOperands for a hint to suppress pairing.
2272 return llvm::any_of(MI.memoperands(), [](MachineMemOperand *MMO) {
2273 return MMO->getFlags() & MOSuppressPair;
2274 });
2275}
2276
2277/// Set a flag on the first MachineMemOperand to suppress pairing.
2279 if (MI.memoperands_empty())
2280 return;
2281 (*MI.memoperands_begin())->setFlags(MOSuppressPair);
2282}
2283
2284/// Check all MachineMemOperands for a hint that the load/store is strided.
2286 return llvm::any_of(MI.memoperands(), [](MachineMemOperand *MMO) {
2287 return MMO->getFlags() & MOStridedAccess;
2288 });
2289}
2290
2292 switch (Opc) {
2293 default:
2294 return false;
2295 case AArch64::STURSi:
2296 case AArch64::STRSpre:
2297 case AArch64::STURDi:
2298 case AArch64::STRDpre:
2299 case AArch64::STURQi:
2300 case AArch64::STRQpre:
2301 case AArch64::STURBBi:
2302 case AArch64::STURHHi:
2303 case AArch64::STURWi:
2304 case AArch64::STRWpre:
2305 case AArch64::STURXi:
2306 case AArch64::STRXpre:
2307 case AArch64::LDURSi:
2308 case AArch64::LDRSpre:
2309 case AArch64::LDURDi:
2310 case AArch64::LDRDpre:
2311 case AArch64::LDURQi:
2312 case AArch64::LDRQpre:
2313 case AArch64::LDURWi:
2314 case AArch64::LDRWpre:
2315 case AArch64::LDURXi:
2316 case AArch64::LDRXpre:
2317 case AArch64::LDRSWpre:
2318 case AArch64::LDURSWi:
2319 case AArch64::LDURHHi:
2320 case AArch64::LDURBBi:
2321 case AArch64::LDURSBWi:
2322 case AArch64::LDURSHWi:
2323 return true;
2324 }
2325}
2326
2327std::optional<unsigned> AArch64InstrInfo::getUnscaledLdSt(unsigned Opc) {
2328 switch (Opc) {
2329 default: return {};
2330 case AArch64::PRFMui: return AArch64::PRFUMi;
2331 case AArch64::LDRXui: return AArch64::LDURXi;
2332 case AArch64::LDRWui: return AArch64::LDURWi;
2333 case AArch64::LDRBui: return AArch64::LDURBi;
2334 case AArch64::LDRHui: return AArch64::LDURHi;
2335 case AArch64::LDRSui: return AArch64::LDURSi;
2336 case AArch64::LDRDui: return AArch64::LDURDi;
2337 case AArch64::LDRQui: return AArch64::LDURQi;
2338 case AArch64::LDRBBui: return AArch64::LDURBBi;
2339 case AArch64::LDRHHui: return AArch64::LDURHHi;
2340 case AArch64::LDRSBXui: return AArch64::LDURSBXi;
2341 case AArch64::LDRSBWui: return AArch64::LDURSBWi;
2342 case AArch64::LDRSHXui: return AArch64::LDURSHXi;
2343 case AArch64::LDRSHWui: return AArch64::LDURSHWi;
2344 case AArch64::LDRSWui: return AArch64::LDURSWi;
2345 case AArch64::STRXui: return AArch64::STURXi;
2346 case AArch64::STRWui: return AArch64::STURWi;
2347 case AArch64::STRBui: return AArch64::STURBi;
2348 case AArch64::STRHui: return AArch64::STURHi;
2349 case AArch64::STRSui: return AArch64::STURSi;
2350 case AArch64::STRDui: return AArch64::STURDi;
2351 case AArch64::STRQui: return AArch64::STURQi;
2352 case AArch64::STRBBui: return AArch64::STURBBi;
2353 case AArch64::STRHHui: return AArch64::STURHHi;
2354 }
2355}
2356
2358 switch (Opc) {
2359 default:
2360 llvm_unreachable("Unhandled Opcode in getLoadStoreImmIdx");
2361 case AArch64::ADDG:
2362 case AArch64::LDAPURBi:
2363 case AArch64::LDAPURHi:
2364 case AArch64::LDAPURi:
2365 case AArch64::LDAPURSBWi:
2366 case AArch64::LDAPURSBXi:
2367 case AArch64::LDAPURSHWi:
2368 case AArch64::LDAPURSHXi:
2369 case AArch64::LDAPURSWi:
2370 case AArch64::LDAPURXi:
2371 case AArch64::LDR_PPXI:
2372 case AArch64::LDR_PXI:
2373 case AArch64::LDR_ZXI:
2374 case AArch64::LDR_ZZXI:
2375 case AArch64::LDR_ZZZXI:
2376 case AArch64::LDR_ZZZZXI:
2377 case AArch64::LDRBBui:
2378 case AArch64::LDRBui:
2379 case AArch64::LDRDui:
2380 case AArch64::LDRHHui:
2381 case AArch64::LDRHui:
2382 case AArch64::LDRQui:
2383 case AArch64::LDRSBWui:
2384 case AArch64::LDRSBXui:
2385 case AArch64::LDRSHWui:
2386 case AArch64::LDRSHXui:
2387 case AArch64::LDRSui:
2388 case AArch64::LDRSWui:
2389 case AArch64::LDRWui:
2390 case AArch64::LDRXui:
2391 case AArch64::LDURBBi:
2392 case AArch64::LDURBi:
2393 case AArch64::LDURDi:
2394 case AArch64::LDURHHi:
2395 case AArch64::LDURHi:
2396 case AArch64::LDURQi:
2397 case AArch64::LDURSBWi:
2398 case AArch64::LDURSBXi:
2399 case AArch64::LDURSHWi:
2400 case AArch64::LDURSHXi:
2401 case AArch64::LDURSi:
2402 case AArch64::LDURSWi:
2403 case AArch64::LDURWi:
2404 case AArch64::LDURXi:
2405 case AArch64::PRFMui:
2406 case AArch64::PRFUMi:
2407 case AArch64::ST2Gi:
2408 case AArch64::STGi:
2409 case AArch64::STLURBi:
2410 case AArch64::STLURHi:
2411 case AArch64::STLURWi:
2412 case AArch64::STLURXi:
2413 case AArch64::StoreSwiftAsyncContext:
2414 case AArch64::STR_PPXI:
2415 case AArch64::STR_PXI:
2416 case AArch64::STR_ZXI:
2417 case AArch64::STR_ZZXI:
2418 case AArch64::STR_ZZZXI:
2419 case AArch64::STR_ZZZZXI:
2420 case AArch64::STRBBui:
2421 case AArch64::STRBui:
2422 case AArch64::STRDui:
2423 case AArch64::STRHHui:
2424 case AArch64::STRHui:
2425 case AArch64::STRQui:
2426 case AArch64::STRSui:
2427 case AArch64::STRWui:
2428 case AArch64::STRXui:
2429 case AArch64::STURBBi:
2430 case AArch64::STURBi:
2431 case AArch64::STURDi:
2432 case AArch64::STURHHi:
2433 case AArch64::STURHi:
2434 case AArch64::STURQi:
2435 case AArch64::STURSi:
2436 case AArch64::STURWi:
2437 case AArch64::STURXi:
2438 case AArch64::STZ2Gi:
2439 case AArch64::STZGi:
2440 case AArch64::TAGPstack:
2441 return 2;
2442 case AArch64::LD1B_D_IMM:
2443 case AArch64::LD1B_H_IMM:
2444 case AArch64::LD1B_IMM:
2445 case AArch64::LD1B_S_IMM:
2446 case AArch64::LD1D_IMM:
2447 case AArch64::LD1H_D_IMM:
2448 case AArch64::LD1H_IMM:
2449 case AArch64::LD1H_S_IMM:
2450 case AArch64::LD1RB_D_IMM:
2451 case AArch64::LD1RB_H_IMM:
2452 case AArch64::LD1RB_IMM:
2453 case AArch64::LD1RB_S_IMM:
2454 case AArch64::LD1RD_IMM:
2455 case AArch64::LD1RH_D_IMM:
2456 case AArch64::LD1RH_IMM:
2457 case AArch64::LD1RH_S_IMM:
2458 case AArch64::LD1RSB_D_IMM:
2459 case AArch64::LD1RSB_H_IMM:
2460 case AArch64::LD1RSB_S_IMM:
2461 case AArch64::LD1RSH_D_IMM:
2462 case AArch64::LD1RSH_S_IMM:
2463 case AArch64::LD1RSW_IMM:
2464 case AArch64::LD1RW_D_IMM:
2465 case AArch64::LD1RW_IMM:
2466 case AArch64::LD1SB_D_IMM:
2467 case AArch64::LD1SB_H_IMM:
2468 case AArch64::LD1SB_S_IMM:
2469 case AArch64::LD1SH_D_IMM:
2470 case AArch64::LD1SH_S_IMM:
2471 case AArch64::LD1SW_D_IMM:
2472 case AArch64::LD1W_D_IMM:
2473 case AArch64::LD1W_IMM:
2474 case AArch64::LD2B_IMM:
2475 case AArch64::LD2D_IMM:
2476 case AArch64::LD2H_IMM:
2477 case AArch64::LD2W_IMM:
2478 case AArch64::LD3B_IMM:
2479 case AArch64::LD3D_IMM:
2480 case AArch64::LD3H_IMM:
2481 case AArch64::LD3W_IMM:
2482 case AArch64::LD4B_IMM:
2483 case AArch64::LD4D_IMM:
2484 case AArch64::LD4H_IMM:
2485 case AArch64::LD4W_IMM:
2486 case AArch64::LDG:
2487 case AArch64::LDNF1B_D_IMM:
2488 case AArch64::LDNF1B_H_IMM:
2489 case AArch64::LDNF1B_IMM:
2490 case AArch64::LDNF1B_S_IMM:
2491 case AArch64::LDNF1D_IMM:
2492 case AArch64::LDNF1H_D_IMM:
2493 case AArch64::LDNF1H_IMM:
2494 case AArch64::LDNF1H_S_IMM:
2495 case AArch64::LDNF1SB_D_IMM:
2496 case AArch64::LDNF1SB_H_IMM:
2497 case AArch64::LDNF1SB_S_IMM:
2498 case AArch64::LDNF1SH_D_IMM:
2499 case AArch64::LDNF1SH_S_IMM:
2500 case AArch64::LDNF1SW_D_IMM:
2501 case AArch64::LDNF1W_D_IMM:
2502 case AArch64::LDNF1W_IMM:
2503 case AArch64::LDNPDi:
2504 case AArch64::LDNPQi:
2505 case AArch64::LDNPSi:
2506 case AArch64::LDNPWi:
2507 case AArch64::LDNPXi:
2508 case AArch64::LDNT1B_ZRI:
2509 case AArch64::LDNT1D_ZRI:
2510 case AArch64::LDNT1H_ZRI:
2511 case AArch64::LDNT1W_ZRI:
2512 case AArch64::LDPDi:
2513 case AArch64::LDPQi:
2514 case AArch64::LDPSi:
2515 case AArch64::LDPWi:
2516 case AArch64::LDPXi:
2517 case AArch64::LDRBBpost:
2518 case AArch64::LDRBBpre:
2519 case AArch64::LDRBpost:
2520 case AArch64::LDRBpre:
2521 case AArch64::LDRDpost:
2522 case AArch64::LDRDpre:
2523 case AArch64::LDRHHpost:
2524 case AArch64::LDRHHpre:
2525 case AArch64::LDRHpost:
2526 case AArch64::LDRHpre:
2527 case AArch64::LDRQpost:
2528 case AArch64::LDRQpre:
2529 case AArch64::LDRSpost:
2530 case AArch64::LDRSpre:
2531 case AArch64::LDRWpost:
2532 case AArch64::LDRWpre:
2533 case AArch64::LDRXpost:
2534 case AArch64::LDRXpre:
2535 case AArch64::ST1B_D_IMM:
2536 case AArch64::ST1B_H_IMM:
2537 case AArch64::ST1B_IMM:
2538 case AArch64::ST1B_S_IMM:
2539 case AArch64::ST1D_IMM:
2540 case AArch64::ST1H_D_IMM:
2541 case AArch64::ST1H_IMM:
2542 case AArch64::ST1H_S_IMM:
2543 case AArch64::ST1W_D_IMM:
2544 case AArch64::ST1W_IMM:
2545 case AArch64::ST2B_IMM:
2546 case AArch64::ST2D_IMM:
2547 case AArch64::ST2H_IMM:
2548 case AArch64::ST2W_IMM:
2549 case AArch64::ST3B_IMM:
2550 case AArch64::ST3D_IMM:
2551 case AArch64::ST3H_IMM:
2552 case AArch64::ST3W_IMM:
2553 case AArch64::ST4B_IMM:
2554 case AArch64::ST4D_IMM:
2555 case AArch64::ST4H_IMM:
2556 case AArch64::ST4W_IMM:
2557 case AArch64::STGPi:
2558 case AArch64::STGPreIndex:
2559 case AArch64::STZGPreIndex:
2560 case AArch64::ST2GPreIndex:
2561 case AArch64::STZ2GPreIndex:
2562 case AArch64::STGPostIndex:
2563 case AArch64::STZGPostIndex:
2564 case AArch64::ST2GPostIndex:
2565 case AArch64::STZ2GPostIndex:
2566 case AArch64::STNPDi:
2567 case AArch64::STNPQi:
2568 case AArch64::STNPSi:
2569 case AArch64::STNPWi:
2570 case AArch64::STNPXi:
2571 case AArch64::STNT1B_ZRI:
2572 case AArch64::STNT1D_ZRI:
2573 case AArch64::STNT1H_ZRI:
2574 case AArch64::STNT1W_ZRI:
2575 case AArch64::STPDi:
2576 case AArch64::STPQi:
2577 case AArch64::STPSi:
2578 case AArch64::STPWi:
2579 case AArch64::STPXi:
2580 case AArch64::STRBBpost:
2581 case AArch64::STRBBpre:
2582 case AArch64::STRBpost:
2583 case AArch64::STRBpre:
2584 case AArch64::STRDpost:
2585 case AArch64::STRDpre:
2586 case AArch64::STRHHpost:
2587 case AArch64::STRHHpre:
2588 case AArch64::STRHpost:
2589 case AArch64::STRHpre:
2590 case AArch64::STRQpost:
2591 case AArch64::STRQpre:
2592 case AArch64::STRSpost:
2593 case AArch64::STRSpre:
2594 case AArch64::STRWpost:
2595 case AArch64::STRWpre:
2596 case AArch64::STRXpost:
2597 case AArch64::STRXpre:
2598 return 3;
2599 case AArch64::LDPDpost:
2600 case AArch64::LDPDpre:
2601 case AArch64::LDPQpost:
2602 case AArch64::LDPQpre:
2603 case AArch64::LDPSpost:
2604 case AArch64::LDPSpre:
2605 case AArch64::LDPWpost:
2606 case AArch64::LDPWpre:
2607 case AArch64::LDPXpost:
2608 case AArch64::LDPXpre:
2609 case AArch64::STGPpre:
2610 case AArch64::STGPpost:
2611 case AArch64::STPDpost:
2612 case AArch64::STPDpre:
2613 case AArch64::STPQpost:
2614 case AArch64::STPQpre:
2615 case AArch64::STPSpost:
2616 case AArch64::STPSpre:
2617 case AArch64::STPWpost:
2618 case AArch64::STPWpre:
2619 case AArch64::STPXpost:
2620 case AArch64::STPXpre:
2621 return 4;
2622 }
2623}
2624
2626 switch (MI.getOpcode()) {
2627 default:
2628 return false;
2629 // Scaled instructions.
2630 case AArch64::STRSui:
2631 case AArch64::STRDui:
2632 case AArch64::STRQui:
2633 case AArch64::STRXui:
2634 case AArch64::STRWui:
2635 case AArch64::LDRSui:
2636 case AArch64::LDRDui:
2637 case AArch64::LDRQui:
2638 case AArch64::LDRXui:
2639 case AArch64::LDRWui:
2640 case AArch64::LDRSWui:
2641 // Unscaled instructions.
2642 case AArch64::STURSi:
2643 case AArch64::STRSpre:
2644 case AArch64::STURDi:
2645 case AArch64::STRDpre:
2646 case AArch64::STURQi:
2647 case AArch64::STRQpre:
2648 case AArch64::STURWi:
2649 case AArch64::STRWpre:
2650 case AArch64::STURXi:
2651 case AArch64::STRXpre:
2652 case AArch64::LDURSi:
2653 case AArch64::LDRSpre:
2654 case AArch64::LDURDi:
2655 case AArch64::LDRDpre:
2656 case AArch64::LDURQi:
2657 case AArch64::LDRQpre:
2658 case AArch64::LDURWi:
2659 case AArch64::LDRWpre:
2660 case AArch64::LDURXi:
2661 case AArch64::LDRXpre:
2662 case AArch64::LDURSWi:
2663 case AArch64::LDRSWpre:
2664 return true;
2665 }
2666}
2667
2669 switch (MI.getOpcode()) {
2670 default:
2671 assert((!MI.isCall() || !MI.isReturn()) &&
2672 "Unexpected instruction - was a new tail call opcode introduced?");
2673 return false;
2674 case AArch64::TCRETURNdi:
2675 case AArch64::TCRETURNri:
2676 case AArch64::TCRETURNrix16x17:
2677 case AArch64::TCRETURNrix17:
2678 case AArch64::TCRETURNrinotx16:
2679 case AArch64::TCRETURNriALL:
2680 case AArch64::AUTH_TCRETURN:
2681 case AArch64::AUTH_TCRETURN_BTI:
2682 return true;
2683 }
2684}
2685
2687 switch (Opc) {
2688 default:
2689 llvm_unreachable("Opcode has no flag setting equivalent!");
2690 // 32-bit cases:
2691 case AArch64::ADDWri:
2692 return AArch64::ADDSWri;
2693 case AArch64::ADDWrr:
2694 return AArch64::ADDSWrr;
2695 case AArch64::ADDWrs:
2696 return AArch64::ADDSWrs;
2697 case AArch64::ADDWrx:
2698 return AArch64::ADDSWrx;
2699 case AArch64::ANDWri:
2700 return AArch64::ANDSWri;
2701 case AArch64::ANDWrr:
2702 return AArch64::ANDSWrr;
2703 case AArch64::ANDWrs:
2704 return AArch64::ANDSWrs;
2705 case AArch64::BICWrr:
2706 return AArch64::BICSWrr;
2707 case AArch64::BICWrs:
2708 return AArch64::BICSWrs;
2709 case AArch64::SUBWri:
2710 return AArch64::SUBSWri;
2711 case AArch64::SUBWrr:
2712 return AArch64::SUBSWrr;
2713 case AArch64::SUBWrs:
2714 return AArch64::SUBSWrs;
2715 case AArch64::SUBWrx:
2716 return AArch64::SUBSWrx;
2717 // 64-bit cases:
2718 case AArch64::ADDXri:
2719 return AArch64::ADDSXri;
2720 case AArch64::ADDXrr:
2721 return AArch64::ADDSXrr;
2722 case AArch64::ADDXrs:
2723 return AArch64::ADDSXrs;
2724 case AArch64::ADDXrx:
2725 return AArch64::ADDSXrx;
2726 case AArch64::ANDXri:
2727 return AArch64::ANDSXri;
2728 case AArch64::ANDXrr:
2729 return AArch64::ANDSXrr;
2730 case AArch64::ANDXrs:
2731 return AArch64::ANDSXrs;
2732 case AArch64::BICXrr:
2733 return AArch64::BICSXrr;
2734 case AArch64::BICXrs:
2735 return AArch64::BICSXrs;
2736 case AArch64::SUBXri:
2737 return AArch64::SUBSXri;
2738 case AArch64::SUBXrr:
2739 return AArch64::SUBSXrr;
2740 case AArch64::SUBXrs:
2741 return AArch64::SUBSXrs;
2742 case AArch64::SUBXrx:
2743 return AArch64::SUBSXrx;
2744 // SVE instructions:
2745 case AArch64::AND_PPzPP:
2746 return AArch64::ANDS_PPzPP;
2747 case AArch64::BIC_PPzPP:
2748 return AArch64::BICS_PPzPP;
2749 case AArch64::EOR_PPzPP:
2750 return AArch64::EORS_PPzPP;
2751 case AArch64::NAND_PPzPP:
2752 return AArch64::NANDS_PPzPP;
2753 case AArch64::NOR_PPzPP:
2754 return AArch64::NORS_PPzPP;
2755 case AArch64::ORN_PPzPP:
2756 return AArch64::ORNS_PPzPP;
2757 case AArch64::ORR_PPzPP:
2758 return AArch64::ORRS_PPzPP;
2759 case AArch64::BRKA_PPzP:
2760 return AArch64::BRKAS_PPzP;
2761 case AArch64::BRKPA_PPzPP:
2762 return AArch64::BRKPAS_PPzPP;
2763 case AArch64::BRKB_PPzP:
2764 return AArch64::BRKBS_PPzP;
2765 case AArch64::BRKPB_PPzPP:
2766 return AArch64::BRKPBS_PPzPP;
2767 case AArch64::BRKN_PPzP:
2768 return AArch64::BRKNS_PPzP;
2769 case AArch64::RDFFR_PPz:
2770 return AArch64::RDFFRS_PPz;
2771 case AArch64::PTRUE_B:
2772 return AArch64::PTRUES_B;
2773 }
2774}
2775
2776// Is this a candidate for ld/st merging or pairing? For example, we don't
2777// touch volatiles or load/stores that have a hint to avoid pair formation.
2779
2780 bool IsPreLdSt = isPreLdSt(MI);
2781
2782 // If this is a volatile load/store, don't mess with it.
2783 if (MI.hasOrderedMemoryRef())
2784 return false;
2785
2786 // Make sure this is a reg/fi+imm (as opposed to an address reloc).
2787 // For Pre-inc LD/ST, the operand is shifted by one.
2788 assert((MI.getOperand(IsPreLdSt ? 2 : 1).isReg() ||
2789 MI.getOperand(IsPreLdSt ? 2 : 1).isFI()) &&
2790 "Expected a reg or frame index operand.");
2791
2792 // For Pre-indexed addressing quadword instructions, the third operand is the
2793 // immediate value.
2794 bool IsImmPreLdSt = IsPreLdSt && MI.getOperand(3).isImm();
2795
2796 if (!MI.getOperand(2).isImm() && !IsImmPreLdSt)
2797 return false;
2798
2799 // Can't merge/pair if the instruction modifies the base register.
2800 // e.g., ldr x0, [x0]
2801 // This case will never occur with an FI base.
2802 // However, if the instruction is an LDR<S,D,Q,W,X,SW>pre or
2803 // STR<S,D,Q,W,X>pre, it can be merged.
2804 // For example:
2805 // ldr q0, [x11, #32]!
2806 // ldr q1, [x11, #16]
2807 // to
2808 // ldp q0, q1, [x11, #32]!
2809 if (MI.getOperand(1).isReg() && !IsPreLdSt) {
2810 Register BaseReg = MI.getOperand(1).getReg();
2812 if (MI.modifiesRegister(BaseReg, TRI))
2813 return false;
2814 }
2815
2816 // Check if this load/store has a hint to avoid pair formation.
2817 // MachineMemOperands hints are set by the AArch64StorePairSuppress pass.
2819 return false;
2820
2821 // Do not pair any callee-save store/reload instructions in the
2822 // prologue/epilogue if the CFI information encoded the operations as separate
2823 // instructions, as that will cause the size of the actual prologue to mismatch
2824 // with the prologue size recorded in the Windows CFI.
2825 const MCAsmInfo *MAI = MI.getMF()->getTarget().getMCAsmInfo();
2826 bool NeedsWinCFI = MAI->usesWindowsCFI() &&
2827 MI.getMF()->getFunction().needsUnwindTableEntry();
2828 if (NeedsWinCFI && (MI.getFlag(MachineInstr::FrameSetup) ||
2830 return false;
2831
2832 // On some CPUs quad load/store pairs are slower than two single load/stores.
2833 if (Subtarget.isPaired128Slow()) {
2834 switch (MI.getOpcode()) {
2835 default:
2836 break;
2837 case AArch64::LDURQi:
2838 case AArch64::STURQi:
2839 case AArch64::LDRQui:
2840 case AArch64::STRQui:
2841 return false;
2842 }
2843 }
2844
2845 return true;
2846}
2847
2850 int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width,
2851 const TargetRegisterInfo *TRI) const {
2852 if (!LdSt.mayLoadOrStore())
2853 return false;
2854
2855 const MachineOperand *BaseOp;
2856 TypeSize WidthN(0, false);
2857 if (!getMemOperandWithOffsetWidth(LdSt, BaseOp, Offset, OffsetIsScalable,
2858 WidthN, TRI))
2859 return false;
2860 // The maximum vscale is 16 under AArch64, return the maximal extent for the
2861 // vector.
2862 Width = LocationSize::precise(WidthN);
2863 BaseOps.push_back(BaseOp);
2864 return true;
2865}
2866
2867std::optional<ExtAddrMode>
2869 const TargetRegisterInfo *TRI) const {
2870 const MachineOperand *Base; // Filled with the base operand of MI.
2871 int64_t Offset; // Filled with the offset of MI.
2872 bool OffsetIsScalable;
2873 if (!getMemOperandWithOffset(MemI, Base, Offset, OffsetIsScalable, TRI))
2874 return std::nullopt;
2875
2876 if (!Base->isReg())
2877 return std::nullopt;
2878 ExtAddrMode AM;
2879 AM.BaseReg = Base->getReg();
2880 AM.Displacement = Offset;
2881 AM.ScaledReg = 0;
2882 AM.Scale = 0;
2883 return AM;
2884}
2885
2887 Register Reg,
2888 const MachineInstr &AddrI,
2889 ExtAddrMode &AM) const {
2890 // Filter out instructions into which we cannot fold.
2891 unsigned NumBytes;
2892 int64_t OffsetScale = 1;
2893 switch (MemI.getOpcode()) {
2894 default:
2895 return false;
2896
2897 case AArch64::LDURQi:
2898 case AArch64::STURQi:
2899 NumBytes = 16;
2900 break;
2901
2902 case AArch64::LDURDi:
2903 case AArch64::STURDi:
2904 case AArch64::LDURXi:
2905 case AArch64::STURXi:
2906 NumBytes = 8;
2907 break;
2908
2909 case AArch64::LDURWi:
2910 case AArch64::LDURSWi:
2911 case AArch64::STURWi:
2912 NumBytes = 4;
2913 break;
2914
2915 case AArch64::LDURHi:
2916 case AArch64::STURHi:
2917 case AArch64::LDURHHi:
2918 case AArch64::STURHHi:
2919 case AArch64::LDURSHXi:
2920 case AArch64::LDURSHWi:
2921 NumBytes = 2;
2922 break;
2923
2924 case AArch64::LDRBroX:
2925 case AArch64::LDRBBroX:
2926 case AArch64::LDRSBXroX:
2927 case AArch64::LDRSBWroX:
2928 case AArch64::STRBroX:
2929 case AArch64::STRBBroX:
2930 case AArch64::LDURBi:
2931 case AArch64::LDURBBi:
2932 case AArch64::LDURSBXi:
2933 case AArch64::LDURSBWi:
2934 case AArch64::STURBi:
2935 case AArch64::STURBBi:
2936 case AArch64::LDRBui:
2937 case AArch64::LDRBBui:
2938 case AArch64::LDRSBXui:
2939 case AArch64::LDRSBWui:
2940 case AArch64::STRBui:
2941 case AArch64::STRBBui:
2942 NumBytes = 1;
2943 break;
2944
2945 case AArch64::LDRQroX:
2946 case AArch64::STRQroX:
2947 case AArch64::LDRQui:
2948 case AArch64::STRQui:
2949 NumBytes = 16;
2950 OffsetScale = 16;
2951 break;
2952
2953 case AArch64::LDRDroX:
2954 case AArch64::STRDroX:
2955 case AArch64::LDRXroX:
2956 case AArch64::STRXroX:
2957 case AArch64::LDRDui:
2958 case AArch64::STRDui:
2959 case AArch64::LDRXui:
2960 case AArch64::STRXui:
2961 NumBytes = 8;
2962 OffsetScale = 8;
2963 break;
2964
2965 case AArch64::LDRWroX:
2966 case AArch64::LDRSWroX:
2967 case AArch64::STRWroX:
2968 case AArch64::LDRWui:
2969 case AArch64::LDRSWui:
2970 case AArch64::STRWui:
2971 NumBytes = 4;
2972 OffsetScale = 4;
2973 break;
2974
2975 case AArch64::LDRHroX:
2976 case AArch64::STRHroX:
2977 case AArch64::LDRHHroX:
2978 case AArch64::STRHHroX:
2979 case AArch64::LDRSHXroX:
2980 case AArch64::LDRSHWroX:
2981 case AArch64::LDRHui:
2982 case AArch64::STRHui:
2983 case AArch64::LDRHHui:
2984 case AArch64::STRHHui:
2985 case AArch64::LDRSHXui:
2986 case AArch64::LDRSHWui:
2987 NumBytes = 2;
2988 OffsetScale = 2;
2989 break;
2990 }
2991
2992 // Check the fold operand is not the loaded/stored value.
2993 const MachineOperand &BaseRegOp = MemI.getOperand(0);
2994 if (BaseRegOp.isReg() && BaseRegOp.getReg() == Reg)
2995 return false;
2996
2997 // Handle memory instructions with a [Reg, Reg] addressing mode.
2998 if (MemI.getOperand(2).isReg()) {
2999 // Bail if the addressing mode already includes extension of the offset
3000 // register.
3001 if (MemI.getOperand(3).getImm())
3002 return false;
3003
3004 // Check if we actually have a scaled offset.
3005 if (MemI.getOperand(4).getImm() == 0)
3006 OffsetScale = 1;
3007
3008 // If the address instructions is folded into the base register, then the
3009 // addressing mode must not have a scale. Then we can swap the base and the
3010 // scaled registers.
3011 if (MemI.getOperand(1).getReg() == Reg && OffsetScale != 1)
3012 return false;
3013
3014 switch (AddrI.getOpcode()) {
3015 default:
3016 return false;
3017
3018 case AArch64::SBFMXri:
3019 // sxtw Xa, Wm
3020 // ldr Xd, [Xn, Xa, lsl #N]
3021 // ->
3022 // ldr Xd, [Xn, Wm, sxtw #N]
3023 if (AddrI.getOperand(2).getImm() != 0 ||
3024 AddrI.getOperand(3).getImm() != 31)
3025 return false;
3026
3027 AM.BaseReg = MemI.getOperand(1).getReg();
3028 if (AM.BaseReg == Reg)
3029 AM.BaseReg = MemI.getOperand(2).getReg();
3030 AM.ScaledReg = AddrI.getOperand(1).getReg();
3031 AM.Scale = OffsetScale;
3032 AM.Displacement = 0;
3034 return true;
3035
3036 case TargetOpcode::SUBREG_TO_REG: {
3037 // mov Wa, Wm
3038 // ldr Xd, [Xn, Xa, lsl #N]
3039 // ->
3040 // ldr Xd, [Xn, Wm, uxtw #N]
3041
3042 // Zero-extension looks like an ORRWrs followed by a SUBREG_TO_REG.
3043 if (AddrI.getOperand(1).getImm() != 0 ||
3044 AddrI.getOperand(3).getImm() != AArch64::sub_32)
3045 return false;
3046
3047 const MachineRegisterInfo &MRI = AddrI.getMF()->getRegInfo();
3048 Register OffsetReg = AddrI.getOperand(2).getReg();
3049 if (!OffsetReg.isVirtual() || !MRI.hasOneNonDBGUse(OffsetReg))
3050 return false;
3051
3052 const MachineInstr &DefMI = *MRI.getVRegDef(OffsetReg);
3053 if (DefMI.getOpcode() != AArch64::ORRWrs ||
3054 DefMI.getOperand(1).getReg() != AArch64::WZR ||
3055 DefMI.getOperand(3).getImm() != 0)
3056 return false;
3057
3058 AM.BaseReg = MemI.getOperand(1).getReg();
3059 if (AM.BaseReg == Reg)
3060 AM.BaseReg = MemI.getOperand(2).getReg();
3061 AM.ScaledReg = DefMI.getOperand(2).getReg();
3062 AM.Scale = OffsetScale;
3063 AM.Displacement = 0;
3065 return true;
3066 }
3067 }
3068 }
3069
3070 // Handle memory instructions with a [Reg, #Imm] addressing mode.
3071
3072 // Check we are not breaking a potential conversion to an LDP.
3073 auto validateOffsetForLDP = [](unsigned NumBytes, int64_t OldOffset,
3074 int64_t NewOffset) -> bool {
3075 int64_t MinOffset, MaxOffset;
3076 switch (NumBytes) {
3077 default:
3078 return true;
3079 case 4:
3080 MinOffset = -256;
3081 MaxOffset = 252;
3082 break;
3083 case 8:
3084 MinOffset = -512;
3085 MaxOffset = 504;
3086 break;
3087 case 16:
3088 MinOffset = -1024;
3089 MaxOffset = 1008;
3090 break;
3091 }
3092 return OldOffset < MinOffset || OldOffset > MaxOffset ||
3093 (NewOffset >= MinOffset && NewOffset <= MaxOffset);
3094 };
3095 auto canFoldAddSubImmIntoAddrMode = [&](int64_t Disp) -> bool {
3096 int64_t OldOffset = MemI.getOperand(2).getImm() * OffsetScale;
3097 int64_t NewOffset = OldOffset + Disp;
3098 if (!isLegalAddressingMode(NumBytes, NewOffset, /* Scale */ 0))
3099 return false;
3100 // If the old offset would fit into an LDP, but the new offset wouldn't,
3101 // bail out.
3102 if (!validateOffsetForLDP(NumBytes, OldOffset, NewOffset))
3103 return false;
3104 AM.BaseReg = AddrI.getOperand(1).getReg();
3105 AM.ScaledReg = 0;
3106 AM.Scale = 0;
3107 AM.Displacement = NewOffset;
3109 return true;
3110 };
3111
3112 auto canFoldAddRegIntoAddrMode =
3113 [&](int64_t Scale,
3115 if (MemI.getOperand(2).getImm() != 0)
3116 return false;
3117 if (!isLegalAddressingMode(NumBytes, /* Offset */ 0, Scale))
3118 return false;
3119 AM.BaseReg = AddrI.getOperand(1).getReg();
3120 AM.ScaledReg = AddrI.getOperand(2).getReg();
3121 AM.Scale = Scale;
3122 AM.Displacement = 0;
3123 AM.Form = Form;
3124 return true;
3125 };
3126
3127 auto avoidSlowSTRQ = [&](const MachineInstr &MemI) {
3128 unsigned Opcode = MemI.getOpcode();
3129 return (Opcode == AArch64::STURQi || Opcode == AArch64::STRQui) &&
3130 Subtarget.isSTRQroSlow();
3131 };
3132
3133 int64_t Disp = 0;
3134 const bool OptSize = MemI.getMF()->getFunction().hasOptSize();
3135 switch (AddrI.getOpcode()) {
3136 default:
3137 return false;
3138
3139 case AArch64::ADDXri:
3140 // add Xa, Xn, #N
3141 // ldr Xd, [Xa, #M]
3142 // ->
3143 // ldr Xd, [Xn, #N'+M]
3144 Disp = AddrI.getOperand(2).getImm() << AddrI.getOperand(3).getImm();
3145 return canFoldAddSubImmIntoAddrMode(Disp);
3146
3147 case AArch64::SUBXri:
3148 // sub Xa, Xn, #N
3149 // ldr Xd, [Xa, #M]
3150 // ->
3151 // ldr Xd, [Xn, #N'+M]
3152 Disp = AddrI.getOperand(2).getImm() << AddrI.getOperand(3).getImm();
3153 return canFoldAddSubImmIntoAddrMode(-Disp);
3154
3155 case AArch64::ADDXrs: {
3156 // add Xa, Xn, Xm, lsl #N
3157 // ldr Xd, [Xa]
3158 // ->
3159 // ldr Xd, [Xn, Xm, lsl #N]
3160
3161 // Don't fold the add if the result would be slower, unless optimising for
3162 // size.
3163 unsigned Shift = static_cast<unsigned>(AddrI.getOperand(3).getImm());
3165 return false;
3166 Shift = AArch64_AM::getShiftValue(Shift);
3167 if (!OptSize) {
3168 if (Shift != 2 && Shift != 3 && Subtarget.hasAddrLSLSlow14())
3169 return false;
3170 if (avoidSlowSTRQ(MemI))
3171 return false;
3172 }
3173 return canFoldAddRegIntoAddrMode(1ULL << Shift);
3174 }
3175
3176 case AArch64::ADDXrr:
3177 // add Xa, Xn, Xm
3178 // ldr Xd, [Xa]
3179 // ->
3180 // ldr Xd, [Xn, Xm, lsl #0]
3181
3182 // Don't fold the add if the result would be slower, unless optimising for
3183 // size.
3184 if (!OptSize && avoidSlowSTRQ(MemI))
3185 return false;
3186 return canFoldAddRegIntoAddrMode(1);
3187
3188 case AArch64::ADDXrx:
3189 // add Xa, Xn, Wm, {s,u}xtw #N
3190 // ldr Xd, [Xa]
3191 // ->
3192 // ldr Xd, [Xn, Wm, {s,u}xtw #N]
3193
3194 // Don't fold the add if the result would be slower, unless optimising for
3195 // size.
3196 if (!OptSize && avoidSlowSTRQ(MemI))
3197 return false;
3198
3199 // Can fold only sign-/zero-extend of a word.
3200 unsigned Imm = static_cast<unsigned>(AddrI.getOperand(3).getImm());
3202 if (Extend != AArch64_AM::UXTW && Extend != AArch64_AM::SXTW)
3203 return false;
3204
3205 return canFoldAddRegIntoAddrMode(
3206 1ULL << AArch64_AM::getArithShiftValue(Imm),
3209 }
3210}
3211
3212// Given an opcode for an instruction with a [Reg, #Imm] addressing mode,
3213// return the opcode of an instruction performing the same operation, but using
3214// the [Reg, Reg] addressing mode.
3215static unsigned regOffsetOpcode(unsigned Opcode) {
3216 switch (Opcode) {
3217 default:
3218 llvm_unreachable("Address folding not implemented for instruction");
3219
3220 case AArch64::LDURQi:
3221 case AArch64::LDRQui:
3222 return AArch64::LDRQroX;
3223 case AArch64::STURQi:
3224 case AArch64::STRQui:
3225 return AArch64::STRQroX;
3226 case AArch64::LDURDi:
3227 case AArch64::LDRDui:
3228 return AArch64::LDRDroX;
3229 case AArch64::STURDi:
3230 case AArch64::STRDui:
3231 return AArch64::STRDroX;
3232 case AArch64::LDURXi:
3233 case AArch64::LDRXui:
3234 return AArch64::LDRXroX;
3235 case AArch64::STURXi:
3236 case AArch64::STRXui:
3237 return AArch64::STRXroX;
3238 case AArch64::LDURWi:
3239 case AArch64::LDRWui:
3240 return AArch64::LDRWroX;
3241 case AArch64::LDURSWi:
3242 case AArch64::LDRSWui:
3243 return AArch64::LDRSWroX;
3244 case AArch64::STURWi:
3245 case AArch64::STRWui:
3246 return AArch64::STRWroX;
3247 case AArch64::LDURHi:
3248 case AArch64::LDRHui:
3249 return AArch64::LDRHroX;
3250 case AArch64::STURHi:
3251 case AArch64::STRHui:
3252 return AArch64::STRHroX;
3253 case AArch64::LDURHHi:
3254 case AArch64::LDRHHui:
3255 return AArch64::LDRHHroX;
3256 case AArch64::STURHHi:
3257 case AArch64::STRHHui:
3258 return AArch64::STRHHroX;
3259 case AArch64::LDURSHXi:
3260 case AArch64::LDRSHXui:
3261 return AArch64::LDRSHXroX;
3262 case AArch64::LDURSHWi:
3263 case AArch64::LDRSHWui:
3264 return AArch64::LDRSHWroX;
3265 case AArch64::LDURBi:
3266 case AArch64::LDRBui:
3267 return AArch64::LDRBroX;
3268 case AArch64::LDURBBi:
3269 case AArch64::LDRBBui:
3270 return AArch64::LDRBBroX;
3271 case AArch64::LDURSBXi:
3272 case AArch64::LDRSBXui:
3273 return AArch64::LDRSBXroX;
3274 case AArch64::LDURSBWi:
3275 case AArch64::LDRSBWui:
3276 return AArch64::LDRSBWroX;
3277 case AArch64::STURBi:
3278 case AArch64::STRBui:
3279 return AArch64::STRBroX;
3280 case AArch64::STURBBi:
3281 case AArch64::STRBBui:
3282 return AArch64::STRBBroX;
3283 }
3284}
3285
3286// Given an opcode for an instruction with a [Reg, #Imm] addressing mode, return
3287// the opcode of an instruction performing the same operation, but using the
3288// [Reg, #Imm] addressing mode with scaled offset.
3289unsigned scaledOffsetOpcode(unsigned Opcode, unsigned &Scale) {
3290 switch (Opcode) {
3291 default:
3292 llvm_unreachable("Address folding not implemented for instruction");
3293
3294 case AArch64::LDURQi:
3295 Scale = 16;
3296 return AArch64::LDRQui;
3297 case AArch64::STURQi:
3298 Scale = 16;
3299 return AArch64::STRQui;
3300 case AArch64::LDURDi:
3301 Scale = 8;
3302 return AArch64::LDRDui;
3303 case AArch64::STURDi:
3304 Scale = 8;
3305 return AArch64::STRDui;
3306 case AArch64::LDURXi:
3307 Scale = 8;
3308 return AArch64::LDRXui;
3309 case AArch64::STURXi:
3310 Scale = 8;
3311 return AArch64::STRXui;
3312 case AArch64::LDURWi:
3313 Scale = 4;
3314 return AArch64::LDRWui;
3315 case AArch64::LDURSWi:
3316 Scale = 4;
3317 return AArch64::LDRSWui;
3318 case AArch64::STURWi:
3319 Scale = 4;
3320 return AArch64::STRWui;
3321 case AArch64::LDURHi:
3322 Scale = 2;
3323 return AArch64::LDRHui;
3324 case AArch64::STURHi:
3325 Scale = 2;
3326 return AArch64::STRHui;
3327 case AArch64::LDURHHi:
3328 Scale = 2;
3329 return AArch64::LDRHHui;
3330 case AArch64::STURHHi:
3331 Scale = 2;
3332 return AArch64::STRHHui;
3333 case AArch64::LDURSHXi:
3334 Scale = 2;
3335 return AArch64::LDRSHXui;
3336 case AArch64::LDURSHWi:
3337 Scale = 2;
3338 return AArch64::LDRSHWui;
3339 case AArch64::LDURBi:
3340 Scale = 1;
3341 return AArch64::LDRBui;
3342 case AArch64::LDURBBi:
3343 Scale = 1;
3344 return AArch64::LDRBBui;
3345 case AArch64::LDURSBXi:
3346 Scale = 1;
3347 return AArch64::LDRSBXui;
3348 case AArch64::LDURSBWi:
3349 Scale = 1;
3350 return AArch64::LDRSBWui;
3351 case AArch64::STURBi:
3352 Scale = 1;
3353 return AArch64::STRBui;
3354 case AArch64::STURBBi:
3355 Scale = 1;
3356 return AArch64::STRBBui;
3357 case AArch64::LDRQui:
3358 case AArch64::STRQui:
3359 Scale = 16;
3360 return Opcode;
3361 case AArch64::LDRDui:
3362 case AArch64::STRDui:
3363 case AArch64::LDRXui:
3364 case AArch64::STRXui:
3365 Scale = 8;
3366 return Opcode;
3367 case AArch64::LDRWui:
3368 case AArch64::LDRSWui:
3369 case AArch64::STRWui:
3370 Scale = 4;
3371 return Opcode;
3372 case AArch64::LDRHui:
3373 case AArch64::STRHui:
3374 case AArch64::LDRHHui:
3375 case AArch64::STRHHui:
3376 case AArch64::LDRSHXui:
3377 case AArch64::LDRSHWui:
3378 Scale = 2;
3379 return Opcode;
3380 case AArch64::LDRBui:
3381 case AArch64::LDRBBui:
3382 case AArch64::LDRSBXui:
3383 case AArch64::LDRSBWui:
3384 case AArch64::STRBui:
3385 case AArch64::STRBBui:
3386 Scale = 1;
3387 return Opcode;
3388 }
3389}
3390
3391// Given an opcode for an instruction with a [Reg, #Imm] addressing mode, return
3392// the opcode of an instruction performing the same operation, but using the
3393// [Reg, #Imm] addressing mode with unscaled offset.
3394unsigned unscaledOffsetOpcode(unsigned Opcode) {
3395 switch (Opcode) {
3396 default:
3397 llvm_unreachable("Address folding not implemented for instruction");
3398
3399 case AArch64::LDURQi:
3400 case AArch64::STURQi:
3401 case AArch64::LDURDi:
3402 case AArch64::STURDi:
3403 case AArch64::LDURXi:
3404 case AArch64::STURXi:
3405 case AArch64::LDURWi:
3406 case AArch64::LDURSWi:
3407 case AArch64::STURWi:
3408 case AArch64::LDURHi:
3409 case AArch64::STURHi:
3410 case AArch64::LDURHHi:
3411 case AArch64::STURHHi:
3412 case AArch64::LDURSHXi:
3413 case AArch64::LDURSHWi:
3414 case AArch64::LDURBi:
3415 case AArch64::STURBi:
3416 case AArch64::LDURBBi:
3417 case AArch64::STURBBi:
3418 case AArch64::LDURSBWi:
3419 case AArch64::LDURSBXi:
3420 return Opcode;
3421 case AArch64::LDRQui:
3422 return AArch64::LDURQi;
3423 case AArch64::STRQui:
3424 return AArch64::STURQi;
3425 case AArch64::LDRDui:
3426 return AArch64::LDURDi;
3427 case AArch64::STRDui:
3428 return AArch64::STURDi;
3429 case AArch64::LDRXui:
3430 return AArch64::LDURXi;
3431 case AArch64::STRXui:
3432 return AArch64::STURXi;
3433 case AArch64::LDRWui:
3434 return AArch64::LDURWi;
3435 case AArch64::LDRSWui:
3436 return AArch64::LDURSWi;
3437 case AArch64::STRWui:
3438 return AArch64::STURWi;
3439 case AArch64::LDRHui:
3440 return AArch64::LDURHi;
3441 case AArch64::STRHui:
3442 return AArch64::STURHi;
3443 case AArch64::LDRHHui:
3444 return AArch64::LDURHHi;
3445 case AArch64::STRHHui:
3446 return AArch64::STURHHi;
3447 case AArch64::LDRSHXui:
3448 return AArch64::LDURSHXi;
3449 case AArch64::LDRSHWui:
3450 return AArch64::LDURSHWi;
3451 case AArch64::LDRBBui:
3452 return AArch64::LDURBBi;
3453 case AArch64::LDRBui:
3454 return AArch64::LDURBi;
3455 case AArch64::STRBBui:
3456 return AArch64::STURBBi;
3457 case AArch64::STRBui:
3458 return AArch64::STURBi;
3459 case AArch64::LDRSBWui:
3460 return AArch64::LDURSBWi;
3461 case AArch64::LDRSBXui:
3462 return AArch64::LDURSBXi;
3463 }
3464}
3465
3466// Given the opcode of a memory load/store instruction, return the opcode of an
3467// instruction performing the same operation, but using
3468// the [Reg, Reg, {s,u}xtw #N] addressing mode with sign-/zero-extend of the
3469// offset register.
3470static unsigned offsetExtendOpcode(unsigned Opcode) {
3471 switch (Opcode) {
3472 default:
3473 llvm_unreachable("Address folding not implemented for instruction");
3474
3475 case AArch64::LDRQroX:
3476 case AArch64::LDURQi:
3477 case AArch64::LDRQui:
3478 return AArch64::LDRQroW;
3479 case AArch64::STRQroX:
3480 case AArch64::STURQi:
3481 case AArch64::STRQui:
3482 return AArch64::STRQroW;
3483 case AArch64::LDRDroX:
3484 case AArch64::LDURDi:
3485 case AArch64::LDRDui:
3486 return AArch64::LDRDroW;
3487 case AArch64::STRDroX:
3488 case AArch64::STURDi:
3489 case AArch64::STRDui:
3490 return AArch64::STRDroW;
3491 case AArch64::LDRXroX:
3492 case AArch64::LDURXi:
3493 case AArch64::LDRXui:
3494 return AArch64::LDRXroW;
3495 case AArch64::STRXroX:
3496 case AArch64::STURXi:
3497 case AArch64::STRXui:
3498 return AArch64::STRXroW;
3499 case AArch64::LDRWroX:
3500 case AArch64::LDURWi:
3501 case AArch64::LDRWui:
3502 return AArch64::LDRWroW;
3503 case AArch64::LDRSWroX:
3504 case AArch64::LDURSWi:
3505 case AArch64::LDRSWui:
3506 return AArch64::LDRSWroW;
3507 case AArch64::STRWroX:
3508 case AArch64::STURWi:
3509 case AArch64::STRWui:
3510 return AArch64::STRWroW;
3511 case AArch64::LDRHroX:
3512 case AArch64::LDURHi:
3513 case AArch64::LDRHui:
3514 return AArch64::LDRHroW;
3515 case AArch64::STRHroX:
3516 case AArch64::STURHi:
3517 case AArch64::STRHui:
3518 return AArch64::STRHroW;
3519 case AArch64::LDRHHroX:
3520 case AArch64::LDURHHi:
3521 case AArch64::LDRHHui:
3522 return AArch64::LDRHHroW;
3523 case AArch64::STRHHroX:
3524 case AArch64::STURHHi:
3525 case AArch64::STRHHui:
3526 return AArch64::STRHHroW;
3527 case AArch64::LDRSHXroX:
3528 case AArch64::LDURSHXi:
3529 case AArch64::LDRSHXui:
3530 return AArch64::LDRSHXroW;
3531 case AArch64::LDRSHWroX:
3532 case AArch64::LDURSHWi:
3533 case AArch64::LDRSHWui:
3534 return AArch64::LDRSHWroW;
3535 case AArch64::LDRBroX:
3536 case AArch64::LDURBi:
3537 case AArch64::LDRBui:
3538 return AArch64::LDRBroW;
3539 case AArch64::LDRBBroX:
3540 case AArch64::LDURBBi:
3541 case AArch64::LDRBBui:
3542 return AArch64::LDRBBroW;
3543 case AArch64::LDRSBXroX:
3544 case AArch64::LDURSBXi:
3545 case AArch64::LDRSBXui:
3546 return AArch64::LDRSBXroW;
3547 case AArch64::LDRSBWroX:
3548 case AArch64::LDURSBWi:
3549 case AArch64::LDRSBWui:
3550 return AArch64::LDRSBWroW;
3551 case AArch64::STRBroX:
3552 case AArch64::STURBi:
3553 case AArch64::STRBui:
3554 return AArch64::STRBroW;
3555 case AArch64::STRBBroX:
3556 case AArch64::STURBBi:
3557 case AArch64::STRBBui:
3558 return AArch64::STRBBroW;
3559 }
3560}
3561
3563 const ExtAddrMode &AM) const {
3564
3565 const DebugLoc &DL = MemI.getDebugLoc();
3566 MachineBasicBlock &MBB = *MemI.getParent();
3568
3570 if (AM.ScaledReg) {
3571 // The new instruction will be in the form `ldr Rt, [Xn, Xm, lsl #imm]`.
3572 unsigned Opcode = regOffsetOpcode(MemI.getOpcode());
3573 MRI.constrainRegClass(AM.BaseReg, &AArch64::GPR64spRegClass);
3574 auto B = BuildMI(MBB, MemI, DL, get(Opcode))
3575 .addReg(MemI.getOperand(0).getReg(),
3576 MemI.mayLoad() ? RegState::Define : 0)
3577 .addReg(AM.BaseReg)
3578 .addReg(AM.ScaledReg)
3579 .addImm(0)
3580 .addImm(AM.Scale > 1)
3581 .setMemRefs(MemI.memoperands())
3582 .setMIFlags(MemI.getFlags());
3583 return B.getInstr();
3584 }
3585
3586 assert(AM.ScaledReg == 0 && AM.Scale == 0 &&
3587 "Addressing mode not supported for folding");
3588
3589 // The new instruction will be in the form `ld[u]r Rt, [Xn, #imm]`.
3590 unsigned Scale = 1;
3591 unsigned Opcode = MemI.getOpcode();
3592 if (isInt<9>(AM.Displacement))
3593 Opcode = unscaledOffsetOpcode(Opcode);
3594 else
3595 Opcode = scaledOffsetOpcode(Opcode, Scale);
3596
3597 auto B = BuildMI(MBB, MemI, DL, get(Opcode))
3598 .addReg(MemI.getOperand(0).getReg(),
3599 MemI.mayLoad() ? RegState::Define : 0)
3600 .addReg(AM.BaseReg)
3601 .addImm(AM.Displacement / Scale)
3602 .setMemRefs(MemI.memoperands())
3603 .setMIFlags(MemI.getFlags());
3604 return B.getInstr();
3605 }
3606
3609 // The new instruction will be in the form `ldr Rt, [Xn, Wm, {s,u}xtw #N]`.
3610 assert(AM.ScaledReg && !AM.Displacement &&
3611 "Address offset can be a register or an immediate, but not both");
3612 unsigned Opcode = offsetExtendOpcode(MemI.getOpcode());
3613 MRI.constrainRegClass(AM.BaseReg, &AArch64::GPR64spRegClass);
3614 // Make sure the offset register is in the correct register class.
3615 Register OffsetReg = AM.ScaledReg;
3616 const TargetRegisterClass *RC = MRI.getRegClass(OffsetReg);
3617 if (RC->hasSuperClassEq(&AArch64::GPR64RegClass)) {
3618 OffsetReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3619 BuildMI(MBB, MemI, DL, get(TargetOpcode::COPY), OffsetReg)
3620 .addReg(AM.ScaledReg, 0, AArch64::sub_32);
3621 }
3622 auto B = BuildMI(MBB, MemI, DL, get(Opcode))
3623 .addReg(MemI.getOperand(0).getReg(),
3624 MemI.mayLoad() ? RegState::Define : 0)
3625 .addReg(AM.BaseReg)
3626 .addReg(OffsetReg)
3628 .addImm(AM.Scale != 1)
3629 .setMemRefs(MemI.memoperands())
3630 .setMIFlags(MemI.getFlags());
3631
3632 return B.getInstr();
3633 }
3634
3636 "Function must not be called with an addressing mode it can't handle");
3637}
3638
3639/// Return true if the opcode is a post-index ld/st instruction, which really
3640/// loads from base+0.
3641static bool isPostIndexLdStOpcode(unsigned Opcode) {
3642 switch (Opcode) {
3643 default:
3644 return false;
3645 case AArch64::LD1Fourv16b_POST:
3646 case AArch64::LD1Fourv1d_POST:
3647 case AArch64::LD1Fourv2d_POST:
3648 case AArch64::LD1Fourv2s_POST:
3649 case AArch64::LD1Fourv4h_POST:
3650 case AArch64::LD1Fourv4s_POST:
3651 case AArch64::LD1Fourv8b_POST:
3652 case AArch64::LD1Fourv8h_POST:
3653 case AArch64::LD1Onev16b_POST:
3654 case AArch64::LD1Onev1d_POST:
3655 case AArch64::LD1Onev2d_POST:
3656 case AArch64::LD1Onev2s_POST:
3657 case AArch64::LD1Onev4h_POST:
3658 case AArch64::LD1Onev4s_POST:
3659 case AArch64::LD1Onev8b_POST:
3660 case AArch64::LD1Onev8h_POST:
3661 case AArch64::LD1Rv16b_POST:
3662 case AArch64::LD1Rv1d_POST:
3663 case AArch64::LD1Rv2d_POST:
3664 case AArch64::LD1Rv2s_POST:
3665 case AArch64::LD1Rv4h_POST:
3666 case AArch64::LD1Rv4s_POST:
3667 case AArch64::LD1Rv8b_POST:
3668 case AArch64::LD1Rv8h_POST:
3669 case AArch64::LD1Threev16b_POST:
3670 case AArch64::LD1Threev1d_POST:
3671 case AArch64::LD1Threev2d_POST:
3672 case AArch64::LD1Threev2s_POST:
3673 case AArch64::LD1Threev4h_POST:
3674 case AArch64::LD1Threev4s_POST:
3675 case AArch64::LD1Threev8b_POST:
3676 case AArch64::LD1Threev8h_POST:
3677 case AArch64::LD1Twov16b_POST:
3678 case AArch64::LD1Twov1d_POST:
3679 case AArch64::LD1Twov2d_POST:
3680 case AArch64::LD1Twov2s_POST:
3681 case AArch64::LD1Twov4h_POST:
3682 case AArch64::LD1Twov4s_POST:
3683 case AArch64::LD1Twov8b_POST:
3684 case AArch64::LD1Twov8h_POST:
3685 case AArch64::LD1i16_POST:
3686 case AArch64::LD1i32_POST:
3687 case AArch64::LD1i64_POST:
3688 case AArch64::LD1i8_POST:
3689 case AArch64::LD2Rv16b_POST:
3690 case AArch64::LD2Rv1d_POST:
3691 case AArch64::LD2Rv2d_POST:
3692 case AArch64::LD2Rv2s_POST:
3693 case AArch64::LD2Rv4h_POST:
3694 case AArch64::LD2Rv4s_POST:
3695 case AArch64::LD2Rv8b_POST:
3696 case AArch64::LD2Rv8h_POST:
3697 case AArch64::LD2Twov16b_POST:
3698 case AArch64::LD2Twov2d_POST:
3699 case AArch64::LD2Twov2s_POST:
3700 case AArch64::LD2Twov4h_POST:
3701 case AArch64::LD2Twov4s_POST:
3702 case AArch64::LD2Twov8b_POST:
3703 case AArch64::LD2Twov8h_POST:
3704 case AArch64::LD2i16_POST:
3705 case AArch64::LD2i32_POST:
3706 case AArch64::LD2i64_POST:
3707 case AArch64::LD2i8_POST:
3708 case AArch64::LD3Rv16b_POST:
3709 case AArch64::LD3Rv1d_POST:
3710 case AArch64::LD3Rv2d_POST:
3711 case AArch64::LD3Rv2s_POST:
3712 case AArch64::LD3Rv4h_POST:
3713 case AArch64::LD3Rv4s_POST:
3714 case AArch64::LD3Rv8b_POST:
3715 case AArch64::LD3Rv8h_POST:
3716 case AArch64::LD3Threev16b_POST:
3717 case AArch64::LD3Threev2d_POST:
3718 case AArch64::LD3Threev2s_POST:
3719 case AArch64::LD3Threev4h_POST:
3720 case AArch64::LD3Threev4s_POST:
3721 case AArch64::LD3Threev8b_POST:
3722 case AArch64::LD3Threev8h_POST:
3723 case AArch64::LD3i16_POST:
3724 case AArch64::LD3i32_POST:
3725 case AArch64::LD3i64_POST:
3726 case AArch64::LD3i8_POST:
3727 case AArch64::LD4Fourv16b_POST:
3728 case AArch64::LD4Fourv2d_POST:
3729 case AArch64::LD4Fourv2s_POST:
3730 case AArch64::LD4Fourv4h_POST:
3731 case AArch64::LD4Fourv4s_POST:
3732 case AArch64::LD4Fourv8b_POST:
3733 case AArch64::LD4Fourv8h_POST:
3734 case AArch64::LD4Rv16b_POST:
3735 case AArch64::LD4Rv1d_POST:
3736 case AArch64::LD4Rv2d_POST:
3737 case AArch64::LD4Rv2s_POST:
3738 case AArch64::LD4Rv4h_POST:
3739 case AArch64::LD4Rv4s_POST:
3740 case AArch64::LD4Rv8b_POST:
3741 case AArch64::LD4Rv8h_POST:
3742 case AArch64::LD4i16_POST:
3743 case AArch64::LD4i32_POST:
3744 case AArch64::LD4i64_POST:
3745 case AArch64::LD4i8_POST:
3746 case AArch64::LDAPRWpost:
3747 case AArch64::LDAPRXpost:
3748 case AArch64::LDIAPPWpost:
3749 case AArch64::LDIAPPXpost:
3750 case AArch64::LDPDpost:
3751 case AArch64::LDPQpost:
3752 case AArch64::LDPSWpost:
3753 case AArch64::LDPSpost:
3754 case AArch64::LDPWpost:
3755 case AArch64::LDPXpost:
3756 case AArch64::LDRBBpost:
3757 case AArch64::LDRBpost:
3758 case AArch64::LDRDpost:
3759 case AArch64::LDRHHpost:
3760 case AArch64::LDRHpost:
3761 case AArch64::LDRQpost:
3762 case AArch64::LDRSBWpost:
3763 case AArch64::LDRSBXpost:
3764 case AArch64::LDRSHWpost:
3765 case AArch64::LDRSHXpost:
3766 case AArch64::LDRSWpost:
3767 case AArch64::LDRSpost:
3768 case AArch64::LDRWpost:
3769 case AArch64::LDRXpost:
3770 case AArch64::ST1Fourv16b_POST:
3771 case AArch64::ST1Fourv1d_POST:
3772 case AArch64::ST1Fourv2d_POST:
3773 case AArch64::ST1Fourv2s_POST:
3774 case AArch64::ST1Fourv4h_POST:
3775 case AArch64::ST1Fourv4s_POST:
3776 case AArch64::ST1Fourv8b_POST:
3777 case AArch64::ST1Fourv8h_POST:
3778 case AArch64::ST1Onev16b_POST:
3779 case AArch64::ST1Onev1d_POST:
3780 case AArch64::ST1Onev2d_POST:
3781 case AArch64::ST1Onev2s_POST:
3782 case AArch64::ST1Onev4h_POST:
3783 case AArch64::ST1Onev4s_POST:
3784 case AArch64::ST1Onev8b_POST:
3785 case AArch64::ST1Onev8h_POST:
3786 case AArch64::ST1Threev16b_POST:
3787 case AArch64::ST1Threev1d_POST:
3788 case AArch64::ST1Threev2d_POST:
3789 case AArch64::ST1Threev2s_POST:
3790 case AArch64::ST1Threev4h_POST:
3791 case AArch64::ST1Threev4s_POST:
3792 case AArch64::ST1Threev8b_POST:
3793 case AArch64::ST1Threev8h_POST:
3794 case AArch64::ST1Twov16b_POST:
3795 case AArch64::ST1Twov1d_POST:
3796 case AArch64::ST1Twov2d_POST:
3797 case AArch64::ST1Twov2s_POST:
3798 case AArch64::ST1Twov4h_POST:
3799 case AArch64::ST1Twov4s_POST:
3800 case AArch64::ST1Twov8b_POST:
3801 case AArch64::ST1Twov8h_POST:
3802 case AArch64::ST1i16_POST:
3803 case AArch64::ST1i32_POST:
3804 case AArch64::ST1i64_POST:
3805 case AArch64::ST1i8_POST:
3806 case AArch64::ST2GPostIndex:
3807 case AArch64::ST2Twov16b_POST:
3808 case AArch64::ST2Twov2d_POST:
3809 case AArch64::ST2Twov2s_POST:
3810 case AArch64::ST2Twov4h_POST:
3811 case AArch64::ST2Twov4s_POST:
3812 case AArch64::ST2Twov8b_POST:
3813 case AArch64::ST2Twov8h_POST:
3814 case AArch64::ST2i16_POST:
3815 case AArch64::ST2i32_POST:
3816 case AArch64::ST2i64_POST:
3817 case AArch64::ST2i8_POST:
3818 case AArch64::ST3Threev16b_POST:
3819 case AArch64::ST3Threev2d_POST:
3820 case AArch64::ST3Threev2s_POST:
3821 case AArch64::ST3Threev4h_POST:
3822 case AArch64::ST3Threev4s_POST:
3823 case AArch64::ST3Threev8b_POST:
3824 case AArch64::ST3Threev8h_POST:
3825 case AArch64::ST3i16_POST:
3826 case AArch64::ST3i32_POST:
3827 case AArch64::ST3i64_POST:
3828 case AArch64::ST3i8_POST:
3829 case AArch64::ST4Fourv16b_POST:
3830 case AArch64::ST4Fourv2d_POST:
3831 case AArch64::ST4Fourv2s_POST:
3832 case AArch64::ST4Fourv4h_POST:
3833 case AArch64::ST4Fourv4s_POST:
3834 case AArch64::ST4Fourv8b_POST:
3835 case AArch64::ST4Fourv8h_POST:
3836 case AArch64::ST4i16_POST:
3837 case AArch64::ST4i32_POST:
3838 case AArch64::ST4i64_POST:
3839 case AArch64::ST4i8_POST:
3840 case AArch64::STGPostIndex:
3841 case AArch64::STGPpost:
3842 case AArch64::STPDpost:
3843 case AArch64::STPQpost:
3844 case AArch64::STPSpost:
3845 case AArch64::STPWpost:
3846 case AArch64::STPXpost:
3847 case AArch64::STRBBpost:
3848 case AArch64::STRBpost:
3849 case AArch64::STRDpost:
3850 case AArch64::STRHHpost:
3851 case AArch64::STRHpost:
3852 case AArch64::STRQpost:
3853 case AArch64::STRSpost:
3854 case AArch64::STRWpost:
3855 case AArch64::STRXpost:
3856 case AArch64::STZ2GPostIndex:
3857 case AArch64::STZGPostIndex:
3858 return true;
3859 }
3860}
3861
3863 const MachineInstr &LdSt, const MachineOperand *&BaseOp, int64_t &Offset,
3864 bool &OffsetIsScalable, TypeSize &Width,
3865 const TargetRegisterInfo *TRI) const {
3866 assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
3867 // Handle only loads/stores with base register followed by immediate offset.
3868 if (LdSt.getNumExplicitOperands() == 3) {
3869 // Non-paired instruction (e.g., ldr x1, [x0, #8]).
3870 if ((!LdSt.getOperand(1).isReg() && !LdSt.getOperand(1).isFI()) ||
3871 !LdSt.getOperand(2).isImm())
3872 return false;
3873 } else if (LdSt.getNumExplicitOperands() == 4) {
3874 // Paired instruction (e.g., ldp x1, x2, [x0, #8]).
3875 if (!LdSt.getOperand(1).isReg() ||
3876 (!LdSt.getOperand(2).isReg() && !LdSt.getOperand(2).isFI()) ||
3877 !LdSt.getOperand(3).isImm())
3878 return false;
3879 } else
3880 return false;
3881
3882 // Get the scaling factor for the instruction and set the width for the
3883 // instruction.
3884 TypeSize Scale(0U, false);
3885 int64_t Dummy1, Dummy2;
3886
3887 // If this returns false, then it's an instruction we don't want to handle.
3888 if (!getMemOpInfo(LdSt.getOpcode(), Scale, Width, Dummy1, Dummy2))
3889 return false;
3890
3891 // Compute the offset. Offset is calculated as the immediate operand
3892 // multiplied by the scaling factor. Unscaled instructions have scaling factor
3893 // set to 1. Postindex are a special case which have an offset of 0.
3894 if (isPostIndexLdStOpcode(LdSt.getOpcode())) {
3895 BaseOp = &LdSt.getOperand(2);
3896 Offset = 0;
3897 } else if (LdSt.getNumExplicitOperands() == 3) {
3898 BaseOp = &LdSt.getOperand(1);
3899 Offset = LdSt.getOperand(2).getImm() * Scale.getKnownMinValue();
3900 } else {
3901 assert(LdSt.getNumExplicitOperands() == 4 && "invalid number of operands");
3902 BaseOp = &LdSt.getOperand(2);
3903 Offset = LdSt.getOperand(3).getImm() * Scale.getKnownMinValue();
3904 }
3905 OffsetIsScalable = Scale.isScalable();
3906
3907 return BaseOp->isReg() || BaseOp->isFI();
3908}
3909
3912 assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
3913 MachineOperand &OfsOp = LdSt.getOperand(LdSt.getNumExplicitOperands() - 1);
3914 assert(OfsOp.isImm() && "Offset operand wasn't immediate.");
3915 return OfsOp;
3916}
3917
3918bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
3919 TypeSize &Width, int64_t &MinOffset,
3920 int64_t &MaxOffset) {
3921 switch (Opcode) {
3922 // Not a memory operation or something we want to handle.
3923 default:
3924 Scale = TypeSize::getFixed(0);
3925 Width = TypeSize::getFixed(0);
3926 MinOffset = MaxOffset = 0;
3927 return false;
3928 // LDR / STR
3929 case AArch64::LDRQui:
3930 case AArch64::STRQui:
3931 Scale = TypeSize::getFixed(16);
3932 Width = TypeSize::getFixed(16);
3933 MinOffset = 0;
3934 MaxOffset = 4095;
3935 break;
3936 case AArch64::LDRXui:
3937 case AArch64::LDRDui:
3938 case AArch64::STRXui:
3939 case AArch64::STRDui:
3940 case AArch64::PRFMui:
3941 Scale = TypeSize::getFixed(8);
3942 Width = TypeSize::getFixed(8);
3943 MinOffset = 0;
3944 MaxOffset = 4095;
3945 break;
3946 case AArch64::LDRWui:
3947 case AArch64::LDRSui:
3948 case AArch64::LDRSWui:
3949 case AArch64::STRWui:
3950 case AArch64::STRSui:
3951 Scale = TypeSize::getFixed(4);
3952 Width = TypeSize::getFixed(4);
3953 MinOffset = 0;
3954 MaxOffset = 4095;
3955 break;
3956 case AArch64::LDRHui:
3957 case AArch64::LDRHHui:
3958 case AArch64::LDRSHWui:
3959 case AArch64::LDRSHXui:
3960 case AArch64::STRHui:
3961 case AArch64::STRHHui:
3962 Scale = TypeSize::getFixed(2);
3963 Width = TypeSize::getFixed(2);
3964 MinOffset = 0;
3965 MaxOffset = 4095;
3966 break;
3967 case AArch64::LDRBui:
3968 case AArch64::LDRBBui:
3969 case AArch64::LDRSBWui:
3970 case AArch64::LDRSBXui:
3971 case AArch64::STRBui:
3972 case AArch64::STRBBui:
3973 Scale = TypeSize::getFixed(1);
3974 Width = TypeSize::getFixed(1);
3975 MinOffset = 0;
3976 MaxOffset = 4095;
3977 break;
3978 // post/pre inc
3979 case AArch64::STRQpre:
3980 case AArch64::LDRQpost:
3981 Scale = TypeSize::getFixed(1);
3982 Width = TypeSize::getFixed(16);
3983 MinOffset = -256;
3984 MaxOffset = 255;
3985 break;
3986 case AArch64::LDRDpost:
3987 case AArch64::LDRDpre:
3988 case AArch64::LDRXpost:
3989 case AArch64::LDRXpre:
3990 case AArch64::STRDpost:
3991 case AArch64::STRDpre:
3992 case AArch64::STRXpost:
3993 case AArch64::STRXpre:
3994 Scale = TypeSize::getFixed(1);
3995 Width = TypeSize::getFixed(8);
3996 MinOffset = -256;
3997 MaxOffset = 255;
3998 break;
3999 case AArch64::STRWpost:
4000 case AArch64::STRWpre:
4001 case AArch64::LDRWpost:
4002 case AArch64::LDRWpre:
4003 case AArch64::STRSpost:
4004 case AArch64::STRSpre:
4005 case AArch64::LDRSpost:
4006 case AArch64::LDRSpre:
4007 Scale = TypeSize::getFixed(1);
4008 Width = TypeSize::getFixed(4);
4009 MinOffset = -256;
4010 MaxOffset = 255;
4011 break;
4012 case AArch64::LDRHpost:
4013 case AArch64::LDRHpre:
4014 case AArch64::STRHpost:
4015 case AArch64::STRHpre:
4016 case AArch64::LDRHHpost:
4017 case AArch64::LDRHHpre:
4018 case AArch64::STRHHpost:
4019 case AArch64::STRHHpre:
4020 Scale = TypeSize::getFixed(1);
4021 Width = TypeSize::getFixed(2);
4022 MinOffset = -256;
4023 MaxOffset = 255;
4024 break;
4025 case AArch64::LDRBpost:
4026 case AArch64::LDRBpre:
4027 case AArch64::STRBpost:
4028 case AArch64::STRBpre:
4029 case AArch64::LDRBBpost:
4030 case AArch64::LDRBBpre:
4031 case AArch64::STRBBpost:
4032 case AArch64::STRBBpre:
4033 Scale = TypeSize::getFixed(1);
4034 Width = TypeSize::getFixed(1);
4035 MinOffset = -256;
4036 MaxOffset = 255;
4037 break;
4038 // Unscaled
4039 case AArch64::LDURQi:
4040 case AArch64::STURQi:
4041 Scale = TypeSize::getFixed(1);
4042 Width = TypeSize::getFixed(16);
4043 MinOffset = -256;
4044 MaxOffset = 255;
4045 break;
4046 case AArch64::LDURXi:
4047 case AArch64::LDURDi:
4048 case AArch64::LDAPURXi:
4049 case AArch64::STURXi:
4050 case AArch64::STURDi:
4051 case AArch64::STLURXi:
4052 case AArch64::PRFUMi:
4053 Scale = TypeSize::getFixed(1);
4054 Width = TypeSize::getFixed(8);
4055 MinOffset = -256;
4056 MaxOffset = 255;
4057 break;
4058 case AArch64::LDURWi:
4059 case AArch64::LDURSi:
4060 case AArch64::LDURSWi:
4061 case AArch64::LDAPURi:
4062 case AArch64::LDAPURSWi:
4063 case AArch64::STURWi:
4064 case AArch64::STURSi:
4065 case AArch64::STLURWi:
4066 Scale = TypeSize::getFixed(1);
4067 Width = TypeSize::getFixed(4);
4068 MinOffset = -256;
4069 MaxOffset = 255;
4070 break;
4071 case AArch64::LDURHi:
4072 case AArch64::LDURHHi:
4073 case AArch64::LDURSHXi:
4074 case AArch64::LDURSHWi:
4075 case AArch64::LDAPURHi:
4076 case AArch64::LDAPURSHWi:
4077 case AArch64::LDAPURSHXi:
4078 case AArch64::STURHi:
4079 case AArch64::STURHHi:
4080 case AArch64::STLURHi:
4081 Scale = TypeSize::getFixed(1);
4082 Width = TypeSize::getFixed(2);
4083 MinOffset = -256;
4084 MaxOffset = 255;
4085 break;
4086 case AArch64::LDURBi:
4087 case AArch64::LDURBBi:
4088 case AArch64::LDURSBXi:
4089 case AArch64::LDURSBWi:
4090 case AArch64::LDAPURBi:
4091 case AArch64::LDAPURSBWi:
4092 case AArch64::LDAPURSBXi:
4093 case AArch64::STURBi:
4094 case AArch64::STURBBi:
4095 case AArch64::STLURBi:
4096 Scale = TypeSize::getFixed(1);
4097 Width = TypeSize::getFixed(1);
4098 MinOffset = -256;
4099 MaxOffset = 255;
4100 break;
4101 // LDP / STP (including pre/post inc)
4102 case AArch64::LDPQi:
4103 case AArch64::LDNPQi:
4104 case AArch64::STPQi:
4105 case AArch64::STNPQi:
4106 case AArch64::LDPQpost:
4107 case AArch64::LDPQpre:
4108 case AArch64::STPQpost:
4109 case AArch64::STPQpre:
4110 Scale = TypeSize::getFixed(16);
4111 Width = TypeSize::getFixed(16 * 2);
4112 MinOffset = -64;
4113 MaxOffset = 63;
4114 break;
4115 case AArch64::LDPXi:
4116 case AArch64::LDPDi:
4117 case AArch64::LDNPXi:
4118 case AArch64::LDNPDi:
4119 case AArch64::STPXi:
4120 case AArch64::STPDi:
4121 case AArch64::STNPXi:
4122 case AArch64::STNPDi:
4123 case AArch64::LDPDpost:
4124 case AArch64::LDPDpre:
4125 case AArch64::LDPXpost:
4126 case AArch64::LDPXpre:
4127 case AArch64::STPDpost:
4128 case AArch64::STPDpre:
4129 case AArch64::STPXpost:
4130 case AArch64::STPXpre:
4131 Scale = TypeSize::getFixed(8);
4132 Width = TypeSize::getFixed(8 * 2);
4133 MinOffset = -64;
4134 MaxOffset = 63;
4135 break;
4136 case AArch64::LDPWi:
4137 case AArch64::LDPSi:
4138 case AArch64::LDNPWi:
4139 case AArch64::LDNPSi:
4140 case AArch64::STPWi:
4141 case AArch64::STPSi:
4142 case AArch64::STNPWi:
4143 case AArch64::STNPSi:
4144 case AArch64::LDPSpost:
4145 case AArch64::LDPSpre:
4146 case AArch64::LDPWpost:
4147 case AArch64::LDPWpre:
4148 case AArch64::STPSpost:
4149 case AArch64::STPSpre:
4150 case AArch64::STPWpost:
4151 case AArch64::STPWpre:
4152 Scale = TypeSize::getFixed(4);
4153 Width = TypeSize::getFixed(4 * 2);
4154 MinOffset = -64;
4155 MaxOffset = 63;
4156 break;
4157 case AArch64::StoreSwiftAsyncContext:
4158 // Store is an STRXui, but there might be an ADDXri in the expansion too.
4159 Scale = TypeSize::getFixed(1);
4160 Width = TypeSize::getFixed(8);
4161 MinOffset = 0;
4162 MaxOffset = 4095;
4163 break;
4164 case AArch64::ADDG:
4165 Scale = TypeSize::getFixed(16);
4166 Width = TypeSize::getFixed(0);
4167 MinOffset = 0;
4168 MaxOffset = 63;
4169 break;
4170 case AArch64::TAGPstack:
4171 Scale = TypeSize::getFixed(16);
4172 Width = TypeSize::getFixed(0);
4173 // TAGP with a negative offset turns into SUBP, which has a maximum offset
4174 // of 63 (not 64!).
4175 MinOffset = -63;
4176 MaxOffset = 63;
4177 break;
4178 case AArch64::LDG:
4179 case AArch64::STGi:
4180 case AArch64::STGPreIndex:
4181 case AArch64::STGPostIndex:
4182 case AArch64::STZGi:
4183 case AArch64::STZGPreIndex:
4184 case AArch64::STZGPostIndex:
4185 Scale = TypeSize::getFixed(16);
4186 Width = TypeSize::getFixed(16);
4187 MinOffset = -256;
4188 MaxOffset = 255;
4189 break;
4190 // SVE
4191 case AArch64::STR_ZZZZXI:
4192 case AArch64::LDR_ZZZZXI:
4193 Scale = TypeSize::getScalable(16);
4194 Width = TypeSize::getScalable(16 * 4);
4195 MinOffset = -256;
4196 MaxOffset = 252;
4197 break;
4198 case AArch64::STR_ZZZXI:
4199 case AArch64::LDR_ZZZXI:
4200 Scale = TypeSize::getScalable(16);
4201 Width = TypeSize::getScalable(16 * 3);
4202 MinOffset = -256;
4203 MaxOffset = 253;
4204 break;
4205 case AArch64::STR_ZZXI:
4206 case AArch64::LDR_ZZXI:
4207 Scale = TypeSize::getScalable(16);
4208 Width = TypeSize::getScalable(16 * 2);
4209 MinOffset = -256;
4210 MaxOffset = 254;
4211 break;
4212 case AArch64::LDR_PXI:
4213 case AArch64::STR_PXI:
4214 Scale = TypeSize::getScalable(2);
4215 Width = TypeSize::getScalable(2);
4216 MinOffset = -256;
4217 MaxOffset = 255;
4218 break;
4219 case AArch64::LDR_PPXI:
4220 case AArch64::STR_PPXI:
4221 Scale = TypeSize::getScalable(2);
4222 Width = TypeSize::getScalable(2 * 2);
4223 MinOffset = -256;
4224 MaxOffset = 254;
4225 break;
4226 case AArch64::LDR_ZXI:
4227 case AArch64::STR_ZXI:
4228 Scale = TypeSize::getScalable(16);
4229 Width = TypeSize::getScalable(16);
4230 MinOffset = -256;
4231 MaxOffset = 255;
4232 break;
4233 case AArch64::LD1B_IMM:
4234 case AArch64::LD1H_IMM:
4235 case AArch64::LD1W_IMM:
4236 case AArch64::LD1D_IMM:
4237 case AArch64::LDNT1B_ZRI:
4238 case AArch64::LDNT1H_ZRI:
4239 case AArch64::LDNT1W_ZRI:
4240 case AArch64::LDNT1D_ZRI:
4241 case AArch64::ST1B_IMM:
4242 case AArch64::ST1H_IMM:
4243 case AArch64::ST1W_IMM:
4244 case AArch64::ST1D_IMM:
4245 case AArch64::STNT1B_ZRI:
4246 case AArch64::STNT1H_ZRI:
4247 case AArch64::STNT1W_ZRI:
4248 case AArch64::STNT1D_ZRI:
4249 case AArch64::LDNF1B_IMM:
4250 case AArch64::LDNF1H_IMM:
4251 case AArch64::LDNF1W_IMM:
4252 case AArch64::LDNF1D_IMM:
4253 // A full vectors worth of data
4254 // Width = mbytes * elements
4255 Scale = TypeSize::getScalable(16);
4256 Width = TypeSize::getScalable(16);
4257 MinOffset = -8;
4258 MaxOffset = 7;
4259 break;
4260 case AArch64::LD2B_IMM:
4261 case AArch64::LD2H_IMM:
4262 case AArch64::LD2W_IMM:
4263 case AArch64::LD2D_IMM:
4264 case AArch64::ST2B_IMM:
4265 case AArch64::ST2H_IMM:
4266 case AArch64::ST2W_IMM:
4267 case AArch64::ST2D_IMM:
4268 Scale = TypeSize::getScalable(32);
4269 Width = TypeSize::getScalable(16 * 2);
4270 MinOffset = -8;
4271 MaxOffset = 7;
4272 break;
4273 case AArch64::LD3B_IMM:
4274 case AArch64::LD3H_IMM:
4275 case AArch64::LD3W_IMM:
4276 case AArch64::LD3D_IMM:
4277 case AArch64::ST3B_IMM:
4278 case AArch64::ST3H_IMM:
4279 case AArch64::ST3W_IMM:
4280 case AArch64::ST3D_IMM:
4281 Scale = TypeSize::getScalable(48);
4282 Width = TypeSize::getScalable(16 * 3);
4283 MinOffset = -8;
4284 MaxOffset = 7;
4285 break;
4286 case AArch64::LD4B_IMM:
4287 case AArch64::LD4H_IMM:
4288 case AArch64::LD4W_IMM:
4289 case AArch64::LD4D_IMM:
4290 case AArch64::ST4B_IMM:
4291 case AArch64::ST4H_IMM:
4292 case AArch64::ST4W_IMM:
4293 case AArch64::ST4D_IMM:
4294 Scale = TypeSize::getScalable(64);
4295 Width = TypeSize::getScalable(16 * 4);
4296 MinOffset = -8;
4297 MaxOffset = 7;
4298 break;
4299 case AArch64::LD1B_H_IMM:
4300 case AArch64::LD1SB_H_IMM:
4301 case AArch64::LD1H_S_IMM:
4302 case AArch64::LD1SH_S_IMM:
4303 case AArch64::LD1W_D_IMM:
4304 case AArch64::LD1SW_D_IMM:
4305 case AArch64::ST1B_H_IMM:
4306 case AArch64::ST1H_S_IMM:
4307 case AArch64::ST1W_D_IMM:
4308 case AArch64::LDNF1B_H_IMM:
4309 case AArch64::LDNF1SB_H_IMM:
4310 case AArch64::LDNF1H_S_IMM:
4311 case AArch64::LDNF1SH_S_IMM:
4312 case AArch64::LDNF1W_D_IMM:
4313 case AArch64::LDNF1SW_D_IMM:
4314 // A half vector worth of data
4315 // Width = mbytes * elements
4316 Scale = TypeSize::getScalable(8);
4317 Width = TypeSize::getScalable(8);
4318 MinOffset = -8;
4319 MaxOffset = 7;
4320 break;
4321 case AArch64::LD1B_S_IMM:
4322 case AArch64::LD1SB_S_IMM:
4323 case AArch64::LD1H_D_IMM:
4324 case AArch64::LD1SH_D_IMM:
4325 case AArch64::ST1B_S_IMM:
4326 case AArch64::ST1H_D_IMM:
4327 case AArch64::LDNF1B_S_IMM:
4328 case AArch64::LDNF1SB_S_IMM:
4329 case AArch64::LDNF1H_D_IMM:
4330 case AArch64::LDNF1SH_D_IMM:
4331 // A quarter vector worth of data
4332 // Width = mbytes * elements
4333 Scale = TypeSize::getScalable(4);
4334 Width = TypeSize::getScalable(4);
4335 MinOffset = -8;
4336 MaxOffset = 7;
4337 break;
4338 case AArch64::LD1B_D_IMM:
4339 case AArch64::LD1SB_D_IMM:
4340 case AArch64::ST1B_D_IMM:
4341 case AArch64::LDNF1B_D_IMM:
4342 case AArch64::LDNF1SB_D_IMM:
4343 // A eighth vector worth of data
4344 // Width = mbytes * elements
4345 Scale = TypeSize::getScalable(2);
4346 Width = TypeSize::getScalable(2);
4347 MinOffset = -8;
4348 MaxOffset = 7;
4349 break;
4350 case AArch64::ST2Gi:
4351 case AArch64::ST2GPreIndex:
4352 case AArch64::ST2GPostIndex:
4353 case AArch64::STZ2Gi:
4354 case AArch64::STZ2GPreIndex:
4355 case AArch64::STZ2GPostIndex:
4356 Scale = TypeSize::getFixed(16);
4357 Width = TypeSize::getFixed(32);
4358 MinOffset = -256;
4359 MaxOffset = 255;
4360 break;
4361 case AArch64::STGPi:
4362 case AArch64::STGPpost:
4363 case AArch64::STGPpre:
4364 Scale = TypeSize::getFixed(16);
4365 Width = TypeSize::getFixed(16);
4366 MinOffset = -64;
4367 MaxOffset = 63;
4368 break;
4369 case AArch64::LD1RB_IMM:
4370 case AArch64::LD1RB_H_IMM:
4371 case AArch64::LD1RB_S_IMM:
4372 case AArch64::LD1RB_D_IMM:
4373 case AArch64::LD1RSB_H_IMM:
4374 case AArch64::LD1RSB_S_IMM:
4375 case AArch64::LD1RSB_D_IMM:
4376 Scale = TypeSize::getFixed(1);
4377 Width = TypeSize::getFixed(1);
4378 MinOffset = 0;
4379 MaxOffset = 63;
4380 break;
4381 case AArch64::LD1RH_IMM:
4382 case AArch64::LD1RH_S_IMM:
4383 case AArch64::LD1RH_D_IMM:
4384 case AArch64::LD1RSH_S_IMM:
4385 case AArch64::LD1RSH_D_IMM:
4386 Scale = TypeSize::getFixed(2);
4387 Width = TypeSize::getFixed(2);
4388 MinOffset = 0;
4389 MaxOffset = 63;
4390 break;
4391 case AArch64::LD1RW_IMM:
4392 case AArch64::LD1RW_D_IMM:
4393 case AArch64::LD1RSW_IMM:
4394 Scale = TypeSize::getFixed(4);
4395 Width = TypeSize::getFixed(4);
4396 MinOffset = 0;
4397 MaxOffset = 63;
4398 break;
4399 case AArch64::LD1RD_IMM:
4400 Scale = TypeSize::getFixed(8);
4401 Width = TypeSize::getFixed(8);
4402 MinOffset = 0;
4403 MaxOffset = 63;
4404 break;
4405 }
4406
4407 return true;
4408}
4409
4410// Scaling factor for unscaled load or store.
4412 switch (Opc) {
4413 default:
4414 llvm_unreachable("Opcode has unknown scale!");
4415 case AArch64::LDRBBui:
4416 case AArch64::LDURBBi:
4417 case AArch64::LDRSBWui:
4418 case AArch64::LDURSBWi:
4419 case AArch64::STRBBui:
4420 case AArch64::STURBBi:
4421 return 1;
4422 case AArch64::LDRHHui:
4423 case AArch64::LDURHHi:
4424 case AArch64::LDRSHWui:
4425 case AArch64::LDURSHWi:
4426 case AArch64::STRHHui:
4427 case AArch64::STURHHi:
4428 return 2;
4429 case AArch64::LDRSui:
4430 case AArch64::LDURSi:
4431 case AArch64::LDRSpre:
4432 case AArch64::LDRSWui:
4433 case AArch64::LDURSWi:
4434 case AArch64::LDRSWpre:
4435 case AArch64::LDRWpre:
4436 case AArch64::LDRWui:
4437 case AArch64::LDURWi:
4438 case AArch64::STRSui:
4439 case AArch64::STURSi:
4440 case AArch64::STRSpre:
4441 case AArch64::STRWui:
4442 case AArch64::STURWi:
4443 case AArch64::STRWpre:
4444 case AArch64::LDPSi:
4445 case AArch64::LDPSWi:
4446 case AArch64::LDPWi:
4447 case AArch64::STPSi:
4448 case AArch64::STPWi:
4449 return 4;
4450 case AArch64::LDRDui:
4451 case AArch64::LDURDi:
4452 case AArch64::LDRDpre:
4453 case AArch64::LDRXui:
4454 case AArch64::LDURXi:
4455 case AArch64::LDRXpre:
4456 case AArch64::STRDui:
4457 case AArch64::STURDi:
4458 case AArch64::STRDpre:
4459 case AArch64::STRXui:
4460 case AArch64::STURXi:
4461 case AArch64::STRXpre:
4462 case AArch64::LDPDi:
4463 case AArch64::LDPXi:
4464 case AArch64::STPDi:
4465 case AArch64::STPXi:
4466 return 8;
4467 case AArch64::LDRQui:
4468 case AArch64::LDURQi:
4469 case AArch64::STRQui:
4470 case AArch64::STURQi:
4471 case AArch64::STRQpre:
4472 case AArch64::LDPQi:
4473 case AArch64::LDRQpre:
4474 case AArch64::STPQi:
4475 case AArch64::STGi:
4476 case AArch64::STZGi:
4477 case AArch64::ST2Gi:
4478 case AArch64::STZ2Gi:
4479 case AArch64::STGPi:
4480 return 16;
4481 }
4482}
4483
4485 switch (MI.getOpcode()) {
4486 default:
4487 return false;
4488 case AArch64::LDRWpre:
4489 case AArch64::LDRXpre:
4490 case AArch64::LDRSWpre:
4491 case AArch64::LDRSpre:
4492 case AArch64::LDRDpre:
4493 case AArch64::LDRQpre:
4494 return true;
4495 }
4496}
4497
4499 switch (MI.getOpcode()) {
4500 default:
4501 return false;
4502 case AArch64::STRWpre:
4503 case AArch64::STRXpre:
4504 case AArch64::STRSpre:
4505 case AArch64::STRDpre:
4506 case AArch64::STRQpre:
4507 return true;
4508 }
4509}
4510
4512 return isPreLd(MI) || isPreSt(MI);
4513}
4514
4516 switch (MI.getOpcode()) {
4517 default:
4518 return false;
4519 case AArch64::LDPSi:
4520 case AArch64::LDPSWi:
4521 case AArch64::LDPDi:
4522 case AArch64::LDPQi:
4523 case AArch64::LDPWi:
4524 case AArch64::LDPXi:
4525 case AArch64::STPSi:
4526 case AArch64::STPDi:
4527 case AArch64::STPQi:
4528 case AArch64::STPWi:
4529 case AArch64::STPXi:
4530 case AArch64::STGPi:
4531 return true;
4532 }
4533}
4534
4536 assert(MI.mayLoadOrStore() && "Load or store instruction expected");
4537 unsigned Idx =
4539 : 1;
4540 return MI.getOperand(Idx);
4541}
4542
4543const MachineOperand &
4545 assert(MI.mayLoadOrStore() && "Load or store instruction expected");
4546 unsigned Idx =
4548 : 2;
4549 return MI.getOperand(Idx);
4550}
4551
4552const MachineOperand &
4554 switch (MI.getOpcode()) {
4555 default:
4556 llvm_unreachable("Unexpected opcode");
4557 case AArch64::LDRBroX:
4558 case AArch64::LDRBBroX:
4559 case AArch64::LDRSBXroX:
4560 case AArch64::LDRSBWroX:
4561 case AArch64::LDRHroX:
4562 case AArch64::LDRHHroX:
4563 case AArch64::LDRSHXroX:
4564 case AArch64::LDRSHWroX:
4565 case AArch64::LDRWroX:
4566 case AArch64::LDRSroX:
4567 case AArch64::LDRSWroX:
4568 case AArch64::LDRDroX:
4569 case AArch64::LDRXroX:
4570 case AArch64::LDRQroX:
4571 return MI.getOperand(4);
4572 }
4573}
4574
4576 Register Reg) {
4577 if (MI.getParent() == nullptr)
4578 return nullptr;
4579 const MachineFunction *MF = MI.getParent()->getParent();
4580 return MF ? MF->getRegInfo().getRegClassOrNull(Reg) : nullptr;
4581}
4582
4584 auto IsHFPR = [&](const MachineOperand &Op) {
4585 if (!Op.isReg())
4586 return false;
4587 auto Reg = Op.getReg();
4588 if (Reg.isPhysical())
4589 return AArch64::FPR16RegClass.contains(Reg);
4590 const TargetRegisterClass *TRC = ::getRegClass(MI, Reg);
4591 return TRC == &AArch64::FPR16RegClass ||
4592 TRC == &AArch64::FPR16_loRegClass;
4593 };
4594 return llvm::any_of(MI.operands(), IsHFPR);
4595}
4596
4598 auto IsQFPR = [&](const MachineOperand &Op) {
4599 if (!Op.isReg())
4600 return false;
4601 auto Reg = Op.getReg();
4602 if (Reg.isPhysical())
4603 return AArch64::FPR128RegClass.contains(Reg);
4604 const TargetRegisterClass *TRC = ::getRegClass(MI, Reg);
4605 return TRC == &AArch64::FPR128RegClass ||
4606 TRC == &AArch64::FPR128_loRegClass;
4607 };
4608 return llvm::any_of(MI.operands(), IsQFPR);
4609}
4610
4612 switch (MI.getOpcode()) {
4613 case AArch64::BRK:
4614 case AArch64::HLT:
4615 case AArch64::PACIASP:
4616 case AArch64::PACIBSP:
4617 // Implicit BTI behavior.
4618 return true;
4619 case AArch64::PAUTH_PROLOGUE:
4620 // PAUTH_PROLOGUE expands to PACI(A|B)SP.
4621 return true;
4622 case AArch64::HINT: {
4623 unsigned Imm = MI.getOperand(0).getImm();
4624 // Explicit BTI instruction.
4625 if (Imm == 32 || Imm == 34 || Imm == 36 || Imm == 38)
4626 return true;
4627 // PACI(A|B)SP instructions.
4628 if (Imm == 25 || Imm == 27)
4629 return true;
4630 return false;
4631 }
4632 default:
4633 return false;
4634 }
4635}
4636
4638 if (Reg == 0)
4639 return false;
4640 assert(Reg.isPhysical() && "Expected physical register in isFpOrNEON");
4641 return AArch64::FPR128RegClass.contains(Reg) ||
4642 AArch64::FPR64RegClass.contains(Reg) ||
4643 AArch64::FPR32RegClass.contains(Reg) ||
4644 AArch64::FPR16RegClass.contains(Reg) ||
4645 AArch64::FPR8RegClass.contains(Reg);
4646}
4647
4649 auto IsFPR = [&](const MachineOperand &Op) {
4650 if (!Op.isReg())
4651 return false;
4652 auto Reg = Op.getReg();
4653 if (Reg.isPhysical())
4654 return isFpOrNEON(Reg);
4655
4656 const TargetRegisterClass *TRC = ::getRegClass(MI, Reg);
4657 return TRC == &AArch64::FPR128RegClass ||
4658 TRC == &AArch64::FPR128_loRegClass ||
4659 TRC == &AArch64::FPR64RegClass ||
4660 TRC == &AArch64::FPR64_loRegClass ||
4661 TRC == &AArch64::FPR32RegClass || TRC == &AArch64::FPR16RegClass ||
4662 TRC == &AArch64::FPR8RegClass;
4663 };
4664 return llvm::any_of(MI.operands(), IsFPR);
4665}
4666
4667// Scale the unscaled offsets. Returns false if the unscaled offset can't be
4668// scaled.
4669static bool scaleOffset(unsigned Opc, int64_t &Offset) {
4670 int Scale = AArch64InstrInfo::getMemScale(Opc);
4671
4672 // If the byte-offset isn't a multiple of the stride, we can't scale this
4673 // offset.
4674 if (Offset % Scale != 0)
4675 return false;
4676
4677 // Convert the byte-offset used by unscaled into an "element" offset used
4678 // by the scaled pair load/store instructions.
4679 Offset /= Scale;
4680 return true;
4681}
4682
4683static bool canPairLdStOpc(unsigned FirstOpc, unsigned SecondOpc) {
4684 if (FirstOpc == SecondOpc)
4685 return true;
4686 // We can also pair sign-ext and zero-ext instructions.
4687 switch (FirstOpc) {
4688 default:
4689 return false;
4690 case AArch64::STRSui:
4691 case AArch64::STURSi:
4692 return SecondOpc == AArch64::STRSui || SecondOpc == AArch64::STURSi;
4693 case AArch64::STRDui:
4694 case AArch64::STURDi:
4695 return SecondOpc == AArch64::STRDui || SecondOpc == AArch64::STURDi;
4696 case AArch64::STRQui:
4697 case AArch64::STURQi:
4698 return SecondOpc == AArch64::STRQui || SecondOpc == AArch64::STURQi;
4699 case AArch64::STRWui:
4700 case AArch64::STURWi:
4701 return SecondOpc == AArch64::STRWui || SecondOpc == AArch64::STURWi;
4702 case AArch64::STRXui:
4703 case AArch64::STURXi:
4704 return SecondOpc == AArch64::STRXui || SecondOpc == AArch64::STURXi;
4705 case AArch64::LDRSui:
4706 case AArch64::LDURSi:
4707 return SecondOpc == AArch64::LDRSui || SecondOpc == AArch64::LDURSi;
4708 case AArch64::LDRDui:
4709 case AArch64::LDURDi:
4710 return SecondOpc == AArch64::LDRDui || SecondOpc == AArch64::LDURDi;
4711 case AArch64::LDRQui:
4712 case AArch64::LDURQi:
4713 return SecondOpc == AArch64::LDRQui || SecondOpc == AArch64::LDURQi;
4714 case AArch64::LDRWui:
4715 case AArch64::LDURWi:
4716 return SecondOpc == AArch64::LDRSWui || SecondOpc == AArch64::LDURSWi;
4717 case AArch64::LDRSWui:
4718 case AArch64::LDURSWi:
4719 return SecondOpc == AArch64::LDRWui || SecondOpc == AArch64::LDURWi;
4720 case AArch64::LDRXui:
4721 case AArch64::LDURXi:
4722 return SecondOpc == AArch64::LDRXui || SecondOpc == AArch64::LDURXi;
4723 }
4724 // These instructions can't be paired based on their opcodes.
4725 return false;
4726}
4727
4728static bool shouldClusterFI(const MachineFrameInfo &MFI, int FI1,
4729 int64_t Offset1, unsigned Opcode1, int FI2,
4730 int64_t Offset2, unsigned Opcode2) {
4731 // Accesses through fixed stack object frame indices may access a different
4732 // fixed stack slot. Check that the object offsets + offsets match.
4733 if (MFI.isFixedObjectIndex(FI1) && MFI.isFixedObjectIndex(FI2)) {
4734 int64_t ObjectOffset1 = MFI.getObjectOffset(FI1);
4735 int64_t ObjectOffset2 = MFI.getObjectOffset(FI2);
4736 assert(ObjectOffset1 <= ObjectOffset2 && "Object offsets are not ordered.");
4737 // Convert to scaled object offsets.
4738 int Scale1 = AArch64InstrInfo::getMemScale(Opcode1);
4739 if (ObjectOffset1 % Scale1 != 0)
4740 return false;
4741 ObjectOffset1 /= Scale1;
4742 int Scale2 = AArch64InstrInfo::getMemScale(Opcode2);
4743 if (ObjectOffset2 % Scale2 != 0)
4744 return false;
4745 ObjectOffset2 /= Scale2;
4746 ObjectOffset1 += Offset1;
4747 ObjectOffset2 += Offset2;
4748 return ObjectOffset1 + 1 == ObjectOffset2;
4749 }
4750
4751 return FI1 == FI2;
4752}
4753
4754/// Detect opportunities for ldp/stp formation.
4755///
4756/// Only called for LdSt for which getMemOperandWithOffset returns true.
4758 ArrayRef<const MachineOperand *> BaseOps1, int64_t OpOffset1,
4759 bool OffsetIsScalable1, ArrayRef<const MachineOperand *> BaseOps2,
4760 int64_t OpOffset2, bool OffsetIsScalable2, unsigned ClusterSize,
4761 unsigned NumBytes) const {
4762 assert(BaseOps1.size() == 1 && BaseOps2.size() == 1);
4763 const MachineOperand &BaseOp1 = *BaseOps1.front();
4764 const MachineOperand &BaseOp2 = *BaseOps2.front();
4765 const MachineInstr &FirstLdSt = *BaseOp1.getParent();
4766 const MachineInstr &SecondLdSt = *BaseOp2.getParent();
4767 if (BaseOp1.getType() != BaseOp2.getType())
4768 return false;
4769
4770 assert((BaseOp1.isReg() || BaseOp1.isFI()) &&
4771 "Only base registers and frame indices are supported.");
4772
4773 // Check for both base regs and base FI.
4774 if (BaseOp1.isReg() && BaseOp1.getReg() != BaseOp2.getReg())
4775 return false;
4776
4777 // Only cluster up to a single pair.
4778 if (ClusterSize > 2)
4779 return false;
4780
4781 if (!isPairableLdStInst(FirstLdSt) || !isPairableLdStInst(SecondLdSt))
4782 return false;
4783
4784 // Can we pair these instructions based on their opcodes?
4785 unsigned FirstOpc = FirstLdSt.getOpcode();
4786 unsigned SecondOpc = SecondLdSt.getOpcode();
4787 if (!canPairLdStOpc(FirstOpc, SecondOpc))
4788 return false;
4789
4790 // Can't merge volatiles or load/stores that have a hint to avoid pair
4791 // formation, for example.
4792 if (!isCandidateToMergeOrPair(FirstLdSt) ||
4793 !isCandidateToMergeOrPair(SecondLdSt))
4794 return false;
4795
4796 // isCandidateToMergeOrPair guarantees that operand 2 is an immediate.
4797 int64_t Offset1 = FirstLdSt.getOperand(2).getImm();
4798 if (hasUnscaledLdStOffset(FirstOpc) && !scaleOffset(FirstOpc, Offset1))
4799 return false;
4800
4801 int64_t Offset2 = SecondLdSt.getOperand(2).getImm();
4802 if (hasUnscaledLdStOffset(SecondOpc) && !scaleOffset(SecondOpc, Offset2))
4803 return false;
4804
4805 // Pairwise instructions have a 7-bit signed offset field.
4806 if (Offset1 > 63 || Offset1 < -64)
4807 return false;
4808
4809 // The caller should already have ordered First/SecondLdSt by offset.
4810 // Note: except for non-equal frame index bases
4811 if (BaseOp1.isFI()) {
4812 assert((!BaseOp1.isIdenticalTo(BaseOp2) || Offset1 <= Offset2) &&
4813 "Caller should have ordered offsets.");
4814
4815 const MachineFrameInfo &MFI =
4816 FirstLdSt.getParent()->getParent()->getFrameInfo();
4817 return shouldClusterFI(MFI, BaseOp1.getIndex(), Offset1, FirstOpc,
4818 BaseOp2.getIndex(), Offset2, SecondOpc);
4819 }
4820
4821 assert(Offset1 <= Offset2 && "Caller should have ordered offsets.");
4822
4823 return Offset1 + 1 == Offset2;
4824}
4825
4827 MCRegister Reg, unsigned SubIdx,
4828 unsigned State,
4829 const TargetRegisterInfo *TRI) {
4830 if (!SubIdx)
4831 return MIB.addReg(Reg, State);
4832
4834 return MIB.addReg(TRI->getSubReg(Reg, SubIdx), State);
4835 return MIB.addReg(Reg, State, SubIdx);
4836}
4837
4838static bool forwardCopyWillClobberTuple(unsigned DestReg, unsigned SrcReg,
4839 unsigned NumRegs) {
4840 // We really want the positive remainder mod 32 here, that happens to be
4841 // easily obtainable with a mask.
4842 return ((DestReg - SrcReg) & 0x1f) < NumRegs;
4843}
4844
4847 const DebugLoc &DL, MCRegister DestReg,
4848 MCRegister SrcReg, bool KillSrc,
4849 unsigned Opcode,
4850 ArrayRef<unsigned> Indices) const {
4851 assert(Subtarget.hasNEON() && "Unexpected register copy without NEON");
4853 uint16_t DestEncoding = TRI->getEncodingValue(DestReg);
4854 uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg);
4855 unsigned NumRegs = Indices.size();
4856
4857 int SubReg = 0, End = NumRegs, Incr = 1;
4858 if (forwardCopyWillClobberTuple(DestEncoding, SrcEncoding, NumRegs)) {
4859 SubReg = NumRegs - 1;
4860 End = -1;
4861 Incr = -1;
4862 }
4863
4864 for (; SubReg != End; SubReg += Incr) {
4865 const MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opcode));
4866 AddSubReg(MIB, DestReg, Indices[SubReg], RegState::Define, TRI);
4867 AddSubReg(MIB, SrcReg, Indices[SubReg], 0, TRI);
4868 AddSubReg(MIB, SrcReg, Indices[SubReg], getKillRegState(KillSrc), TRI);
4869 }
4870}
4871
4874 const DebugLoc &DL, MCRegister DestReg,
4875 MCRegister SrcReg, bool KillSrc,
4876 unsigned Opcode, unsigned ZeroReg,
4877 llvm::ArrayRef<unsigned> Indices) const {
4879 unsigned NumRegs = Indices.size();
4880
4881#ifndef NDEBUG
4882 uint16_t DestEncoding = TRI->getEncodingValue(DestReg);
4883 uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg);
4884 assert(DestEncoding % NumRegs == 0 && SrcEncoding % NumRegs == 0 &&
4885 "GPR reg sequences should not be able to overlap");
4886#endif
4887
4888 for (unsigned SubReg = 0; SubReg != NumRegs; ++SubReg) {
4889 const MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opcode));
4890 AddSubReg(MIB, DestReg, Indices[SubReg], RegState::Define, TRI);
4891 MIB.addReg(ZeroReg);
4892 AddSubReg(MIB, SrcReg, Indices[SubReg], getKillRegState(KillSrc), TRI);
4893 MIB.addImm(0);
4894 }
4895}
4896
4899 const DebugLoc &DL, MCRegister DestReg,
4900 MCRegister SrcReg, bool KillSrc,
4901 bool RenamableDest,
4902 bool RenamableSrc) const {
4903 if (AArch64::GPR32spRegClass.contains(DestReg) &&
4904 (AArch64::GPR32spRegClass.contains(SrcReg) || SrcReg == AArch64::WZR)) {
4906
4907 if (DestReg == AArch64::WSP || SrcReg == AArch64::WSP) {
4908 // If either operand is WSP, expand to ADD #0.
4909 if (Subtarget.hasZeroCycleRegMove()) {
4910 // Cyclone recognizes "ADD Xd, Xn, #0" as a zero-cycle register move.
4911 MCRegister DestRegX = TRI->getMatchingSuperReg(
4912 DestReg, AArch64::sub_32, &AArch64::GPR64spRegClass);
4913 MCRegister SrcRegX = TRI->getMatchingSuperReg(
4914 SrcReg, AArch64::sub_32, &AArch64::GPR64spRegClass);
4915 // This instruction is reading and writing X registers. This may upset
4916 // the register scavenger and machine verifier, so we need to indicate
4917 // that we are reading an undefined value from SrcRegX, but a proper
4918 // value from SrcReg.
4919 BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestRegX)
4920 .addReg(SrcRegX, RegState::Undef)
4921 .addImm(0)
4923 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
4924 } else {
4925 BuildMI(MBB, I, DL, get(AArch64::ADDWri), DestReg)
4926 .addReg(SrcReg, getKillRegState(KillSrc))
4927 .addImm(0)
4929 }
4930 } else if (SrcReg == AArch64::WZR && Subtarget.hasZeroCycleZeroingGP()) {
4931 BuildMI(MBB, I, DL, get(AArch64::MOVZWi), DestReg)
4932 .addImm(0)
4934 } else {
4935 if (Subtarget.hasZeroCycleRegMove()) {
4936 // Cyclone recognizes "ORR Xd, XZR, Xm" as a zero-cycle register move.
4937 MCRegister DestRegX = TRI->getMatchingSuperReg(
4938 DestReg, AArch64::sub_32, &AArch64::GPR64spRegClass);
4939 MCRegister SrcRegX = TRI->getMatchingSuperReg(
4940 SrcReg, AArch64::sub_32, &AArch64::GPR64spRegClass);
4941 // This instruction is reading and writing X registers. This may upset
4942 // the register scavenger and machine verifier, so we need to indicate
4943 // that we are reading an undefined value from SrcRegX, but a proper
4944 // value from SrcReg.
4945 BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestRegX)
4946 .addReg(AArch64::XZR)
4947 .addReg(SrcRegX, RegState::Undef)
4948 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
4949 } else {
4950 // Otherwise, expand to ORR WZR.
4951 BuildMI(MBB, I, DL, get(AArch64::ORRWrr), DestReg)
4952 .addReg(AArch64::WZR)
4953 .addReg(SrcReg, getKillRegState(KillSrc));
4954 }
4955 }
4956 return;
4957 }
4958
4959 // Copy a Predicate register by ORRing with itself.
4960 if (AArch64::PPRRegClass.contains(DestReg) &&
4961 AArch64::PPRRegClass.contains(SrcReg)) {
4963 "Unexpected SVE register.");
4964 BuildMI(MBB, I, DL, get(AArch64::ORR_PPzPP), DestReg)
4965 .addReg(SrcReg) // Pg
4966 .addReg(SrcReg)
4967 .addReg(SrcReg, getKillRegState(KillSrc));
4968 return;
4969 }
4970
4971 // Copy a predicate-as-counter register by ORRing with itself as if it
4972 // were a regular predicate (mask) register.
4973 bool DestIsPNR = AArch64::PNRRegClass.contains(DestReg);
4974 bool SrcIsPNR = AArch64::PNRRegClass.contains(SrcReg);
4975 if (DestIsPNR || SrcIsPNR) {
4976 auto ToPPR = [](MCRegister R) -> MCRegister {
4977 return (R - AArch64::PN0) + AArch64::P0;
4978 };
4979 MCRegister PPRSrcReg = SrcIsPNR ? ToPPR(SrcReg) : SrcReg;
4980 MCRegister PPRDestReg = DestIsPNR ? ToPPR(DestReg) : DestReg;
4981
4982 if (PPRSrcReg != PPRDestReg) {
4983 auto NewMI = BuildMI(MBB, I, DL, get(AArch64::ORR_PPzPP), PPRDestReg)
4984 .addReg(PPRSrcReg) // Pg
4985 .addReg(PPRSrcReg)
4986 .addReg(PPRSrcReg, getKillRegState(KillSrc));
4987 if (DestIsPNR)
4988 NewMI.addDef(DestReg, RegState::Implicit);
4989 }
4990 return;
4991 }
4992
4993 // Copy a Z register by ORRing with itself.
4994 if (AArch64::ZPRRegClass.contains(DestReg) &&
4995 AArch64::ZPRRegClass.contains(SrcReg)) {
4997 "Unexpected SVE register.");
4998 BuildMI(MBB, I, DL, get(AArch64::ORR_ZZZ), DestReg)
4999 .addReg(SrcReg)
5000 .addReg(SrcReg, getKillRegState(KillSrc));
5001 return;
5002 }
5003
5004 // Copy a Z register pair by copying the individual sub-registers.
5005 if ((AArch64::ZPR2RegClass.contains(DestReg) ||
5006 AArch64::ZPR2StridedOrContiguousRegClass.contains(DestReg)) &&
5007 (AArch64::ZPR2RegClass.contains(SrcReg) ||
5008 AArch64::ZPR2StridedOrContiguousRegClass.contains(SrcReg))) {
5010 "Unexpected SVE register.");
5011 static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1};
5012 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORR_ZZZ,
5013 Indices);
5014 return;
5015 }
5016
5017 // Copy a Z register triple by copying the individual sub-registers.
5018 if (AArch64::ZPR3RegClass.contains(DestReg) &&
5019 AArch64::ZPR3RegClass.contains(SrcReg)) {
5021 "Unexpected SVE register.");
5022 static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1,
5023 AArch64::zsub2};
5024 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORR_ZZZ,
5025 Indices);
5026 return;
5027 }
5028
5029 // Copy a Z register quad by copying the individual sub-registers.
5030 if ((AArch64::ZPR4RegClass.contains(DestReg) ||
5031 AArch64::ZPR4StridedOrContiguousRegClass.contains(DestReg)) &&
5032 (AArch64::ZPR4RegClass.contains(SrcReg) ||
5033 AArch64::ZPR4StridedOrContiguousRegClass.contains(SrcReg))) {
5035 "Unexpected SVE register.");
5036 static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1,
5037 AArch64::zsub2, AArch64::zsub3};
5038 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORR_ZZZ,
5039 Indices);
5040 return;
5041 }
5042
5043 if (AArch64::GPR64spRegClass.contains(DestReg) &&
5044 (AArch64::GPR64spRegClass.contains(SrcReg) || SrcReg == AArch64::XZR)) {
5045 if (DestReg == AArch64::SP || SrcReg == AArch64::SP) {
5046 // If either operand is SP, expand to ADD #0.
5047 BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestReg)
5048 .addReg(SrcReg, getKillRegState(KillSrc))
5049 .addImm(0)
5051 } else if (SrcReg == AArch64::XZR && Subtarget.hasZeroCycleZeroingGP()) {
5052 BuildMI(MBB, I, DL, get(AArch64::MOVZXi), DestReg)
5053 .addImm(0)
5055 } else {
5056 // Otherwise, expand to ORR XZR.
5057 BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestReg)
5058 .addReg(AArch64::XZR)
5059 .addReg(SrcReg, getKillRegState(KillSrc));
5060 }
5061 return;
5062 }
5063
5064 // Copy a DDDD register quad by copying the individual sub-registers.
5065 if (AArch64::DDDDRegClass.contains(DestReg) &&
5066 AArch64::DDDDRegClass.contains(SrcReg)) {
5067 static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1,
5068 AArch64::dsub2, AArch64::dsub3};
5069 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
5070 Indices);
5071 return;
5072 }
5073
5074 // Copy a DDD register triple by copying the individual sub-registers.
5075 if (AArch64::DDDRegClass.contains(DestReg) &&
5076 AArch64::DDDRegClass.contains(SrcReg)) {
5077 static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1,
5078 AArch64::dsub2};
5079 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
5080 Indices);
5081 return;
5082 }
5083
5084 // Copy a DD register pair by copying the individual sub-registers.
5085 if (AArch64::DDRegClass.contains(DestReg) &&
5086 AArch64::DDRegClass.contains(SrcReg)) {
5087 static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1};
5088 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
5089 Indices);
5090 return;
5091 }
5092
5093 // Copy a QQQQ register quad by copying the individual sub-registers.
5094 if (AArch64::QQQQRegClass.contains(DestReg) &&
5095 AArch64::QQQQRegClass.contains(SrcReg)) {
5096 static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1,
5097 AArch64::qsub2, AArch64::qsub3};
5098 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
5099 Indices);
5100 return;
5101 }
5102
5103 // Copy a QQQ register triple by copying the individual sub-registers.
5104 if (AArch64::QQQRegClass.contains(DestReg) &&
5105 AArch64::QQQRegClass.contains(SrcReg)) {
5106 static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1,
5107 AArch64::qsub2};
5108 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
5109 Indices);
5110 return;
5111 }
5112
5113 // Copy a QQ register pair by copying the individual sub-registers.
5114 if (AArch64::QQRegClass.contains(DestReg) &&
5115 AArch64::QQRegClass.contains(SrcReg)) {
5116 static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1};
5117 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
5118 Indices);
5119 return;
5120 }
5121
5122 if (AArch64::XSeqPairsClassRegClass.contains(DestReg) &&
5123 AArch64::XSeqPairsClassRegClass.contains(SrcReg)) {
5124 static const unsigned Indices[] = {AArch64::sube64, AArch64::subo64};
5125 copyGPRRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRXrs,
5126 AArch64::XZR, Indices);
5127 return;
5128 }
5129
5130 if (AArch64::WSeqPairsClassRegClass.contains(DestReg) &&
5131 AArch64::WSeqPairsClassRegClass.contains(SrcReg)) {
5132 static const unsigned Indices[] = {AArch64::sube32, AArch64::subo32};
5133 copyGPRRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRWrs,
5134 AArch64::WZR, Indices);
5135 return;
5136 }
5137
5138 if (AArch64::FPR128RegClass.contains(DestReg) &&
5139 AArch64::FPR128RegClass.contains(SrcReg)) {
5140 if (Subtarget.isSVEorStreamingSVEAvailable() &&
5141 !Subtarget.isNeonAvailable())
5142 BuildMI(MBB, I, DL, get(AArch64::ORR_ZZZ))
5143 .addReg(AArch64::Z0 + (DestReg - AArch64::Q0), RegState::Define)
5144 .addReg(AArch64::Z0 + (SrcReg - AArch64::Q0))
5145 .addReg(AArch64::Z0 + (SrcReg - AArch64::Q0));
5146 else if (Subtarget.isNeonAvailable())
5147 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
5148 .addReg(SrcReg)
5149 .addReg(SrcReg, getKillRegState(KillSrc));
5150 else {
5151 BuildMI(MBB, I, DL, get(AArch64::STRQpre))
5152 .addReg(AArch64::SP, RegState::Define)
5153 .addReg(SrcReg, getKillRegState(KillSrc))
5154 .addReg(AArch64::SP)
5155 .addImm(-16);
5156 BuildMI(MBB, I, DL, get(AArch64::LDRQpost))
5157 .addReg(AArch64::SP, RegState::Define)
5158 .addReg(DestReg, RegState::Define)
5159 .addReg(AArch64::SP)
5160 .addImm(16);
5161 }
5162 return;
5163 }
5164
5165 if (AArch64::FPR64RegClass.contains(DestReg) &&
5166 AArch64::FPR64RegClass.contains(SrcReg)) {
5167 BuildMI(MBB, I, DL, get(AArch64::FMOVDr), DestReg)
5168 .addReg(SrcReg, getKillRegState(KillSrc));
5169 return;
5170 }
5171
5172 if (AArch64::FPR32RegClass.contains(DestReg) &&
5173 AArch64::FPR32RegClass.contains(SrcReg)) {
5174 BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
5175 .addReg(SrcReg, getKillRegState(KillSrc));
5176 return;
5177 }
5178
5179 if (AArch64::FPR16RegClass.contains(DestReg) &&
5180 AArch64::FPR16RegClass.contains(SrcReg)) {
5181 DestReg =
5182 RI.getMatchingSuperReg(DestReg, AArch64::hsub, &AArch64::FPR32RegClass);
5183 SrcReg =
5184 RI.getMatchingSuperReg(SrcReg, AArch64::hsub, &AArch64::FPR32RegClass);
5185 BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
5186 .addReg(SrcReg, getKillRegState(KillSrc));
5187 return;
5188 }
5189
5190 if (AArch64::FPR8RegClass.contains(DestReg) &&
5191 AArch64::FPR8RegClass.contains(SrcReg)) {
5192 DestReg =
5193 RI.getMatchingSuperReg(DestReg, AArch64::bsub, &AArch64::FPR32RegClass);
5194 SrcReg =
5195 RI.getMatchingSuperReg(SrcReg, AArch64::bsub, &AArch64::FPR32RegClass);
5196 BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
5197 .addReg(SrcReg, getKillRegState(KillSrc));
5198 return;
5199 }
5200
5201 // Copies between GPR64 and FPR64.
5202 if (AArch64::FPR64RegClass.contains(DestReg) &&
5203 AArch64::GPR64RegClass.contains(SrcReg)) {
5204 BuildMI(MBB, I, DL, get(AArch64::FMOVXDr), DestReg)
5205 .addReg(SrcReg, getKillRegState(KillSrc));
5206 return;
5207 }
5208 if (AArch64::GPR64RegClass.contains(DestReg) &&
5209 AArch64::FPR64RegClass.contains(SrcReg)) {
5210 BuildMI(MBB, I, DL, get(AArch64::FMOVDXr), DestReg)
5211 .addReg(SrcReg, getKillRegState(KillSrc));
5212 return;
5213 }
5214 // Copies between GPR32 and FPR32.
5215 if (AArch64::FPR32RegClass.contains(DestReg) &&
5216 AArch64::GPR32RegClass.contains(SrcReg)) {
5217 BuildMI(MBB, I, DL, get(AArch64::FMOVWSr), DestReg)
5218 .addReg(SrcReg, getKillRegState(KillSrc));
5219 return;
5220 }
5221 if (AArch64::GPR32RegClass.contains(DestReg) &&
5222 AArch64::FPR32RegClass.contains(SrcReg)) {
5223 BuildMI(MBB, I, DL, get(AArch64::FMOVSWr), DestReg)
5224 .addReg(SrcReg, getKillRegState(KillSrc));
5225 return;
5226 }
5227
5228 if (DestReg == AArch64::NZCV) {
5229 assert(AArch64::GPR64RegClass.contains(SrcReg) && "Invalid NZCV copy");
5230 BuildMI(MBB, I, DL, get(AArch64::MSR))
5231 .addImm(AArch64SysReg::NZCV)
5232 .addReg(SrcReg, getKillRegState(KillSrc))
5233 .addReg(AArch64::NZCV, RegState::Implicit | RegState::Define);
5234 return;
5235 }