LLVM 18.0.0git
AArch64InstrInfo.cpp
Go to the documentation of this file.
1//===- AArch64InstrInfo.cpp - AArch64 Instruction Information -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains the AArch64 implementation of the TargetInstrInfo class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "AArch64ExpandImm.h"
14#include "AArch64InstrInfo.h"
17#include "AArch64Subtarget.h"
20#include "llvm/ADT/ArrayRef.h"
21#include "llvm/ADT/STLExtras.h"
38#include "llvm/IR/DebugLoc.h"
39#include "llvm/IR/GlobalValue.h"
40#include "llvm/MC/MCAsmInfo.h"
41#include "llvm/MC/MCInst.h"
43#include "llvm/MC/MCInstrDesc.h"
48#include "llvm/Support/LEB128.h"
52#include <cassert>
53#include <cstdint>
54#include <iterator>
55#include <utility>
56
57using namespace llvm;
58
59#define GET_INSTRINFO_CTOR_DTOR
60#include "AArch64GenInstrInfo.inc"
61
63 "aarch64-tbz-offset-bits", cl::Hidden, cl::init(14),
64 cl::desc("Restrict range of TB[N]Z instructions (DEBUG)"));
65
67 "aarch64-cbz-offset-bits", cl::Hidden, cl::init(19),
68 cl::desc("Restrict range of CB[N]Z instructions (DEBUG)"));
69
71 BCCDisplacementBits("aarch64-bcc-offset-bits", cl::Hidden, cl::init(19),
72 cl::desc("Restrict range of Bcc instructions (DEBUG)"));
73
75 BDisplacementBits("aarch64-b-offset-bits", cl::Hidden, cl::init(26),
76 cl::desc("Restrict range of B instructions (DEBUG)"));
77
79 : AArch64GenInstrInfo(AArch64::ADJCALLSTACKDOWN, AArch64::ADJCALLSTACKUP,
80 AArch64::CATCHRET),
81 RI(STI.getTargetTriple()), Subtarget(STI) {}
82
83/// GetInstSize - Return the number of bytes of code the specified
84/// instruction may be. This returns the maximum number of bytes.
86 const MachineBasicBlock &MBB = *MI.getParent();
87 const MachineFunction *MF = MBB.getParent();
88 const Function &F = MF->getFunction();
89 const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo();
90
91 {
92 auto Op = MI.getOpcode();
93 if (Op == AArch64::INLINEASM || Op == AArch64::INLINEASM_BR)
94 return getInlineAsmLength(MI.getOperand(0).getSymbolName(), *MAI);
95 }
96
97 // Meta-instructions emit no code.
98 if (MI.isMetaInstruction())
99 return 0;
100
101 // FIXME: We currently only handle pseudoinstructions that don't get expanded
102 // before the assembly printer.
103 unsigned NumBytes = 0;
104 const MCInstrDesc &Desc = MI.getDesc();
105
106 // Size should be preferably set in
107 // llvm/lib/Target/AArch64/AArch64InstrInfo.td (default case).
108 // Specific cases handle instructions of variable sizes
109 switch (Desc.getOpcode()) {
110 default:
111 if (Desc.getSize())
112 return Desc.getSize();
113
114 // Anything not explicitly designated otherwise (i.e. pseudo-instructions
115 // with fixed constant size but not specified in .td file) is a normal
116 // 4-byte insn.
117 NumBytes = 4;
118 break;
119 case TargetOpcode::STACKMAP:
120 // The upper bound for a stackmap intrinsic is the full length of its shadow
121 NumBytes = StackMapOpers(&MI).getNumPatchBytes();
122 assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
123 break;
124 case TargetOpcode::PATCHPOINT:
125 // The size of the patchpoint intrinsic is the number of bytes requested
126 NumBytes = PatchPointOpers(&MI).getNumPatchBytes();
127 assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
128 break;
129 case TargetOpcode::STATEPOINT:
130 NumBytes = StatepointOpers(&MI).getNumPatchBytes();
131 assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
132 // No patch bytes means a normal call inst is emitted
133 if (NumBytes == 0)
134 NumBytes = 4;
135 break;
136 case TargetOpcode::PATCHABLE_FUNCTION_ENTER:
137 // If `patchable-function-entry` is set, PATCHABLE_FUNCTION_ENTER
138 // instructions are expanded to the specified number of NOPs. Otherwise,
139 // they are expanded to 36-byte XRay sleds.
140 NumBytes =
141 F.getFnAttributeAsParsedInteger("patchable-function-entry", 9) * 4;
142 break;
143 case TargetOpcode::PATCHABLE_FUNCTION_EXIT:
144 case TargetOpcode::PATCHABLE_TYPED_EVENT_CALL:
145 // An XRay sled can be 4 bytes of alignment plus a 32-byte block.
146 NumBytes = 36;
147 break;
148 case TargetOpcode::PATCHABLE_EVENT_CALL:
149 // EVENT_CALL XRay sleds are exactly 6 instructions long (no alignment).
150 NumBytes = 24;
151 break;
152
153 case AArch64::SPACE:
154 NumBytes = MI.getOperand(1).getImm();
155 break;
156 case TargetOpcode::BUNDLE:
157 NumBytes = getInstBundleLength(MI);
158 break;
159 }
160
161 return NumBytes;
162}
163
164unsigned AArch64InstrInfo::getInstBundleLength(const MachineInstr &MI) const {
165 unsigned Size = 0;
167 MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end();
168 while (++I != E && I->isInsideBundle()) {
169 assert(!I->isBundle() && "No nested bundle!");
171 }
172 return Size;
173}
174
177 // Block ends with fall-through condbranch.
178 switch (LastInst->getOpcode()) {
179 default:
180 llvm_unreachable("Unknown branch instruction?");
181 case AArch64::Bcc:
182 Target = LastInst->getOperand(1).getMBB();
183 Cond.push_back(LastInst->getOperand(0));
184 break;
185 case AArch64::CBZW:
186 case AArch64::CBZX:
187 case AArch64::CBNZW:
188 case AArch64::CBNZX:
189 Target = LastInst->getOperand(1).getMBB();
190 Cond.push_back(MachineOperand::CreateImm(-1));
191 Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
192 Cond.push_back(LastInst->getOperand(0));
193 break;
194 case AArch64::TBZW:
195 case AArch64::TBZX:
196 case AArch64::TBNZW:
197 case AArch64::TBNZX:
198 Target = LastInst->getOperand(2).getMBB();
199 Cond.push_back(MachineOperand::CreateImm(-1));
200 Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
201 Cond.push_back(LastInst->getOperand(0));
202 Cond.push_back(LastInst->getOperand(1));
203 }
204}
205
206static unsigned getBranchDisplacementBits(unsigned Opc) {
207 switch (Opc) {
208 default:
209 llvm_unreachable("unexpected opcode!");
210 case AArch64::B:
211 return BDisplacementBits;
212 case AArch64::TBNZW:
213 case AArch64::TBZW:
214 case AArch64::TBNZX:
215 case AArch64::TBZX:
216 return TBZDisplacementBits;
217 case AArch64::CBNZW:
218 case AArch64::CBZW:
219 case AArch64::CBNZX:
220 case AArch64::CBZX:
221 return CBZDisplacementBits;
222 case AArch64::Bcc:
223 return BCCDisplacementBits;
224 }
225}
226
228 int64_t BrOffset) const {
229 unsigned Bits = getBranchDisplacementBits(BranchOp);
230 assert(Bits >= 3 && "max branch displacement must be enough to jump"
231 "over conditional branch expansion");
232 return isIntN(Bits, BrOffset / 4);
233}
234
237 switch (MI.getOpcode()) {
238 default:
239 llvm_unreachable("unexpected opcode!");
240 case AArch64::B:
241 return MI.getOperand(0).getMBB();
242 case AArch64::TBZW:
243 case AArch64::TBNZW:
244 case AArch64::TBZX:
245 case AArch64::TBNZX:
246 return MI.getOperand(2).getMBB();
247 case AArch64::CBZW:
248 case AArch64::CBNZW:
249 case AArch64::CBZX:
250 case AArch64::CBNZX:
251 case AArch64::Bcc:
252 return MI.getOperand(1).getMBB();
253 }
254}
255
257 MachineBasicBlock &NewDestBB,
258 MachineBasicBlock &RestoreBB,
259 const DebugLoc &DL,
260 int64_t BrOffset,
261 RegScavenger *RS) const {
262 assert(RS && "RegScavenger required for long branching");
263 assert(MBB.empty() &&
264 "new block should be inserted for expanding unconditional branch");
265 assert(MBB.pred_size() == 1);
266 assert(RestoreBB.empty() &&
267 "restore block should be inserted for restoring clobbered registers");
268
269 auto buildIndirectBranch = [&](Register Reg, MachineBasicBlock &DestBB) {
270 // Offsets outside of the signed 33-bit range are not supported for ADRP +
271 // ADD.
272 if (!isInt<33>(BrOffset))
274 "Branch offsets outside of the signed 33-bit range not supported");
275
276 BuildMI(MBB, MBB.end(), DL, get(AArch64::ADRP), Reg)
277 .addSym(DestBB.getSymbol(), AArch64II::MO_PAGE);
278 BuildMI(MBB, MBB.end(), DL, get(AArch64::ADDXri), Reg)
279 .addReg(Reg)
280 .addSym(DestBB.getSymbol(), AArch64II::MO_PAGEOFF | AArch64II::MO_NC)
281 .addImm(0);
282 BuildMI(MBB, MBB.end(), DL, get(AArch64::BR)).addReg(Reg);
283 };
284
286 // If X16 is unused, we can rely on the linker to insert a range extension
287 // thunk if NewDestBB is out of range of a single B instruction.
288 constexpr Register Reg = AArch64::X16;
289 if (!RS->isRegUsed(Reg)) {
290 insertUnconditionalBranch(MBB, &NewDestBB, DL);
291 RS->setRegUsed(Reg);
292 return;
293 }
294
295 // If there's a free register and it's worth inflating the code size,
296 // manually insert the indirect branch.
297 Register Scavenged = RS->FindUnusedReg(&AArch64::GPR64RegClass);
298 if (Scavenged != AArch64::NoRegister &&
300 buildIndirectBranch(Scavenged, NewDestBB);
301 RS->setRegUsed(Scavenged);
302 return;
303 }
304
305 // Note: Spilling X16 briefly moves the stack pointer, making it incompatible
306 // with red zones.
308 if (!AFI || AFI->hasRedZone().value_or(true))
310 "Unable to insert indirect branch inside function that has red zone");
311
312 // Otherwise, spill X16 and defer range extension to the linker.
313 BuildMI(MBB, MBB.end(), DL, get(AArch64::STRXpre))
314 .addReg(AArch64::SP, RegState::Define)
315 .addReg(Reg)
316 .addReg(AArch64::SP)
317 .addImm(-16);
318
319 BuildMI(MBB, MBB.end(), DL, get(AArch64::B)).addMBB(&RestoreBB);
320
321 BuildMI(RestoreBB, RestoreBB.end(), DL, get(AArch64::LDRXpost))
322 .addReg(AArch64::SP, RegState::Define)
324 .addReg(AArch64::SP)
325 .addImm(16);
326}
327
328// Branch analysis.
331 MachineBasicBlock *&FBB,
333 bool AllowModify) const {
334 // If the block has no terminators, it just falls into the block after it.
336 if (I == MBB.end())
337 return false;
338
339 // Skip over SpeculationBarrierEndBB terminators
340 if (I->getOpcode() == AArch64::SpeculationBarrierISBDSBEndBB ||
341 I->getOpcode() == AArch64::SpeculationBarrierSBEndBB) {
342 --I;
343 }
344
345 if (!isUnpredicatedTerminator(*I))
346 return false;
347
348 // Get the last instruction in the block.
349 MachineInstr *LastInst = &*I;
350
351 // If there is only one terminator instruction, process it.
352 unsigned LastOpc = LastInst->getOpcode();
353 if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
354 if (isUncondBranchOpcode(LastOpc)) {
355 TBB = LastInst->getOperand(0).getMBB();
356 return false;
357 }
358 if (isCondBranchOpcode(LastOpc)) {
359 // Block ends with fall-through condbranch.
360 parseCondBranch(LastInst, TBB, Cond);
361 return false;
362 }
363 return true; // Can't handle indirect branch.
364 }
365
366 // Get the instruction before it if it is a terminator.
367 MachineInstr *SecondLastInst = &*I;
368 unsigned SecondLastOpc = SecondLastInst->getOpcode();
369
370 // If AllowModify is true and the block ends with two or more unconditional
371 // branches, delete all but the first unconditional branch.
372 if (AllowModify && isUncondBranchOpcode(LastOpc)) {
373 while (isUncondBranchOpcode(SecondLastOpc)) {
374 LastInst->eraseFromParent();
375 LastInst = SecondLastInst;
376 LastOpc = LastInst->getOpcode();
377 if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
378 // Return now the only terminator is an unconditional branch.
379 TBB = LastInst->getOperand(0).getMBB();
380 return false;
381 }
382 SecondLastInst = &*I;
383 SecondLastOpc = SecondLastInst->getOpcode();
384 }
385 }
386
387 // If we're allowed to modify and the block ends in a unconditional branch
388 // which could simply fallthrough, remove the branch. (Note: This case only
389 // matters when we can't understand the whole sequence, otherwise it's also
390 // handled by BranchFolding.cpp.)
391 if (AllowModify && isUncondBranchOpcode(LastOpc) &&
393 LastInst->eraseFromParent();
394 LastInst = SecondLastInst;
395 LastOpc = LastInst->getOpcode();
396 if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
397 assert(!isUncondBranchOpcode(LastOpc) &&
398 "unreachable unconditional branches removed above");
399
400 if (isCondBranchOpcode(LastOpc)) {
401 // Block ends with fall-through condbranch.
402 parseCondBranch(LastInst, TBB, Cond);
403 return false;
404 }
405 return true; // Can't handle indirect branch.
406 }
407 SecondLastInst = &*I;
408 SecondLastOpc = SecondLastInst->getOpcode();
409 }
410
411 // If there are three terminators, we don't know what sort of block this is.
412 if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(*--I))
413 return true;
414
415 // If the block ends with a B and a Bcc, handle it.
416 if (isCondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
417 parseCondBranch(SecondLastInst, TBB, Cond);
418 FBB = LastInst->getOperand(0).getMBB();
419 return false;
420 }
421
422 // If the block ends with two unconditional branches, handle it. The second
423 // one is not executed, so remove it.
424 if (isUncondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
425 TBB = SecondLastInst->getOperand(0).getMBB();
426 I = LastInst;
427 if (AllowModify)
428 I->eraseFromParent();
429 return false;
430 }
431
432 // ...likewise if it ends with an indirect branch followed by an unconditional
433 // branch.
434 if (isIndirectBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
435 I = LastInst;
436 if (AllowModify)
437 I->eraseFromParent();
438 return true;
439 }
440
441 // Otherwise, can't handle this.
442 return true;
443}
444
446 MachineBranchPredicate &MBP,
447 bool AllowModify) const {
448 // For the moment, handle only a block which ends with a cb(n)zx followed by
449 // a fallthrough. Why this? Because it is a common form.
450 // TODO: Should we handle b.cc?
451
453 if (I == MBB.end())
454 return true;
455
456 // Skip over SpeculationBarrierEndBB terminators
457 if (I->getOpcode() == AArch64::SpeculationBarrierISBDSBEndBB ||
458 I->getOpcode() == AArch64::SpeculationBarrierSBEndBB) {
459 --I;
460 }
461
462 if (!isUnpredicatedTerminator(*I))
463 return true;
464
465 // Get the last instruction in the block.
466 MachineInstr *LastInst = &*I;
467 unsigned LastOpc = LastInst->getOpcode();
468 if (!isCondBranchOpcode(LastOpc))
469 return true;
470
471 switch (LastOpc) {
472 default:
473 return true;
474 case AArch64::CBZW:
475 case AArch64::CBZX:
476 case AArch64::CBNZW:
477 case AArch64::CBNZX:
478 break;
479 };
480
481 MBP.TrueDest = LastInst->getOperand(1).getMBB();
482 assert(MBP.TrueDest && "expected!");
483 MBP.FalseDest = MBB.getNextNode();
484
485 MBP.ConditionDef = nullptr;
486 MBP.SingleUseCondition = false;
487
488 MBP.LHS = LastInst->getOperand(0);
489 MBP.RHS = MachineOperand::CreateImm(0);
490 MBP.Predicate = LastOpc == AArch64::CBNZX ? MachineBranchPredicate::PRED_NE
491 : MachineBranchPredicate::PRED_EQ;
492 return false;
493}
494
497 if (Cond[0].getImm() != -1) {
498 // Regular Bcc
501 } else {
502 // Folded compare-and-branch
503 switch (Cond[1].getImm()) {
504 default:
505 llvm_unreachable("Unknown conditional branch!");
506 case AArch64::CBZW:
507 Cond[1].setImm(AArch64::CBNZW);
508 break;
509 case AArch64::CBNZW:
510 Cond[1].setImm(AArch64::CBZW);
511 break;
512 case AArch64::CBZX:
513 Cond[1].setImm(AArch64::CBNZX);
514 break;
515 case AArch64::CBNZX:
516 Cond[1].setImm(AArch64::CBZX);
517 break;
518 case AArch64::TBZW:
519 Cond[1].setImm(AArch64::TBNZW);
520 break;
521 case AArch64::TBNZW:
522 Cond[1].setImm(AArch64::TBZW);
523 break;
524 case AArch64::TBZX:
525 Cond[1].setImm(AArch64::TBNZX);
526 break;
527 case AArch64::TBNZX:
528 Cond[1].setImm(AArch64::TBZX);
529 break;
530 }
531 }
532
533 return false;
534}
535
537 int *BytesRemoved) const {
539 if (I == MBB.end())
540 return 0;
541
542 if (!isUncondBranchOpcode(I->getOpcode()) &&
543 !isCondBranchOpcode(I->getOpcode()))
544 return 0;
545
546 // Remove the branch.
547 I->eraseFromParent();
548
549 I = MBB.end();
550
551 if (I == MBB.begin()) {
552 if (BytesRemoved)
553 *BytesRemoved = 4;
554 return 1;
555 }
556 --I;
557 if (!isCondBranchOpcode(I->getOpcode())) {
558 if (BytesRemoved)
559 *BytesRemoved = 4;
560 return 1;
561 }
562
563 // Remove the branch.
564 I->eraseFromParent();
565 if (BytesRemoved)
566 *BytesRemoved = 8;
567
568 return 2;
569}
570
571void AArch64InstrInfo::instantiateCondBranch(
574 if (Cond[0].getImm() != -1) {
575 // Regular Bcc
576 BuildMI(&MBB, DL, get(AArch64::Bcc)).addImm(Cond[0].getImm()).addMBB(TBB);
577 } else {
578 // Folded compare-and-branch
579 // Note that we use addOperand instead of addReg to keep the flags.
580 const MachineInstrBuilder MIB =
581 BuildMI(&MBB, DL, get(Cond[1].getImm())).add(Cond[2]);
582 if (Cond.size() > 3)
583 MIB.addImm(Cond[3].getImm());
584 MIB.addMBB(TBB);
585 }
586}
587
590 ArrayRef<MachineOperand> Cond, const DebugLoc &DL, int *BytesAdded) const {
591 // Shouldn't be a fall through.
592 assert(TBB && "insertBranch must not be told to insert a fallthrough");
593
594 if (!FBB) {
595 if (Cond.empty()) // Unconditional branch?
596 BuildMI(&MBB, DL, get(AArch64::B)).addMBB(TBB);
597 else
598 instantiateCondBranch(MBB, DL, TBB, Cond);
599
600 if (BytesAdded)
601 *BytesAdded = 4;
602
603 return 1;
604 }
605
606 // Two-way conditional branch.
607 instantiateCondBranch(MBB, DL, TBB, Cond);
608 BuildMI(&MBB, DL, get(AArch64::B)).addMBB(FBB);
609
610 if (BytesAdded)
611 *BytesAdded = 8;
612
613 return 2;
614}
615
616// Find the original register that VReg is copied from.
617static unsigned removeCopies(const MachineRegisterInfo &MRI, unsigned VReg) {
618 while (Register::isVirtualRegister(VReg)) {
619 const MachineInstr *DefMI = MRI.getVRegDef(VReg);
620 if (!DefMI->isFullCopy())
621 return VReg;
622 VReg = DefMI->getOperand(1).getReg();
623 }
624 return VReg;
625}
626
627// Determine if VReg is defined by an instruction that can be folded into a
628// csel instruction. If so, return the folded opcode, and the replacement
629// register.
630static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg,
631 unsigned *NewVReg = nullptr) {
632 VReg = removeCopies(MRI, VReg);
634 return 0;
635
636 bool Is64Bit = AArch64::GPR64allRegClass.hasSubClassEq(MRI.getRegClass(VReg));
637 const MachineInstr *DefMI = MRI.getVRegDef(VReg);
638 unsigned Opc = 0;
639 unsigned SrcOpNum = 0;
640 switch (DefMI->getOpcode()) {
641 case AArch64::ADDSXri:
642 case AArch64::ADDSWri:
643 // if NZCV is used, do not fold.
644 if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) == -1)
645 return 0;
646 // fall-through to ADDXri and ADDWri.
647 [[fallthrough]];
648 case AArch64::ADDXri:
649 case AArch64::ADDWri:
650 // add x, 1 -> csinc.
651 if (!DefMI->getOperand(2).isImm() || DefMI->getOperand(2).getImm() != 1 ||
652 DefMI->getOperand(3).getImm() != 0)
653 return 0;
654 SrcOpNum = 1;
655 Opc = Is64Bit ? AArch64::CSINCXr : AArch64::CSINCWr;
656 break;
657
658 case AArch64::ORNXrr:
659 case AArch64::ORNWrr: {
660 // not x -> csinv, represented as orn dst, xzr, src.
661 unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg());
662 if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)
663 return 0;
664 SrcOpNum = 2;
665 Opc = Is64Bit ? AArch64::CSINVXr : AArch64::CSINVWr;
666 break;
667 }
668
669 case AArch64::SUBSXrr:
670 case AArch64::SUBSWrr:
671 // if NZCV is used, do not fold.
672 if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) == -1)
673 return 0;
674 // fall-through to SUBXrr and SUBWrr.
675 [[fallthrough]];
676 case AArch64::SUBXrr:
677 case AArch64::SUBWrr: {
678 // neg x -> csneg, represented as sub dst, xzr, src.
679 unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg());
680 if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)
681 return 0;
682 SrcOpNum = 2;
683 Opc = Is64Bit ? AArch64::CSNEGXr : AArch64::CSNEGWr;
684 break;
685 }
686 default:
687 return 0;
688 }
689 assert(Opc && SrcOpNum && "Missing parameters");
690
691 if (NewVReg)
692 *NewVReg = DefMI->getOperand(SrcOpNum).getReg();
693 return Opc;
694}
695
698 Register DstReg, Register TrueReg,
699 Register FalseReg, int &CondCycles,
700 int &TrueCycles,
701 int &FalseCycles) const {
702 // Check register classes.
704 const TargetRegisterClass *RC =
705 RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg));
706 if (!RC)
707 return false;
708
709 // Also need to check the dest regclass, in case we're trying to optimize
710 // something like:
711 // %1(gpr) = PHI %2(fpr), bb1, %(fpr), bb2
712 if (!RI.getCommonSubClass(RC, MRI.getRegClass(DstReg)))
713 return false;
714
715 // Expanding cbz/tbz requires an extra cycle of latency on the condition.
716 unsigned ExtraCondLat = Cond.size() != 1;
717
718 // GPRs are handled by csel.
719 // FIXME: Fold in x+1, -x, and ~x when applicable.
720 if (AArch64::GPR64allRegClass.hasSubClassEq(RC) ||
721 AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
722 // Single-cycle csel, csinc, csinv, and csneg.
723 CondCycles = 1 + ExtraCondLat;
724 TrueCycles = FalseCycles = 1;
725 if (canFoldIntoCSel(MRI, TrueReg))
726 TrueCycles = 0;
727 else if (canFoldIntoCSel(MRI, FalseReg))
728 FalseCycles = 0;
729 return true;
730 }
731
732 // Scalar floating point is handled by fcsel.
733 // FIXME: Form fabs, fmin, and fmax when applicable.
734 if (AArch64::FPR64RegClass.hasSubClassEq(RC) ||
735 AArch64::FPR32RegClass.hasSubClassEq(RC)) {
736 CondCycles = 5 + ExtraCondLat;
737 TrueCycles = FalseCycles = 2;
738 return true;
739 }
740
741 // Can't do vectors.
742 return false;
743}
744
747 const DebugLoc &DL, Register DstReg,
749 Register TrueReg, Register FalseReg) const {
751
752 // Parse the condition code, see parseCondBranch() above.
754 switch (Cond.size()) {
755 default:
756 llvm_unreachable("Unknown condition opcode in Cond");
757 case 1: // b.cc
758 CC = AArch64CC::CondCode(Cond[0].getImm());
759 break;
760 case 3: { // cbz/cbnz
761 // We must insert a compare against 0.
762 bool Is64Bit;
763 switch (Cond[1].getImm()) {
764 default:
765 llvm_unreachable("Unknown branch opcode in Cond");
766 case AArch64::CBZW:
767 Is64Bit = false;
769 break;
770 case AArch64::CBZX:
771 Is64Bit = true;
773 break;
774 case AArch64::CBNZW:
775 Is64Bit = false;
777 break;
778 case AArch64::CBNZX:
779 Is64Bit = true;
781 break;
782 }
783 Register SrcReg = Cond[2].getReg();
784 if (Is64Bit) {
785 // cmp reg, #0 is actually subs xzr, reg, #0.
786 MRI.constrainRegClass(SrcReg, &AArch64::GPR64spRegClass);
787 BuildMI(MBB, I, DL, get(AArch64::SUBSXri), AArch64::XZR)
788 .addReg(SrcReg)
789 .addImm(0)
790 .addImm(0);
791 } else {
792 MRI.constrainRegClass(SrcReg, &AArch64::GPR32spRegClass);
793 BuildMI(MBB, I, DL, get(AArch64::SUBSWri), AArch64::WZR)
794 .addReg(SrcReg)
795 .addImm(0)
796 .addImm(0);
797 }
798 break;
799 }
800 case 4: { // tbz/tbnz
801 // We must insert a tst instruction.
802 switch (Cond[1].getImm()) {
803 default:
804 llvm_unreachable("Unknown branch opcode in Cond");
805 case AArch64::TBZW:
806 case AArch64::TBZX:
808 break;
809 case AArch64::TBNZW:
810 case AArch64::TBNZX:
812 break;
813 }
814 // cmp reg, #foo is actually ands xzr, reg, #1<<foo.
815 if (Cond[1].getImm() == AArch64::TBZW || Cond[1].getImm() == AArch64::TBNZW)
816 BuildMI(MBB, I, DL, get(AArch64::ANDSWri), AArch64::WZR)
817 .addReg(Cond[2].getReg())
818 .addImm(
819 AArch64_AM::encodeLogicalImmediate(1ull << Cond[3].getImm(), 32));
820 else
821 BuildMI(MBB, I, DL, get(AArch64::ANDSXri), AArch64::XZR)
822 .addReg(Cond[2].getReg())
823 .addImm(
824 AArch64_AM::encodeLogicalImmediate(1ull << Cond[3].getImm(), 64));
825 break;
826 }
827 }
828
829 unsigned Opc = 0;
830 const TargetRegisterClass *RC = nullptr;
831 bool TryFold = false;
832 if (MRI.constrainRegClass(DstReg, &AArch64::GPR64RegClass)) {
833 RC = &AArch64::GPR64RegClass;
834 Opc = AArch64::CSELXr;
835 TryFold = true;
836 } else if (MRI.constrainRegClass(DstReg, &AArch64::GPR32RegClass)) {
837 RC = &AArch64::GPR32RegClass;
838 Opc = AArch64::CSELWr;
839 TryFold = true;
840 } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR64RegClass)) {
841 RC = &AArch64::FPR64RegClass;
842 Opc = AArch64::FCSELDrrr;
843 } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR32RegClass)) {
844 RC = &AArch64::FPR32RegClass;
845 Opc = AArch64::FCSELSrrr;
846 }
847 assert(RC && "Unsupported regclass");
848
849 // Try folding simple instructions into the csel.
850 if (TryFold) {
851 unsigned NewVReg = 0;
852 unsigned FoldedOpc = canFoldIntoCSel(MRI, TrueReg, &NewVReg);
853 if (FoldedOpc) {
854 // The folded opcodes csinc, csinc and csneg apply the operation to
855 // FalseReg, so we need to invert the condition.
857 TrueReg = FalseReg;
858 } else
859 FoldedOpc = canFoldIntoCSel(MRI, FalseReg, &NewVReg);
860
861 // Fold the operation. Leave any dead instructions for DCE to clean up.
862 if (FoldedOpc) {
863 FalseReg = NewVReg;
864 Opc = FoldedOpc;
865 // The extends the live range of NewVReg.
866 MRI.clearKillFlags(NewVReg);
867 }
868 }
869
870 // Pull all virtual register into the appropriate class.
871 MRI.constrainRegClass(TrueReg, RC);
872 MRI.constrainRegClass(FalseReg, RC);
873
874 // Insert the csel.
875 BuildMI(MBB, I, DL, get(Opc), DstReg)
876 .addReg(TrueReg)
877 .addReg(FalseReg)
878 .addImm(CC);
879}
880
881// Return true if Imm can be loaded into a register by a "cheap" sequence of
882// instructions. For now, "cheap" means at most two instructions.
883static bool isCheapImmediate(const MachineInstr &MI, unsigned BitSize) {
884 if (BitSize == 32)
885 return true;
886
887 assert(BitSize == 64 && "Only bit sizes of 32 or 64 allowed");
888 uint64_t Imm = static_cast<uint64_t>(MI.getOperand(1).getImm());
890 AArch64_IMM::expandMOVImm(Imm, BitSize, Is);
891
892 return Is.size() <= 2;
893}
894
895// FIXME: this implementation should be micro-architecture dependent, so a
896// micro-architecture target hook should be introduced here in future.
898 if (Subtarget.hasExynosCheapAsMoveHandling()) {
899 if (isExynosCheapAsMove(MI))
900 return true;
901 return MI.isAsCheapAsAMove();
902 }
903
904 switch (MI.getOpcode()) {
905 default:
906 return MI.isAsCheapAsAMove();
907
908 case AArch64::ADDWrs:
909 case AArch64::ADDXrs:
910 case AArch64::SUBWrs:
911 case AArch64::SUBXrs:
912 return Subtarget.hasALULSLFast() && MI.getOperand(3).getImm() <= 4;
913
914 // If MOVi32imm or MOVi64imm can be expanded into ORRWri or
915 // ORRXri, it is as cheap as MOV.
916 // Likewise if it can be expanded to MOVZ/MOVN/MOVK.
917 case AArch64::MOVi32imm:
918 return isCheapImmediate(MI, 32);
919 case AArch64::MOVi64imm:
920 return isCheapImmediate(MI, 64);
921 }
922}
923
925 switch (MI.getOpcode()) {
926 default:
927 return false;
928
929 case AArch64::ADDWrs:
930 case AArch64::ADDXrs:
931 case AArch64::ADDSWrs:
932 case AArch64::ADDSXrs: {
933 unsigned Imm = MI.getOperand(3).getImm();
934 unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
935 if (ShiftVal == 0)
936 return true;
937 return AArch64_AM::getShiftType(Imm) == AArch64_AM::LSL && ShiftVal <= 5;
938 }
939
940 case AArch64::ADDWrx:
941 case AArch64::ADDXrx:
942 case AArch64::ADDXrx64:
943 case AArch64::ADDSWrx:
944 case AArch64::ADDSXrx:
945 case AArch64::ADDSXrx64: {
946 unsigned Imm = MI.getOperand(3).getImm();
947 switch (AArch64_AM::getArithExtendType(Imm)) {
948 default:
949 return false;
950 case AArch64_AM::UXTB:
951 case AArch64_AM::UXTH:
952 case AArch64_AM::UXTW:
953 case AArch64_AM::UXTX:
954 return AArch64_AM::getArithShiftValue(Imm) <= 4;
955 }
956 }
957
958 case AArch64::SUBWrs:
959 case AArch64::SUBSWrs: {
960 unsigned Imm = MI.getOperand(3).getImm();
961 unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
962 return ShiftVal == 0 ||
963 (AArch64_AM::getShiftType(Imm) == AArch64_AM::ASR && ShiftVal == 31);
964 }
965
966 case AArch64::SUBXrs:
967 case AArch64::SUBSXrs: {
968 unsigned Imm = MI.getOperand(3).getImm();
969 unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
970 return ShiftVal == 0 ||
971 (AArch64_AM::getShiftType(Imm) == AArch64_AM::ASR && ShiftVal == 63);
972 }
973
974 case AArch64::SUBWrx:
975 case AArch64::SUBXrx:
976 case AArch64::SUBXrx64:
977 case AArch64::SUBSWrx:
978 case AArch64::SUBSXrx:
979 case AArch64::SUBSXrx64: {
980 unsigned Imm = MI.getOperand(3).getImm();
981 switch (AArch64_AM::getArithExtendType(Imm)) {
982 default:
983 return false;
984 case AArch64_AM::UXTB:
985 case AArch64_AM::UXTH:
986 case AArch64_AM::UXTW:
987 case AArch64_AM::UXTX:
988 return AArch64_AM::getArithShiftValue(Imm) == 0;
989 }
990 }
991
992 case AArch64::LDRBBroW:
993 case AArch64::LDRBBroX:
994 case AArch64::LDRBroW:
995 case AArch64::LDRBroX:
996 case AArch64::LDRDroW:
997 case AArch64::LDRDroX:
998 case AArch64::LDRHHroW:
999 case AArch64::LDRHHroX:
1000 case AArch64::LDRHroW:
1001 case AArch64::LDRHroX:
1002 case AArch64::LDRQroW:
1003 case AArch64::LDRQroX:
1004 case AArch64::LDRSBWroW:
1005 case AArch64::LDRSBWroX:
1006 case AArch64::LDRSBXroW:
1007 case AArch64::LDRSBXroX:
1008 case AArch64::LDRSHWroW:
1009 case AArch64::LDRSHWroX:
1010 case AArch64::LDRSHXroW:
1011 case AArch64::LDRSHXroX:
1012 case AArch64::LDRSWroW:
1013 case AArch64::LDRSWroX:
1014 case AArch64::LDRSroW:
1015 case AArch64::LDRSroX:
1016 case AArch64::LDRWroW:
1017 case AArch64::LDRWroX:
1018 case AArch64::LDRXroW:
1019 case AArch64::LDRXroX:
1020 case AArch64::PRFMroW:
1021 case AArch64::PRFMroX:
1022 case AArch64::STRBBroW:
1023 case AArch64::STRBBroX:
1024 case AArch64::STRBroW:
1025 case AArch64::STRBroX:
1026 case AArch64::STRDroW:
1027 case AArch64::STRDroX:
1028 case AArch64::STRHHroW:
1029 case AArch64::STRHHroX:
1030 case AArch64::STRHroW:
1031 case AArch64::STRHroX:
1032 case AArch64::STRQroW:
1033 case AArch64::STRQroX:
1034 case AArch64::STRSroW:
1035 case AArch64::STRSroX:
1036 case AArch64::STRWroW:
1037 case AArch64::STRWroX:
1038 case AArch64::STRXroW:
1039 case AArch64::STRXroX: {
1040 unsigned IsSigned = MI.getOperand(3).getImm();
1041 return !IsSigned;
1042 }
1043 }
1044}
1045
1047 unsigned Opc = MI.getOpcode();
1048 switch (Opc) {
1049 default:
1050 return false;
1051 case AArch64::SEH_StackAlloc:
1052 case AArch64::SEH_SaveFPLR:
1053 case AArch64::SEH_SaveFPLR_X:
1054 case AArch64::SEH_SaveReg:
1055 case AArch64::SEH_SaveReg_X:
1056 case AArch64::SEH_SaveRegP:
1057 case AArch64::SEH_SaveRegP_X:
1058 case AArch64::SEH_SaveFReg:
1059 case AArch64::SEH_SaveFReg_X:
1060 case AArch64::SEH_SaveFRegP:
1061 case AArch64::SEH_SaveFRegP_X:
1062 case AArch64::SEH_SetFP:
1063 case AArch64::SEH_AddFP:
1064 case AArch64::SEH_Nop:
1065 case AArch64::SEH_PrologEnd:
1066 case AArch64::SEH_EpilogStart:
1067 case AArch64::SEH_EpilogEnd:
1068 case AArch64::SEH_PACSignLR:
1069 return true;
1070 }
1071}
1072
1074 Register &SrcReg, Register &DstReg,
1075 unsigned &SubIdx) const {
1076 switch (MI.getOpcode()) {
1077 default:
1078 return false;
1079 case AArch64::SBFMXri: // aka sxtw
1080 case AArch64::UBFMXri: // aka uxtw
1081 // Check for the 32 -> 64 bit extension case, these instructions can do
1082 // much more.
1083 if (MI.getOperand(2).getImm() != 0 || MI.getOperand(3).getImm() != 31)
1084 return false;
1085 // This is a signed or unsigned 32 -> 64 bit extension.
1086 SrcReg = MI.getOperand(1).getReg();
1087 DstReg = MI.getOperand(0).getReg();
1088 SubIdx = AArch64::sub_32;
1089 return true;
1090 }
1091}
1092
1094 const MachineInstr &MIa, const MachineInstr &MIb) const {
1096 const MachineOperand *BaseOpA = nullptr, *BaseOpB = nullptr;
1097 int64_t OffsetA = 0, OffsetB = 0;
1098 unsigned WidthA = 0, WidthB = 0;
1099 bool OffsetAIsScalable = false, OffsetBIsScalable = false;
1100
1101 assert(MIa.mayLoadOrStore() && "MIa must be a load or store.");
1102 assert(MIb.mayLoadOrStore() && "MIb must be a load or store.");
1103
1106 return false;
1107
1108 // Retrieve the base, offset from the base and width. Width
1109 // is the size of memory that is being loaded/stored (e.g. 1, 2, 4, 8). If
1110 // base are identical, and the offset of a lower memory access +
1111 // the width doesn't overlap the offset of a higher memory access,
1112 // then the memory accesses are different.
1113 // If OffsetAIsScalable and OffsetBIsScalable are both true, they
1114 // are assumed to have the same scale (vscale).
1115 if (getMemOperandWithOffsetWidth(MIa, BaseOpA, OffsetA, OffsetAIsScalable,
1116 WidthA, TRI) &&
1117 getMemOperandWithOffsetWidth(MIb, BaseOpB, OffsetB, OffsetBIsScalable,
1118 WidthB, TRI)) {
1119 if (BaseOpA->isIdenticalTo(*BaseOpB) &&
1120 OffsetAIsScalable == OffsetBIsScalable) {
1121 int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB;
1122 int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA;
1123 int LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
1124 if (LowOffset + LowWidth <= HighOffset)
1125 return true;
1126 }
1127 }
1128 return false;
1129}
1130
1132 const MachineBasicBlock *MBB,
1133 const MachineFunction &MF) const {
1135 return true;
1136 switch (MI.getOpcode()) {
1137 case AArch64::HINT:
1138 // CSDB hints are scheduling barriers.
1139 if (MI.getOperand(0).getImm() == 0x14)
1140 return true;
1141 break;
1142 case AArch64::DSB:
1143 case AArch64::ISB:
1144 // DSB and ISB also are scheduling barriers.
1145 return true;
1146 case AArch64::MSRpstatesvcrImm1:
1147 // SMSTART and SMSTOP are also scheduling barriers.
1148 return true;
1149 default:;
1150 }
1151 if (isSEHInstruction(MI))
1152 return true;
1153 auto Next = std::next(MI.getIterator());
1154 return Next != MBB->end() && Next->isCFIInstruction();
1155}
1156
1157/// analyzeCompare - For a comparison instruction, return the source registers
1158/// in SrcReg and SrcReg2, and the value it compares against in CmpValue.
1159/// Return true if the comparison instruction can be analyzed.
1161 Register &SrcReg2, int64_t &CmpMask,
1162 int64_t &CmpValue) const {
1163 // The first operand can be a frame index where we'd normally expect a
1164 // register.
1165 assert(MI.getNumOperands() >= 2 && "All AArch64 cmps should have 2 operands");
1166 if (!MI.getOperand(1).isReg())
1167 return false;
1168
1169 switch (MI.getOpcode()) {
1170 default:
1171 break;
1172 case AArch64::PTEST_PP:
1173 case AArch64::PTEST_PP_ANY:
1174 SrcReg = MI.getOperand(0).getReg();
1175 SrcReg2 = MI.getOperand(1).getReg();
1176 // Not sure about the mask and value for now...
1177 CmpMask = ~0;
1178 CmpValue = 0;
1179 return true;
1180 case AArch64::SUBSWrr:
1181 case AArch64::SUBSWrs:
1182 case AArch64::SUBSWrx:
1183 case AArch64::SUBSXrr:
1184 case AArch64::SUBSXrs:
1185 case AArch64::SUBSXrx:
1186 case AArch64::ADDSWrr:
1187 case AArch64::ADDSWrs:
1188 case AArch64::ADDSWrx:
1189 case AArch64::ADDSXrr:
1190 case AArch64::ADDSXrs:
1191 case AArch64::ADDSXrx:
1192 // Replace SUBSWrr with SUBWrr if NZCV is not used.
1193 SrcReg = MI.getOperand(1).getReg();
1194 SrcReg2 = MI.getOperand(2).getReg();
1195 CmpMask = ~0;
1196 CmpValue = 0;
1197 return true;
1198 case AArch64::SUBSWri:
1199 case AArch64::ADDSWri:
1200 case AArch64::SUBSXri:
1201 case AArch64::ADDSXri:
1202 SrcReg = MI.getOperand(1).getReg();
1203 SrcReg2 = 0;
1204 CmpMask = ~0;
1205 CmpValue = MI.getOperand(2).getImm();
1206 return true;
1207 case AArch64::ANDSWri:
1208 case AArch64::ANDSXri:
1209 // ANDS does not use the same encoding scheme as the others xxxS
1210 // instructions.
1211 SrcReg = MI.getOperand(1).getReg();
1212 SrcReg2 = 0;
1213 CmpMask = ~0;
1215 MI.getOperand(2).getImm(),
1216 MI.getOpcode() == AArch64::ANDSWri ? 32 : 64);
1217 return true;
1218 }
1219
1220 return false;
1221}
1222
1224 MachineBasicBlock *MBB = Instr.getParent();
1225 assert(MBB && "Can't get MachineBasicBlock here");
1226 MachineFunction *MF = MBB->getParent();
1227 assert(MF && "Can't get MachineFunction here");
1231
1232 for (unsigned OpIdx = 0, EndIdx = Instr.getNumOperands(); OpIdx < EndIdx;
1233 ++OpIdx) {
1234 MachineOperand &MO = Instr.getOperand(OpIdx);
1235 const TargetRegisterClass *OpRegCstraints =
1236 Instr.getRegClassConstraint(OpIdx, TII, TRI);
1237
1238 // If there's no constraint, there's nothing to do.
1239 if (!OpRegCstraints)
1240 continue;
1241 // If the operand is a frame index, there's nothing to do here.
1242 // A frame index operand will resolve correctly during PEI.
1243 if (MO.isFI())
1244 continue;
1245
1246 assert(MO.isReg() &&
1247 "Operand has register constraints without being a register!");
1248
1249 Register Reg = MO.getReg();
1250 if (Reg.isPhysical()) {
1251 if (!OpRegCstraints->contains(Reg))
1252 return false;
1253 } else if (!OpRegCstraints->hasSubClassEq(MRI->getRegClass(Reg)) &&
1254 !MRI->constrainRegClass(Reg, OpRegCstraints))
1255 return false;
1256 }
1257
1258 return true;
1259}
1260
1261/// Return the opcode that does not set flags when possible - otherwise
1262/// return the original opcode. The caller is responsible to do the actual
1263/// substitution and legality checking.
1265 // Don't convert all compare instructions, because for some the zero register
1266 // encoding becomes the sp register.
1267 bool MIDefinesZeroReg = false;
1268 if (MI.definesRegister(AArch64::WZR) || MI.definesRegister(AArch64::XZR))
1269 MIDefinesZeroReg = true;
1270
1271 switch (MI.getOpcode()) {
1272 default:
1273 return MI.getOpcode();
1274 case AArch64::ADDSWrr:
1275 return AArch64::ADDWrr;
1276 case AArch64::ADDSWri:
1277 return MIDefinesZeroReg ? AArch64::ADDSWri : AArch64::ADDWri;
1278 case AArch64::ADDSWrs:
1279 return MIDefinesZeroReg ? AArch64::ADDSWrs : AArch64::ADDWrs;
1280 case AArch64::ADDSWrx:
1281 return AArch64::ADDWrx;
1282 case AArch64::ADDSXrr:
1283 return AArch64::ADDXrr;
1284 case AArch64::ADDSXri:
1285 return MIDefinesZeroReg ? AArch64::ADDSXri : AArch64::ADDXri;
1286 case AArch64::ADDSXrs:
1287 return MIDefinesZeroReg ? AArch64::ADDSXrs : AArch64::ADDXrs;
1288 case AArch64::ADDSXrx:
1289 return AArch64::ADDXrx;
1290 case AArch64::SUBSWrr:
1291 return AArch64::SUBWrr;
1292 case AArch64::SUBSWri:
1293 return MIDefinesZeroReg ? AArch64::SUBSWri : AArch64::SUBWri;
1294 case AArch64::SUBSWrs:
1295 return MIDefinesZeroReg ? AArch64::SUBSWrs : AArch64::SUBWrs;
1296 case AArch64::SUBSWrx:
1297 return AArch64::SUBWrx;
1298 case AArch64::SUBSXrr:
1299 return AArch64::SUBXrr;
1300 case AArch64::SUBSXri:
1301 return MIDefinesZeroReg ? AArch64::SUBSXri : AArch64::SUBXri;
1302 case AArch64::SUBSXrs:
1303 return MIDefinesZeroReg ? AArch64::SUBSXrs : AArch64::SUBXrs;
1304 case AArch64::SUBSXrx:
1305 return AArch64::SUBXrx;
1306 }
1307}
1308
1309enum AccessKind { AK_Write = 0x01, AK_Read = 0x10, AK_All = 0x11 };
1310
1311/// True when condition flags are accessed (either by writing or reading)
1312/// on the instruction trace starting at From and ending at To.
1313///
1314/// Note: If From and To are from different blocks it's assumed CC are accessed
1315/// on the path.
1318 const TargetRegisterInfo *TRI, const AccessKind AccessToCheck = AK_All) {
1319 // Early exit if To is at the beginning of the BB.
1320 if (To == To->getParent()->begin())
1321 return true;
1322
1323 // Check whether the instructions are in the same basic block
1324 // If not, assume the condition flags might get modified somewhere.
1325 if (To->getParent() != From->getParent())
1326 return true;
1327
1328 // From must be above To.
1329 assert(std::any_of(
1330 ++To.getReverse(), To->getParent()->rend(),
1331 [From](MachineInstr &MI) { return MI.getIterator() == From; }));
1332
1333 // We iterate backward starting at \p To until we hit \p From.
1334 for (const MachineInstr &Instr :
1335 instructionsWithoutDebug(++To.getReverse(), From.getReverse())) {
1336 if (((AccessToCheck & AK_Write) &&
1337 Instr.modifiesRegister(AArch64::NZCV, TRI)) ||
1338 ((AccessToCheck & AK_Read) && Instr.readsRegister(AArch64::NZCV, TRI)))
1339 return true;
1340 }
1341 return false;
1342}
1343
1344/// optimizePTestInstr - Attempt to remove a ptest of a predicate-generating
1345/// operation which could set the flags in an identical manner
1346bool AArch64InstrInfo::optimizePTestInstr(
1347 MachineInstr *PTest, unsigned MaskReg, unsigned PredReg,
1348 const MachineRegisterInfo *MRI) const {
1349 auto *Mask = MRI->getUniqueVRegDef(MaskReg);
1350 auto *Pred = MRI->getUniqueVRegDef(PredReg);
1351 auto NewOp = Pred->getOpcode();
1352 bool OpChanged = false;
1353
1354 unsigned MaskOpcode = Mask->getOpcode();
1355 unsigned PredOpcode = Pred->getOpcode();
1356 bool PredIsPTestLike = isPTestLikeOpcode(PredOpcode);
1357 bool PredIsWhileLike = isWhileOpcode(PredOpcode);
1358
1359 if (isPTrueOpcode(MaskOpcode) && (PredIsPTestLike || PredIsWhileLike) &&
1360 getElementSizeForOpcode(MaskOpcode) ==
1361 getElementSizeForOpcode(PredOpcode) &&
1362 Mask->getOperand(1).getImm() == 31) {
1363 // For PTEST(PTRUE_ALL, WHILE), if the element size matches, the PTEST is
1364 // redundant since WHILE performs an implicit PTEST with an all active
1365 // mask. Must be an all active predicate of matching element size.
1366
1367 // For PTEST(PTRUE_ALL, PTEST_LIKE), the PTEST is redundant if the
1368 // PTEST_LIKE instruction uses the same all active mask and the element
1369 // size matches. If the PTEST has a condition of any then it is always
1370 // redundant.
1371 if (PredIsPTestLike) {
1372 auto PTestLikeMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg());
1373 if (Mask != PTestLikeMask && PTest->getOpcode() != AArch64::PTEST_PP_ANY)
1374 return false;
1375 }
1376
1377 // Fallthough to simply remove the PTEST.
1378 } else if ((Mask == Pred) && (PredIsPTestLike || PredIsWhileLike) &&
1379 PTest->getOpcode() == AArch64::PTEST_PP_ANY) {
1380 // For PTEST(PG, PG), PTEST is redundant when PG is the result of an
1381 // instruction that sets the flags as PTEST would. This is only valid when
1382 // the condition is any.
1383
1384 // Fallthough to simply remove the PTEST.
1385 } else if (PredIsPTestLike) {
1386 // For PTEST(PG, PTEST_LIKE(PG, ...)), the PTEST is redundant since the
1387 // flags are set based on the same mask 'PG', but PTEST_LIKE must operate
1388 // on 8-bit predicates like the PTEST. Otherwise, for instructions like
1389 // compare that also support 16/32/64-bit predicates, the implicit PTEST
1390 // performed by the compare could consider fewer lanes for these element
1391 // sizes.
1392 //
1393 // For example, consider
1394 //
1395 // ptrue p0.b ; P0=1111-1111-1111-1111
1396 // index z0.s, #0, #1 ; Z0=<0,1,2,3>
1397 // index z1.s, #1, #1 ; Z1=<1,2,3,4>
1398 // cmphi p1.s, p0/z, z1.s, z0.s ; P1=0001-0001-0001-0001
1399 // ; ^ last active
1400 // ptest p0, p1.b ; P1=0001-0001-0001-0001
1401 // ; ^ last active
1402 //
1403 // where the compare generates a canonical all active 32-bit predicate
1404 // (equivalent to 'ptrue p1.s, all'). The implicit PTEST sets the last
1405 // active flag, whereas the PTEST instruction with the same mask doesn't.
1406 // For PTEST_ANY this doesn't apply as the flags in this case would be
1407 // identical regardless of element size.
1408 auto PTestLikeMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg());
1409 uint64_t PredElementSize = getElementSizeForOpcode(PredOpcode);
1410 if ((Mask != PTestLikeMask) ||
1411 (PredElementSize != AArch64::ElementSizeB &&
1412 PTest->getOpcode() != AArch64::PTEST_PP_ANY))
1413 return false;
1414
1415 // Fallthough to simply remove the PTEST.
1416 } else {
1417 // If OP in PTEST(PG, OP(PG, ...)) has a flag-setting variant change the
1418 // opcode so the PTEST becomes redundant.
1419 switch (PredOpcode) {
1420 case AArch64::AND_PPzPP:
1421 case AArch64::BIC_PPzPP:
1422 case AArch64::EOR_PPzPP:
1423 case AArch64::NAND_PPzPP:
1424 case AArch64::NOR_PPzPP:
1425 case AArch64::ORN_PPzPP:
1426 case AArch64::ORR_PPzPP:
1427 case AArch64::BRKA_PPzP:
1428 case AArch64::BRKPA_PPzPP:
1429 case AArch64::BRKB_PPzP:
1430 case AArch64::BRKPB_PPzPP:
1431 case AArch64::RDFFR_PPz: {
1432 // Check to see if our mask is the same. If not the resulting flag bits
1433 // may be different and we can't remove the ptest.
1434 auto *PredMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg());
1435 if (Mask != PredMask)
1436 return false;
1437 break;
1438 }
1439 case AArch64::BRKN_PPzP: {
1440 // BRKN uses an all active implicit mask to set flags unlike the other
1441 // flag-setting instructions.
1442 // PTEST(PTRUE_B(31), BRKN(PG, A, B)) -> BRKNS(PG, A, B).
1443 if ((MaskOpcode != AArch64::PTRUE_B) ||
1444 (Mask->getOperand(1).getImm() != 31))
1445 return false;
1446 break;
1447 }
1448 case AArch64::PTRUE_B:
1449 // PTEST(OP=PTRUE_B(A), OP) -> PTRUES_B(A)
1450 break;
1451 default:
1452 // Bail out if we don't recognize the input
1453 return false;
1454 }
1455
1456 NewOp = convertToFlagSettingOpc(PredOpcode);
1457 OpChanged = true;
1458 }
1459
1461
1462 // If another instruction between Pred and PTest accesses flags, don't remove
1463 // the ptest or update the earlier instruction to modify them.
1464 if (areCFlagsAccessedBetweenInstrs(Pred, PTest, TRI))
1465 return false;
1466
1467 // If we pass all the checks, it's safe to remove the PTEST and use the flags
1468 // as they are prior to PTEST. Sometimes this requires the tested PTEST
1469 // operand to be replaced with an equivalent instruction that also sets the
1470 // flags.
1471 Pred->setDesc(get(NewOp));
1472 PTest->eraseFromParent();
1473 if (OpChanged) {
1474 bool succeeded = UpdateOperandRegClass(*Pred);
1475 (void)succeeded;
1476 assert(succeeded && "Operands have incompatible register classes!");
1477 Pred->addRegisterDefined(AArch64::NZCV, TRI);
1478 }
1479
1480 // Ensure that the flags def is live.
1481 if (Pred->registerDefIsDead(AArch64::NZCV, TRI)) {
1482 unsigned i = 0, e = Pred->getNumOperands();
1483 for (; i != e; ++i) {
1484 MachineOperand &MO = Pred->getOperand(i);
1485 if (MO.isReg() && MO.isDef() && MO.getReg() == AArch64::NZCV) {
1486 MO.setIsDead(false);
1487 break;
1488 }
1489 }
1490 }
1491 return true;
1492}
1493
1494/// Try to optimize a compare instruction. A compare instruction is an
1495/// instruction which produces AArch64::NZCV. It can be truly compare
1496/// instruction
1497/// when there are no uses of its destination register.
1498///
1499/// The following steps are tried in order:
1500/// 1. Convert CmpInstr into an unconditional version.
1501/// 2. Remove CmpInstr if above there is an instruction producing a needed
1502/// condition code or an instruction which can be converted into such an
1503/// instruction.
1504/// Only comparison with zero is supported.
1506 MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int64_t CmpMask,
1507 int64_t CmpValue, const MachineRegisterInfo *MRI) const {
1508 assert(CmpInstr.getParent());
1509 assert(MRI);
1510
1511 // Replace SUBSWrr with SUBWrr if NZCV is not used.
1512 int DeadNZCVIdx = CmpInstr.findRegisterDefOperandIdx(AArch64::NZCV, true);
1513 if (DeadNZCVIdx != -1) {
1514 if (CmpInstr.definesRegister(AArch64::WZR) ||
1515 CmpInstr.definesRegister(AArch64::XZR)) {
1516 CmpInstr.eraseFromParent();
1517 return true;
1518 }
1519 unsigned Opc = CmpInstr.getOpcode();
1520 unsigned NewOpc = convertToNonFlagSettingOpc(CmpInstr);
1521 if (NewOpc == Opc)
1522 return false;
1523 const MCInstrDesc &MCID = get(NewOpc);
1524 CmpInstr.setDesc(MCID);
1525 CmpInstr.removeOperand(DeadNZCVIdx);
1526 bool succeeded = UpdateOperandRegClass(CmpInstr);
1527 (void)succeeded;
1528 assert(succeeded && "Some operands reg class are incompatible!");
1529 return true;
1530 }
1531
1532 if (CmpInstr.getOpcode() == AArch64::PTEST_PP ||
1533 CmpInstr.getOpcode() == AArch64::PTEST_PP_ANY)
1534 return optimizePTestInstr(&CmpInstr, SrcReg, SrcReg2, MRI);
1535
1536 if (SrcReg2 != 0)
1537 return false;
1538
1539 // CmpInstr is a Compare instruction if destination register is not used.
1540 if (!MRI->use_nodbg_empty(CmpInstr.getOperand(0).getReg()))
1541 return false;
1542
1543 if (CmpValue == 0 && substituteCmpToZero(CmpInstr, SrcReg, *MRI))
1544 return true;
1545 return (CmpValue == 0 || CmpValue == 1) &&
1546 removeCmpToZeroOrOne(CmpInstr, SrcReg, CmpValue, *MRI);
1547}
1548
1549/// Get opcode of S version of Instr.
1550/// If Instr is S version its opcode is returned.
1551/// AArch64::INSTRUCTION_LIST_END is returned if Instr does not have S version
1552/// or we are not interested in it.
1553static unsigned sForm(MachineInstr &Instr) {
1554 switch (Instr.getOpcode()) {
1555 default:
1556 return AArch64::INSTRUCTION_LIST_END;
1557
1558 case AArch64::ADDSWrr:
1559 case AArch64::ADDSWri:
1560 case AArch64::ADDSXrr:
1561 case AArch64::ADDSXri:
1562 case AArch64::SUBSWrr:
1563 case AArch64::SUBSWri:
1564 case AArch64::SUBSXrr:
1565 case AArch64::SUBSXri:
1566 return Instr.getOpcode();
1567
1568 case AArch64::ADDWrr:
1569 return AArch64::ADDSWrr;
1570 case AArch64::ADDWri:
1571 return AArch64::ADDSWri;
1572 case AArch64::ADDXrr:
1573 return AArch64::ADDSXrr;
1574 case AArch64::ADDXri:
1575 return AArch64::ADDSXri;
1576 case AArch64::ADCWr:
1577 return AArch64::ADCSWr;
1578 case AArch64::ADCXr:
1579 return AArch64::ADCSXr;
1580 case AArch64::SUBWrr:
1581 return AArch64::SUBSWrr;
1582 case AArch64::SUBWri:
1583 return AArch64::SUBSWri;
1584 case AArch64::SUBXrr:
1585 return AArch64::SUBSXrr;
1586 case AArch64::SUBXri:
1587 return AArch64::SUBSXri;
1588 case AArch64::SBCWr:
1589 return AArch64::SBCSWr;
1590 case AArch64::SBCXr:
1591 return AArch64::SBCSXr;
1592 case AArch64::ANDWri:
1593 return AArch64::ANDSWri;
1594 case AArch64::ANDXri:
1595 return AArch64::ANDSXri;
1596 }
1597}
1598
1599/// Check if AArch64::NZCV should be alive in successors of MBB.
1601 for (auto *BB : MBB->successors())
1602 if (BB->isLiveIn(AArch64::NZCV))
1603 return true;
1604 return false;
1605}
1606
1607/// \returns The condition code operand index for \p Instr if it is a branch
1608/// or select and -1 otherwise.
1609static int
1611 switch (Instr.getOpcode()) {
1612 default:
1613 return -1;
1614
1615 case AArch64::Bcc: {
1616 int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV);
1617 assert(Idx >= 2);
1618 return Idx - 2;
1619 }
1620
1621 case AArch64::CSINVWr:
1622 case AArch64::CSINVXr:
1623 case AArch64::CSINCWr:
1624 case AArch64::CSINCXr:
1625 case AArch64::CSELWr:
1626 case AArch64::CSELXr:
1627 case AArch64::CSNEGWr:
1628 case AArch64::CSNEGXr:
1629 case AArch64::FCSELSrrr:
1630 case AArch64::FCSELDrrr: {
1631 int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV);
1632 assert(Idx >= 1);
1633 return Idx - 1;
1634 }
1635 }
1636}
1637
1638/// Find a condition code used by the instruction.
1639/// Returns AArch64CC::Invalid if either the instruction does not use condition
1640/// codes or we don't optimize CmpInstr in the presence of such instructions.
1643 return CCIdx >= 0 ? static_cast<AArch64CC::CondCode>(
1644 Instr.getOperand(CCIdx).getImm())
1646}
1647
1650 UsedNZCV UsedFlags;
1651 switch (CC) {
1652 default:
1653 break;
1654
1655 case AArch64CC::EQ: // Z set
1656 case AArch64CC::NE: // Z clear
1657 UsedFlags.Z = true;
1658 break;
1659
1660 case AArch64CC::HI: // Z clear and C set
1661 case AArch64CC::LS: // Z set or C clear
1662 UsedFlags.Z = true;
1663 [[fallthrough]];
1664 case AArch64CC::HS: // C set
1665 case AArch64CC::LO: // C clear
1666 UsedFlags.C = true;
1667 break;
1668
1669 case AArch64CC::MI: // N set
1670 case AArch64CC::PL: // N clear
1671 UsedFlags.N = true;
1672 break;
1673
1674 case AArch64CC::VS: // V set
1675 case AArch64CC::VC: // V clear
1676 UsedFlags.V = true;
1677 break;
1678
1679 case AArch64CC::GT: // Z clear, N and V the same
1680 case AArch64CC::LE: // Z set, N and V differ
1681 UsedFlags.Z = true;
1682 [[fallthrough]];
1683 case AArch64CC::GE: // N and V the same
1684 case AArch64CC::LT: // N and V differ
1685 UsedFlags.N = true;
1686 UsedFlags.V = true;
1687 break;
1688 }
1689 return UsedFlags;
1690}
1691
1692/// \returns Conditions flags used after \p CmpInstr in its MachineBB if NZCV
1693/// flags are not alive in successors of the same \p CmpInstr and \p MI parent.
1694/// \returns std::nullopt otherwise.
1695///
1696/// Collect instructions using that flags in \p CCUseInstrs if provided.
1697std::optional<UsedNZCV>
1699 const TargetRegisterInfo &TRI,
1700 SmallVectorImpl<MachineInstr *> *CCUseInstrs) {
1701 MachineBasicBlock *CmpParent = CmpInstr.getParent();
1702 if (MI.getParent() != CmpParent)
1703 return std::nullopt;
1704
1705 if (areCFlagsAliveInSuccessors(CmpParent))
1706 return std::nullopt;
1707
1708 UsedNZCV NZCVUsedAfterCmp;
1710 std::next(CmpInstr.getIterator()), CmpParent->instr_end())) {
1711 if (Instr.readsRegister(AArch64::NZCV, &TRI)) {
1713 if (CC == AArch64CC::Invalid) // Unsupported conditional instruction
1714 return std::nullopt;
1715 NZCVUsedAfterCmp |= getUsedNZCV(CC);
1716 if (CCUseInstrs)
1717 CCUseInstrs->push_back(&Instr);
1718 }
1719 if (Instr.modifiesRegister(AArch64::NZCV, &TRI))
1720 break;
1721 }
1722 return NZCVUsedAfterCmp;
1723}
1724
1725static bool isADDSRegImm(unsigned Opcode) {
1726 return Opcode == AArch64::ADDSWri || Opcode == AArch64::ADDSXri;
1727}
1728
1729static bool isSUBSRegImm(unsigned Opcode) {
1730 return Opcode == AArch64::SUBSWri || Opcode == AArch64::SUBSXri;
1731}
1732
1733/// Check if CmpInstr can be substituted by MI.
1734///
1735/// CmpInstr can be substituted:
1736/// - CmpInstr is either 'ADDS %vreg, 0' or 'SUBS %vreg, 0'
1737/// - and, MI and CmpInstr are from the same MachineBB
1738/// - and, condition flags are not alive in successors of the CmpInstr parent
1739/// - and, if MI opcode is the S form there must be no defs of flags between
1740/// MI and CmpInstr
1741/// or if MI opcode is not the S form there must be neither defs of flags
1742/// nor uses of flags between MI and CmpInstr.
1743/// - and, if C/V flags are not used after CmpInstr
1744/// or if N flag is used but MI produces poison value if signed overflow
1745/// occurs.
1747 const TargetRegisterInfo &TRI) {
1748 // NOTE this assertion guarantees that MI.getOpcode() is add or subtraction
1749 // that may or may not set flags.
1750 assert(sForm(MI) != AArch64::INSTRUCTION_LIST_END);
1751
1752 const unsigned CmpOpcode = CmpInstr.getOpcode();
1753 if (!isADDSRegImm(CmpOpcode) && !isSUBSRegImm(CmpOpcode))
1754 return false;
1755
1756 assert((CmpInstr.getOperand(2).isImm() &&
1757 CmpInstr.getOperand(2).getImm() == 0) &&
1758 "Caller guarantees that CmpInstr compares with constant 0");
1759
1760 std::optional<UsedNZCV> NZVCUsed = examineCFlagsUse(MI, CmpInstr, TRI);
1761 if (!NZVCUsed || NZVCUsed->C)
1762 return false;
1763
1764 // CmpInstr is either 'ADDS %vreg, 0' or 'SUBS %vreg, 0', and MI is either
1765 // '%vreg = add ...' or '%vreg = sub ...'.
1766 // Condition flag V is used to indicate signed overflow.
1767 // 1) MI and CmpInstr set N and V to the same value.
1768 // 2) If MI is add/sub with no-signed-wrap, it produces a poison value when
1769 // signed overflow occurs, so CmpInstr could still be simplified away.
1770 if (NZVCUsed->V && !MI.getFlag(MachineInstr::NoSWrap))
1771 return false;
1772
1773 AccessKind AccessToCheck = AK_Write;
1774 if (sForm(MI) != MI.getOpcode())
1775 AccessToCheck = AK_All;
1776 return !areCFlagsAccessedBetweenInstrs(&MI, &CmpInstr, &TRI, AccessToCheck);
1777}
1778
1779/// Substitute an instruction comparing to zero with another instruction
1780/// which produces needed condition flags.
1781///
1782/// Return true on success.
1783bool AArch64InstrInfo::substituteCmpToZero(
1784 MachineInstr &CmpInstr, unsigned SrcReg,
1785 const MachineRegisterInfo &MRI) const {
1786 // Get the unique definition of SrcReg.
1787 MachineInstr *MI = MRI.getUniqueVRegDef(SrcReg);
1788 if (!MI)
1789 return false;
1790
1792
1793 unsigned NewOpc = sForm(*MI);
1794 if (NewOpc == AArch64::INSTRUCTION_LIST_END)
1795 return false;
1796
1797 if (!canInstrSubstituteCmpInstr(*MI, CmpInstr, TRI))
1798 return false;
1799
1800 // Update the instruction to set NZCV.
1801 MI->setDesc(get(NewOpc));
1802 CmpInstr.eraseFromParent();
1803 bool succeeded = UpdateOperandRegClass(*MI);
1804 (void)succeeded;
1805 assert(succeeded && "Some operands reg class are incompatible!");
1806 MI->addRegisterDefined(AArch64::NZCV, &TRI);
1807 return true;
1808}
1809
1810/// \returns True if \p CmpInstr can be removed.
1811///
1812/// \p IsInvertCC is true if, after removing \p CmpInstr, condition
1813/// codes used in \p CCUseInstrs must be inverted.
1815 int CmpValue, const TargetRegisterInfo &TRI,
1817 bool &IsInvertCC) {
1818 assert((CmpValue == 0 || CmpValue == 1) &&
1819 "Only comparisons to 0 or 1 considered for removal!");
1820
1821 // MI is 'CSINCWr %vreg, wzr, wzr, <cc>' or 'CSINCXr %vreg, xzr, xzr, <cc>'
1822 unsigned MIOpc = MI.getOpcode();
1823 if (MIOpc == AArch64::CSINCWr) {
1824 if (MI.getOperand(1).getReg() != AArch64::WZR ||
1825 MI.getOperand(2).getReg() != AArch64::WZR)
1826 return false;
1827 } else if (MIOpc == AArch64::CSINCXr) {
1828 if (MI.getOperand(1).getReg() != AArch64::XZR ||
1829 MI.getOperand(2).getReg() != AArch64::XZR)
1830 return false;
1831 } else {
1832 return false;
1833 }
1835 if (MICC == AArch64CC::Invalid)
1836 return false;
1837
1838 // NZCV needs to be defined
1839 if (MI.findRegisterDefOperandIdx(AArch64::NZCV, true) != -1)
1840 return false;
1841
1842 // CmpInstr is 'ADDS %vreg, 0' or 'SUBS %vreg, 0' or 'SUBS %vreg, 1'
1843 const unsigned CmpOpcode = CmpInstr.getOpcode();
1844 bool IsSubsRegImm = isSUBSRegImm(CmpOpcode);
1845 if (CmpValue && !IsSubsRegImm)
1846 return false;
1847 if (!CmpValue && !IsSubsRegImm && !isADDSRegImm(CmpOpcode))
1848 return false;
1849
1850 // MI conditions allowed: eq, ne, mi, pl
1851 UsedNZCV MIUsedNZCV = getUsedNZCV(MICC);
1852 if (MIUsedNZCV.C || MIUsedNZCV.V)
1853 return false;
1854
1855 std::optional<UsedNZCV> NZCVUsedAfterCmp =
1856 examineCFlagsUse(MI, CmpInstr, TRI, &CCUseInstrs);
1857 // Condition flags are not used in CmpInstr basic block successors and only
1858 // Z or N flags allowed to be used after CmpInstr within its basic block
1859 if (!NZCVUsedAfterCmp || NZCVUsedAfterCmp->C || NZCVUsedAfterCmp->V)
1860 return false;
1861 // Z or N flag used after CmpInstr must correspond to the flag used in MI
1862 if ((MIUsedNZCV.Z && NZCVUsedAfterCmp->N) ||
1863 (MIUsedNZCV.N && NZCVUsedAfterCmp->Z))
1864 return false;
1865 // If CmpInstr is comparison to zero MI conditions are limited to eq, ne
1866 if (MIUsedNZCV.N && !CmpValue)
1867 return false;
1868
1869 // There must be no defs of flags between MI and CmpInstr
1870 if (areCFlagsAccessedBetweenInstrs(&MI, &CmpInstr, &TRI, AK_Write))
1871 return false;
1872
1873 // Condition code is inverted in the following cases:
1874 // 1. MI condition is ne; CmpInstr is 'ADDS %vreg, 0' or 'SUBS %vreg, 0'
1875 // 2. MI condition is eq, pl; CmpInstr is 'SUBS %vreg, 1'
1876 IsInvertCC = (CmpValue && (MICC == AArch64CC::EQ || MICC == AArch64CC::PL)) ||
1877 (!CmpValue && MICC == AArch64CC::NE);
1878 return true;
1879}
1880
1881/// Remove comparison in csinc-cmp sequence
1882///
1883/// Examples:
1884/// 1. \code
1885/// csinc w9, wzr, wzr, ne
1886/// cmp w9, #0
1887/// b.eq
1888/// \endcode
1889/// to
1890/// \code
1891/// csinc w9, wzr, wzr, ne
1892/// b.ne
1893/// \endcode
1894///
1895/// 2. \code
1896/// csinc x2, xzr, xzr, mi
1897/// cmp x2, #1
1898/// b.pl
1899/// \endcode
1900/// to
1901/// \code
1902/// csinc x2, xzr, xzr, mi
1903/// b.pl
1904/// \endcode
1905///
1906/// \param CmpInstr comparison instruction
1907/// \return True when comparison removed
1908bool AArch64InstrInfo::removeCmpToZeroOrOne(
1909 MachineInstr &CmpInstr, unsigned SrcReg, int CmpValue,
1910 const MachineRegisterInfo &MRI) const {
1911 MachineInstr *MI = MRI.getUniqueVRegDef(SrcReg);
1912 if (!MI)
1913 return false;
1916 bool IsInvertCC = false;
1917 if (!canCmpInstrBeRemoved(*MI, CmpInstr, CmpValue, TRI, CCUseInstrs,
1918 IsInvertCC))
1919 return false;
1920 // Make transformation
1921 CmpInstr.eraseFromParent();
1922 if (IsInvertCC) {
1923 // Invert condition codes in CmpInstr CC users
1924 for (MachineInstr *CCUseInstr : CCUseInstrs) {
1926 assert(Idx >= 0 && "Unexpected instruction using CC.");
1927 MachineOperand &CCOperand = CCUseInstr->getOperand(Idx);
1929 static_cast<AArch64CC::CondCode>(CCOperand.getImm()));
1930 CCOperand.setImm(CCUse);
1931 }
1932 }
1933 return true;
1934}
1935
1937 if (MI.getOpcode() != TargetOpcode::LOAD_STACK_GUARD &&
1938 MI.getOpcode() != AArch64::CATCHRET)
1939 return false;
1940
1941 MachineBasicBlock &MBB = *MI.getParent();
1942 auto &Subtarget = MBB.getParent()->getSubtarget<AArch64Subtarget>();
1943 auto TRI = Subtarget.getRegisterInfo();
1944 DebugLoc DL = MI.getDebugLoc();
1945
1946 if (MI.getOpcode() == AArch64::CATCHRET) {
1947 // Skip to the first instruction before the epilog.
1948 const TargetInstrInfo *TII =
1950 MachineBasicBlock *TargetMBB = MI.getOperand(0).getMBB();
1952 MachineBasicBlock::iterator FirstEpilogSEH = std::prev(MBBI);
1953 while (FirstEpilogSEH->getFlag(MachineInstr::FrameDestroy) &&
1954 FirstEpilogSEH != MBB.begin())
1955 FirstEpilogSEH = std::prev(FirstEpilogSEH);
1956 if (FirstEpilogSEH != MBB.begin())
1957 FirstEpilogSEH = std::next(FirstEpilogSEH);
1958 BuildMI(MBB, FirstEpilogSEH, DL, TII->get(AArch64::ADRP))
1959 .addReg(AArch64::X0, RegState::Define)
1960 .addMBB(TargetMBB);
1961 BuildMI(MBB, FirstEpilogSEH, DL, TII->get(AArch64::ADDXri))
1962 .addReg(AArch64::X0, RegState::Define)
1963 .addReg(AArch64::X0)
1964 .addMBB(TargetMBB)
1965 .addImm(0);
1966 return true;
1967 }
1968
1969 Register Reg = MI.getOperand(0).getReg();
1971 if (M.getStackProtectorGuard() == "sysreg") {
1972 const AArch64SysReg::SysReg *SrcReg =
1973 AArch64SysReg::lookupSysRegByName(M.getStackProtectorGuardReg());
1974 if (!SrcReg)
1975 report_fatal_error("Unknown SysReg for Stack Protector Guard Register");
1976
1977 // mrs xN, sysreg
1978 BuildMI(MBB, MI, DL, get(AArch64::MRS))
1980 .addImm(SrcReg->Encoding);
1981 int Offset = M.getStackProtectorGuardOffset();
1982 if (Offset >= 0 && Offset <= 32760 && Offset % 8 == 0) {
1983 // ldr xN, [xN, #offset]
1984 BuildMI(MBB, MI, DL, get(AArch64::LDRXui))
1985 .addDef(Reg)
1986 .addUse(Reg, RegState::Kill)
1987 .addImm(Offset / 8);
1988 } else if (Offset >= -256 && Offset <= 255) {
1989 // ldur xN, [xN, #offset]
1990 BuildMI(MBB, MI, DL, get(AArch64::LDURXi))
1991 .addDef(Reg)
1992 .addUse(Reg, RegState::Kill)
1993 .addImm(Offset);
1994 } else if (Offset >= -4095 && Offset <= 4095) {
1995 if (Offset > 0) {
1996 // add xN, xN, #offset
1997 BuildMI(MBB, MI, DL, get(AArch64::ADDXri))
1998 .addDef(Reg)
1999 .addUse(Reg, RegState::Kill)
2000 .addImm(Offset)
2001 .addImm(0);
2002 } else {
2003 // sub xN, xN, #offset
2004 BuildMI(MBB, MI, DL, get(AArch64::SUBXri))
2005 .addDef(Reg)
2006 .addUse(Reg, RegState::Kill)
2007 .addImm(-Offset)
2008 .addImm(0);
2009 }
2010 // ldr xN, [xN]
2011 BuildMI(MBB, MI, DL, get(AArch64::LDRXui))
2012 .addDef(Reg)
2013 .addUse(Reg, RegState::Kill)
2014 .addImm(0);
2015 } else {
2016 // Cases that are larger than +/- 4095 and not a multiple of 8, or larger
2017 // than 23760.
2018 // It might be nice to use AArch64::MOVi32imm here, which would get
2019 // expanded in PreSched2 after PostRA, but our lone scratch Reg already
2020 // contains the MRS result. findScratchNonCalleeSaveRegister() in
2021 // AArch64FrameLowering might help us find such a scratch register
2022 // though. If we failed to find a scratch register, we could emit a
2023 // stream of add instructions to build up the immediate. Or, we could try
2024 // to insert a AArch64::MOVi32imm before register allocation so that we
2025 // didn't need to scavenge for a scratch register.
2026 report_fatal_error("Unable to encode Stack Protector Guard Offset");
2027 }
2028 MBB.erase(MI);
2029 return true;
2030 }
2031
2032 const GlobalValue *GV =
2033 cast<GlobalValue>((*MI.memoperands_begin())->getValue());
2034 const TargetMachine &TM = MBB.getParent()->getTarget();
2035 unsigned OpFlags = Subtarget.ClassifyGlobalReference(GV, TM);
2036 const unsigned char MO_NC = AArch64II::MO_NC;
2037
2038 if ((OpFlags & AArch64II::MO_GOT) != 0) {
2039 BuildMI(MBB, MI, DL, get(AArch64::LOADgot), Reg)
2040 .addGlobalAddress(GV, 0, OpFlags);
2041 if (Subtarget.isTargetILP32()) {
2042 unsigned Reg32 = TRI->getSubReg(Reg, AArch64::sub_32);
2043 BuildMI(MBB, MI, DL, get(AArch64::LDRWui))
2044 .addDef(Reg32, RegState::Dead)
2045 .addUse(Reg, RegState::Kill)
2046 .addImm(0)
2047 .addMemOperand(*MI.memoperands_begin())
2049 } else {
2050 BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
2051 .addReg(Reg, RegState::Kill)
2052 .addImm(0)
2053 .addMemOperand(*MI.memoperands_begin());
2054 }
2055 } else if (TM.getCodeModel() == CodeModel::Large) {
2056 assert(!Subtarget.isTargetILP32() && "how can large exist in ILP32?");
2057 BuildMI(MBB, MI, DL, get(AArch64::MOVZXi), Reg)
2058 .addGlobalAddress(GV, 0, AArch64II::MO_G0 | MO_NC)
2059 .addImm(0);
2060 BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
2061 .addReg(Reg, RegState::Kill)
2062 .addGlobalAddress(GV, 0, AArch64II::MO_G1 | MO_NC)
2063 .addImm(16);
2064 BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
2065 .addReg(Reg, RegState::Kill)
2066 .addGlobalAddress(GV, 0, AArch64II::MO_G2 | MO_NC)
2067 .addImm(32);
2068 BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
2069 .addReg(Reg, RegState::Kill)
2071 .addImm(48);
2072 BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
2073 .addReg(Reg, RegState::Kill)
2074 .addImm(0)
2075 .addMemOperand(*MI.memoperands_begin());
2076 } else if (TM.getCodeModel() == CodeModel::Tiny) {
2077 BuildMI(MBB, MI, DL, get(AArch64::ADR), Reg)
2078 .addGlobalAddress(GV, 0, OpFlags);
2079 } else {
2080 BuildMI(MBB, MI, DL, get(AArch64::ADRP), Reg)
2081 .addGlobalAddress(GV, 0, OpFlags | AArch64II::MO_PAGE);
2082 unsigned char LoFlags = OpFlags | AArch64II::MO_PAGEOFF | MO_NC;
2083 if (Subtarget.isTargetILP32()) {
2084 unsigned Reg32 = TRI->getSubReg(Reg, AArch64::sub_32);
2085 BuildMI(MBB, MI, DL, get(AArch64::LDRWui))
2086 .addDef(Reg32, RegState::Dead)
2087 .addUse(Reg, RegState::Kill)
2088 .addGlobalAddress(GV, 0, LoFlags)
2089 .addMemOperand(*MI.memoperands_begin())
2091 } else {
2092 BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
2093 .addReg(Reg, RegState::Kill)
2094 .addGlobalAddress(GV, 0, LoFlags)
2095 .addMemOperand(*MI.memoperands_begin());
2096 }
2097 }
2098
2099 MBB.erase(MI);
2100
2101 return true;
2102}
2103
2104// Return true if this instruction simply sets its single destination register
2105// to zero. This is equivalent to a register rename of the zero-register.
2107 switch (MI.getOpcode()) {
2108 default:
2109 break;
2110 case AArch64::MOVZWi:
2111 case AArch64::MOVZXi: // movz Rd, #0 (LSL #0)
2112 if (MI.getOperand(1).isImm() && MI.getOperand(1).getImm() == 0) {
2113 assert(MI.getDesc().getNumOperands() == 3 &&
2114 MI.getOperand(2).getImm() == 0 && "invalid MOVZi operands");
2115 return true;
2116 }
2117 break;
2118 case AArch64::ANDWri: // and Rd, Rzr, #imm
2119 return MI.getOperand(1).getReg() == AArch64::WZR;
2120 case AArch64::ANDXri:
2121 return MI.getOperand(1).getReg() == AArch64::XZR;
2122 case TargetOpcode::COPY:
2123 return MI.getOperand(1).getReg() == AArch64::WZR;
2124 }
2125 return false;
2126}
2127
2128// Return true if this instruction simply renames a general register without
2129// modifying bits.
2131 switch (MI.getOpcode()) {
2132 default:
2133 break;
2134 case TargetOpcode::COPY: {
2135 // GPR32 copies will by lowered to ORRXrs
2136 Register DstReg = MI.getOperand(0).getReg();
2137 return (AArch64::GPR32RegClass.contains(DstReg) ||
2138 AArch64::GPR64RegClass.contains(DstReg));
2139 }
2140 case AArch64::ORRXrs: // orr Xd, Xzr, Xm (LSL #0)
2141 if (MI.getOperand(1).getReg() == AArch64::XZR) {
2142 assert(MI.getDesc().getNumOperands() == 4 &&
2143 MI.getOperand(3).getImm() == 0 && "invalid ORRrs operands");
2144 return true;
2145 }
2146 break;
2147 case AArch64::ADDXri: // add Xd, Xn, #0 (LSL #0)
2148 if (MI.getOperand(2).getImm() == 0) {
2149 assert(MI.getDesc().getNumOperands() == 4 &&
2150 MI.getOperand(3).getImm() == 0 && "invalid ADDXri operands");
2151 return true;
2152 }
2153 break;
2154 }
2155 return false;
2156}
2157
2158// Return true if this instruction simply renames a general register without
2159// modifying bits.
2161 switch (MI.getOpcode()) {
2162 default:
2163 break;
2164 case TargetOpcode::COPY: {
2165 Register DstReg = MI.getOperand(0).getReg();
2166 return AArch64::FPR128RegClass.contains(DstReg);
2167 }
2168 case AArch64::ORRv16i8:
2169 if (MI.getOperand(1).getReg() == MI.getOperand(2).getReg()) {
2170 assert(MI.getDesc().getNumOperands() == 3 && MI.getOperand(0).isReg() &&
2171 "invalid ORRv16i8 operands");
2172 return true;
2173 }
2174 break;
2175 }
2176 return false;
2177}
2178
2180 int &FrameIndex) const {
2181 switch (MI.getOpcode()) {
2182 default:
2183 break;
2184 case AArch64::LDRWui:
2185 case AArch64::LDRXui:
2186 case AArch64::LDRBui:
2187 case AArch64::LDRHui:
2188 case AArch64::LDRSui:
2189 case AArch64::LDRDui:
2190 case AArch64::LDRQui:
2191 case AArch64::LDR_PXI:
2192 if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() &&
2193 MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) {
2194 FrameIndex = MI.getOperand(1).getIndex();
2195 return MI.getOperand(0).getReg();
2196 }
2197 break;
2198 }
2199
2200 return 0;
2201}
2202
2204 int &FrameIndex) const {
2205 switch (MI.getOpcode()) {
2206 default:
2207 break;
2208 case AArch64::STRWui:
2209 case AArch64::STRXui:
2210 case AArch64::STRBui:
2211 case AArch64::STRHui:
2212 case AArch64::STRSui:
2213 case AArch64::STRDui:
2214 case AArch64::STRQui:
2215 case AArch64::STR_PXI:
2216 if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() &&
2217 MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) {
2218 FrameIndex = MI.getOperand(1).getIndex();
2219 return MI.getOperand(0).getReg();
2220 }
2221 break;
2222 }
2223 return 0;
2224}
2225
2226/// Check all MachineMemOperands for a hint to suppress pairing.
2228 return llvm::any_of(MI.memoperands(), [](MachineMemOperand *MMO) {
2229 return MMO->getFlags() & MOSuppressPair;
2230 });
2231}
2232
2233/// Set a flag on the first MachineMemOperand to suppress pairing.
2235 if (MI.memoperands_empty())
2236 return;
2237 (*MI.memoperands_begin())->setFlags(MOSuppressPair);
2238}
2239
2240/// Check all MachineMemOperands for a hint that the load/store is strided.
2242 return llvm::any_of(MI.memoperands(), [](MachineMemOperand *MMO) {
2243 return MMO->getFlags() & MOStridedAccess;
2244 });
2245}
2246
2248 switch (Opc) {
2249 default:
2250 return false;
2251 case AArch64::STURSi:
2252 case AArch64::STRSpre:
2253 case AArch64::STURDi:
2254 case AArch64::STRDpre:
2255 case AArch64::STURQi:
2256 case AArch64::STRQpre:
2257 case AArch64::STURBBi:
2258 case AArch64::STURHHi:
2259 case AArch64::STURWi:
2260 case AArch64::STRWpre:
2261 case AArch64::STURXi:
2262 case AArch64::STRXpre:
2263 case AArch64::LDURSi:
2264 case AArch64::LDRSpre:
2265 case AArch64::LDURDi:
2266 case AArch64::LDRDpre:
2267 case AArch64::LDURQi:
2268 case AArch64::LDRQpre:
2269 case AArch64::LDURWi:
2270 case AArch64::LDRWpre:
2271 case AArch64::LDURXi:
2272 case AArch64::LDRXpre:
2273 case AArch64::LDRSWpre:
2274 case AArch64::LDURSWi:
2275 case AArch64::LDURHHi:
2276 case AArch64::LDURBBi:
2277 case AArch64::LDURSBWi:
2278 case AArch64::LDURSHWi:
2279 return true;
2280 }
2281}
2282
2283std::optional<unsigned> AArch64InstrInfo::getUnscaledLdSt(unsigned Opc) {
2284 switch (Opc) {
2285 default: return {};
2286 case AArch64::PRFMui: return AArch64::PRFUMi;
2287 case AArch64::LDRXui: return AArch64::LDURXi;
2288 case AArch64::LDRWui: return AArch64::LDURWi;
2289 case AArch64::LDRBui: return AArch64::LDURBi;
2290 case AArch64::LDRHui: return AArch64::LDURHi;
2291 case AArch64::LDRSui: return AArch64::LDURSi;
2292 case AArch64::LDRDui: return AArch64::LDURDi;
2293 case AArch64::LDRQui: return AArch64::LDURQi;
2294 case AArch64::LDRBBui: return AArch64::LDURBBi;
2295 case AArch64::LDRHHui: return AArch64::LDURHHi;
2296 case AArch64::LDRSBXui: return AArch64::LDURSBXi;
2297 case AArch64::LDRSBWui: return AArch64::LDURSBWi;
2298 case AArch64::LDRSHXui: return AArch64::LDURSHXi;
2299 case AArch64::LDRSHWui: return AArch64::LDURSHWi;
2300 case AArch64::LDRSWui: return AArch64::LDURSWi;
2301 case AArch64::STRXui: return AArch64::STURXi;
2302 case AArch64::STRWui: return AArch64::STURWi;
2303 case AArch64::STRBui: return AArch64::STURBi;
2304 case AArch64::STRHui: return AArch64::STURHi;
2305 case AArch64::STRSui: return AArch64::STURSi;
2306 case AArch64::STRDui: return AArch64::STURDi;
2307 case AArch64::STRQui: return AArch64::STURQi;
2308 case AArch64::STRBBui: return AArch64::STURBBi;
2309 case AArch64::STRHHui: return AArch64::STURHHi;
2310 }
2311}
2312
2314 switch (Opc) {
2315 default:
2316 return 2;
2317 case AArch64::LDPXi:
2318 case AArch64::LDPDi:
2319 case AArch64::STPXi:
2320 case AArch64::STPDi:
2321 case AArch64::LDNPXi:
2322 case AArch64::LDNPDi:
2323 case AArch64::STNPXi:
2324 case AArch64::STNPDi:
2325 case AArch64::LDPQi:
2326 case AArch64::STPQi:
2327 case AArch64::LDNPQi:
2328 case AArch64::STNPQi:
2329 case AArch64::LDPWi:
2330 case AArch64::LDPSi:
2331 case AArch64::STPWi:
2332 case AArch64::STPSi:
2333 case AArch64::LDNPWi:
2334 case AArch64::LDNPSi:
2335 case AArch64::STNPWi:
2336 case AArch64::STNPSi:
2337 case AArch64::LDG:
2338 case AArch64::STGPi:
2339
2340 case AArch64::LD1B_IMM:
2341 case AArch64::LD1B_H_IMM:
2342 case AArch64::LD1B_S_IMM:
2343 case AArch64::LD1B_D_IMM:
2344 case AArch64::LD1SB_H_IMM:
2345 case AArch64::LD1SB_S_IMM:
2346 case AArch64::LD1SB_D_IMM:
2347 case AArch64::LD1H_IMM:
2348 case AArch64::LD1H_S_IMM:
2349 case AArch64::LD1H_D_IMM:
2350 case AArch64::LD1SH_S_IMM:
2351 case AArch64::LD1SH_D_IMM:
2352 case AArch64::LD1W_IMM:
2353 case AArch64::LD1W_D_IMM:
2354 case AArch64::LD1SW_D_IMM:
2355 case AArch64::LD1D_IMM:
2356
2357 case AArch64::LD2B_IMM:
2358 case AArch64::LD2H_IMM:
2359 case AArch64::LD2W_IMM:
2360 case AArch64::LD2D_IMM:
2361 case AArch64::LD3B_IMM:
2362 case AArch64::LD3H_IMM:
2363 case AArch64::LD3W_IMM:
2364 case AArch64::LD3D_IMM:
2365 case AArch64::LD4B_IMM:
2366 case AArch64::LD4H_IMM:
2367 case AArch64::LD4W_IMM:
2368 case AArch64::LD4D_IMM:
2369
2370 case AArch64::ST1B_IMM:
2371 case AArch64::ST1B_H_IMM:
2372 case AArch64::ST1B_S_IMM:
2373 case AArch64::ST1B_D_IMM:
2374 case AArch64::ST1H_IMM:
2375 case AArch64::ST1H_S_IMM:
2376 case AArch64::ST1H_D_IMM:
2377 case AArch64::ST1W_IMM:
2378 case AArch64::ST1W_D_IMM:
2379 case AArch64::ST1D_IMM:
2380
2381 case AArch64::ST2B_IMM:
2382 case AArch64::ST2H_IMM:
2383 case AArch64::ST2W_IMM:
2384 case AArch64::ST2D_IMM:
2385 case AArch64::ST3B_IMM:
2386 case AArch64::ST3H_IMM:
2387 case AArch64::ST3W_IMM:
2388 case AArch64::ST3D_IMM:
2389 case AArch64::ST4B_IMM:
2390 case AArch64::ST4H_IMM:
2391 case AArch64::ST4W_IMM:
2392 case AArch64::ST4D_IMM:
2393
2394 case AArch64::LD1RB_IMM:
2395 case AArch64::LD1RB_H_IMM:
2396 case AArch64::LD1RB_S_IMM:
2397 case AArch64::LD1RB_D_IMM:
2398 case AArch64::LD1RSB_H_IMM:
2399 case AArch64::LD1RSB_S_IMM:
2400 case AArch64::LD1RSB_D_IMM:
2401 case AArch64::LD1RH_IMM:
2402 case AArch64::LD1RH_S_IMM:
2403 case AArch64::LD1RH_D_IMM:
2404 case AArch64::LD1RSH_S_IMM:
2405 case AArch64::LD1RSH_D_IMM:
2406 case AArch64::LD1RW_IMM:
2407 case AArch64::LD1RW_D_IMM:
2408 case AArch64::LD1RSW_IMM:
2409 case AArch64::LD1RD_IMM:
2410
2411 case AArch64::LDNT1B_ZRI:
2412 case AArch64::LDNT1H_ZRI:
2413 case AArch64::LDNT1W_ZRI:
2414 case AArch64::LDNT1D_ZRI:
2415 case AArch64::STNT1B_ZRI:
2416 case AArch64::STNT1H_ZRI:
2417 case AArch64::STNT1W_ZRI:
2418 case AArch64::STNT1D_ZRI:
2419
2420 case AArch64::LDNF1B_IMM:
2421 case AArch64::LDNF1B_H_IMM:
2422 case AArch64::LDNF1B_S_IMM:
2423 case AArch64::LDNF1B_D_IMM:
2424 case AArch64::LDNF1SB_H_IMM:
2425 case AArch64::LDNF1SB_S_IMM:
2426 case AArch64::LDNF1SB_D_IMM:
2427 case AArch64::LDNF1H_IMM:
2428 case AArch64::LDNF1H_S_IMM:
2429 case AArch64::LDNF1H_D_IMM:
2430 case AArch64::LDNF1SH_S_IMM:
2431 case AArch64::LDNF1SH_D_IMM:
2432 case AArch64::LDNF1W_IMM:
2433 case AArch64::LDNF1W_D_IMM:
2434 case AArch64::LDNF1SW_D_IMM:
2435 case AArch64::LDNF1D_IMM:
2436 return 3;
2437 case AArch64::ADDG:
2438 case AArch64::STGi:
2439 case AArch64::LDR_PXI:
2440 case AArch64::STR_PXI:
2441 return 2;
2442 }
2443}
2444
2446 switch (MI.getOpcode()) {
2447 default:
2448 return false;
2449 // Scaled instructions.
2450 case AArch64::STRSui:
2451 case AArch64::STRDui:
2452 case AArch64::STRQui:
2453 case AArch64::STRXui:
2454 case AArch64::STRWui:
2455 case AArch64::LDRSui:
2456 case AArch64::LDRDui:
2457 case AArch64::LDRQui:
2458 case AArch64::LDRXui:
2459 case AArch64::LDRWui:
2460 case AArch64::LDRSWui:
2461 // Unscaled instructions.
2462 case AArch64::STURSi:
2463 case AArch64::STRSpre:
2464 case AArch64::STURDi:
2465 case AArch64::STRDpre:
2466 case AArch64::STURQi:
2467 case AArch64::STRQpre:
2468 case AArch64::STURWi:
2469 case AArch64::STRWpre:
2470 case AArch64::STURXi:
2471 case AArch64::STRXpre:
2472 case AArch64::LDURSi:
2473 case AArch64::LDRSpre:
2474 case AArch64::LDURDi:
2475 case AArch64::LDRDpre:
2476 case AArch64::LDURQi:
2477 case AArch64::LDRQpre:
2478 case AArch64::LDURWi:
2479 case AArch64::LDRWpre:
2480 case AArch64::LDURXi:
2481 case AArch64::LDRXpre:
2482 case AArch64::LDURSWi:
2483 case AArch64::LDRSWpre:
2484 return true;
2485 }
2486}
2487
2489 switch (Opc) {
2490 default:
2491 llvm_unreachable("Opcode has no flag setting equivalent!");
2492 // 32-bit cases:
2493 case AArch64::ADDWri:
2494 return AArch64::ADDSWri;
2495 case AArch64::ADDWrr:
2496 return AArch64::ADDSWrr;
2497 case AArch64::ADDWrs:
2498 return AArch64::ADDSWrs;
2499 case AArch64::ADDWrx:
2500 return AArch64::ADDSWrx;
2501 case AArch64::ANDWri:
2502 return AArch64::ANDSWri;
2503 case AArch64::ANDWrr:
2504 return AArch64::ANDSWrr;
2505 case AArch64::ANDWrs:
2506 return AArch64::ANDSWrs;
2507 case AArch64::BICWrr:
2508 return AArch64::BICSWrr;
2509 case AArch64::BICWrs:
2510 return AArch64::BICSWrs;
2511 case AArch64::SUBWri:
2512 return AArch64::SUBSWri;
2513 case AArch64::SUBWrr:
2514 return AArch64::SUBSWrr;
2515 case AArch64::SUBWrs:
2516 return AArch64::SUBSWrs;
2517 case AArch64::SUBWrx:
2518 return AArch64::SUBSWrx;
2519 // 64-bit cases:
2520 case AArch64::ADDXri:
2521 return AArch64::ADDSXri;
2522 case AArch64::ADDXrr:
2523 return AArch64::ADDSXrr;
2524 case AArch64::ADDXrs:
2525 return AArch64::ADDSXrs;
2526 case AArch64::ADDXrx:
2527 return AArch64::ADDSXrx;
2528 case AArch64::ANDXri:
2529 return AArch64::ANDSXri;
2530 case AArch64::ANDXrr:
2531 return AArch64::ANDSXrr;
2532 case AArch64::ANDXrs:
2533 return AArch64::ANDSXrs;
2534 case AArch64::BICXrr:
2535 return AArch64::BICSXrr;
2536 case AArch64::BICXrs:
2537 return AArch64::BICSXrs;
2538 case AArch64::SUBXri:
2539 return AArch64::SUBSXri;
2540 case AArch64::SUBXrr:
2541 return AArch64::SUBSXrr;
2542 case AArch64::SUBXrs:
2543 return AArch64::SUBSXrs;
2544 case AArch64::SUBXrx:
2545 return AArch64::SUBSXrx;
2546 // SVE instructions:
2547 case AArch64::AND_PPzPP:
2548 return AArch64::ANDS_PPzPP;
2549 case AArch64::BIC_PPzPP:
2550 return AArch64::BICS_PPzPP;
2551 case AArch64::EOR_PPzPP:
2552 return AArch64::EORS_PPzPP;
2553 case AArch64::NAND_PPzPP:
2554 return AArch64::NANDS_PPzPP;
2555 case AArch64::NOR_PPzPP:
2556 return AArch64::NORS_PPzPP;
2557 case AArch64::ORN_PPzPP:
2558 return AArch64::ORNS_PPzPP;
2559 case AArch64::ORR_PPzPP:
2560 return AArch64::ORRS_PPzPP;
2561 case AArch64::BRKA_PPzP:
2562 return AArch64::BRKAS_PPzP;
2563 case AArch64::BRKPA_PPzPP:
2564 return AArch64::BRKPAS_PPzPP;
2565 case AArch64::BRKB_PPzP:
2566 return AArch64::BRKBS_PPzP;
2567 case AArch64::BRKPB_PPzPP:
2568 return AArch64::BRKPBS_PPzPP;
2569 case AArch64::BRKN_PPzP:
2570 return AArch64::BRKNS_PPzP;
2571 case AArch64::RDFFR_PPz:
2572 return AArch64::RDFFRS_PPz;
2573 case AArch64::PTRUE_B:
2574 return AArch64::PTRUES_B;
2575 }
2576}
2577
2578// Is this a candidate for ld/st merging or pairing? For example, we don't
2579// touch volatiles or load/stores that have a hint to avoid pair formation.
2581
2582 bool IsPreLdSt = isPreLdSt(MI);
2583
2584 // If this is a volatile load/store, don't mess with it.
2585 if (MI.hasOrderedMemoryRef())
2586 return false;
2587
2588 // Make sure this is a reg/fi+imm (as opposed to an address reloc).
2589 // For Pre-inc LD/ST, the operand is shifted by one.
2590 assert((MI.getOperand(IsPreLdSt ? 2 : 1).isReg() ||
2591 MI.getOperand(IsPreLdSt ? 2 : 1).isFI()) &&
2592 "Expected a reg or frame index operand.");
2593
2594 // For Pre-indexed addressing quadword instructions, the third operand is the
2595 // immediate value.
2596 bool IsImmPreLdSt = IsPreLdSt && MI.getOperand(3).isImm();
2597
2598 if (!MI.getOperand(2).isImm() && !IsImmPreLdSt)
2599 return false;
2600
2601 // Can't merge/pair if the instruction modifies the base register.
2602 // e.g., ldr x0, [x0]
2603 // This case will never occur with an FI base.
2604 // However, if the instruction is an LDR<S,D,Q,W,X,SW>pre or
2605 // STR<S,D,Q,W,X>pre, it can be merged.
2606 // For example:
2607 // ldr q0, [x11, #32]!
2608 // ldr q1, [x11, #16]
2609 // to
2610 // ldp q0, q1, [x11, #32]!
2611 if (MI.getOperand(1).isReg() && !IsPreLdSt) {
2612 Register BaseReg = MI.getOperand(1).getReg();
2614 if (MI.modifiesRegister(BaseReg, TRI))
2615 return false;
2616 }
2617
2618 // Check if this load/store has a hint to avoid pair formation.
2619 // MachineMemOperands hints are set by the AArch64StorePairSuppress pass.
2621 return false;
2622
2623 // Do not pair any callee-save store/reload instructions in the
2624 // prologue/epilogue if the CFI information encoded the operations as separate
2625 // instructions, as that will cause the size of the actual prologue to mismatch
2626 // with the prologue size recorded in the Windows CFI.
2627 const MCAsmInfo *MAI = MI.getMF()->getTarget().getMCAsmInfo();
2628 bool NeedsWinCFI = MAI->usesWindowsCFI() &&
2629 MI.getMF()->getFunction().needsUnwindTableEntry();
2630 if (NeedsWinCFI && (MI.getFlag(MachineInstr::FrameSetup) ||
2632 return false;
2633
2634 // On some CPUs quad load/store pairs are slower than two single load/stores.
2635 if (Subtarget.isPaired128Slow()) {
2636 switch (MI.getOpcode()) {
2637 default:
2638 break;
2639 case AArch64::LDURQi:
2640 case AArch64::STURQi:
2641 case AArch64::LDRQui:
2642 case AArch64::STRQui:
2643 return false;
2644 }
2645 }
2646
2647 return true;
2648}
2649
2652 int64_t &Offset, bool &OffsetIsScalable, unsigned &Width,
2653 const TargetRegisterInfo *TRI) const {
2654 if (!LdSt.mayLoadOrStore())
2655 return false;
2656
2657 const MachineOperand *BaseOp;
2658 if (!getMemOperandWithOffsetWidth(LdSt, BaseOp, Offset, OffsetIsScalable,
2659 Width, TRI))
2660 return false;
2661 BaseOps.push_back(BaseOp);
2662 return true;
2663}
2664
2665std::optional<ExtAddrMode>
2667 const TargetRegisterInfo *TRI) const {
2668 const MachineOperand *Base; // Filled with the base operand of MI.
2669 int64_t Offset; // Filled with the offset of MI.
2670 bool OffsetIsScalable;
2671 if (!getMemOperandWithOffset(MemI, Base, Offset, OffsetIsScalable, TRI))
2672 return std::nullopt;
2673
2674 if (!Base->isReg())
2675 return std::nullopt;
2676 ExtAddrMode AM;
2677 AM.BaseReg = Base->getReg();
2678 AM.Displacement = Offset;
2679 AM.ScaledReg = 0;
2680 AM.Scale = 0;
2681 return AM;
2682}
2683
2685 Register Reg,
2686 const MachineInstr &AddrI,
2687 ExtAddrMode &AM) const {
2688 // Filter out instructions into which we cannot fold.
2689 unsigned NumBytes;
2690 int64_t OffsetScale = 1;
2691 switch (MemI.getOpcode()) {
2692 default:
2693 return false;
2694
2695 case AArch64::LDURQi:
2696 case AArch64::STURQi:
2697 NumBytes = 16;
2698 break;
2699
2700 case AArch64::LDURDi:
2701 case AArch64::STURDi:
2702 case AArch64::LDURXi:
2703 case AArch64::STURXi:
2704 NumBytes = 8;
2705 break;
2706
2707 case AArch64::LDURWi:
2708 case AArch64::LDURSWi:
2709 case AArch64::STURWi:
2710 NumBytes = 4;
2711 break;
2712
2713 case AArch64::LDURHi:
2714 case AArch64::STURHi:
2715 case AArch64::LDURHHi:
2716 case AArch64::STURHHi:
2717 case AArch64::LDURSHXi:
2718 case AArch64::LDURSHWi:
2719 NumBytes = 2;
2720 break;
2721
2722 case AArch64::LDRBroX:
2723 case AArch64::LDRBBroX:
2724 case AArch64::LDRSBXroX:
2725 case AArch64::LDRSBWroX:
2726 case AArch64::STRBroX:
2727 case AArch64::STRBBroX:
2728 case AArch64::LDURBi:
2729 case AArch64::LDURBBi:
2730 case AArch64::LDURSBXi:
2731 case AArch64::LDURSBWi:
2732 case AArch64::STURBi:
2733 case AArch64::STURBBi:
2734 case AArch64::LDRBui:
2735 case AArch64::LDRBBui:
2736 case AArch64::LDRSBXui:
2737 case AArch64::LDRSBWui:
2738 case AArch64::STRBui:
2739 case AArch64::STRBBui:
2740 NumBytes = 1;
2741 break;
2742
2743 case AArch64::LDRQroX:
2744 case AArch64::STRQroX:
2745 case AArch64::LDRQui:
2746 case AArch64::STRQui:
2747 NumBytes = 16;
2748 OffsetScale = 16;
2749 break;
2750
2751 case AArch64::LDRDroX:
2752 case AArch64::STRDroX:
2753 case AArch64::LDRXroX:
2754 case AArch64::STRXroX:
2755 case AArch64::LDRDui:
2756 case AArch64::STRDui:
2757 case AArch64::LDRXui:
2758 case AArch64::STRXui:
2759 NumBytes = 8;
2760 OffsetScale = 8;
2761 break;
2762
2763 case AArch64::LDRWroX:
2764 case AArch64::LDRSWroX:
2765 case AArch64::STRWroX:
2766 case AArch64::LDRWui:
2767 case AArch64::LDRSWui:
2768 case AArch64::STRWui:
2769 NumBytes = 4;
2770 OffsetScale = 4;
2771 break;
2772
2773 case AArch64::LDRHroX:
2774 case AArch64::STRHroX:
2775 case AArch64::LDRHHroX:
2776 case AArch64::STRHHroX:
2777 case AArch64::LDRSHXroX:
2778 case AArch64::LDRSHWroX:
2779 case AArch64::LDRHui:
2780 case AArch64::STRHui:
2781 case AArch64::LDRHHui:
2782 case AArch64::STRHHui:
2783 case AArch64::LDRSHXui:
2784 case AArch64::LDRSHWui:
2785 NumBytes = 2;
2786 OffsetScale = 2;
2787 break;
2788 }
2789
2790 // Check the fold operand is not the loaded/stored value.
2791 const MachineOperand &BaseRegOp = MemI.getOperand(0);
2792 if (BaseRegOp.isReg() && BaseRegOp.getReg() == Reg)
2793 return false;
2794
2795 // Handle memory instructions with a [Reg, Reg] addressing mode.
2796 if (MemI.getOperand(2).isReg()) {
2797 // Bail if the addressing mode already includes extension of the offset
2798 // register.
2799 if (MemI.getOperand(3).getImm())
2800 return false;
2801
2802 // Check if we actually have a scaled offset.
2803 if (MemI.getOperand(4).getImm() == 0)
2804 OffsetScale = 1;
2805
2806 // If the address instructions is folded into the base register, then the
2807 // addressing mode must not have a scale. Then we can swap the base and the
2808 // scaled registers.
2809 if (MemI.getOperand(1).getReg() == Reg && OffsetScale != 1)
2810 return false;
2811
2812 switch (AddrI.getOpcode()) {
2813 default:
2814 return false;
2815
2816 case AArch64::SBFMXri:
2817 // sxtw Xa, Wm
2818 // ldr Xd, [Xn, Xa, lsl #N]
2819 // ->
2820 // ldr Xd, [Xn, Wm, sxtw #N]
2821 if (AddrI.getOperand(2).getImm() != 0 ||
2822 AddrI.getOperand(3).getImm() != 31)
2823 return false;
2824
2825 AM.BaseReg = MemI.getOperand(1).getReg();
2826 if (AM.BaseReg == Reg)
2827 AM.BaseReg = MemI.getOperand(2).getReg();
2828 AM.ScaledReg = AddrI.getOperand(1).getReg();
2829 AM.Scale = OffsetScale;
2830 AM.Displacement = 0;
2832 return true;
2833
2834 case TargetOpcode::SUBREG_TO_REG: {
2835 // mov Wa, Wm
2836 // ldr Xd, [Xn, Xa, lsl #N]
2837 // ->
2838 // ldr Xd, [Xn, Wm, uxtw #N]
2839
2840 // Zero-extension looks like an ORRWrs followed by a SUBREG_TO_REG.
2841 if (AddrI.getOperand(1).getImm() != 0 ||
2842 AddrI.getOperand(3).getImm() != AArch64::sub_32)
2843 return false;
2844
2845 const MachineRegisterInfo &MRI = AddrI.getMF()->getRegInfo();
2846 Register OffsetReg = AddrI.getOperand(2).getReg();
2847 if (!OffsetReg.isVirtual() || !MRI.hasOneNonDBGUse(OffsetReg))
2848 return false;
2849
2850 const MachineInstr &DefMI = *MRI.getVRegDef(OffsetReg);
2851 if (DefMI.getOpcode() != AArch64::ORRWrs ||
2852 DefMI.getOperand(1).getReg() != AArch64::WZR ||
2853 DefMI.getOperand(3).getImm() != 0)
2854 return false;
2855
2856 AM.BaseReg = MemI.getOperand(1).getReg();
2857 if (AM.BaseReg == Reg)
2858 AM.BaseReg = MemI.getOperand(2).getReg();
2859 AM.ScaledReg = DefMI.getOperand(2).getReg();
2860 AM.Scale = OffsetScale;
2861 AM.Displacement = 0;
2863 return true;
2864 }
2865 }
2866 }
2867
2868 // Handle memory instructions with a [Reg, #Imm] addressing mode.
2869
2870 // Check we are not breaking a potential conversion to an LDP.
2871 auto validateOffsetForLDP = [](unsigned NumBytes, int64_t OldOffset,
2872 int64_t NewOffset) -> bool {
2873 int64_t MinOffset, MaxOffset;
2874 switch (NumBytes) {
2875 default:
2876 return true;
2877 case 4:
2878 MinOffset = -256;
2879 MaxOffset = 252;
2880 break;
2881 case 8:
2882 MinOffset = -512;
2883 MaxOffset = 504;
2884 break;
2885 case 16:
2886 MinOffset = -1024;
2887 MaxOffset = 1008;
2888 break;
2889 }
2890 return OldOffset < MinOffset || OldOffset > MaxOffset ||
2891 (NewOffset >= MinOffset && NewOffset <= MaxOffset);
2892 };
2893 auto canFoldAddSubImmIntoAddrMode = [&](int64_t Disp) -> bool {
2894 int64_t OldOffset = MemI.getOperand(2).getImm() * OffsetScale;
2895 int64_t NewOffset = OldOffset + Disp;
2896 if (!isLegalAddressingMode(NumBytes, NewOffset, /* Scale */ 0))
2897 return false;
2898 // If the old offset would fit into an LDP, but the new offset wouldn't,
2899 // bail out.
2900 if (!validateOffsetForLDP(NumBytes, OldOffset, NewOffset))
2901 return false;
2902 AM.BaseReg = AddrI.getOperand(1).getReg();
2903 AM.ScaledReg = 0;
2904 AM.Scale = 0;
2905 AM.Displacement = NewOffset;
2907 return true;
2908 };
2909
2910 auto canFoldAddRegIntoAddrMode =
2911 [&](int64_t Scale,
2913 if (MemI.getOperand(2).getImm() != 0)
2914 return false;
2915 if (!isLegalAddressingMode(NumBytes, /* Offset */ 0, Scale))
2916 return false;
2917 AM.BaseReg = AddrI.getOperand(1).getReg();
2918 AM.ScaledReg = AddrI.getOperand(2).getReg();
2919 AM.Scale = Scale;
2920 AM.Displacement = 0;
2921 AM.Form = Form;
2922 return true;
2923 };
2924
2925 auto avoidSlowSTRQ = [&](const MachineInstr &MemI) {
2926 unsigned Opcode = MemI.getOpcode();
2927 return (Opcode == AArch64::STURQi || Opcode == AArch64::STRQui) &&
2928 Subtarget.isSTRQroSlow();
2929 };
2930
2931 int64_t Disp = 0;
2932 const bool OptSize = MemI.getMF()->getFunction().hasOptSize();
2933 switch (AddrI.getOpcode()) {
2934 default:
2935 return false;
2936
2937 case AArch64::ADDXri:
2938 // add Xa, Xn, #N
2939 // ldr Xd, [Xa, #M]
2940 // ->
2941 // ldr Xd, [Xn, #N'+M]
2942 Disp = AddrI.getOperand(2).getImm() << AddrI.getOperand(3).getImm();
2943 return canFoldAddSubImmIntoAddrMode(Disp);
2944
2945 case AArch64::SUBXri:
2946 // sub Xa, Xn, #N
2947 // ldr Xd, [Xa, #M]
2948 // ->
2949 // ldr Xd, [Xn, #N'+M]
2950 Disp = AddrI.getOperand(2).getImm() << AddrI.getOperand(3).getImm();
2951 return canFoldAddSubImmIntoAddrMode(-Disp);
2952
2953 case AArch64::ADDXrs: {
2954 // add Xa, Xn, Xm, lsl #N
2955 // ldr Xd, [Xa]
2956 // ->
2957 // ldr Xd, [Xn, Xm, lsl #N]
2958
2959 // Don't fold the add if the result would be slower, unless optimising for
2960 // size.
2961 int64_t Shift = AddrI.getOperand(3).getImm();
2962 if (!OptSize) {
2963 if ((Shift != 2 && Shift != 3) || !Subtarget.hasAddrLSLFast())
2964 return false;
2965 if (avoidSlowSTRQ(MemI))
2966 return false;
2967 }
2968 return canFoldAddRegIntoAddrMode(1ULL << Shift);
2969 }
2970
2971 case AArch64::ADDXrr:
2972 // add Xa, Xn, Xm
2973 // ldr Xd, [Xa]
2974 // ->
2975 // ldr Xd, [Xn, Xm, lsl #0]
2976
2977 // Don't fold the add if the result would be slower, unless optimising for
2978 // size.
2979 if (!OptSize && avoidSlowSTRQ(MemI))
2980 return false;
2981 return canFoldAddRegIntoAddrMode(1);
2982
2983 case AArch64::ADDXrx:
2984 // add Xa, Xn, Wm, {s,u}xtw #N
2985 // ldr Xd, [Xa]
2986 // ->
2987 // ldr Xd, [Xn, Wm, {s,u}xtw #N]
2988
2989 // Don't fold the add if the result would be slower, unless optimising for
2990 // size.
2991 if (!OptSize && avoidSlowSTRQ(MemI))
2992 return false;
2993
2994 // Can fold only sign-/zero-extend of a word.
2995 unsigned Imm = static_cast<unsigned>(AddrI.getOperand(3).getImm());
2997 if (Extend != AArch64_AM::UXTW && Extend != AArch64_AM::SXTW)
2998 return false;
2999
3000 return canFoldAddRegIntoAddrMode(
3001 1ULL << AArch64_AM::getArithShiftValue(Imm),
3004 }
3005}
3006
3007// Given an opcode for an instruction with a [Reg, #Imm] addressing mode,
3008// return the opcode of an instruction performing the same operation, but using
3009// the [Reg, Reg] addressing mode.
3010static unsigned regOffsetOpcode(unsigned Opcode) {
3011 switch (Opcode) {
3012 default:
3013 llvm_unreachable("Address folding not implemented for instruction");
3014
3015 case AArch64::LDURQi:
3016 case AArch64::LDRQui:
3017 return AArch64::LDRQroX;
3018 case AArch64::STURQi:
3019 case AArch64::STRQui:
3020 return AArch64::STRQroX;
3021 case AArch64::LDURDi:
3022 case AArch64::LDRDui:
3023 return AArch64::LDRDroX;
3024 case AArch64::STURDi:
3025 case AArch64::STRDui:
3026 return AArch64::STRDroX;
3027 case AArch64::LDURXi:
3028 case AArch64::LDRXui:
3029 return AArch64::LDRXroX;
3030 case AArch64::STURXi:
3031 case AArch64::STRXui:
3032 return AArch64::STRXroX;
3033 case AArch64::LDURWi:
3034 case AArch64::LDRWui:
3035 return AArch64::LDRWroX;
3036 case AArch64::LDURSWi:
3037 case AArch64::LDRSWui:
3038 return AArch64::LDRSWroX;
3039 case AArch64::STURWi:
3040 case AArch64::STRWui:
3041 return AArch64::STRWroX;
3042 case AArch64::LDURHi:
3043 case AArch64::LDRHui:
3044 return AArch64::LDRHroX;
3045 case AArch64::STURHi:
3046 case AArch64::STRHui:
3047 return AArch64::STRHroX;
3048 case AArch64::LDURHHi:
3049 case AArch64::LDRHHui:
3050 return AArch64::LDRHHroX;
3051 case AArch64::STURHHi:
3052 case AArch64::STRHHui:
3053 return AArch64::STRHHroX;
3054 case AArch64::LDURSHXi:
3055 case AArch64::LDRSHXui:
3056 return AArch64::LDRSHXroX;
3057 case AArch64::LDURSHWi:
3058 case AArch64::LDRSHWui:
3059 return AArch64::LDRSHWroX;
3060 case AArch64::LDURBi:
3061 case AArch64::LDRBui:
3062 return AArch64::LDRBroX;
3063 case AArch64::LDURBBi:
3064 case AArch64::LDRBBui:
3065 return AArch64::LDRBBroX;
3066 case AArch64::LDURSBXi:
3067 case AArch64::LDRSBXui:
3068 return AArch64::LDRSBXroX;
3069 case AArch64::LDURSBWi:
3070 case AArch64::LDRSBWui:
3071 return AArch64::LDRSBWroX;
3072 case AArch64::STURBi:
3073 case AArch64::STRBui:
3074 return AArch64::STRBroX;
3075 case AArch64::STURBBi:
3076 case AArch64::STRBBui:
3077 return AArch64::STRBBroX;
3078 }
3079}
3080
3081// Given an opcode for an instruction with a [Reg, #Imm] addressing mode, return
3082// the opcode of an instruction performing the same operation, but using the
3083// [Reg, #Imm] addressing mode with scaled offset.
3084unsigned scaledOffsetOpcode(unsigned Opcode, unsigned &Scale) {
3085 switch (Opcode) {
3086 default:
3087 llvm_unreachable("Address folding not implemented for instruction");
3088
3089 case AArch64::LDURQi:
3090 Scale = 16;
3091 return AArch64::LDRQui;
3092 case AArch64::STURQi:
3093 Scale = 16;
3094 return AArch64::STRQui;
3095 case AArch64::LDURDi:
3096 Scale = 8;
3097 return AArch64::LDRDui;
3098 case AArch64::STURDi:
3099 Scale = 8;
3100 return AArch64::STRDui;
3101 case AArch64::LDURXi:
3102 Scale = 8;
3103 return AArch64::LDRXui;
3104 case AArch64::STURXi:
3105 Scale = 8;
3106 return AArch64::STRXui;
3107 case AArch64::LDURWi:
3108 Scale = 4;
3109 return AArch64::LDRWui;
3110 case AArch64::LDURSWi:
3111 Scale = 4;
3112 return AArch64::LDRSWui;
3113 case AArch64::STURWi:
3114 Scale = 4;
3115 return AArch64::STRWui;
3116 case AArch64::LDURHi:
3117 Scale = 2;
3118 return AArch64::LDRHui;
3119 case AArch64::STURHi:
3120 Scale = 2;
3121 return AArch64::STRHui;
3122 case AArch64::LDURHHi:
3123 Scale = 2;
3124 return AArch64::LDRHHui;
3125 case AArch64::STURHHi:
3126 Scale = 2;
3127 return AArch64::STRHHui;
3128 case AArch64::LDURSHXi:
3129 Scale = 2;
3130 return AArch64::LDRSHXui;
3131 case AArch64::LDURSHWi:
3132 Scale = 2;
3133 return AArch64::LDRSHWui;
3134 case AArch64::LDURBi:
3135 Scale = 1;
3136 return AArch64::LDRBui;
3137 case AArch64::LDURBBi:
3138 Scale = 1;
3139 return AArch64::LDRBBui;
3140 case AArch64::LDURSBXi:
3141 Scale = 1;
3142 return AArch64::LDRSBXui;
3143 case AArch64::LDURSBWi:
3144 Scale = 1;
3145 return AArch64::LDRSBWui;
3146 case AArch64::STURBi:
3147 Scale = 1;
3148 return AArch64::STRBui;
3149 case AArch64::STURBBi:
3150 Scale = 1;
3151 return AArch64::STRBBui;
3152 case AArch64::LDRQui:
3153 case AArch64::STRQui:
3154 Scale = 16;
3155 return Opcode;
3156 case AArch64::LDRDui:
3157 case AArch64::STRDui:
3158 case AArch64::LDRXui:
3159 case AArch64::STRXui:
3160 Scale = 8;
3161 return Opcode;
3162 case AArch64::LDRWui:
3163 case AArch64::LDRSWui:
3164 case AArch64::STRWui:
3165 Scale = 4;
3166 return Opcode;
3167 case AArch64::LDRHui:
3168 case AArch64::STRHui:
3169 case AArch64::LDRHHui:
3170 case AArch64::STRHHui:
3171 case AArch64::LDRSHXui:
3172 case AArch64::LDRSHWui:
3173 Scale = 2;
3174 return Opcode;
3175 case AArch64::LDRBui:
3176 case AArch64::LDRBBui:
3177 case AArch64::LDRSBXui:
3178 case AArch64::LDRSBWui:
3179 case AArch64::STRBui:
3180 case AArch64::STRBBui:
3181 Scale = 1;
3182 return Opcode;
3183 }
3184}
3185
3186// Given an opcode for an instruction with a [Reg, #Imm] addressing mode, return
3187// the opcode of an instruction performing the same operation, but using the
3188// [Reg, #Imm] addressing mode with unscaled offset.
3189unsigned unscaledOffsetOpcode(unsigned Opcode) {
3190 switch (Opcode) {
3191 default:
3192 llvm_unreachable("Address folding not implemented for instruction");
3193
3194 case AArch64::LDURQi:
3195 case AArch64::STURQi:
3196 case AArch64::LDURDi:
3197 case AArch64::STURDi:
3198 case AArch64::LDURXi:
3199 case AArch64::STURXi:
3200 case AArch64::LDURWi:
3201 case AArch64::LDURSWi:
3202 case AArch64::STURWi:
3203 case AArch64::LDURHi:
3204 case AArch64::STURHi:
3205 case AArch64::LDURHHi:
3206 case AArch64::STURHHi:
3207 case AArch64::LDURSHXi:
3208 case AArch64::LDURSHWi:
3209 case AArch64::LDURBi:
3210 case AArch64::STURBi:
3211 case AArch64::LDURBBi:
3212 case AArch64::STURBBi:
3213 case AArch64::LDURSBWi:
3214 case AArch64::LDURSBXi:
3215 return Opcode;
3216 case AArch64::LDRQui:
3217 return AArch64::LDURQi;
3218 case AArch64::STRQui:
3219 return AArch64::STURQi;
3220 case AArch64::LDRDui:
3221 return AArch64::LDURDi;
3222 case AArch64::STRDui:
3223 return AArch64::STURDi;
3224 case AArch64::LDRXui:
3225 return AArch64::LDURXi;
3226 case AArch64::STRXui:
3227 return AArch64::STURXi;
3228 case AArch64::LDRWui:
3229 return AArch64::LDURWi;
3230 case AArch64::LDRSWui:
3231 return AArch64::LDURSWi;
3232 case AArch64::STRWui:
3233 return AArch64::STURWi;
3234 case AArch64::LDRHui:
3235 return AArch64::LDURHi;
3236 case AArch64::STRHui:
3237 return AArch64::STURHi;
3238 case AArch64::LDRHHui:
3239 return AArch64::LDURHHi;
3240 case AArch64::STRHHui:
3241 return AArch64::STURHHi;
3242 case AArch64::LDRSHXui:
3243 return AArch64::LDURSHXi;
3244 case AArch64::LDRSHWui:
3245 return AArch64::LDURSHWi;
3246 case AArch64::LDRBBui:
3247 return AArch64::LDURBBi;
3248 case AArch64::LDRBui:
3249 return AArch64::LDURBi;
3250 case AArch64::STRBBui:
3251 return AArch64::STURBBi;
3252 case AArch64::STRBui:
3253 return AArch64::STURBi;
3254 case AArch64::LDRSBWui:
3255 return AArch64::LDURSBWi;
3256 case AArch64::LDRSBXui:
3257 return AArch64::LDURSBXi;
3258 }
3259}
3260
3261// Given the opcode of a memory load/store instruction, return the opcode of an
3262// instruction performing the same operation, but using
3263// the [Reg, Reg, {s,u}xtw #N] addressing mode with sign-/zero-extend of the
3264// offset register.
3265static unsigned offsetExtendOpcode(unsigned Opcode) {
3266 switch (Opcode) {
3267 default:
3268 llvm_unreachable("Address folding not implemented for instruction");
3269
3270 case AArch64::LDRQroX:
3271 case AArch64::LDURQi:
3272 case AArch64::LDRQui:
3273 return AArch64::LDRQroW;
3274 case AArch64::STRQroX:
3275 case AArch64::STURQi:
3276 case AArch64::STRQui:
3277 return AArch64::STRQroW;
3278 case AArch64::LDRDroX:
3279 case AArch64::LDURDi:
3280 case AArch64::LDRDui:
3281 return AArch64::LDRDroW;
3282 case AArch64::STRDroX:
3283 case AArch64::STURDi:
3284 case AArch64::STRDui:
3285 return AArch64::STRDroW;
3286 case AArch64::LDRXroX:
3287 case AArch64::LDURXi:
3288 case AArch64::LDRXui:
3289 return AArch64::LDRXroW;
3290 case AArch64::STRXroX:
3291 case AArch64::STURXi:
3292 case AArch64::STRXui:
3293 return AArch64::STRXroW;
3294 case AArch64::LDRWroX:
3295 case AArch64::LDURWi:
3296 case AArch64::LDRWui:
3297 return AArch64::LDRWroW;
3298 case AArch64::LDRSWroX:
3299 case AArch64::LDURSWi:
3300 case AArch64::LDRSWui:
3301 return AArch64::LDRSWroW;
3302 case AArch64::STRWroX:
3303 case AArch64::STURWi:
3304 case AArch64::STRWui:
3305 return AArch64::STRWroW;
3306 case AArch64::LDRHroX:
3307 case AArch64::LDURHi:
3308 case AArch64::LDRHui:
3309 return AArch64::LDRHroW;
3310 case AArch64::STRHroX:
3311 case AArch64::STURHi:
3312 case AArch64::STRHui:
3313 return AArch64::STRHroW;
3314 case AArch64::LDRHHroX:
3315 case AArch64::LDURHHi:
3316 case AArch64::LDRHHui:
3317 return AArch64::LDRHHroW;
3318 case AArch64::STRHHroX:
3319 case AArch64::STURHHi:
3320 case AArch64::STRHHui:
3321 return AArch64::STRHHroW;
3322 case AArch64::LDRSHXroX:
3323 case AArch64::LDURSHXi:
3324 case AArch64::LDRSHXui:
3325 return AArch64::LDRSHXroW;
3326 case AArch64::LDRSHWroX:
3327 case AArch64::LDURSHWi:
3328 case AArch64::LDRSHWui:
3329 return AArch64::LDRSHWroW;
3330 case AArch64::LDRBroX:
3331 case AArch64::LDURBi:
3332 case AArch64::LDRBui:
3333 return AArch64::LDRBroW;
3334 case AArch64::LDRBBroX:
3335 case AArch64::LDURBBi:
3336 case AArch64::LDRBBui:
3337 return AArch64::LDRBBroW;
3338 case AArch64::LDRSBXroX:
3339 case AArch64::LDURSBXi:
3340 case AArch64::LDRSBXui:
3341 return AArch64::LDRSBXroW;
3342 case AArch64::LDRSBWroX:
3343 case AArch64::LDURSBWi:
3344 case AArch64::LDRSBWui:
3345 return AArch64::LDRSBWroW;
3346 case AArch64::STRBroX:
3347 case AArch64::STURBi:
3348 case AArch64::STRBui:
3349 return AArch64::STRBroW;
3350 case AArch64::STRBBroX:
3351 case AArch64::STURBBi:
3352 case AArch64::STRBBui:
3353 return AArch64::STRBBroW;
3354 }
3355}
3356
3358 const ExtAddrMode &AM) const {
3359
3360 const DebugLoc &DL = MemI.getDebugLoc();
3361 MachineBasicBlock &MBB = *MemI.getParent();
3363
3365 if (AM.ScaledReg) {
3366 // The new instruction will be in the form `ldr Rt, [Xn, Xm, lsl #imm]`.
3367 unsigned Opcode = regOffsetOpcode(MemI.getOpcode());
3368 MRI.constrainRegClass(AM.BaseReg, &AArch64::GPR64spRegClass);
3369 auto B = BuildMI(MBB, MemI, DL, get(Opcode))
3370 .addReg(MemI.getOperand(0).getReg(),
3371 MemI.mayLoad() ? RegState::Define : 0)
3372 .addReg(AM.BaseReg)
3373 .addReg(AM.ScaledReg)
3374 .addImm(0)
3375 .addImm(AM.Scale > 1)
3376 .setMemRefs(MemI.memoperands())
3377 .setMIFlags(MemI.getFlags());
3378 return B.getInstr();
3379 }
3380
3381 assert(AM.ScaledReg == 0 && AM.Scale == 0 &&
3382 "Addressing mode not supported for folding");
3383
3384 // The new instruction will be in the form `ld[u]r Rt, [Xn, #imm]`.
3385 unsigned Scale = 1;
3386 unsigned Opcode = MemI.getOpcode();
3387 if (isInt<9>(AM.Displacement))
3388 Opcode = unscaledOffsetOpcode(Opcode);
3389 else
3390 Opcode = scaledOffsetOpcode(Opcode, Scale);
3391
3392 auto B = BuildMI(MBB, MemI, DL, get(Opcode))
3393 .addReg(MemI.getOperand(0).getReg(),
3394 MemI.mayLoad() ? RegState::Define : 0)
3395 .addReg(AM.BaseReg)
3396 .addImm(AM.Displacement / Scale)
3397 .setMemRefs(MemI.memoperands())
3398 .setMIFlags(MemI.getFlags());
3399 return B.getInstr();
3400 }
3401
3404 // The new instruction will be in the form `ldr Rt, [Xn, Wm, {s,u}xtw #N]`.
3405 assert(AM.ScaledReg && !AM.Displacement &&
3406 "Address offset can be a register or an immediate, but not both");
3407 unsigned Opcode = offsetExtendOpcode(MemI.getOpcode());
3408 MRI.constrainRegClass(AM.BaseReg, &AArch64::GPR64spRegClass);
3409 // Make sure the offset register is in the correct register class.
3410 Register OffsetReg = AM.ScaledReg;
3411 const TargetRegisterClass *RC = MRI.getRegClass(OffsetReg);
3412 if (RC->hasSuperClassEq(&AArch64::GPR64RegClass)) {
3413 OffsetReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3414 BuildMI(MBB, MemI, DL, get(TargetOpcode::COPY), OffsetReg)
3415 .addReg(AM.ScaledReg, 0, AArch64::sub_32);
3416 }
3417 auto B = BuildMI(MBB, MemI, DL, get(Opcode))
3418 .addReg(MemI.getOperand(0).getReg(),
3419 MemI.mayLoad() ? RegState::Define : 0)
3420 .addReg(AM.BaseReg)
3421 .addReg(OffsetReg)
3423 .addImm(AM.Scale != 1)
3424 .setMemRefs(MemI.memoperands())
3425 .setMIFlags(MemI.getFlags());
3426
3427 return B.getInstr();
3428 }
3429
3431 "Function must not be called with an addressing mode it can't handle");
3432}
3433
3435 const MachineInstr &LdSt, const MachineOperand *&BaseOp, int64_t &Offset,
3436 bool &OffsetIsScalable, unsigned &Width,
3437 const TargetRegisterInfo *TRI) const {
3438 assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
3439 // Handle only loads/stores with base register followed by immediate offset.
3440 if (LdSt.getNumExplicitOperands() == 3) {
3441 // Non-paired instruction (e.g., ldr x1, [x0, #8]).
3442 if ((!LdSt.getOperand(1).isReg() && !LdSt.getOperand(1).isFI()) ||
3443 !LdSt.getOperand(2).isImm())
3444 return false;
3445 } else if (LdSt.getNumExplicitOperands() == 4) {
3446 // Paired instruction (e.g., ldp x1, x2, [x0, #8]).
3447 if (!LdSt.getOperand(1).isReg() ||
3448 (!LdSt.getOperand(2).isReg() && !LdSt.getOperand(2).isFI()) ||
3449 !LdSt.getOperand(3).isImm())
3450 return false;
3451 } else
3452 return false;
3453
3454 // Get the scaling factor for the instruction and set the width for the
3455 // instruction.
3456 TypeSize Scale(0U, false);
3457 int64_t Dummy1, Dummy2;
3458
3459 // If this returns false, then it's an instruction we don't want to handle.
3460 if (!getMemOpInfo(LdSt.getOpcode(), Scale, Width, Dummy1, Dummy2))
3461 return false;
3462
3463 // Compute the offset. Offset is calculated as the immediate operand
3464 // multiplied by the scaling factor. Unscaled instructions have scaling factor
3465 // set to 1.
3466 if (LdSt.getNumExplicitOperands() == 3) {
3467 BaseOp = &LdSt.getOperand(1);
3468 Offset = LdSt.getOperand(2).getImm() * Scale.getKnownMinValue();
3469 } else {
3470 assert(LdSt.getNumExplicitOperands() == 4 && "invalid number of operands");
3471 BaseOp = &LdSt.getOperand(2);
3472 Offset = LdSt.getOperand(3).getImm() * Scale.getKnownMinValue();
3473 }
3474 OffsetIsScalable = Scale.isScalable();
3475
3476 if (!BaseOp->isReg() && !BaseOp->isFI())
3477 return false;
3478
3479 return true;
3480}
3481
3484 assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
3485 MachineOperand &OfsOp = LdSt.getOperand(LdSt.getNumExplicitOperands() - 1);
3486 assert(OfsOp.isImm() && "Offset operand wasn't immediate.");
3487 return OfsOp;
3488}
3489
3490bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
3491 unsigned &Width, int64_t &MinOffset,
3492 int64_t &MaxOffset) {
3493 const unsigned SVEMaxBytesPerVector = AArch64::SVEMaxBitsPerVector / 8;
3494 switch (Opcode) {
3495 // Not a memory operation or something we want to handle.
3496 default:
3497 Scale = TypeSize::Fixed(0);
3498 Width = 0;
3499 MinOffset = MaxOffset = 0;
3500 return false;
3501 case AArch64::STRWpost:
3502 case AArch64::LDRWpost:
3503 Width = 32;
3504 Scale = TypeSize::Fixed(4);
3505 MinOffset = -256;
3506 MaxOffset = 255;
3507 break;
3508 case AArch64::LDURQi:
3509 case AArch64::STURQi:
3510 Width = 16;
3511 Scale = TypeSize::Fixed(1);
3512 MinOffset = -256;
3513 MaxOffset = 255;
3514 break;
3515 case AArch64::PRFUMi:
3516 case AArch64::LDURXi:
3517 case AArch64::LDURDi:
3518 case AArch64::STURXi:
3519 case AArch64::STURDi:
3520 Width = 8;
3521 Scale = TypeSize::Fixed(1);
3522 MinOffset = -256;
3523 MaxOffset = 255;
3524 break;
3525 case AArch64::LDURWi:
3526 case AArch64::LDURSi:
3527 case AArch64::LDURSWi:
3528 case AArch64::STURWi:
3529 case AArch64::STURSi:
3530 Width = 4;
3531 Scale = TypeSize::Fixed(1);
3532 MinOffset = -256;
3533 MaxOffset = 255;
3534 break;
3535 case AArch64::LDURHi:
3536 case AArch64::LDURHHi:
3537 case AArch64::LDURSHXi:
3538 case AArch64::LDURSHWi:
3539 case AArch64::STURHi:
3540 case AArch64::STURHHi:
3541 Width = 2;
3542 Scale = TypeSize::Fixed(1);
3543 MinOffset = -256;
3544 MaxOffset = 255;
3545 break;
3546 case AArch64::LDURBi:
3547 case AArch64::LDURBBi:
3548 case AArch64::LDURSBXi:
3549 case AArch64::LDURSBWi:
3550 case AArch64::STURBi:
3551 case AArch64::STURBBi:
3552 Width = 1;
3553 Scale = TypeSize::Fixed(1);
3554 MinOffset = -256;
3555 MaxOffset = 255;
3556 break;
3557 case AArch64::LDPQi:
3558 case AArch64::LDNPQi:
3559 case AArch64::STPQi:
3560 case AArch64::STNPQi:
3561 Scale = TypeSize::Fixed(16);
3562 Width = 32;
3563 MinOffset = -64;
3564 MaxOffset = 63;
3565 break;
3566 case AArch64::LDRQui:
3567 case AArch64::STRQui:
3568 Scale = TypeSize::Fixed(16);
3569 Width = 16;
3570 MinOffset = 0;
3571 MaxOffset = 4095;
3572 break;
3573 case AArch64::LDPXi:
3574 case AArch64::LDPDi:
3575 case AArch64::LDNPXi:
3576 case AArch64::LDNPDi:
3577 case AArch64::STPXi:
3578 case AArch64::STPDi:
3579 case AArch64::STNPXi:
3580 case AArch64::STNPDi:
3581 Scale = TypeSize::Fixed(8);
3582 Width = 16;
3583 MinOffset = -64;
3584 MaxOffset = 63;
3585 break;
3586 case AArch64::PRFMui:
3587 case AArch64::LDRXui:
3588 case AArch64::LDRDui:
3589 case AArch64::STRXui:
3590 case AArch64::STRDui:
3591 Scale = TypeSize::Fixed(8);
3592 Width = 8;
3593 MinOffset = 0;
3594 MaxOffset = 4095;
3595 break;
3596 case AArch64::StoreSwiftAsyncContext:
3597 // Store is an STRXui, but there might be an ADDXri in the expansion too.
3598 Scale = TypeSize::Fixed(1);
3599 Width = 8;
3600 MinOffset = 0;
3601 MaxOffset = 4095;
3602 break;
3603 case AArch64::LDPWi:
3604 case AArch64::LDPSi:
3605 case AArch64::LDNPWi:
3606 case AArch64::LDNPSi:
3607 case AArch64::STPWi:
3608 case AArch64::STPSi:
3609 case AArch64::STNPWi:
3610 case AArch64::STNPSi:
3611 Scale = TypeSize::Fixed(4);
3612 Width = 8;
3613 MinOffset = -64;
3614 MaxOffset = 63;
3615 break;
3616 case AArch64::LDRWui:
3617 case AArch64::LDRSui:
3618 case AArch64::LDRSWui:
3619 case AArch64::STRWui:
3620 case AArch64::STRSui:
3621 Scale = TypeSize::Fixed(4);
3622 Width = 4;
3623 MinOffset = 0;
3624 MaxOffset = 4095;
3625 break;
3626 case AArch64::LDRHui:
3627 case AArch64::LDRHHui:
3628 case AArch64::LDRSHWui:
3629 case AArch64::LDRSHXui:
3630 case AArch64::STRHui:
3631 case AArch64::STRHHui:
3632 Scale = TypeSize::Fixed(2);
3633 Width = 2;
3634 MinOffset = 0;
3635 MaxOffset = 4095;
3636 break;
3637 case AArch64::LDRBui:
3638 case AArch64::LDRBBui:
3639 case AArch64::LDRSBWui:
3640 case AArch64::LDRSBXui:
3641 case AArch64::STRBui:
3642 case AArch64::STRBBui:
3643 Scale = TypeSize::Fixed(1);
3644 Width = 1;
3645 MinOffset = 0;
3646 MaxOffset = 4095;
3647 break;
3648 case AArch64::STPXpre:
3649 case AArch64::LDPXpost:
3650 case AArch64::STPDpre:
3651 case AArch64::LDPDpost:
3652 Scale = TypeSize::Fixed(8);
3653 Width = 8;
3654 MinOffset = -512;
3655 MaxOffset = 504;
3656 break;
3657 case AArch64::STPQpre:
3658 case AArch64::LDPQpost:
3659 Scale = TypeSize::Fixed(16);
3660 Width = 16;
3661 MinOffset = -1024;
3662 MaxOffset = 1008;
3663 break;
3664 case AArch64::STRXpre:
3665 case AArch64::STRDpre:
3666 case AArch64::LDRXpost:
3667 case AArch64::LDRDpost:
3668 Scale = TypeSize::Fixed(1);
3669 Width = 8;
3670 MinOffset = -256;
3671 MaxOffset = 255;
3672 break;
3673 case AArch64::STRQpre:
3674 case AArch64::LDRQpost:
3675 Scale = TypeSize::Fixed(1);
3676 Width = 16;
3677 MinOffset = -256;
3678 MaxOffset = 255;
3679 break;
3680 case AArch64::ADDG:
3681 Scale = TypeSize::Fixed(16);
3682 Width = 0;
3683 MinOffset = 0;
3684 MaxOffset = 63;
3685 break;
3686 case AArch64::TAGPstack:
3687 Scale = TypeSize::Fixed(16);
3688 Width = 0;
3689 // TAGP with a negative offset turns into SUBP, which has a maximum offset
3690 // of 63 (not 64!).
3691 MinOffset = -63;
3692 MaxOffset = 63;
3693 break;
3694 case AArch64::LDG:
3695 case AArch64::STGi:
3696 case AArch64::STZGi:
3697 Scale = TypeSize::Fixed(16);
3698 Width = 16;
3699 MinOffset = -256;
3700 MaxOffset = 255;
3701 break;
3702 case AArch64::STR_ZZZZXI:
3703 case AArch64::LDR_ZZZZXI:
3704 Scale = TypeSize::Scalable(16);
3705 Width = SVEMaxBytesPerVector * 4;
3706 MinOffset = -256;
3707 MaxOffset = 252;
3708 break;
3709 case AArch64::STR_ZZZXI:
3710 case AArch64::LDR_ZZZXI:
3711 Scale = TypeSize::Scalable(16);
3712 Width = SVEMaxBytesPerVector * 3;
3713 MinOffset = -256;
3714 MaxOffset = 253;
3715 break;
3716 case AArch64::STR_ZZXI:
3717 case AArch64::LDR_ZZXI:
3718 Scale = TypeSize::Scalable(16);
3719 Width = SVEMaxBytesPerVector * 2;
3720 MinOffset = -256;
3721 MaxOffset = 254;
3722 break;
3723 case AArch64::LDR_PXI:
3724 case AArch64::STR_PXI:
3725 Scale = TypeSize::Scalable(2);
3726 Width = SVEMaxBytesPerVector / 8;
3727 MinOffset = -256;
3728 MaxOffset = 255;
3729 break;
3730 case AArch64::LDR_ZXI:
3731 case AArch64::STR_ZXI:
3732 Scale = TypeSize::Scalable(16);
3733 Width = SVEMaxBytesPerVector;
3734 MinOffset = -256;
3735 MaxOffset = 255;
3736 break;
3737 case AArch64::LD1B_IMM:
3738 case AArch64::LD1H_IMM:
3739 case AArch64::LD1W_IMM:
3740 case AArch64::LD1D_IMM:
3741 case AArch64::LDNT1B_ZRI:
3742 case AArch64::LDNT1H_ZRI:
3743 case AArch64::LDNT1W_ZRI:
3744 case AArch64::LDNT1D_ZRI:
3745 case AArch64::ST1B_IMM:
3746 case AArch64::ST1H_IMM:
3747 case AArch64::ST1W_IMM:
3748 case AArch64::ST1D_IMM:
3749 case AArch64::STNT1B_ZRI:
3750 case AArch64::STNT1H_ZRI:
3751 case AArch64::STNT1W_ZRI:
3752 case AArch64::STNT1D_ZRI:
3753 case AArch64::LDNF1B_IMM:
3754 case AArch64::LDNF1H_IMM:
3755 case AArch64::LDNF1W_IMM:
3756 case AArch64::LDNF1D_IMM:
3757 // A full vectors worth of data
3758 // Width = mbytes * elements
3759 Scale = TypeSize::Scalable(16);
3760 Width = SVEMaxBytesPerVector;
3761 MinOffset = -8;
3762 MaxOffset = 7;
3763 break;
3764 case AArch64::LD2B_IMM:
3765 case AArch64::LD2H_IMM:
3766 case AArch64::LD2W_IMM:
3767 case AArch64::LD2D_IMM:
3768 case AArch64::ST2B_IMM:
3769 case AArch64::ST2H_IMM:
3770 case AArch64::ST2W_IMM:
3771 case AArch64::ST2D_IMM:
3772 Scale = TypeSize::Scalable(32);
3773 Width = SVEMaxBytesPerVector * 2;
3774 MinOffset = -8;
3775 MaxOffset = 7;
3776 break;
3777 case AArch64::LD3B_IMM:
3778 case AArch64::LD3H_IMM:
3779 case AArch64::LD3W_IMM:
3780 case AArch64::LD3D_IMM:
3781 case AArch64::ST3B_IMM:
3782 case AArch64::ST3H_IMM:
3783 case AArch64::ST3W_IMM:
3784 case AArch64::ST3D_IMM:
3785 Scale = TypeSize::Scalable(48);
3786 Width = SVEMaxBytesPerVector * 3;
3787 MinOffset = -8;
3788 MaxOffset = 7;
3789 break;
3790 case AArch64::LD4B_IMM:
3791 case AArch64::LD4H_IMM:
3792 case AArch64::LD4W_IMM:
3793 case AArch64::LD4D_IMM:
3794 case AArch64::ST4B_IMM:
3795 case AArch64::ST4H_IMM:
3796 case AArch64::ST4W_IMM:
3797 case AArch64::ST4D_IMM:
3798 Scale = TypeSize::Scalable(64);
3799 Width = SVEMaxBytesPerVector * 4;
3800 MinOffset = -8;
3801 MaxOffset = 7;
3802 break;
3803 case AArch64::LD1B_H_IMM:
3804 case AArch64::LD1SB_H_IMM:
3805 case AArch64::LD1H_S_IMM:
3806 case AArch64::LD1SH_S_IMM:
3807 case AArch64::LD1W_D_IMM:
3808 case AArch64::LD1SW_D_IMM:
3809 case AArch64::ST1B_H_IMM:
3810 case AArch64::ST1H_S_IMM:
3811 case AArch64::ST1W_D_IMM:
3812 case AArch64::LDNF1B_H_IMM:
3813 case AArch64::LDNF1SB_H_IMM:
3814 case AArch64::LDNF1H_S_IMM:
3815 case AArch64::LDNF1SH_S_IMM:
3816 case AArch64::LDNF1W_D_IMM:
3817 case AArch64::LDNF1SW_D_IMM:
3818 // A half vector worth of data
3819 // Width = mbytes * elements
3820 Scale = TypeSize::Scalable(8);
3821 Width = SVEMaxBytesPerVector / 2;
3822 MinOffset = -8;
3823 MaxOffset = 7;
3824 break;
3825 case AArch64::LD1B_S_IMM:
3826 case AArch64::LD1SB_S_IMM:
3827 case AArch64::LD1H_D_IMM:
3828 case AArch64::LD1SH_D_IMM:
3829 case AArch64::ST1B_S_IMM:
3830 case AArch64::ST1H_D_IMM:
3831 case AArch64::LDNF1B_S_IMM:
3832 case AArch64::LDNF1SB_S_IMM:
3833 case AArch64::LDNF1H_D_IMM:
3834 case AArch64::LDNF1SH_D_IMM:
3835 // A quarter vector worth of data
3836 // Width = mbytes * elements
3837 Scale = TypeSize::Scalable(4);
3838 Width = SVEMaxBytesPerVector / 4;
3839 MinOffset = -8;
3840 MaxOffset = 7;
3841 break;
3842 case AArch64::LD1B_D_IMM:
3843 case AArch64::LD1SB_D_IMM:
3844 case AArch64::ST1B_D_IMM:
3845 case AArch64::LDNF1B_D_IMM:
3846 case AArch64::LDNF1SB_D_IMM:
3847 // A eighth vector worth of data
3848 // Width = mbytes * elements
3849 Scale = TypeSize::Scalable(2);
3850 Width = SVEMaxBytesPerVector / 8;
3851 MinOffset = -8;
3852 MaxOffset = 7;
3853 break;
3854 case AArch64::ST2Gi:
3855 case AArch64::STZ2Gi:
3856 Scale = TypeSize::Fixed(16);
3857 Width = 32;
3858 MinOffset = -256;
3859 MaxOffset = 255;
3860 break;
3861 case AArch64::STGPi:
3862 Scale = TypeSize::Fixed(16);
3863 Width = 16;
3864 MinOffset = -64;
3865 MaxOffset = 63;
3866 break;
3867 case AArch64::LD1RB_IMM:
3868 case AArch64::LD1RB_H_IMM:
3869 case AArch64::LD1RB_S_IMM:
3870 case AArch64::LD1RB_D_IMM:
3871 case AArch64::LD1RSB_H_IMM:
3872 case AArch64::LD1RSB_S_IMM:
3873 case AArch64::LD1RSB_D_IMM:
3874 Scale = TypeSize::Fixed(1);
3875 Width = 1;
3876 MinOffset = 0;
3877 MaxOffset = 63;
3878 break;
3879 case AArch64::LD1RH_IMM:
3880 case AArch64::LD1RH_S_IMM:
3881 case AArch64::LD1RH_D_IMM:
3882 case AArch64::LD1RSH_S_IMM:
3883 case AArch64::LD1RSH_D_IMM:
3884 Scale = TypeSize::Fixed(2);
3885 Width = 2;
3886 MinOffset = 0;
3887 MaxOffset = 63;
3888 break;
3889 case AArch64::LD1RW_IMM:
3890 case AArch64::LD1RW_D_IMM:
3891 case AArch64::LD1RSW_IMM:
3892 Scale = TypeSize::Fixed(4);
3893 Width = 4;
3894 MinOffset = 0;
3895 MaxOffset = 63;
3896 break;
3897 case AArch64::LD1RD_IMM:
3898 Scale = TypeSize::Fixed(8);
3899 Width = 8;
3900 MinOffset = 0;
3901 MaxOffset = 63;
3902 break;
3903 }
3904
3905 return true;
3906}
3907
3908// Scaling factor for unscaled load or store.
3910 switch (Opc) {
3911 default:
3912 llvm_unreachable("Opcode has unknown scale!");
3913 case AArch64::LDRBBui:
3914 case AArch64::LDURBBi:
3915 case AArch64::LDRSBWui:
3916 case AArch64::LDURSBWi:
3917 case AArch64::STRBBui:
3918 case AArch64::STURBBi:
3919 return 1;
3920 case AArch64::LDRHHui:
3921 case AArch64::LDURHHi:
3922 case AArch64::LDRSHWui:
3923 case AArch64::LDURSHWi:
3924 case AArch64::STRHHui:
3925 case AArch64::STURHHi:
3926 return 2;
3927 case AArch64::LDRSui:
3928 case AArch64::LDURSi:
3929 case AArch64::LDRSpre:
3930 case AArch64::LDRSWui:
3931 case AArch64::LDURSWi:
3932 case AArch64::LDRSWpre:
3933 case AArch64::LDRWpre:
3934 case AArch64::LDRWui:
3935 case AArch64::LDURWi:
3936 case AArch64::STRSui:
3937 case AArch64::STURSi:
3938 case AArch64::STRSpre:
3939 case AArch64::STRWui:
3940 case AArch64::STURWi:
3941 case AArch64::STRWpre:
3942 case AArch64::LDPSi:
3943 case AArch64::LDPSWi:
3944 case AArch64::LDPWi:
3945 case AArch64::STPSi:
3946 case AArch64::STPWi:
3947 return 4;
3948 case AArch64::LDRDui:
3949 case AArch64::LDURDi:
3950 case AArch64::LDRDpre:
3951 case AArch64::LDRXui:
3952 case AArch64::LDURXi:
3953 case AArch64::LDRXpre:
3954 case AArch64::STRDui:
3955 case AArch64::STURDi:
3956 case AArch64::STRDpre:
3957 case AArch64::STRXui:
3958 case AArch64::STURXi:
3959 case AArch64::STRXpre:
3960 case AArch64::LDPDi:
3961 case AArch64::LDPXi:
3962 case AArch64::STPDi:
3963 case AArch64::STPXi:
3964 return 8;
3965 case AArch64::LDRQui:
3966 case AArch64::LDURQi:
3967 case AArch64::STRQui:
3968 case AArch64::STURQi:
3969 case AArch64::STRQpre:
3970 case AArch64::LDPQi:
3971 case AArch64::LDRQpre:
3972 case AArch64::STPQi:
3973 case AArch64::STGi:
3974 case AArch64::STZGi:
3975 case AArch64::ST2Gi:
3976 case AArch64::STZ2Gi:
3977 case AArch64::STGPi:
3978 return 16;
3979 }
3980}
3981
3983 switch (MI.getOpcode()) {
3984 default:
3985 return false;
3986 case AArch64::LDRWpre:
3987 case AArch64::LDRXpre:
3988 case AArch64::LDRSWpre:
3989 case AArch64::LDRSpre:
3990 case AArch64::LDRDpre:
3991 case AArch64::LDRQpre:
3992 return true;
3993 }
3994}
3995
3997 switch (MI.getOpcode()) {
3998 default:
3999 return false;
4000 case AArch64::STRWpre:
4001 case AArch64::STRXpre:
4002 case AArch64::STRSpre:
4003 case AArch64::STRDpre:
4004 case AArch64::STRQpre:
4005 return true;
4006 }
4007}
4008
4010 return isPreLd(MI) || isPreSt(MI);
4011}
4012
4014 switch (MI.getOpcode()) {
4015 default:
4016 return false;
4017 case AArch64::LDPSi:
4018 case AArch64::LDPSWi:
4019 case AArch64::LDPDi:
4020 case AArch64::LDPQi:
4021 case AArch64::LDPWi:
4022 case AArch64::LDPXi:
4023 case AArch64::STPSi:
4024 case AArch64::STPDi:
4025 case AArch64::STPQi:
4026 case AArch64::STPWi:
4027 case AArch64::STPXi:
4028 case AArch64::STGPi:
4029 return true;
4030 }
4031}
4032
4034 unsigned Idx =
4036 : 1;
4037 return MI.getOperand(Idx);
4038}
4039
4040const MachineOperand &
4042 unsigned Idx =
4044 : 2;
4045 return MI.getOperand(Idx);
4046}
4047
4049 Register Reg) {
4050 if (MI.getParent() == nullptr)
4051 return nullptr;
4052 const MachineFunction *MF = MI.getParent()->getParent();
4053 return MF ? MF->getRegInfo().getRegClassOrNull(Reg) : nullptr;
4054}
4055
4057 auto IsHFPR = [&](const MachineOperand &Op) {
4058 if (!Op.isReg())
4059 return false;
4060 auto Reg = Op.getReg();
4061 if (Reg.isPhysical())
4062 return AArch64::FPR16RegClass.contains(Reg);
4063 const TargetRegisterClass *TRC = ::getRegClass(MI, Reg);
4064 return TRC == &AArch64::FPR16RegClass ||
4065 TRC == &AArch64::FPR16_loRegClass;
4066 };
4067 return llvm::any_of(MI.operands(), IsHFPR);
4068}
4069
4071 auto IsQFPR = [&](const MachineOperand &Op) {
4072 if (!Op.isReg())
4073 return false;
4074 auto Reg = Op.getReg();
4075 if (Reg.isPhysical())
4076 return AArch64::FPR128RegClass.contains(Reg);
4077 const TargetRegisterClass *TRC = ::getRegClass(MI, Reg);
4078 return TRC == &AArch64::FPR128RegClass ||
4079 TRC == &AArch64::FPR128_loRegClass;
4080 };
4081 return llvm::any_of(MI.operands(), IsQFPR);
4082}
4083
4085 auto IsFPR = [&](const MachineOperand &Op) {
4086 if (!Op.isReg())
4087 return false;
4088 auto Reg = Op.getReg();
4089 if (Reg.isPhysical())
4090 return AArch64::FPR128RegClass.contains(Reg) ||
4091 AArch64::FPR64RegClass.contains(Reg) ||
4092 AArch64::FPR32RegClass.contains(Reg) ||
4093 AArch64::FPR16RegClass.contains(Reg) ||
4094 AArch64::FPR8RegClass.contains(Reg);
4095
4096 const TargetRegisterClass *TRC = ::getRegClass(MI, Reg);
4097 return TRC == &AArch64::FPR128RegClass ||
4098 TRC == &AArch64::FPR128_loRegClass ||
4099 TRC == &AArch64::FPR64RegClass ||
4100 TRC == &AArch64::FPR64_loRegClass ||
4101 TRC == &AArch64::FPR32RegClass || TRC == &AArch64::FPR16RegClass ||
4102 TRC == &AArch64::FPR8RegClass;
4103 };
4104 return llvm::any_of(MI.operands(), IsFPR);
4105}
4106
4107// Scale the unscaled offsets. Returns false if the unscaled offset can't be
4108// scaled.
4109static bool scaleOffset(unsigned Opc, int64_t &Offset) {
4110 int Scale = AArch64InstrInfo::getMemScale(Opc);
4111
4112 // If the byte-offset isn't a multiple of the stride, we can't scale this
4113 // offset.
4114 if (Offset % Scale != 0)
4115 return false;
4116
4117 // Convert the byte-offset used by unscaled into an "element" offset used
4118 // by the scaled pair load/store instructions.
4119 Offset /= Scale;
4120 return true;
4121}
4122
4123static bool canPairLdStOpc(unsigned FirstOpc, unsigned SecondOpc) {
4124 if (FirstOpc == SecondOpc)
4125 return true;
4126 // We can also pair sign-ext and zero-ext instructions.
4127 switch (FirstOpc) {
4128 default:
4129 return false;
4130 case AArch64::LDRWui:
4131 case AArch64::LDURWi:
4132 return SecondOpc == AArch64::LDRSWui || SecondOpc == AArch64::LDURSWi;
4133 case AArch64::LDRSWui:
4134 case AArch64::LDURSWi:
4135 return SecondOpc == AArch64::LDRWui || SecondOpc == AArch64::LDURWi;
4136 }
4137 // These instructions can't be paired based on their opcodes.
4138 return false;
4139}
4140
4141static bool shouldClusterFI(const MachineFrameInfo &MFI, int FI1,
4142 int64_t Offset1, unsigned Opcode1, int FI2,
4143 int64_t Offset2, unsigned Opcode2) {
4144 // Accesses through fixed stack object frame indices may access a different
4145 // fixed stack slot. Check that the object offsets + offsets match.
4146 if (MFI.isFixedObjectIndex(FI1) && MFI.isFixedObjectIndex(FI2)) {
4147 int64_t ObjectOffset1 = MFI.getObjectOffset(FI1);
4148 int64_t ObjectOffset2 = MFI.getObjectOffset(FI2);
4149 assert(ObjectOffset1 <= ObjectOffset2 && "Object offsets are not ordered.");
4150 // Convert to scaled object offsets.
4151 int Scale1 = AArch64InstrInfo::getMemScale(Opcode1);
4152 if (ObjectOffset1 % Scale1 != 0)
4153 return false;
4154 ObjectOffset1 /= Scale1;
4155 int Scale2 = AArch64InstrInfo::getMemScale(Opcode2);
4156 if (ObjectOffset2 % Scale2 != 0)
4157 return false;
4158 ObjectOffset2 /= Scale2;
4159 ObjectOffset1 += Offset1;
4160 ObjectOffset2 += Offset2;
4161 return ObjectOffset1 + 1 == ObjectOffset2;
4162 }
4163
4164 return FI1 == FI2;
4165}
4166
4167/// Detect opportunities for ldp/stp formation.
4168///
4169/// Only called for LdSt for which getMemOperandWithOffset returns true.
4172 ArrayRef<const MachineOperand *> BaseOps2, unsigned NumLoads,
4173 unsigned NumBytes) const {
4174 assert(BaseOps1.size() == 1 && BaseOps2.size() == 1);
4175 const MachineOperand &BaseOp1 = *BaseOps1.front();
4176 const MachineOperand &BaseOp2 = *BaseOps2.front();
4177 const MachineInstr &FirstLdSt = *BaseOp1.getParent();
4178 const MachineInstr &SecondLdSt = *BaseOp2.getParent();
4179 if (BaseOp1.getType() != BaseOp2.getType())
4180 return false;
4181
4182 assert((BaseOp1.isReg() || BaseOp1.isFI()) &&
4183 "Only base registers and frame indices are supported.");
4184
4185 // Check for both base regs and base FI.
4186 if (BaseOp1.isReg() && BaseOp1.getReg() != BaseOp2.getReg())
4187 return false;
4188
4189 // Only cluster up to a single pair.
4190 if (NumLoads > 2)
4191 return false;
4192
4193 if (!isPairableLdStInst(FirstLdSt) || !isPairableLdStInst(SecondLdSt))
4194 return false;
4195
4196 // Can we pair these instructions based on their opcodes?
4197 unsigned FirstOpc = FirstLdSt.getOpcode();
4198 unsigned SecondOpc = SecondLdSt.getOpcode();
4199 if (!canPairLdStOpc(FirstOpc, SecondOpc))
4200 return false;
4201
4202 // Can't merge volatiles or load/stores that have a hint to avoid pair
4203 // formation, for example.
4204 if (!isCandidateToMergeOrPair(FirstLdSt) ||
4205 !isCandidateToMergeOrPair(SecondLdSt))
4206 return false;
4207
4208 // isCandidateToMergeOrPair guarantees that operand 2 is an immediate.
4209 int64_t Offset1 = FirstLdSt.getOperand(2).getImm();
4210 if (hasUnscaledLdStOffset(FirstOpc) && !scaleOffset(FirstOpc, Offset1))
4211 return false;
4212
4213 int64_t Offset2 = SecondLdSt.getOperand(2).getImm();
4214 if (hasUnscaledLdStOffset(SecondOpc) && !scaleOffset(SecondOpc, Offset2))
4215 return false;
4216
4217 // Pairwise instructions have a 7-bit signed offset field.
4218 if (Offset1 > 63 || Offset1 < -64)
4219 return false;
4220
4221 // The caller should already have ordered First/SecondLdSt by offset.
4222 // Note: except for non-equal frame index bases
4223 if (BaseOp1.isFI()) {
4224 assert((!BaseOp1.isIdenticalTo(BaseOp2) || Offset1 <= Offset2) &&
4225 "Caller should have ordered offsets.");
4226
4227 const MachineFrameInfo &MFI =
4228 FirstLdSt.getParent()->getParent()->getFrameInfo();
4229 return shouldClusterFI(MFI, BaseOp1.getIndex(), Offset1, FirstOpc,
4230 BaseOp2.getIndex(), Offset2, SecondOpc);
4231 }
4232
4233 assert(Offset1 <= Offset2 && "Caller should have ordered offsets.");
4234
4235 return Offset1 + 1 == Offset2;
4236}
4237
4239 unsigned Reg, unsigned SubIdx,
4240 unsigned State,
4241 const TargetRegisterInfo *TRI) {
4242 if (!SubIdx)
4243 return MIB.addReg(Reg, State);
4244
4246 return MIB.addReg(TRI->getSubReg(Reg, SubIdx), State);
4247 return MIB.addReg(Reg, State, SubIdx);
4248}
4249
4250static bool forwardCopyWillClobberTuple(unsigned DestReg, unsigned SrcReg,
4251 unsigned NumRegs) {
4252 // We really want the positive remainder mod 32 here, that happens to be
4253 // easily obtainable with a mask.
4254 return ((DestReg - SrcReg) & 0x1f) < NumRegs;
4255}
4256
4259 const DebugLoc &DL, MCRegister DestReg,
4260 MCRegister SrcReg, bool KillSrc,
4261 unsigned Opcode,
4262 ArrayRef<unsigned> Indices) const {
4263 assert(Subtarget.hasNEON() && "Unexpected register copy without NEON");
4265 uint16_t DestEncoding = TRI->getEncodingValue(DestReg);
4266 uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg);
4267 unsigned NumRegs = Indices.size();
4268
4269 int SubReg = 0, End = NumRegs, Incr = 1;
4270 if (forwardCopyWillClobberTuple(DestEncoding, SrcEncoding, NumRegs)) {
4271 SubReg = NumRegs - 1;
4272 End = -1;
4273 Incr = -1;
4274 }
4275
4276 for (; SubReg != End; SubReg += Incr) {
4277 const MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opcode));
4278 AddSubReg(MIB, DestReg, Indices[SubReg], RegState::Define, TRI);
4279 AddSubReg(MIB, SrcReg, Indices[SubReg], 0, TRI);
4280 AddSubReg(MIB, SrcReg, Indices[SubReg], getKillRegState(KillSrc), TRI);
4281 }
4282}
4283
4286 DebugLoc DL, unsigned DestReg,
4287 unsigned SrcReg, bool KillSrc,
4288 unsigned Opcode, unsigned ZeroReg,
4289 llvm::ArrayRef<unsigned> Indices) const {
4291 unsigned NumRegs = Indices.size();
4292
4293#ifndef NDEBUG
4294 uint16_t DestEncoding = TRI->getEncodingValue(DestReg);
4295 uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg);
4296 assert(DestEncoding % NumRegs == 0 && SrcEncoding % NumRegs == 0 &&
4297 "GPR reg sequences should not be able to overlap");
4298#endif
4299
4300 for (unsigned SubReg = 0; SubReg != NumRegs; ++SubReg) {
4301 const MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opcode));
4302 AddSubReg(MIB, DestReg, Indices[SubReg], RegState::Define, TRI);
4303 MIB.addReg(ZeroReg);
4304 AddSubReg(MIB, SrcReg, Indices[SubReg], getKillRegState(KillSrc), TRI);
4305 MIB.addImm(0);
4306 }
4307}
4308
4311 const DebugLoc &DL, MCRegister DestReg,
4312 MCRegister SrcReg, bool KillSrc) const {
4313 if (AArch64::GPR32spRegClass.contains(DestReg) &&
4314 (AArch64::GPR32spRegClass.contains(SrcReg) || SrcReg == AArch64::WZR)) {
4316
4317 if (DestReg == AArch64::WSP || SrcReg == AArch64::WSP) {
4318 // If either operand is WSP, expand to ADD #0.
4319 if (Subtarget.hasZeroCycleRegMove()) {
4320 // Cyclone recognizes "ADD Xd, Xn, #0" as a zero-cycle register move.
4321 MCRegister DestRegX = TRI->getMatchingSuperReg(
4322 DestReg, AArch64::sub_32, &AArch64::GPR64spRegClass);
4323 MCRegister SrcRegX = TRI->getMatchingSuperReg(
4324 SrcReg, AArch64::sub_32, &AArch64::GPR64spRegClass);
4325 // This instruction is reading and writing X registers. This may upset
4326 // the register scavenger and machine verifier, so we need to indicate
4327 // that we are reading an undefined value from SrcRegX, but a proper
4328 // value from SrcReg.
4329 BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestRegX)
4330 .addReg(SrcRegX, RegState::Undef)
4331 .addImm(0)
4333 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
4334 } else {
4335 BuildMI(MBB, I, DL, get(AArch64::ADDWri), DestReg)
4336 .addReg(SrcReg, getKillRegState(KillSrc))
4337 .addImm(0)
4339 }
4340 } else if (SrcReg == AArch64::WZR && Subtarget.hasZeroCycleZeroingGP()) {
4341 BuildMI(MBB, I, DL, get(AArch64::MOVZWi), DestReg)
4342 .addImm(0)
4344 } else {
4345 if (Subtarget.hasZeroCycleRegMove()) {
4346 // Cyclone recognizes "ORR Xd, XZR, Xm" as a zero-cycle register move.
4347 MCRegister DestRegX = TRI->getMatchingSuperReg(
4348 DestReg, AArch64::sub_32, &AArch64::GPR64spRegClass);
4349 MCRegister SrcRegX = TRI->getMatchingSuperReg(
4350 SrcReg, AArch64::sub_32, &AArch64::GPR64spRegClass);
4351 // This instruction is reading and writing X registers. This may upset
4352 // the register scavenger and machine verifier, so we need to indicate
4353 // that we are reading an undefined value from SrcRegX, but a proper
4354 // value from SrcReg.
4355 BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestRegX)
4356 .addReg(AArch64::XZR)
4357 .addReg(SrcRegX, RegState::Undef)
4358 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
4359 } else {
4360 // Otherwise, expand to ORR WZR.
4361 BuildMI(MBB, I, DL, get(AArch64::ORRWrr), DestReg)
4362 .addReg(AArch64::WZR)
4363 .addReg(SrcReg, getKillRegState(KillSrc));
4364 }
4365 }
4366 return;
4367 }
4368
4369 // Copy a Predicate register by ORRing with itself.
4370 if (AArch64::PPRRegClass.contains(DestReg) &&
4371 AArch64::PPRRegClass.contains(SrcReg)) {
4372 assert(Subtarget.hasSVEorSME() && "Unexpected SVE register.");
4373 BuildMI(MBB, I, DL, get(AArch64::ORR_PPzPP), DestReg)
4374 .addReg(SrcReg) // Pg
4375 .addReg(SrcReg)
4376 .addReg(SrcReg, getKillRegState(KillSrc));
4377 return;
4378 }
4379
4380 if (AArch64::PNRRegClass.contains(DestReg) &&
4381 AArch64::PPRRegClass.contains(SrcReg)) {
4382 assert((Subtarget.hasSVE2p1() || Subtarget.hasSME2()) &&
4383 "Unexpected predicate-as-counter register.");
4384 // Copy from pX to pnX is a no-op
4385 if ((DestReg.id() - AArch64::PN0) == (SrcReg.id() - AArch64::P0))
4386 return;
4387 MCRegister PPRDestReg = (DestReg - AArch64::PN0) + AArch64::P0;
4388 BuildMI(MBB, I, DL, get(AArch64::ORR_PPzPP), PPRDestReg)
4389 .addReg(SrcReg)
4390 .addReg(SrcReg)
4391 .addReg(SrcReg, getKillRegState(KillSrc))
4392 .addDef(DestReg, RegState::Implicit);
4393 return;
4394 }
4395
4396 if (AArch64::PPRRegClass.contains(DestReg) &&
4397 AArch64::PNRRegClass.contains(SrcReg)) {
4398 assert((Subtarget.hasSVE2p1() || Subtarget.hasSME2()) &&
4399 "Unexpected predicate-as-counter register.");
4400 // Copy from pnX to pX is a no-op
4401 if ((DestReg.id() - AArch64::P0) == (SrcReg.id() - AArch64::PN0))
4402 return;
4403 MCRegister PNRDestReg = (DestReg - AArch64::P0) + AArch64::PN0;
4404 MCRegister PPRSrcReg = (SrcReg - AArch64::PN0) + AArch64::P0;
4405 BuildMI(MBB, I, DL, get(AArch64::ORR_PPzPP), DestReg)
4406 .addReg(PPRSrcReg)
4407 .addReg(PPRSrcReg)
4408 .addReg(PPRSrcReg, getKillRegState(KillSrc))
4409 .addDef(PNRDestReg, RegState::Implicit);
4410 return;
4411 }
4412
4413 // Copy a Z register by ORRing with itself.
4414 if (AArch64::ZPRRegClass.contains(DestReg) &&
4415 AArch64::ZPRRegClass.contains(SrcReg)) {
4416 assert(Subtarget.hasSVEorSME() && "Unexpected SVE register.");
4417 BuildMI(MBB, I, DL, get(AArch64::ORR_ZZZ), DestReg)
4418 .addReg(SrcReg)
4419 .addReg(SrcReg, getKillRegState(KillSrc));
4420 return;
4421 }
4422
4423 // Copy a Z register pair by copying the individual sub-registers.
4424 if ((AArch64::ZPR2RegClass.contains(DestReg) ||
4425 AArch64::ZPR2StridedOrContiguousRegClass.contains(DestReg)) &&
4426 (AArch64::ZPR2RegClass.contains(SrcReg) ||
4427 AArch64::ZPR2StridedOrContiguousRegClass.contains(SrcReg))) {
4428 assert(Subtarget.hasSVEorSME() && "Unexpected SVE register.");
4429 static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1};
4430 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORR_ZZZ,
4431 Indices);
4432 return;
4433 }
4434
4435 // Copy a Z register triple by copying the individual sub-registers.
4436 if (AArch64::ZPR3RegClass.contains(DestReg) &&
4437 AArch64::ZPR3RegClass.contains(SrcReg)) {
4438 assert(Subtarget.hasSVEorSME() && "Unexpected SVE register.");
4439 static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1,
4440 AArch64::zsub2};
4441 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORR_ZZZ,
4442 Indices);
4443 return;
4444 }
4445
4446 // Copy a Z register quad by copying the individual sub-registers.
4447 if ((AArch64::ZPR4RegClass.contains(DestReg) ||
4448 AArch64::ZPR4StridedOrContiguousRegClass.contains(DestReg)) &&
4449 (AArch64::ZPR4RegClass.contains(SrcReg) ||
4450 AArch64::ZPR4StridedOrContiguousRegClass.contains(SrcReg))) {
4451 assert(Subtarget.hasSVEorSME() && "Unexpected SVE register.");
4452 static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1,
4453 AArch64::zsub2, AArch64::zsub3};
4454 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORR_ZZZ,
4455 Indices);
4456 return;
4457 }
4458
4459 if (AArch64::GPR64spRegClass.contains(DestReg) &&
4460 (AArch64::GPR64spRegClass.contains(SrcReg) || SrcReg == AArch64::XZR)) {
4461 if (DestReg == AArch64::SP || SrcReg == AArch64::SP) {
4462 // If either operand is SP, expand to ADD #0.
4463 BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestReg)
4464 .addReg(SrcReg, getKillRegState(KillSrc))
4465 .addImm(0)
4467 } else if (SrcReg == AArch64::XZR && Subtarget.hasZeroCycleZeroingGP()) {
4468 BuildMI(MBB, I, DL, get(AArch64::MOVZXi), DestReg)
4469 .addImm(0)
4471 } else {
4472 // Otherwise, expand to ORR XZR.
4473 BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestReg)
4474 .addReg(AArch64::XZR)
4475 .addReg(SrcReg, getKillRegState(KillSrc));
4476 }
4477 return;
4478 }
4479
4480 // Copy a DDDD register quad by copying the individual sub-registers.
4481 if (AArch64::DDDDRegClass.contains(DestReg) &&
4482 AArch64::DDDDRegClass.contains(SrcReg)) {
4483 static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1,
4484 AArch64::dsub2, AArch64::dsub3};
4485 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
4486 Indices);
4487 return;
4488 }
4489
4490 // Copy a DDD register triple by copying the individual sub-registers.
4491 if (AArch64::DDDRegClass.contains(DestReg) &&
4492 AArch64::DDDRegClass.contains(SrcReg)) {
4493 static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1,
4494 AArch64::dsub2};
4495 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
4496 Indices);
4497 return;
4498 }
4499
4500 // Copy a DD register pair by copying the individual sub-registers.
4501 if (AArch64::DDRegClass.contains(DestReg) &&
4502 AArch64::DDRegClass.contains(SrcReg)) {
4503 static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1};
4504 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
4505 Indices);
4506 return;
4507 }
4508
4509 // Copy a QQQQ register quad by copying the individual sub-registers.
4510 if (AArch64::QQQQRegClass.contains(DestReg) &&
4511 AArch64::QQQQRegClass.contains(SrcReg)) {
4512 static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1,
4513 AArch64::qsub2, AArch64::qsub3};
4514 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
4515 Indices);
4516 return;
4517 }
4518
4519 // Copy a QQQ register triple by copying the individual sub-registers.
4520 if (AArch64::QQQRegClass.contains(DestReg) &&
4521 AArch64::QQQRegClass.contains(SrcReg)) {
4522 static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1,
4523 AArch64::qsub2};
4524 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
4525 Indices);
4526 return;
4527 }
4528
4529 // Copy a QQ register pair by copying the individual sub-registers.
4530 if (AArch64::QQRegClass.contains(DestReg) &&
4531 AArch64::QQRegClass.contains(SrcReg)) {
4532 static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1};
4533 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
4534 Indices);
4535 return;
4536 }
4537
4538 if (AArch64::XSeqPairsClassRegClass.contains(DestReg) &&
4539 AArch64::XSeqPairsClassRegClass.contains(SrcReg)) {
4540 static const unsigned Indices[] = {AArch64::sube64, AArch64::subo64};
4541 copyGPRRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRXrs,
4542 AArch64::XZR, Indices);
4543 return;
4544 }
4545
4546 if (AArch64::WSeqPairsClassRegClass.contains(DestReg) &&
4547 AArch64::WSeqPairsClassRegClass.contains(SrcReg)) {
4548 static const unsigned Indices[] = {AArch64::sube32, AArch64::subo32};
4549 copyGPRRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRWrs,
4550 AArch64::WZR, Indices);
4551 return;
4552 }
4553
4554 if (AArch64::FPR128RegClass.contains(DestReg) &&
4555 AArch64::FPR128RegClass.contains(SrcReg)) {
4556 if (Subtarget.hasSVEorSME() && !Subtarget.isNeonAvailable())
4557 BuildMI(MBB, I, DL, get(AArch64::ORR_ZZZ))
4558 .addReg(AArch64::Z0 + (DestReg - AArch64::Q0), RegState::Define)
4559 .addReg(AArch64::Z0 + (SrcReg - AArch64::Q0))
4560 .addReg(AArch64::Z0 + (SrcReg - AArch64::Q0));
4561 else if (Subtarget.hasNEON())
4562 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
4563 .addReg(SrcReg)
4564 .addReg(SrcReg, getKillRegState(KillSrc));
4565 else {
4566 BuildMI(MBB, I, DL, get(AArch64::STRQpre))
4567 .addReg(AArch64::SP, RegState::Define)
4568 .addReg(SrcReg, getKillRegState(KillSrc))
4569 .addReg(AArch64::SP)
4570 .addImm(-16);
4571 BuildMI(MBB, I, DL, get(AArch64::LDRQpre))
4572 .addReg(AArch64::SP, RegState::Define)
4573 .addReg(DestReg, RegState::Define)
4574 .addReg(AArch64::SP)
4575 .addImm(16);
4576 }
4577 return;
4578 }
4579
4580 if (AArch64::FPR64RegClass.contains(DestReg) &&
4581 AArch64::FPR64RegClass.contains(SrcReg)) {
4582 BuildMI(MBB, I, DL, get(AArch64::FMOVDr), DestReg)
4583 .addReg(SrcReg, getKillRegState(KillSrc));
4584 return;
4585 }
4586
4587 if (AArch64::FPR32RegClass.contains(DestReg) &&
4588 AArch64::FPR32RegClass.contains(SrcReg)) {
4589 BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
4590 .addReg(SrcReg, getKillRegState(KillSrc));
4591 return;
4592 }
4593
4594 if (AArch64::FPR16RegClass.contains(DestReg) &&
4595 AArch64::FPR16RegClass.contains(SrcReg)) {
4596 DestReg =
4597 RI.getMatchingSuperReg(DestReg, AArch64::hsub, &AArch64::FPR32RegClass);
4598 SrcReg =
4599 RI.getMatchingSuperReg(SrcReg, AArch64::hsub, &AArch64::FPR32RegClass);
4600 BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
4601 .addReg(SrcReg, getKillRegState(KillSrc));
4602 return;
4603 }
4604
4605 if (AArch64::FPR8RegClass.contains(DestReg) &&
4606 AArch64::FPR8RegClass.contains(SrcReg)) {
4607 DestReg =
4608 RI.getMatchingSuperReg(DestReg, AArch64::bsub, &AArch64::FPR32RegClass);
4609 SrcReg =
4610 RI.getMatchingSuperReg(SrcReg, AArch64::bsub, &AArch64::FPR32RegClass);
4611 BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
4612 .addReg(SrcReg, getKillRegState(KillSrc));
4613 return;
4614 }
4615
4616 // Copies between GPR64 and FPR64.
4617 if (AArch64::FPR64RegClass.contains(DestReg) &&
4618 AArch64::GPR64RegClass.contains(SrcReg)) {
4619 BuildMI(MBB, I, DL, get(AArch64::FMOVXDr), DestReg)
4620 .addReg(SrcReg, getKillRegState(KillSrc));
4621 return;
4622 }
4623 if (AArch64::GPR64RegClass.contains(DestReg) &&
4624 AArch64::FPR64RegClass.contains(SrcReg)) {
4625 BuildMI(MBB, I, DL, get(AArch64::FMOVDXr), DestReg)
4626 .addReg(SrcReg, getKillRegState(KillSrc));
4627 return;
4628 }
4629 // Copies between GPR32 and FPR32.
4630 if (AArch64::FPR32RegClass.contains(DestReg) &&
4631 AArch64::GPR32RegClass.contains(SrcReg)) {
4632 BuildMI(MBB, I, DL, get(AArch64::FMOVWSr), DestReg)
4633 .addReg(SrcReg, getKillRegState(KillSrc));
4634 return;
4635 }
4636 if (AArch64::GPR32RegClass.contains(DestReg) &&
4637 AArch64::FPR32RegClass.contains(SrcReg)) {
4638 BuildMI(MBB, I, DL, get(AArch64::FMOVSWr), DestReg)
4639 .addReg(SrcReg, getKillRegState(KillSrc));
4640 return;
4641 }
4642
4643 if (DestReg == AArch64::NZCV) {
4644 assert(AArch64::GPR64RegClass.contains(SrcReg) && "Invalid NZCV copy");
4645 BuildMI(MBB, I, DL, get(AArch64::MSR))
4646 .addImm(AArch64SysReg::NZCV)
4647 .addReg(SrcReg, getKillRegState(KillSrc))
4648 .addReg(AArch64::NZCV, RegState::Implicit | RegState::Define);
4649 return;
4650 }
4651
4652 if (SrcReg == AArch64::NZCV) {
4653 assert(AArch64::GPR64RegClass.contains(DestReg) && "Invalid NZCV copy");
4654 BuildMI(MBB, I, DL, get(AArch64::MRS), DestReg)
4655 .addImm(AArch64SysReg::NZCV)
4656 .addReg(AArch64::NZCV, RegState::Implicit | getKillRegState(KillSrc));
4657 return;
4658 }
4659
4660#ifndef NDEBUG
4662 errs() << TRI.getRegAsmName(DestReg) << " = COPY "
4663 << TRI.getRegAsmName(SrcReg) << "\n";
4664#endif
4665 llvm_unreachable("unimplemented reg-to-reg copy");
4666}
4667
4670 MachineBasicBlock::iterator InsertBefore,
4671 const MCInstrDesc &MCID,
4672 Register SrcReg, bool IsKill,
4673 unsigned SubIdx0, unsigned SubIdx1, int FI,
4674 MachineMemOperand *MMO) {
4675 Register SrcReg0 = SrcReg;
4676 Register SrcReg1 = SrcReg;
4677 if (SrcReg.isPhysical()) {
4678 SrcReg0 = TRI.getSubReg(SrcReg, SubIdx0);
4679 SubIdx0 = 0;
4680 SrcReg1 = TRI.getSubReg(SrcReg, SubIdx1);
4681 SubIdx1 = 0;
4682 }
4683 BuildMI(MBB, InsertBefore, DebugLoc(), MCID)
4684 .addReg(SrcReg0, getKillRegState(IsKill), SubIdx0)
4685 .addReg(SrcReg1, getKillRegState(IsKill), SubIdx1)
4686 .addFrameIndex(FI)
4687 .addImm(0)
4688 .addMemOperand(MMO);
4689}
4690
4693 Register SrcReg, bool isKill, int FI,
4694 const TargetRegisterClass *RC,
4695 const TargetRegisterInfo *TRI,
4696 Register VReg) const {
4697 MachineFunction &MF = *MBB.getParent();
4698 MachineFrameInfo &MFI = MF.getFrameInfo();
4699
4701 MachineMemOperand *MMO =
4703 MFI.getObjectSize(FI), MFI.getObjectAlign(FI));
4704 unsigned Opc = 0;
4705 bool Offset = true;
4707 unsigned StackID = TargetStackID::Default;
4708 switch (TRI->getSpillSize(*RC)) {
4709 case 1:
4710 if (AArch64::FPR8RegClass.hasSubClassEq(RC))
4711 Opc = AArch64::STRBui;
4712 break;
4713 case 2:
4714 if (AArch64::FPR16RegClass.hasSubClassEq(RC))
4715 Opc = AArch64::STRHui;
4716 else if (AArch64::PPRRegClass.hasSubClassEq(RC)) {
4717 assert(Subtarget.hasSVEorSME() &&
4718 "Unexpected register store without SVE store instructions");
4719 Opc = AArch64::STR_PXI;
4721 } else if (AArch64::PNRRegClass.hasSubClassEq(RC)) {
4722 assert((Subtarget.hasSVE2p1() || Subtarget.hasSME2()) &&
4723 "Unexpected register store without SVE2p1 or SME2");
4724 SrcReg = (SrcReg - AArch64::PN0) + AArch64::P0;
4725 Opc = AArch64::STR_PXI;
4727 }
4728 break;
4729 case 4:
4730 if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
4731 Opc = AArch64::STRWui;
4732 if (SrcReg.isVirtual())
4733 MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR32RegClass);
4734 else
4735 assert(SrcReg != AArch64::WSP);
4736 } else if (AArch64::FPR32RegClass.hasSubClassEq(RC))
4737 Opc = AArch64::STRSui;
4738 break;
4739 case 8:
4740 if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) {
4741 Opc = AArch64::STRXui;
4742 if (SrcReg.isVirtual())
4743 MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass);
4744 else
4745 assert(SrcReg != AArch64::SP);
4746 } else if (AArch64::FPR64RegClass.hasSubClassEq(RC)) {
4747 Opc = AArch64::STRDui;
4748 } else if (AArch64::WSeqPairsClassRegClass.hasSubClassEq(RC)) {
4750 get(AArch64::STPWi), SrcReg, isKill,
4751 AArch64::sube32, AArch64::subo32, FI, MMO);
4752 return;
4753 }
4754 break;
4755 case 16:
4756 if (AArch64::FPR128RegClass.hasSubClassEq(RC))
4757 Opc = AArch64::STRQui;
4758 else if (AArch64::DDRegClass.hasSubClassEq(RC)) {
4759 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
4760 Opc = AArch64::ST1Twov1d;
4761 Offset = false;
4762 } else if (AArch64::XSeqPairsClassRegClass.hasSubClassEq(RC)) {
4764 get(AArch64::STPXi), SrcReg, isKill,
4765 AArch64::sube64, AArch64::subo64, FI, MMO);
4766 return;
4767 } else if (AArch64::ZPRRegClass.hasSubClassEq(RC)) {
4768 assert(Subtarget.hasSVEorSME() &&
4769 "Unexpected register store without SVE store instructions");
4770 Opc = AArch64::STR_ZXI;
4772 }
4773 break;
4774 case 24:
4775 if (AArch64::DDDRegClass.hasSubClassEq(RC)) {
4776 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
4777 Opc = AArch64::ST1Threev1d;
4778 Offset = false;
4779 }
4780 break;
4781 case 32:
4782 if (AArch64::DDDDRegClass.hasSubClassEq(RC)) {
4783 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
4784 Opc = AArch64::ST1Fourv1d;
4785 Offset = false;
4786 } else if (AArch64::QQRegClass.hasSubClassEq(RC)) {
4787 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
4788 Opc = AArch64::ST1Twov2d;
4789 Offset = false;
4790 } else if (AArch64::ZPR2RegClass.hasSubClassEq(RC) ||
4791 AArch64::ZPR2StridedOrContiguousRegClass.hasSubClassEq(RC)) {
4792 assert(Subtarget.hasSVEorSME() &&
4793 "Unexpected register store without SVE store instructions");
4794 Opc = AArch64::STR_ZZXI;
4796 }
4797 break;
4798 case 48:
4799 if (AArch64::QQQRegClass.hasSubClassEq(RC)) {
4800 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
4801 Opc = AArch64::ST1Threev2d;
4802 Offset = false;
4803 } else if (AArch64::ZPR3RegClass.hasSubClassEq(RC)) {
4804 assert(Subtarget.hasSVEorSME() &&
4805 "Unexpected register store without SVE store instructions");
4806 Opc = AArch64::STR_ZZZXI;
4808 }
4809 break;
4810 case 64:
4811 if (AArch64::QQQQRegClass.hasSubClassEq(RC)) {
4812 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
4813 Opc = AArch64::ST1Fourv2d;
4814 Offset = false;
4815 } else if (AArch64::ZPR4RegClass.hasSubClassEq(RC) ||
4816 AArch64::ZPR4StridedOrContiguousRegClass.hasSubClassEq(RC)) {
4817 assert(Subtarget.hasSVEorSME() &&
4818 "Unexpected register store without SVE store instructions");
4819 Opc = AArch64::STR_ZZZZXI;
4821 }
4822 break;
4823 }
4824 assert(Opc && "Unknown register class");
4825 MFI.setStackID(FI, StackID);
4826
4827 const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DebugLoc(), get(Opc))
4828 .addReg(SrcReg, getKillRegState(isKill))
4829 .addFrameIndex(FI);
4830
4831 if (Offset)
4832 MI.addImm(0);
4833 if (PNRReg.isValid())
4834 MI.addDef(PNRReg, RegState::Implicit);
4835 MI.addMemOperand(MMO);
4836}
4837
4840 MachineBasicBlock::iterator InsertBefore,
4841 const MCInstrDesc &MCID,
4842 Register DestReg, unsigned SubIdx0,
4843 unsigned SubIdx1, int FI,
4844 MachineMemOperand *MMO) {
4845 Register DestReg0 = DestReg;
4846 Register DestReg1 = DestReg;
4847 bool IsUndef = true;
4848 if (DestReg.isPhysical()) {
4849 DestReg0 = TRI.getSubReg(DestReg, SubIdx0);
4850 SubIdx0 = 0;
4851 DestReg1 = TRI.getSubReg(DestReg, SubIdx1);
4852 SubIdx1 = 0;
4853 IsUndef = false;
4854 }
4855 BuildMI(MBB, InsertBefore, DebugLoc(), MCID)
4856 .addReg(DestReg0, RegState::Define | getUndefRegState(IsUndef), SubIdx0)
4857 .addReg(DestReg1, RegState::Define | getUndefRegState(IsUndef), SubIdx1)
4858 .addFrameIndex(FI)
4859 .addImm(0)
4860 .addMemOperand(MMO);
4861}
4862
4865 Register DestReg, int FI,
4866 const TargetRegisterClass *RC,
4867 const TargetRegisterInfo *TRI,
4868 Register VReg) const {
4869 MachineFunction &MF = *MBB.getParent();
4870 MachineFrameInfo &MFI = MF.getFrameInfo();
4872 MachineMemOperand *MMO =
4874 MFI.getObjectSize(FI), MFI.getObjectAlign(FI));
4875
4876 unsigned Opc = 0;
4877 bool Offset = true;
4878 unsigned StackID = TargetStackID::Default;
4880 switch (TRI->getSpillSize(*RC)) {
4881 case 1:
4882 if (AArch64::FPR8RegClass.hasSubClassEq(RC))
4883 Opc = AArch64::LDRBui;
4884 break;
4885 case 2:
4886 if (AArch64::FPR16RegClass.hasSubClassEq(RC))
4887 Opc = AArch64::LDRHui;
4888 else if (AArch64::PPRRegClass.hasSubClassEq(RC)) {
4889 assert(Subtarget.hasSVEorSME() &&
4890 "Unexpected register load without SVE load instructions");
4891 Opc = AArch64::LDR_PXI;
4893 } else if (AArch64::PNRRegClass.hasSubClassEq(RC)) {
4894 assert((Subtarget.hasSVE2p1() || Subtarget.hasSME2()) &&
4895 "Unexpected register load without SVE2p1 or SME2");
4896 PNRReg = DestReg;
4897 DestReg = (DestReg - AArch64::PN0) + AArch64::P0;
4898 Opc = AArch64::LDR_PXI;
4900 }
4901 break;
4902 case 4:
4903 if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
4904 Opc = AArch64::LDRWui;
4905 if (DestReg.isVirtual())
4906 MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR32RegClass);
4907 else
4908 assert(DestReg != AArch64::WSP);
4909 } else if (AArch64::FPR32RegClass.hasSubClassEq(RC))
4910 Opc = AArch64::LDRSui;
4911 break;
4912 case 8:
4913 if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) {
4914 Opc = AArch64::LDRXui;
4915 if (DestReg.isVirtual())
4916 MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR64RegClass);
4917 else
4918 assert(DestReg != AArch64::SP);
4919 } else if (AArch64::FPR64RegClass.hasSubClassEq(RC)) {
4920 Opc = AArch64::LDRDui;
4921 } else if (AArch64::WSeqPairsClassRegClass.hasSubClassEq(RC)) {
4923 get(AArch64::LDPWi), DestReg, AArch64::sube32,
4924 AArch64::subo32, FI, MMO);
4925 return;
4926 }
4927 break;
4928 case 16:
4929 if (AArch64::FPR128RegClass.hasSubClassEq(RC))
4930 Opc = AArch64::LDRQui;
4931 else if (AArch64::DDRegClass.hasSubClassEq(RC)) {
4932 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
4933 Opc = AArch64::LD1Twov1d;
4934 Offset = false;
4935 } else if (AArch64::XSeqPairsClassRegClass.hasSubClassEq(RC)) {
4937 get(AArch64::LDPXi), DestReg, AArch64::sube64,
4938 AArch64::subo64, FI, MMO);
4939 return;
4940 } else if (AArch64::ZPRRegClass.hasSubClassEq(RC)) {
4941 assert(Subtarget.hasSVEorSME() &&
4942 "Unexpected register load without SVE load instructions");
4943 Opc = AArch64::LDR_ZXI;
4945 }
4946 break;
4947 case 24:
4948 if (AArch64::DDDRegClass.hasSubClassEq(RC)) {
4949 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
4950 Opc = AArch64::LD1Threev1d;
4951 Offset = false;
4952 }
4953 break;
4954 case 32:
4955 if (AArch64::DDDDRegClass.hasSubClassEq(RC)) {
4956 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
4957 Opc = AArch64::LD1Fourv1d;
4958 Offset = false;
4959 } else if (AArch64::QQRegClass.hasSubClassEq(RC)) {
4960 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
4961 Opc = AArch64::LD1Twov2d;
4962 Offset = false;
4963 } else if (AArch64::ZPR2RegClass.hasSubClassEq(RC) ||
4964 AArch64::ZPR2StridedOrContiguousRegClass.hasSubClassEq(RC)) {
4965 assert(Subtarget.hasSVEorSME() &&
4966 "Unexpected register load without SVE load instructions");
4967 Opc = AArch64::LDR_ZZXI;
4969 }
4970 break;
4971 case 48:
4972 if (AArch64::QQQRegClass.hasSubClassEq(RC)) {
4973 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
4974 Opc = AArch64::LD1Threev2d;
4975 Offset = false;
4976 } else if (AArch64::ZPR3RegClass.hasSubClassEq(RC)) {
4977 assert(Subtarget.hasSVEorSME() &&
4978 "Unexpected register load without SVE load instructions");
4979 Opc = AArch64::LDR_ZZZXI;
4981 }
4982 break;
4983 case 64:
4984 if (AArch64::QQQQRegClass.hasSubClassEq(RC)) {
4985 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
4986 Opc = AArch64::LD1Fourv2d;
4987 Offset = false;
4988 } else if (AArch64::ZPR4RegClass.hasSubClassEq(RC) ||
4989 AArch64::ZPR4StridedOrContiguousRegClass.hasSubClassEq(RC)) {
4990 assert(Subtarget.hasSVEorSME() &&
4991 "Unexpected register load without SVE load instructions");
4992 Opc = AArch64::LDR_ZZZZXI;
4994 }
4995 break;
4996 }
4997
4998 assert(Opc && "Unknown register class");
4999 MFI.setStackID(FI, StackID);
5000
5001 const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DebugLoc(), get(Opc))
5002 .addReg(DestReg, getDefRegState(true))
5003 .addFrameIndex(FI);
5004 if (Offset)
5005 MI.addImm(0);
5006 if (PNRReg.isValid())
5007 MI.addDef(PNRReg, RegState::Implicit);
5008 MI.addMemOperand(MMO);
5009}
5010
5012 const MachineInstr &UseMI,
5013 const TargetRegisterInfo *TRI) {
5014 return any_of(instructionsWithoutDebug(std::next(DefMI.getIterator()),
5015 UseMI.getIterator()),
5016 [TRI](const MachineInstr &I) {
5017 return I.modifiesRegister(AArch64::NZCV, TRI) ||
5018 I.readsRegister(AArch64::NZCV, TRI);
5019 });
5020}
5021
5023 const StackOffset &Offset, int64_t &ByteSized, int64_t &VGSized) {
5024 // The smallest scalable element supported by scaled SVE addressing
5025 // modes are predicates, which are 2 scalable bytes in size. So the scalable
5026 // byte offset must always be a multiple of 2.
5027 assert(Offset.getScalable() % 2 == 0 && "Invalid frame offset");
5028
5029 // VGSized offsets are divided by '2', because the VG register is the
5030 // the number of 64bit granules as opposed to 128bit vector chunks,
5031 // which is how the 'n' in e.g. MVT::nxv1i8 is modelled.
5032 // So, for a stack offset of 16 MVT::nxv1i8's, the size is n x 16 bytes.
5033 // VG = n * 2 and the dwarf offset must be VG * 8 bytes.
5034 ByteSized = Offset.getFixed();
5035 VGSized = Offset.getScalable() / 2;
5036}
5037
5038/// Returns the offset in parts to which this frame offset can be
5039/// decomposed for the purpose of describing a frame offset.
5040/// For non-scalable offsets this is simply its byte size.
5042 const StackOffset &Offset, int64_t &NumBytes, int64_t &NumPredicateVectors,
5043 int64_t &NumDataVectors) {
5044 // The smallest scalable element supported by scaled SVE addressing
5045 // modes are predicates, which are 2 scalable bytes in size. So the scalable
5046 // byte offset must always be a multiple of 2.
5047 assert(Offset.getScalable() % 2 == 0 && "Invalid frame offset");
5048
5049 NumBytes = Offset.getFixed();
5050 NumDataVectors = 0;
5051 NumPredicateVectors = Offset.getScalable() / 2;
5052 // This method is used to get the offsets to adjust the frame offset.
5053 // If the function requires ADDPL to be used and needs more than two ADDPL
5054 // instructions, part of the offset is folded into NumDataVectors so that it
5055 // uses ADDVL for part of it, reducing the number of ADDPL instructions.
5056 if (NumPredicateVectors % 8 == 0 || NumPredicateVectors < -64 ||
5057 NumPredicateVectors > 62) {
5058 NumDataVectors = NumPredicateVectors / 8;
5059 NumPredicateVectors -= NumDataVectors * 8;
5060 }
5061}
5062
5063// Convenience function to create a DWARF expression for
5064// Expr + NumBytes + NumVGScaledBytes * AArch64::VG
5065static void appendVGScaledOffsetExpr(SmallVectorImpl<char> &Expr, int NumBytes,
5066 int NumVGScaledBytes, unsigned VG,
5067 llvm::raw_string_ostream &Comment) {
5068 uint8_t buffer[16];
5069
5070 if (NumBytes) {
5071 Expr.push_back(dwarf::DW_OP_consts);
5072 Expr.append(buffer, buffer + encodeSLEB128(NumBytes, buffer));
5073 Expr.push_back((uint8_t)dwarf::DW_OP_plus);
5074 Comment << (NumBytes < 0 ? " - " : " + ") << std::abs(NumBytes);
5075 }
5076
5077 if (NumVGScaledBytes) {
5078 Expr.push_back((uint8_t)dwarf::DW_OP_consts);
5079 Expr.append(buffer, buffer + encodeSLEB128(NumVGScaledBytes, buffer));
5080
5081 Expr.push_back((uint8_t)dwarf::DW_OP_bregx);
5082 Expr.append(buffer, buffer + encodeULEB128(VG, buffer));
5083 Expr.push_back(0);
5084
5085 Expr.push_back((uint8_t)dwarf::DW_OP_mul);
5086 Expr.push_back((uint8_t)dwarf::DW_OP_plus);
5087
5088 Comment << (NumVGScaledBytes < 0 ? " - " : " + ")
5089 << std::abs(NumVGScaledBytes) << " * VG";
5090 }
5091}
5092
5093// Creates an MCCFIInstruction:
5094// { DW_CFA_def_cfa_expression, ULEB128 (sizeof expr), expr }
5096 unsigned Reg,
5097 const StackOffset &Offset) {
5098 int64_t NumBytes, NumVGScaledBytes;
5100 NumVGScaledBytes);
5101 std::string CommentBuffer;
5102 llvm::raw_string_ostream Comment(CommentBuffer);
5103
5104 if (Reg == AArch64::SP)
5105 Comment << "sp";
5106 else if (Reg == AArch64::FP)
5107 Comment << "fp";