LLVM 20.0.0git
AArch64InstrInfo.cpp
Go to the documentation of this file.
1//===- AArch64InstrInfo.cpp - AArch64 Instruction Information -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains the AArch64 implementation of the TargetInstrInfo class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "AArch64InstrInfo.h"
14#include "AArch64ExpandImm.h"
16#include "AArch64PointerAuth.h"
17#include "AArch64Subtarget.h"
21#include "llvm/ADT/ArrayRef.h"
22#include "llvm/ADT/STLExtras.h"
40#include "llvm/IR/DebugLoc.h"
41#include "llvm/IR/GlobalValue.h"
42#include "llvm/IR/Module.h"
43#include "llvm/MC/MCAsmInfo.h"
44#include "llvm/MC/MCInst.h"
46#include "llvm/MC/MCInstrDesc.h"
51#include "llvm/Support/LEB128.h"
55#include <cassert>
56#include <cstdint>
57#include <iterator>
58#include <utility>
59
60using namespace llvm;
61
62#define GET_INSTRINFO_CTOR_DTOR
63#include "AArch64GenInstrInfo.inc"
64
66 "aarch64-tbz-offset-bits", cl::Hidden, cl::init(14),
67 cl::desc("Restrict range of TB[N]Z instructions (DEBUG)"));
68
70 "aarch64-cbz-offset-bits", cl::Hidden, cl::init(19),
71 cl::desc("Restrict range of CB[N]Z instructions (DEBUG)"));
72
74 BCCDisplacementBits("aarch64-bcc-offset-bits", cl::Hidden, cl::init(19),
75 cl::desc("Restrict range of Bcc instructions (DEBUG)"));
76
78 BDisplacementBits("aarch64-b-offset-bits", cl::Hidden, cl::init(26),
79 cl::desc("Restrict range of B instructions (DEBUG)"));
80
82 : AArch64GenInstrInfo(AArch64::ADJCALLSTACKDOWN, AArch64::ADJCALLSTACKUP,
83 AArch64::CATCHRET),
84 RI(STI.getTargetTriple()), Subtarget(STI) {}
85
86/// GetInstSize - Return the number of bytes of code the specified
87/// instruction may be. This returns the maximum number of bytes.
89 const MachineBasicBlock &MBB = *MI.getParent();
90 const MachineFunction *MF = MBB.getParent();
91 const Function &F = MF->getFunction();
92 const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo();
93
94 {
95 auto Op = MI.getOpcode();
96 if (Op == AArch64::INLINEASM || Op == AArch64::INLINEASM_BR)
97 return getInlineAsmLength(MI.getOperand(0).getSymbolName(), *MAI);
98 }
99
100 // Meta-instructions emit no code.
101 if (MI.isMetaInstruction())
102 return 0;
103
104 // FIXME: We currently only handle pseudoinstructions that don't get expanded
105 // before the assembly printer.
106 unsigned NumBytes = 0;
107 const MCInstrDesc &Desc = MI.getDesc();
108
109 if (!MI.isBundle() && isTailCallReturnInst(MI)) {
110 NumBytes = Desc.getSize() ? Desc.getSize() : 4;
111
112 const auto *MFI = MF->getInfo<AArch64FunctionInfo>();
113 if (!MFI->shouldSignReturnAddress(MF))
114 return NumBytes;
115
116 const auto &STI = MF->getSubtarget<AArch64Subtarget>();
117 auto Method = STI.getAuthenticatedLRCheckMethod(*MF);
118 NumBytes += AArch64PAuth::getCheckerSizeInBytes(Method);
119 return NumBytes;
120 }
121
122 // Size should be preferably set in
123 // llvm/lib/Target/AArch64/AArch64InstrInfo.td (default case).
124 // Specific cases handle instructions of variable sizes
125 switch (Desc.getOpcode()) {
126 default:
127 if (Desc.getSize())
128 return Desc.getSize();
129
130 // Anything not explicitly designated otherwise (i.e. pseudo-instructions
131 // with fixed constant size but not specified in .td file) is a normal
132 // 4-byte insn.
133 NumBytes = 4;
134 break;
135 case TargetOpcode::STACKMAP:
136 // The upper bound for a stackmap intrinsic is the full length of its shadow
137 NumBytes = StackMapOpers(&MI).getNumPatchBytes();
138 assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
139 break;
140 case TargetOpcode::PATCHPOINT:
141 // The size of the patchpoint intrinsic is the number of bytes requested
142 NumBytes = PatchPointOpers(&MI).getNumPatchBytes();
143 assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
144 break;
145 case TargetOpcode::STATEPOINT:
146 NumBytes = StatepointOpers(&MI).getNumPatchBytes();
147 assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
148 // No patch bytes means a normal call inst is emitted
149 if (NumBytes == 0)
150 NumBytes = 4;
151 break;
152 case TargetOpcode::PATCHABLE_FUNCTION_ENTER:
153 // If `patchable-function-entry` is set, PATCHABLE_FUNCTION_ENTER
154 // instructions are expanded to the specified number of NOPs. Otherwise,
155 // they are expanded to 36-byte XRay sleds.
156 NumBytes =
157 F.getFnAttributeAsParsedInteger("patchable-function-entry", 9) * 4;
158 break;
159 case TargetOpcode::PATCHABLE_FUNCTION_EXIT:
160 case TargetOpcode::PATCHABLE_TYPED_EVENT_CALL:
161 // An XRay sled can be 4 bytes of alignment plus a 32-byte block.
162 NumBytes = 36;
163 break;
164 case TargetOpcode::PATCHABLE_EVENT_CALL:
165 // EVENT_CALL XRay sleds are exactly 6 instructions long (no alignment).
166 NumBytes = 24;
167 break;
168
169 case AArch64::SPACE:
170 NumBytes = MI.getOperand(1).getImm();
171 break;
172 case TargetOpcode::BUNDLE:
173 NumBytes = getInstBundleLength(MI);
174 break;
175 }
176
177 return NumBytes;
178}
179
180unsigned AArch64InstrInfo::getInstBundleLength(const MachineInstr &MI) const {
181 unsigned Size = 0;
183 MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end();
184 while (++I != E && I->isInsideBundle()) {
185 assert(!I->isBundle() && "No nested bundle!");
187 }
188 return Size;
189}
190
193 // Block ends with fall-through condbranch.
194 switch (LastInst->getOpcode()) {
195 default:
196 llvm_unreachable("Unknown branch instruction?");
197 case AArch64::Bcc:
198 Target = LastInst->getOperand(1).getMBB();
199 Cond.push_back(LastInst->getOperand(0));
200 break;
201 case AArch64::CBZW:
202 case AArch64::CBZX:
203 case AArch64::CBNZW:
204 case AArch64::CBNZX:
205 Target = LastInst->getOperand(1).getMBB();
206 Cond.push_back(MachineOperand::CreateImm(-1));
207 Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
208 Cond.push_back(LastInst->getOperand(0));
209 break;
210 case AArch64::TBZW:
211 case AArch64::TBZX:
212 case AArch64::TBNZW:
213 case AArch64::TBNZX:
214 Target = LastInst->getOperand(2).getMBB();
215 Cond.push_back(MachineOperand::CreateImm(-1));
216 Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
217 Cond.push_back(LastInst->getOperand(0));
218 Cond.push_back(LastInst->getOperand(1));
219 }
220}
221
222static unsigned getBranchDisplacementBits(unsigned Opc) {
223 switch (Opc) {
224 default:
225 llvm_unreachable("unexpected opcode!");
226 case AArch64::B:
227 return BDisplacementBits;
228 case AArch64::TBNZW:
229 case AArch64::TBZW:
230 case AArch64::TBNZX:
231 case AArch64::TBZX:
232 return TBZDisplacementBits;
233 case AArch64::CBNZW:
234 case AArch64::CBZW:
235 case AArch64::CBNZX:
236 case AArch64::CBZX:
237 return CBZDisplacementBits;
238 case AArch64::Bcc:
239 return BCCDisplacementBits;
240 }
241}
242
244 int64_t BrOffset) const {
245 unsigned Bits = getBranchDisplacementBits(BranchOp);
246 assert(Bits >= 3 && "max branch displacement must be enough to jump"
247 "over conditional branch expansion");
248 return isIntN(Bits, BrOffset / 4);
249}
250
253 switch (MI.getOpcode()) {
254 default:
255 llvm_unreachable("unexpected opcode!");
256 case AArch64::B:
257 return MI.getOperand(0).getMBB();
258 case AArch64::TBZW:
259 case AArch64::TBNZW:
260 case AArch64::TBZX:
261 case AArch64::TBNZX:
262 return MI.getOperand(2).getMBB();
263 case AArch64::CBZW:
264 case AArch64::CBNZW:
265 case AArch64::CBZX:
266 case AArch64::CBNZX:
267 case AArch64::Bcc:
268 return MI.getOperand(1).getMBB();
269 }
270}
271
273 MachineBasicBlock &NewDestBB,
274 MachineBasicBlock &RestoreBB,
275 const DebugLoc &DL,
276 int64_t BrOffset,
277 RegScavenger *RS) const {
278 assert(RS && "RegScavenger required for long branching");
279 assert(MBB.empty() &&
280 "new block should be inserted for expanding unconditional branch");
281 assert(MBB.pred_size() == 1);
282 assert(RestoreBB.empty() &&
283 "restore block should be inserted for restoring clobbered registers");
284
285 auto buildIndirectBranch = [&](Register Reg, MachineBasicBlock &DestBB) {
286 // Offsets outside of the signed 33-bit range are not supported for ADRP +
287 // ADD.
288 if (!isInt<33>(BrOffset))
290 "Branch offsets outside of the signed 33-bit range not supported");
291
292 BuildMI(MBB, MBB.end(), DL, get(AArch64::ADRP), Reg)
293 .addSym(DestBB.getSymbol(), AArch64II::MO_PAGE);
294 BuildMI(MBB, MBB.end(), DL, get(AArch64::ADDXri), Reg)
295 .addReg(Reg)
296 .addSym(DestBB.getSymbol(), AArch64II::MO_PAGEOFF | AArch64II::MO_NC)
297 .addImm(0);
298 BuildMI(MBB, MBB.end(), DL, get(AArch64::BR)).addReg(Reg);
299 };
300
302 // If X16 is unused, we can rely on the linker to insert a range extension
303 // thunk if NewDestBB is out of range of a single B instruction.
304 constexpr Register Reg = AArch64::X16;
305 if (!RS->isRegUsed(Reg)) {
306 insertUnconditionalBranch(MBB, &NewDestBB, DL);
307 RS->setRegUsed(Reg);
308 return;
309 }
310
311 // If there's a free register and it's worth inflating the code size,
312 // manually insert the indirect branch.
313 Register Scavenged = RS->FindUnusedReg(&AArch64::GPR64RegClass);
314 if (Scavenged != AArch64::NoRegister &&
316 buildIndirectBranch(Scavenged, NewDestBB);
317 RS->setRegUsed(Scavenged);
318 return;
319 }
320
321 // Note: Spilling X16 briefly moves the stack pointer, making it incompatible
322 // with red zones.
324 if (!AFI || AFI->hasRedZone().value_or(true))
326 "Unable to insert indirect branch inside function that has red zone");
327
328 // Otherwise, spill X16 and defer range extension to the linker.
329 BuildMI(MBB, MBB.end(), DL, get(AArch64::STRXpre))
330 .addReg(AArch64::SP, RegState::Define)
331 .addReg(Reg)
332 .addReg(AArch64::SP)
333 .addImm(-16);
334
335 BuildMI(MBB, MBB.end(), DL, get(AArch64::B)).addMBB(&RestoreBB);
336
337 BuildMI(RestoreBB, RestoreBB.end(), DL, get(AArch64::LDRXpost))
338 .addReg(AArch64::SP, RegState::Define)
340 .addReg(AArch64::SP)
341 .addImm(16);
342}
343
344// Branch analysis.
347 MachineBasicBlock *&FBB,
349 bool AllowModify) const {
350 // If the block has no terminators, it just falls into the block after it.
352 if (I == MBB.end())
353 return false;
354
355 // Skip over SpeculationBarrierEndBB terminators
356 if (I->getOpcode() == AArch64::SpeculationBarrierISBDSBEndBB ||
357 I->getOpcode() == AArch64::SpeculationBarrierSBEndBB) {
358 --I;
359 }
360
361 if (!isUnpredicatedTerminator(*I))
362 return false;
363
364 // Get the last instruction in the block.
365 MachineInstr *LastInst = &*I;
366
367 // If there is only one terminator instruction, process it.
368 unsigned LastOpc = LastInst->getOpcode();
369 if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
370 if (isUncondBranchOpcode(LastOpc)) {
371 TBB = LastInst->getOperand(0).getMBB();
372 return false;
373 }
374 if (isCondBranchOpcode(LastOpc)) {
375 // Block ends with fall-through condbranch.
376 parseCondBranch(LastInst, TBB, Cond);
377 return false;
378 }
379 return true; // Can't handle indirect branch.
380 }
381
382 // Get the instruction before it if it is a terminator.
383 MachineInstr *SecondLastInst = &*I;
384 unsigned SecondLastOpc = SecondLastInst->getOpcode();
385
386 // If AllowModify is true and the block ends with two or more unconditional
387 // branches, delete all but the first unconditional branch.
388 if (AllowModify && isUncondBranchOpcode(LastOpc)) {
389 while (isUncondBranchOpcode(SecondLastOpc)) {
390 LastInst->eraseFromParent();
391 LastInst = SecondLastInst;
392 LastOpc = LastInst->getOpcode();
393 if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
394 // Return now the only terminator is an unconditional branch.
395 TBB = LastInst->getOperand(0).getMBB();
396 return false;
397 }
398 SecondLastInst = &*I;
399 SecondLastOpc = SecondLastInst->getOpcode();
400 }
401 }
402
403 // If we're allowed to modify and the block ends in a unconditional branch
404 // which could simply fallthrough, remove the branch. (Note: This case only
405 // matters when we can't understand the whole sequence, otherwise it's also
406 // handled by BranchFolding.cpp.)
407 if (AllowModify && isUncondBranchOpcode(LastOpc) &&
409 LastInst->eraseFromParent();
410 LastInst = SecondLastInst;
411 LastOpc = LastInst->getOpcode();
412 if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
413 assert(!isUncondBranchOpcode(LastOpc) &&
414 "unreachable unconditional branches removed above");
415
416 if (isCondBranchOpcode(LastOpc)) {
417 // Block ends with fall-through condbranch.
418 parseCondBranch(LastInst, TBB, Cond);
419 return false;
420 }
421 return true; // Can't handle indirect branch.
422 }
423 SecondLastInst = &*I;
424 SecondLastOpc = SecondLastInst->getOpcode();
425 }
426
427 // If there are three terminators, we don't know what sort of block this is.
428 if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(*--I))
429 return true;
430
431 // If the block ends with a B and a Bcc, handle it.
432 if (isCondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
433 parseCondBranch(SecondLastInst, TBB, Cond);
434 FBB = LastInst->getOperand(0).getMBB();
435 return false;
436 }
437
438 // If the block ends with two unconditional branches, handle it. The second
439 // one is not executed, so remove it.
440 if (isUncondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
441 TBB = SecondLastInst->getOperand(0).getMBB();
442 I = LastInst;
443 if (AllowModify)
444 I->eraseFromParent();
445 return false;
446 }
447
448 // ...likewise if it ends with an indirect branch followed by an unconditional
449 // branch.
450 if (isIndirectBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
451 I = LastInst;
452 if (AllowModify)
453 I->eraseFromParent();
454 return true;
455 }
456
457 // Otherwise, can't handle this.
458 return true;
459}
460
462 MachineBranchPredicate &MBP,
463 bool AllowModify) const {
464 // For the moment, handle only a block which ends with a cb(n)zx followed by
465 // a fallthrough. Why this? Because it is a common form.
466 // TODO: Should we handle b.cc?
467
469 if (I == MBB.end())
470 return true;
471
472 // Skip over SpeculationBarrierEndBB terminators
473 if (I->getOpcode() == AArch64::SpeculationBarrierISBDSBEndBB ||
474 I->getOpcode() == AArch64::SpeculationBarrierSBEndBB) {
475 --I;
476 }
477
478 if (!isUnpredicatedTerminator(*I))
479 return true;
480
481 // Get the last instruction in the block.
482 MachineInstr *LastInst = &*I;
483 unsigned LastOpc = LastInst->getOpcode();
484 if (!isCondBranchOpcode(LastOpc))
485 return true;
486
487 switch (LastOpc) {
488 default:
489 return true;
490 case AArch64::CBZW:
491 case AArch64::CBZX:
492 case AArch64::CBNZW:
493 case AArch64::CBNZX:
494 break;
495 };
496
497 MBP.TrueDest = LastInst->getOperand(1).getMBB();
498 assert(MBP.TrueDest && "expected!");
499 MBP.FalseDest = MBB.getNextNode();
500
501 MBP.ConditionDef = nullptr;
502 MBP.SingleUseCondition = false;
503
504 MBP.LHS = LastInst->getOperand(0);
505 MBP.RHS = MachineOperand::CreateImm(0);
506 MBP.Predicate = LastOpc == AArch64::CBNZX ? MachineBranchPredicate::PRED_NE
507 : MachineBranchPredicate::PRED_EQ;
508 return false;
509}
510
513 if (Cond[0].getImm() != -1) {
514 // Regular Bcc
517 } else {
518 // Folded compare-and-branch
519 switch (Cond[1].getImm()) {
520 default:
521 llvm_unreachable("Unknown conditional branch!");
522 case AArch64::CBZW:
523 Cond[1].setImm(AArch64::CBNZW);
524 break;
525 case AArch64::CBNZW:
526 Cond[1].setImm(AArch64::CBZW);
527 break;
528 case AArch64::CBZX:
529 Cond[1].setImm(AArch64::CBNZX);
530 break;
531 case AArch64::CBNZX:
532 Cond[1].setImm(AArch64::CBZX);
533 break;
534 case AArch64::TBZW:
535 Cond[1].setImm(AArch64::TBNZW);
536 break;
537 case AArch64::TBNZW:
538 Cond[1].setImm(AArch64::TBZW);
539 break;
540 case AArch64::TBZX:
541 Cond[1].setImm(AArch64::TBNZX);
542 break;
543 case AArch64::TBNZX:
544 Cond[1].setImm(AArch64::TBZX);
545 break;
546 }
547 }
548
549 return false;
550}
551
553 int *BytesRemoved) const {
555 if (I == MBB.end())
556 return 0;
557
558 if (!isUncondBranchOpcode(I->getOpcode()) &&
559 !isCondBranchOpcode(I->getOpcode()))
560 return 0;
561
562 // Remove the branch.
563 I->eraseFromParent();
564
565 I = MBB.end();
566
567 if (I == MBB.begin()) {
568 if (BytesRemoved)
569 *BytesRemoved = 4;
570 return 1;
571 }
572 --I;
573 if (!isCondBranchOpcode(I->getOpcode())) {
574 if (BytesRemoved)
575 *BytesRemoved = 4;
576 return 1;
577 }
578
579 // Remove the branch.
580 I->eraseFromParent();
581 if (BytesRemoved)
582 *BytesRemoved = 8;
583
584 return 2;
585}
586
587void AArch64InstrInfo::instantiateCondBranch(
590 if (Cond[0].getImm() != -1) {
591 // Regular Bcc
592 BuildMI(&MBB, DL, get(AArch64::Bcc)).addImm(Cond[0].getImm()).addMBB(TBB);
593 } else {
594 // Folded compare-and-branch
595 // Note that we use addOperand instead of addReg to keep the flags.
596 const MachineInstrBuilder MIB =
597 BuildMI(&MBB, DL, get(Cond[1].getImm())).add(Cond[2]);
598 if (Cond.size() > 3)
599 MIB.addImm(Cond[3].getImm());
600 MIB.addMBB(TBB);
601 }
602}
603
606 ArrayRef<MachineOperand> Cond, const DebugLoc &DL, int *BytesAdded) const {
607 // Shouldn't be a fall through.
608 assert(TBB && "insertBranch must not be told to insert a fallthrough");
609
610 if (!FBB) {
611 if (Cond.empty()) // Unconditional branch?
612 BuildMI(&MBB, DL, get(AArch64::B)).addMBB(TBB);
613 else
614 instantiateCondBranch(MBB, DL, TBB, Cond);
615
616 if (BytesAdded)
617 *BytesAdded = 4;
618
619 return 1;
620 }
621
622 // Two-way conditional branch.
623 instantiateCondBranch(MBB, DL, TBB, Cond);
624 BuildMI(&MBB, DL, get(AArch64::B)).addMBB(FBB);
625
626 if (BytesAdded)
627 *BytesAdded = 8;
628
629 return 2;
630}
631
632// Find the original register that VReg is copied from.
633static unsigned removeCopies(const MachineRegisterInfo &MRI, unsigned VReg) {
634 while (Register::isVirtualRegister(VReg)) {
635 const MachineInstr *DefMI = MRI.getVRegDef(VReg);
636 if (!DefMI->isFullCopy())
637 return VReg;
638 VReg = DefMI->getOperand(1).getReg();
639 }
640 return VReg;
641}
642
643// Determine if VReg is defined by an instruction that can be folded into a
644// csel instruction. If so, return the folded opcode, and the replacement
645// register.
646static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg,
647 unsigned *NewVReg = nullptr) {
648 VReg = removeCopies(MRI, VReg);
650 return 0;
651
652 bool Is64Bit = AArch64::GPR64allRegClass.hasSubClassEq(MRI.getRegClass(VReg));
653 const MachineInstr *DefMI = MRI.getVRegDef(VReg);
654 unsigned Opc = 0;
655 unsigned SrcOpNum = 0;
656 switch (DefMI->getOpcode()) {
657 case AArch64::ADDSXri:
658 case AArch64::ADDSWri:
659 // if NZCV is used, do not fold.
660 if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr,
661 true) == -1)
662 return 0;
663 // fall-through to ADDXri and ADDWri.
664 [[fallthrough]];
665 case AArch64::ADDXri:
666 case AArch64::ADDWri:
667 // add x, 1 -> csinc.
668 if (!DefMI->getOperand(2).isImm() || DefMI->getOperand(2).getImm() != 1 ||
669 DefMI->getOperand(3).getImm() != 0)
670 return 0;
671 SrcOpNum = 1;
672 Opc = Is64Bit ? AArch64::CSINCXr : AArch64::CSINCWr;
673 break;
674
675 case AArch64::ORNXrr:
676 case AArch64::ORNWrr: {
677 // not x -> csinv, represented as orn dst, xzr, src.
678 unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg());
679 if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)
680 return 0;
681 SrcOpNum = 2;
682 Opc = Is64Bit ? AArch64::CSINVXr : AArch64::CSINVWr;
683 break;
684 }
685
686 case AArch64::SUBSXrr:
687 case AArch64::SUBSWrr:
688 // if NZCV is used, do not fold.
689 if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr,
690 true) == -1)
691 return 0;
692 // fall-through to SUBXrr and SUBWrr.
693 [[fallthrough]];
694 case AArch64::SUBXrr:
695 case AArch64::SUBWrr: {
696 // neg x -> csneg, represented as sub dst, xzr, src.
697 unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg());
698 if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)
699 return 0;
700 SrcOpNum = 2;
701 Opc = Is64Bit ? AArch64::CSNEGXr : AArch64::CSNEGWr;
702 break;
703 }
704 default:
705 return 0;
706 }
707 assert(Opc && SrcOpNum && "Missing parameters");
708
709 if (NewVReg)
710 *NewVReg = DefMI->getOperand(SrcOpNum).getReg();
711 return Opc;
712}
713
716 Register DstReg, Register TrueReg,
717 Register FalseReg, int &CondCycles,
718 int &TrueCycles,
719 int &FalseCycles) const {
720 // Check register classes.
722 const TargetRegisterClass *RC =
723 RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg));
724 if (!RC)
725 return false;
726
727 // Also need to check the dest regclass, in case we're trying to optimize
728 // something like:
729 // %1(gpr) = PHI %2(fpr), bb1, %(fpr), bb2
730 if (!RI.getCommonSubClass(RC, MRI.getRegClass(DstReg)))
731 return false;
732
733 // Expanding cbz/tbz requires an extra cycle of latency on the condition.
734 unsigned ExtraCondLat = Cond.size() != 1;
735
736 // GPRs are handled by csel.
737 // FIXME: Fold in x+1, -x, and ~x when applicable.
738 if (AArch64::GPR64allRegClass.hasSubClassEq(RC) ||
739 AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
740 // Single-cycle csel, csinc, csinv, and csneg.
741 CondCycles = 1 + ExtraCondLat;
742 TrueCycles = FalseCycles = 1;
743 if (canFoldIntoCSel(MRI, TrueReg))
744 TrueCycles = 0;
745 else if (canFoldIntoCSel(MRI, FalseReg))
746 FalseCycles = 0;
747 return true;
748 }
749
750 // Scalar floating point is handled by fcsel.
751 // FIXME: Form fabs, fmin, and fmax when applicable.
752 if (AArch64::FPR64RegClass.hasSubClassEq(RC) ||
753 AArch64::FPR32RegClass.hasSubClassEq(RC)) {
754 CondCycles = 5 + ExtraCondLat;
755 TrueCycles = FalseCycles = 2;
756 return true;
757 }
758
759 // Can't do vectors.
760 return false;
761}
762
765 const DebugLoc &DL, Register DstReg,
767 Register TrueReg, Register FalseReg) const {
769
770 // Parse the condition code, see parseCondBranch() above.
772 switch (Cond.size()) {
773 default:
774 llvm_unreachable("Unknown condition opcode in Cond");
775 case 1: // b.cc
776 CC = AArch64CC::CondCode(Cond[0].getImm());
777 break;
778 case 3: { // cbz/cbnz
779 // We must insert a compare against 0.
780 bool Is64Bit;
781 switch (Cond[1].getImm()) {
782 default:
783 llvm_unreachable("Unknown branch opcode in Cond");
784 case AArch64::CBZW:
785 Is64Bit = false;
787 break;
788 case AArch64::CBZX:
789 Is64Bit = true;
791 break;
792 case AArch64::CBNZW:
793 Is64Bit = false;
795 break;
796 case AArch64::CBNZX:
797 Is64Bit = true;
799 break;
800 }
801 Register SrcReg = Cond[2].getReg();
802 if (Is64Bit) {
803 // cmp reg, #0 is actually subs xzr, reg, #0.
804 MRI.constrainRegClass(SrcReg, &AArch64::GPR64spRegClass);
805 BuildMI(MBB, I, DL, get(AArch64::SUBSXri), AArch64::XZR)
806 .addReg(SrcReg)
807 .addImm(0)
808 .addImm(0);
809 } else {
810 MRI.constrainRegClass(SrcReg, &AArch64::GPR32spRegClass);
811 BuildMI(MBB, I, DL, get(AArch64::SUBSWri), AArch64::WZR)
812 .addReg(SrcReg)
813 .addImm(0)
814 .addImm(0);
815 }
816 break;
817 }
818 case 4: { // tbz/tbnz
819 // We must insert a tst instruction.
820 switch (Cond[1].getImm()) {
821 default:
822 llvm_unreachable("Unknown branch opcode in Cond");
823 case AArch64::TBZW:
824 case AArch64::TBZX:
826 break;
827 case AArch64::TBNZW:
828 case AArch64::TBNZX:
830 break;
831 }
832 // cmp reg, #foo is actually ands xzr, reg, #1<<foo.
833 if (Cond[1].getImm() == AArch64::TBZW || Cond[1].getImm() == AArch64::TBNZW)
834 BuildMI(MBB, I, DL, get(AArch64::ANDSWri), AArch64::WZR)
835 .addReg(Cond[2].getReg())
836 .addImm(
837 AArch64_AM::encodeLogicalImmediate(1ull << Cond[3].getImm(), 32));
838 else
839 BuildMI(MBB, I, DL, get(AArch64::ANDSXri), AArch64::XZR)
840 .addReg(Cond[2].getReg())
841 .addImm(
842 AArch64_AM::encodeLogicalImmediate(1ull << Cond[3].getImm(), 64));
843 break;
844 }
845 }
846
847 unsigned Opc = 0;
848 const TargetRegisterClass *RC = nullptr;
849 bool TryFold = false;
850 if (MRI.constrainRegClass(DstReg, &AArch64::GPR64RegClass)) {
851 RC = &AArch64::GPR64RegClass;
852 Opc = AArch64::CSELXr;
853 TryFold = true;
854 } else if (MRI.constrainRegClass(DstReg, &AArch64::GPR32RegClass)) {
855 RC = &AArch64::GPR32RegClass;
856 Opc = AArch64::CSELWr;
857 TryFold = true;
858 } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR64RegClass)) {
859 RC = &AArch64::FPR64RegClass;
860 Opc = AArch64::FCSELDrrr;
861 } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR32RegClass)) {
862 RC = &AArch64::FPR32RegClass;
863 Opc = AArch64::FCSELSrrr;
864 }
865 assert(RC && "Unsupported regclass");
866
867 // Try folding simple instructions into the csel.
868 if (TryFold) {
869 unsigned NewVReg = 0;
870 unsigned FoldedOpc = canFoldIntoCSel(MRI, TrueReg, &NewVReg);
871 if (FoldedOpc) {
872 // The folded opcodes csinc, csinc and csneg apply the operation to
873 // FalseReg, so we need to invert the condition.
875 TrueReg = FalseReg;
876 } else
877 FoldedOpc = canFoldIntoCSel(MRI, FalseReg, &NewVReg);
878
879 // Fold the operation. Leave any dead instructions for DCE to clean up.
880 if (FoldedOpc) {
881 FalseReg = NewVReg;
882 Opc = FoldedOpc;
883 // The extends the live range of NewVReg.
884 MRI.clearKillFlags(NewVReg);
885 }
886 }
887
888 // Pull all virtual register into the appropriate class.
889 MRI.constrainRegClass(TrueReg, RC);
890 MRI.constrainRegClass(FalseReg, RC);
891
892 // Insert the csel.
893 BuildMI(MBB, I, DL, get(Opc), DstReg)
894 .addReg(TrueReg)
895 .addReg(FalseReg)
896 .addImm(CC);
897}
898
899// Return true if Imm can be loaded into a register by a "cheap" sequence of
900// instructions. For now, "cheap" means at most two instructions.
901static bool isCheapImmediate(const MachineInstr &MI, unsigned BitSize) {
902 if (BitSize == 32)
903 return true;
904
905 assert(BitSize == 64 && "Only bit sizes of 32 or 64 allowed");
906 uint64_t Imm = static_cast<uint64_t>(MI.getOperand(1).getImm());
908 AArch64_IMM::expandMOVImm(Imm, BitSize, Is);
909
910 return Is.size() <= 2;
911}
912
913// FIXME: this implementation should be micro-architecture dependent, so a
914// micro-architecture target hook should be introduced here in future.
916 if (Subtarget.hasExynosCheapAsMoveHandling()) {
917 if (isExynosCheapAsMove(MI))
918 return true;
919 return MI.isAsCheapAsAMove();
920 }
921
922 switch (MI.getOpcode()) {
923 default:
924 return MI.isAsCheapAsAMove();
925
926 case AArch64::ADDWrs:
927 case AArch64::ADDXrs:
928 case AArch64::SUBWrs:
929 case AArch64::SUBXrs:
930 return Subtarget.hasALULSLFast() && MI.getOperand(3).getImm() <= 4;
931
932 // If MOVi32imm or MOVi64imm can be expanded into ORRWri or
933 // ORRXri, it is as cheap as MOV.
934 // Likewise if it can be expanded to MOVZ/MOVN/MOVK.
935 case AArch64::MOVi32imm:
936 return isCheapImmediate(MI, 32);
937 case AArch64::MOVi64imm:
938 return isCheapImmediate(MI, 64);
939 }
940}
941
943 switch (MI.getOpcode()) {
944 default:
945 return false;
946
947 case AArch64::ADDWrs:
948 case AArch64::ADDXrs:
949 case AArch64::ADDSWrs:
950 case AArch64::ADDSXrs: {
951 unsigned Imm = MI.getOperand(3).getImm();
952 unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
953 if (ShiftVal == 0)
954 return true;
955 return AArch64_AM::getShiftType(Imm) == AArch64_AM::LSL && ShiftVal <= 5;
956 }
957
958 case AArch64::ADDWrx:
959 case AArch64::ADDXrx:
960 case AArch64::ADDXrx64:
961 case AArch64::ADDSWrx:
962 case AArch64::ADDSXrx:
963 case AArch64::ADDSXrx64: {
964 unsigned Imm = MI.getOperand(3).getImm();
965 switch (AArch64_AM::getArithExtendType(Imm)) {
966 default:
967 return false;
968 case AArch64_AM::UXTB:
969 case AArch64_AM::UXTH:
970 case AArch64_AM::UXTW:
971 case AArch64_AM::UXTX:
972 return AArch64_AM::getArithShiftValue(Imm) <= 4;
973 }
974 }
975
976 case AArch64::SUBWrs:
977 case AArch64::SUBSWrs: {
978 unsigned Imm = MI.getOperand(3).getImm();
979 unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
980 return ShiftVal == 0 ||
981 (AArch64_AM::getShiftType(Imm) == AArch64_AM::ASR && ShiftVal == 31);
982 }
983
984 case AArch64::SUBXrs:
985 case AArch64::SUBSXrs: {
986 unsigned Imm = MI.getOperand(3).getImm();
987 unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
988 return ShiftVal == 0 ||
989 (AArch64_AM::getShiftType(Imm) == AArch64_AM::ASR && ShiftVal == 63);
990 }
991
992 case AArch64::SUBWrx:
993 case AArch64::SUBXrx:
994 case AArch64::SUBXrx64:
995 case AArch64::SUBSWrx:
996 case AArch64::SUBSXrx:
997 case AArch64::SUBSXrx64: {
998 unsigned Imm = MI.getOperand(3).getImm();
999 switch (AArch64_AM::getArithExtendType(Imm)) {
1000 default:
1001 return false;
1002 case AArch64_AM::UXTB:
1003 case AArch64_AM::UXTH:
1004 case AArch64_AM::UXTW:
1005 case AArch64_AM::UXTX:
1006 return AArch64_AM::getArithShiftValue(Imm) == 0;
1007 }
1008 }
1009
1010 case AArch64::LDRBBroW:
1011 case AArch64::LDRBBroX:
1012 case AArch64::LDRBroW:
1013 case AArch64::LDRBroX:
1014 case AArch64::LDRDroW:
1015 case AArch64::LDRDroX:
1016 case AArch64::LDRHHroW:
1017 case AArch64::LDRHHroX:
1018 case AArch64::LDRHroW:
1019 case AArch64::LDRHroX:
1020 case AArch64::LDRQroW:
1021 case AArch64::LDRQroX:
1022 case AArch64::LDRSBWroW:
1023 case AArch64::LDRSBWroX:
1024 case AArch64::LDRSBXroW:
1025 case AArch64::LDRSBXroX:
1026 case AArch64::LDRSHWroW:
1027 case AArch64::LDRSHWroX:
1028 case AArch64::LDRSHXroW:
1029 case AArch64::LDRSHXroX:
1030 case AArch64::LDRSWroW:
1031 case AArch64::LDRSWroX:
1032 case AArch64::LDRSroW:
1033 case AArch64::LDRSroX:
1034 case AArch64::LDRWroW:
1035 case AArch64::LDRWroX:
1036 case AArch64::LDRXroW:
1037 case AArch64::LDRXroX:
1038 case AArch64::PRFMroW:
1039 case AArch64::PRFMroX:
1040 case AArch64::STRBBroW:
1041 case AArch64::STRBBroX:
1042 case AArch64::STRBroW:
1043 case AArch64::STRBroX:
1044 case AArch64::STRDroW:
1045 case AArch64::STRDroX:
1046 case AArch64::STRHHroW:
1047 case AArch64::STRHHroX:
1048 case AArch64::STRHroW:
1049 case AArch64::STRHroX:
1050 case AArch64::STRQroW:
1051 case AArch64::STRQroX:
1052 case AArch64::STRSroW:
1053 case AArch64::STRSroX:
1054 case AArch64::STRWroW:
1055 case AArch64::STRWroX:
1056 case AArch64::STRXroW:
1057 case AArch64::STRXroX: {
1058 unsigned IsSigned = MI.getOperand(3).getImm();
1059 return !IsSigned;
1060 }
1061 }
1062}
1063
1065 unsigned Opc = MI.getOpcode();
1066 switch (Opc) {
1067 default:
1068 return false;
1069 case AArch64::SEH_StackAlloc:
1070 case AArch64::SEH_SaveFPLR:
1071 case AArch64::SEH_SaveFPLR_X:
1072 case AArch64::SEH_SaveReg:
1073 case AArch64::SEH_SaveReg_X:
1074 case AArch64::SEH_SaveRegP:
1075 case AArch64::SEH_SaveRegP_X:
1076 case AArch64::SEH_SaveFReg:
1077 case AArch64::SEH_SaveFReg_X:
1078 case AArch64::SEH_SaveFRegP:
1079 case AArch64::SEH_SaveFRegP_X:
1080 case AArch64::SEH_SetFP:
1081 case AArch64::SEH_AddFP:
1082 case AArch64::SEH_Nop:
1083 case AArch64::SEH_PrologEnd:
1084 case AArch64::SEH_EpilogStart:
1085 case AArch64::SEH_EpilogEnd:
1086 case AArch64::SEH_PACSignLR:
1087 case AArch64::SEH_SaveAnyRegQP:
1088 case AArch64::SEH_SaveAnyRegQPX:
1089 return true;
1090 }
1091}
1092
1094 Register &SrcReg, Register &DstReg,
1095 unsigned &SubIdx) const {
1096 switch (MI.getOpcode()) {
1097 default:
1098 return false;
1099 case AArch64::SBFMXri: // aka sxtw
1100 case AArch64::UBFMXri: // aka uxtw
1101 // Check for the 32 -> 64 bit extension case, these instructions can do
1102 // much more.
1103 if (MI.getOperand(2).getImm() != 0 || MI.getOperand(3).getImm() != 31)
1104 return false;
1105 // This is a signed or unsigned 32 -> 64 bit extension.
1106 SrcReg = MI.getOperand(1).getReg();
1107 DstReg = MI.getOperand(0).getReg();
1108 SubIdx = AArch64::sub_32;
1109 return true;
1110 }
1111}
1112
1114 const MachineInstr &MIa, const MachineInstr &MIb) const {
1116 const MachineOperand *BaseOpA = nullptr, *BaseOpB = nullptr;
1117 int64_t OffsetA = 0, OffsetB = 0;
1118 TypeSize WidthA(0, false), WidthB(0, false);
1119 bool OffsetAIsScalable = false, OffsetBIsScalable = false;
1120
1121 assert(MIa.mayLoadOrStore() && "MIa must be a load or store.");
1122 assert(MIb.mayLoadOrStore() && "MIb must be a load or store.");
1123
1126 return false;
1127
1128 // Retrieve the base, offset from the base and width. Width
1129 // is the size of memory that is being loaded/stored (e.g. 1, 2, 4, 8). If
1130 // base are identical, and the offset of a lower memory access +
1131 // the width doesn't overlap the offset of a higher memory access,
1132 // then the memory accesses are different.
1133 // If OffsetAIsScalable and OffsetBIsScalable are both true, they
1134 // are assumed to have the same scale (vscale).
1135 if (getMemOperandWithOffsetWidth(MIa, BaseOpA, OffsetA, OffsetAIsScalable,
1136 WidthA, TRI) &&
1137 getMemOperandWithOffsetWidth(MIb, BaseOpB, OffsetB, OffsetBIsScalable,
1138 WidthB, TRI)) {
1139 if (BaseOpA->isIdenticalTo(*BaseOpB) &&
1140 OffsetAIsScalable == OffsetBIsScalable) {
1141 int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB;
1142 int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA;
1143 TypeSize LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
1144 if (LowWidth.isScalable() == OffsetAIsScalable &&
1145 LowOffset + (int)LowWidth.getKnownMinValue() <= HighOffset)
1146 return true;
1147 }
1148 }
1149 return false;
1150}
1151
1153 const MachineBasicBlock *MBB,
1154 const MachineFunction &MF) const {
1156 return true;
1157
1158 // Do not move an instruction that can be recognized as a branch target.
1159 if (hasBTISemantics(MI))
1160 return true;
1161
1162 switch (MI.getOpcode()) {
1163 case AArch64::HINT:
1164 // CSDB hints are scheduling barriers.
1165 if (MI.getOperand(0).getImm() == 0x14)
1166 return true;
1167 break;
1168 case AArch64::DSB:
1169 case AArch64::ISB:
1170 // DSB and ISB also are scheduling barriers.
1171 return true;
1172 case AArch64::MSRpstatesvcrImm1:
1173 // SMSTART and SMSTOP are also scheduling barriers.
1174 return true;
1175 default:;
1176 }
1177 if (isSEHInstruction(MI))
1178 return true;
1179 auto Next = std::next(MI.getIterator());
1180 return Next != MBB->end() && Next->isCFIInstruction();
1181}
1182
1183/// analyzeCompare - For a comparison instruction, return the source registers
1184/// in SrcReg and SrcReg2, and the value it compares against in CmpValue.
1185/// Return true if the comparison instruction can be analyzed.
1187 Register &SrcReg2, int64_t &CmpMask,
1188 int64_t &CmpValue) const {
1189 // The first operand can be a frame index where we'd normally expect a
1190 // register.
1191 assert(MI.getNumOperands() >= 2 && "All AArch64 cmps should have 2 operands");
1192 if (!MI.getOperand(1).isReg())
1193 return false;
1194
1195 switch (MI.getOpcode()) {
1196 default:
1197 break;
1198 case AArch64::PTEST_PP:
1199 case AArch64::PTEST_PP_ANY:
1200 SrcReg = MI.getOperand(0).getReg();
1201 SrcReg2 = MI.getOperand(1).getReg();
1202 // Not sure about the mask and value for now...
1203 CmpMask = ~0;
1204 CmpValue = 0;
1205 return true;
1206 case AArch64::SUBSWrr:
1207 case AArch64::SUBSWrs:
1208 case AArch64::SUBSWrx:
1209 case AArch64::SUBSXrr:
1210 case AArch64::SUBSXrs:
1211 case AArch64::SUBSXrx:
1212 case AArch64::ADDSWrr:
1213 case AArch64::ADDSWrs:
1214 case AArch64::ADDSWrx:
1215 case AArch64::ADDSXrr:
1216 case AArch64::ADDSXrs:
1217 case AArch64::ADDSXrx:
1218 // Replace SUBSWrr with SUBWrr if NZCV is not used.
1219 SrcReg = MI.getOperand(1).getReg();
1220 SrcReg2 = MI.getOperand(2).getReg();
1221 CmpMask = ~0;
1222 CmpValue = 0;
1223 return true;
1224 case AArch64::SUBSWri:
1225 case AArch64::ADDSWri:
1226 case AArch64::SUBSXri:
1227 case AArch64::ADDSXri:
1228 SrcReg = MI.getOperand(1).getReg();
1229 SrcReg2 = 0;
1230 CmpMask = ~0;
1231 CmpValue = MI.getOperand(2).getImm();
1232 return true;
1233 case AArch64::ANDSWri:
1234 case AArch64::ANDSXri:
1235 // ANDS does not use the same encoding scheme as the others xxxS
1236 // instructions.
1237 SrcReg = MI.getOperand(1).getReg();
1238 SrcReg2 = 0;
1239 CmpMask = ~0;
1241 MI.getOperand(2).getImm(),
1242 MI.getOpcode() == AArch64::ANDSWri ? 32 : 64);
1243 return true;
1244 }
1245
1246 return false;
1247}
1248
1250 MachineBasicBlock *MBB = Instr.getParent();
1251 assert(MBB && "Can't get MachineBasicBlock here");
1252 MachineFunction *MF = MBB->getParent();
1253 assert(MF && "Can't get MachineFunction here");
1257
1258 for (unsigned OpIdx = 0, EndIdx = Instr.getNumOperands(); OpIdx < EndIdx;
1259 ++OpIdx) {
1260 MachineOperand &MO = Instr.getOperand(OpIdx);
1261 const TargetRegisterClass *OpRegCstraints =
1262 Instr.getRegClassConstraint(OpIdx, TII, TRI);
1263
1264 // If there's no constraint, there's nothing to do.
1265 if (!OpRegCstraints)
1266 continue;
1267 // If the operand is a frame index, there's nothing to do here.
1268 // A frame index operand will resolve correctly during PEI.
1269 if (MO.isFI())
1270 continue;
1271
1272 assert(MO.isReg() &&
1273 "Operand has register constraints without being a register!");
1274
1275 Register Reg = MO.getReg();
1276 if (Reg.isPhysical()) {
1277 if (!OpRegCstraints->contains(Reg))
1278 return false;
1279 } else if (!OpRegCstraints->hasSubClassEq(MRI->getRegClass(Reg)) &&
1280 !MRI->constrainRegClass(Reg, OpRegCstraints))
1281 return false;
1282 }
1283
1284 return true;
1285}
1286
1287/// Return the opcode that does not set flags when possible - otherwise
1288/// return the original opcode. The caller is responsible to do the actual
1289/// substitution and legality checking.
1291 // Don't convert all compare instructions, because for some the zero register
1292 // encoding becomes the sp register.
1293 bool MIDefinesZeroReg = false;
1294 if (MI.definesRegister(AArch64::WZR, /*TRI=*/nullptr) ||
1295 MI.definesRegister(AArch64::XZR, /*TRI=*/nullptr))
1296 MIDefinesZeroReg = true;
1297
1298 switch (MI.getOpcode()) {
1299 default:
1300 return MI.getOpcode();
1301 case AArch64::ADDSWrr:
1302 return AArch64::ADDWrr;
1303 case AArch64::ADDSWri:
1304 return MIDefinesZeroReg ? AArch64::ADDSWri : AArch64::ADDWri;
1305 case AArch64::ADDSWrs:
1306 return MIDefinesZeroReg ? AArch64::ADDSWrs : AArch64::ADDWrs;
1307 case AArch64::ADDSWrx:
1308 return AArch64::ADDWrx;
1309 case AArch64::ADDSXrr:
1310 return AArch64::ADDXrr;
1311 case AArch64::ADDSXri:
1312 return MIDefinesZeroReg ? AArch64::ADDSXri : AArch64::ADDXri;
1313 case AArch64::ADDSXrs:
1314 return MIDefinesZeroReg ? AArch64::ADDSXrs : AArch64::ADDXrs;
1315 case AArch64::ADDSXrx:
1316 return AArch64::ADDXrx;
1317 case AArch64::SUBSWrr:
1318 return AArch64::SUBWrr;
1319 case AArch64::SUBSWri:
1320 return MIDefinesZeroReg ? AArch64::SUBSWri : AArch64::SUBWri;
1321 case AArch64::SUBSWrs:
1322 return MIDefinesZeroReg ? AArch64::SUBSWrs : AArch64::SUBWrs;
1323 case AArch64::SUBSWrx:
1324 return AArch64::SUBWrx;
1325 case AArch64::SUBSXrr:
1326 return AArch64::SUBXrr;
1327 case AArch64::SUBSXri:
1328 return MIDefinesZeroReg ? AArch64::SUBSXri : AArch64::SUBXri;
1329 case AArch64::SUBSXrs:
1330 return MIDefinesZeroReg ? AArch64::SUBSXrs : AArch64::SUBXrs;
1331 case AArch64::SUBSXrx:
1332 return AArch64::SUBXrx;
1333 }
1334}
1335
1336enum AccessKind { AK_Write = 0x01, AK_Read = 0x10, AK_All = 0x11 };
1337
1338/// True when condition flags are accessed (either by writing or reading)
1339/// on the instruction trace starting at From and ending at To.
1340///
1341/// Note: If From and To are from different blocks it's assumed CC are accessed
1342/// on the path.
1345 const TargetRegisterInfo *TRI, const AccessKind AccessToCheck = AK_All) {
1346 // Early exit if To is at the beginning of the BB.
1347 if (To == To->getParent()->begin())
1348 return true;
1349
1350 // Check whether the instructions are in the same basic block
1351 // If not, assume the condition flags might get modified somewhere.
1352 if (To->getParent() != From->getParent())
1353 return true;
1354
1355 // From must be above To.
1356 assert(std::any_of(
1357 ++To.getReverse(), To->getParent()->rend(),
1358 [From](MachineInstr &MI) { return MI.getIterator() == From; }));
1359
1360 // We iterate backward starting at \p To until we hit \p From.
1361 for (const MachineInstr &Instr :
1362 instructionsWithoutDebug(++To.getReverse(), From.getReverse())) {
1363 if (((AccessToCheck & AK_Write) &&
1364 Instr.modifiesRegister(AArch64::NZCV, TRI)) ||
1365 ((AccessToCheck & AK_Read) && Instr.readsRegister(AArch64::NZCV, TRI)))
1366 return true;
1367 }
1368 return false;
1369}
1370
1371std::optional<unsigned>
1372AArch64InstrInfo::canRemovePTestInstr(MachineInstr *PTest, MachineInstr *Mask,
1373 MachineInstr *Pred,
1374 const MachineRegisterInfo *MRI) const {
1375 unsigned MaskOpcode = Mask->getOpcode();
1376 unsigned PredOpcode = Pred->getOpcode();
1377 bool PredIsPTestLike = isPTestLikeOpcode(PredOpcode);
1378 bool PredIsWhileLike = isWhileOpcode(PredOpcode);
1379
1380 if (PredIsWhileLike) {
1381 // For PTEST(PG, PG), PTEST is redundant when PG is the result of a WHILEcc
1382 // instruction and the condition is "any" since WHILcc does an implicit
1383 // PTEST(ALL, PG) check and PG is always a subset of ALL.
1384 if ((Mask == Pred) && PTest->getOpcode() == AArch64::PTEST_PP_ANY)
1385 return PredOpcode;
1386
1387 // For PTEST(PTRUE_ALL, WHILE), if the element size matches, the PTEST is
1388 // redundant since WHILE performs an implicit PTEST with an all active
1389 // mask.
1390 if (isPTrueOpcode(MaskOpcode) && Mask->getOperand(1).getImm() == 31 &&
1391 getElementSizeForOpcode(MaskOpcode) ==
1392 getElementSizeForOpcode(PredOpcode))
1393 return PredOpcode;
1394
1395 return {};
1396 }
1397
1398 if (PredIsPTestLike) {
1399 // For PTEST(PG, PG), PTEST is redundant when PG is the result of an
1400 // instruction that sets the flags as PTEST would and the condition is
1401 // "any" since PG is always a subset of the governing predicate of the
1402 // ptest-like instruction.
1403 if ((Mask == Pred) && PTest->getOpcode() == AArch64::PTEST_PP_ANY)
1404 return PredOpcode;
1405
1406 // For PTEST(PTRUE_ALL, PTEST_LIKE), the PTEST is redundant if the
1407 // the element size matches and either the PTEST_LIKE instruction uses
1408 // the same all active mask or the condition is "any".
1409 if (isPTrueOpcode(MaskOpcode) && Mask->getOperand(1).getImm() == 31 &&
1410 getElementSizeForOpcode(MaskOpcode) ==
1411 getElementSizeForOpcode(PredOpcode)) {
1412 auto PTestLikeMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg());
1413 if (Mask == PTestLikeMask || PTest->getOpcode() == AArch64::PTEST_PP_ANY)
1414 return PredOpcode;
1415 }
1416
1417 // For PTEST(PG, PTEST_LIKE(PG, ...)), the PTEST is redundant since the
1418 // flags are set based on the same mask 'PG', but PTEST_LIKE must operate
1419 // on 8-bit predicates like the PTEST. Otherwise, for instructions like
1420 // compare that also support 16/32/64-bit predicates, the implicit PTEST
1421 // performed by the compare could consider fewer lanes for these element
1422 // sizes.
1423 //
1424 // For example, consider
1425 //
1426 // ptrue p0.b ; P0=1111-1111-1111-1111
1427 // index z0.s, #0, #1 ; Z0=<0,1,2,3>
1428 // index z1.s, #1, #1 ; Z1=<1,2,3,4>
1429 // cmphi p1.s, p0/z, z1.s, z0.s ; P1=0001-0001-0001-0001
1430 // ; ^ last active
1431 // ptest p0, p1.b ; P1=0001-0001-0001-0001
1432 // ; ^ last active
1433 //
1434 // where the compare generates a canonical all active 32-bit predicate
1435 // (equivalent to 'ptrue p1.s, all'). The implicit PTEST sets the last
1436 // active flag, whereas the PTEST instruction with the same mask doesn't.
1437 // For PTEST_ANY this doesn't apply as the flags in this case would be
1438 // identical regardless of element size.
1439 auto PTestLikeMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg());
1440 uint64_t PredElementSize = getElementSizeForOpcode(PredOpcode);
1441 if (Mask == PTestLikeMask && (PredElementSize == AArch64::ElementSizeB ||
1442 PTest->getOpcode() == AArch64::PTEST_PP_ANY))
1443 return PredOpcode;
1444
1445 return {};
1446 }
1447
1448 // If OP in PTEST(PG, OP(PG, ...)) has a flag-setting variant change the
1449 // opcode so the PTEST becomes redundant.
1450 switch (PredOpcode) {
1451 case AArch64::AND_PPzPP:
1452 case AArch64::BIC_PPzPP:
1453 case AArch64::EOR_PPzPP:
1454 case AArch64::NAND_PPzPP:
1455 case AArch64::NOR_PPzPP:
1456 case AArch64::ORN_PPzPP:
1457 case AArch64::ORR_PPzPP:
1458 case AArch64::BRKA_PPzP:
1459 case AArch64::BRKPA_PPzPP:
1460 case AArch64::BRKB_PPzP:
1461 case AArch64::BRKPB_PPzPP:
1462 case AArch64::RDFFR_PPz: {
1463 // Check to see if our mask is the same. If not the resulting flag bits
1464 // may be different and we can't remove the ptest.
1465 auto *PredMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg());
1466 if (Mask != PredMask)
1467 return {};
1468 break;
1469 }
1470 case AArch64::BRKN_PPzP: {
1471 // BRKN uses an all active implicit mask to set flags unlike the other
1472 // flag-setting instructions.
1473 // PTEST(PTRUE_B(31), BRKN(PG, A, B)) -> BRKNS(PG, A, B).
1474 if ((MaskOpcode != AArch64::PTRUE_B) ||
1475 (Mask->getOperand(1).getImm() != 31))
1476 return {};
1477 break;
1478 }
1479 case AArch64::PTRUE_B:
1480 // PTEST(OP=PTRUE_B(A), OP) -> PTRUES_B(A)
1481 break;
1482 default:
1483 // Bail out if we don't recognize the input
1484 return {};
1485 }
1486
1487 return convertToFlagSettingOpc(PredOpcode);
1488}
1489
1490/// optimizePTestInstr - Attempt to remove a ptest of a predicate-generating
1491/// operation which could set the flags in an identical manner
1492bool AArch64InstrInfo::optimizePTestInstr(
1493 MachineInstr *PTest, unsigned MaskReg, unsigned PredReg,
1494 const MachineRegisterInfo *MRI) const {
1495 auto *Mask = MRI->getUniqueVRegDef(MaskReg);
1496 auto *Pred = MRI->getUniqueVRegDef(PredReg);
1497 unsigned PredOpcode = Pred->getOpcode();
1498 auto NewOp = canRemovePTestInstr(PTest, Mask, Pred, MRI);
1499 if (!NewOp)
1500 return false;
1501
1503
1504 // If another instruction between Pred and PTest accesses flags, don't remove
1505 // the ptest or update the earlier instruction to modify them.
1506 if (areCFlagsAccessedBetweenInstrs(Pred, PTest, TRI))
1507 return false;
1508
1509 // If we pass all the checks, it's safe to remove the PTEST and use the flags
1510 // as they are prior to PTEST. Sometimes this requires the tested PTEST
1511 // operand to be replaced with an equivalent instruction that also sets the
1512 // flags.
1513 PTest->eraseFromParent();
1514 if (*NewOp != PredOpcode) {
1515 Pred->setDesc(get(*NewOp));
1516 bool succeeded = UpdateOperandRegClass(*Pred);
1517 (void)succeeded;
1518 assert(succeeded && "Operands have incompatible register classes!");
1519 Pred->addRegisterDefined(AArch64::NZCV, TRI);
1520 }
1521
1522 // Ensure that the flags def is live.
1523 if (Pred->registerDefIsDead(AArch64::NZCV, TRI)) {
1524 unsigned i = 0, e = Pred->getNumOperands();
1525 for (; i != e; ++i) {
1526 MachineOperand &MO = Pred->getOperand(i);
1527 if (MO.isReg() && MO.isDef() && MO.getReg() == AArch64::NZCV) {
1528 MO.setIsDead(false);
1529 break;
1530 }
1531 }
1532 }
1533 return true;
1534}
1535
1536/// Try to optimize a compare instruction. A compare instruction is an
1537/// instruction which produces AArch64::NZCV. It can be truly compare
1538/// instruction
1539/// when there are no uses of its destination register.
1540///
1541/// The following steps are tried in order:
1542/// 1. Convert CmpInstr into an unconditional version.
1543/// 2. Remove CmpInstr if above there is an instruction producing a needed
1544/// condition code or an instruction which can be converted into such an
1545/// instruction.
1546/// Only comparison with zero is supported.
1548 MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int64_t CmpMask,
1549 int64_t CmpValue, const MachineRegisterInfo *MRI) const {
1550 assert(CmpInstr.getParent());
1551 assert(MRI);
1552
1553 // Replace SUBSWrr with SUBWrr if NZCV is not used.
1554 int DeadNZCVIdx =
1555 CmpInstr.findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr, true);
1556 if (DeadNZCVIdx != -1) {
1557 if (CmpInstr.definesRegister(AArch64::WZR, /*TRI=*/nullptr) ||
1558 CmpInstr.definesRegister(AArch64::XZR, /*TRI=*/nullptr)) {
1559 CmpInstr.eraseFromParent();
1560 return true;
1561 }
1562 unsigned Opc = CmpInstr.getOpcode();
1563 unsigned NewOpc = convertToNonFlagSettingOpc(CmpInstr);
1564 if (NewOpc == Opc)
1565 return false;
1566 const MCInstrDesc &MCID = get(NewOpc);
1567 CmpInstr.setDesc(MCID);
1568 CmpInstr.removeOperand(DeadNZCVIdx);
1569 bool succeeded = UpdateOperandRegClass(CmpInstr);
1570 (void)succeeded;
1571 assert(succeeded && "Some operands reg class are incompatible!");
1572 return true;
1573 }
1574
1575 if (CmpInstr.getOpcode() == AArch64::PTEST_PP ||
1576 CmpInstr.getOpcode() == AArch64::PTEST_PP_ANY)
1577 return optimizePTestInstr(&CmpInstr, SrcReg, SrcReg2, MRI);
1578
1579 if (SrcReg2 != 0)
1580 return false;
1581
1582 // CmpInstr is a Compare instruction if destination register is not used.
1583 if (!MRI->use_nodbg_empty(CmpInstr.getOperand(0).getReg()))
1584 return false;
1585
1586 if (CmpValue == 0 && substituteCmpToZero(CmpInstr, SrcReg, *MRI))
1587 return true;
1588 return (CmpValue == 0 || CmpValue == 1) &&
1589 removeCmpToZeroOrOne(CmpInstr, SrcReg, CmpValue, *MRI);
1590}
1591
1592/// Get opcode of S version of Instr.
1593/// If Instr is S version its opcode is returned.
1594/// AArch64::INSTRUCTION_LIST_END is returned if Instr does not have S version
1595/// or we are not interested in it.
1596static unsigned sForm(MachineInstr &Instr) {
1597 switch (Instr.getOpcode()) {
1598 default:
1599 return AArch64::INSTRUCTION_LIST_END;
1600
1601 case AArch64::ADDSWrr:
1602 case AArch64::ADDSWri:
1603 case AArch64::ADDSXrr:
1604 case AArch64::ADDSXri:
1605 case AArch64::SUBSWrr:
1606 case AArch64::SUBSWri:
1607 case AArch64::SUBSXrr:
1608 case AArch64::SUBSXri:
1609 return Instr.getOpcode();
1610
1611 case AArch64::ADDWrr:
1612 return AArch64::ADDSWrr;
1613 case AArch64::ADDWri:
1614 return AArch64::ADDSWri;
1615 case AArch64::ADDXrr:
1616 return AArch64::ADDSXrr;
1617 case AArch64::ADDXri:
1618 return AArch64::ADDSXri;
1619 case AArch64::ADCWr:
1620 return AArch64::ADCSWr;
1621 case AArch64::ADCXr:
1622 return AArch64::ADCSXr;
1623 case AArch64::SUBWrr:
1624 return AArch64::SUBSWrr;
1625 case AArch64::SUBWri:
1626 return AArch64::SUBSWri;
1627 case AArch64::SUBXrr:
1628 return AArch64::SUBSXrr;
1629 case AArch64::SUBXri:
1630 return AArch64::SUBSXri;
1631 case AArch64::SBCWr:
1632 return AArch64::SBCSWr;
1633 case AArch64::SBCXr:
1634 return AArch64::SBCSXr;
1635 case AArch64::ANDWri:
1636 return AArch64::ANDSWri;
1637 case AArch64::ANDXri:
1638 return AArch64::ANDSXri;
1639 }
1640}
1641
1642/// Check if AArch64::NZCV should be alive in successors of MBB.
1644 for (auto *BB : MBB->successors())
1645 if (BB->isLiveIn(AArch64::NZCV))
1646 return true;
1647 return false;
1648}
1649
1650/// \returns The condition code operand index for \p Instr if it is a branch
1651/// or select and -1 otherwise.
1652static int
1654 switch (Instr.getOpcode()) {
1655 default:
1656 return -1;
1657
1658 case AArch64::Bcc: {
1659 int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV, /*TRI=*/nullptr);
1660 assert(Idx >= 2);
1661 return Idx - 2;
1662 }
1663
1664 case AArch64::CSINVWr:
1665 case AArch64::CSINVXr:
1666 case AArch64::CSINCWr:
1667 case AArch64::CSINCXr:
1668 case AArch64::CSELWr:
1669 case AArch64::CSELXr:
1670 case AArch64::CSNEGWr:
1671 case AArch64::CSNEGXr:
1672 case AArch64::FCSELSrrr:
1673 case AArch64::FCSELDrrr: {
1674 int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV, /*TRI=*/nullptr);
1675 assert(Idx >= 1);
1676 return Idx - 1;
1677 }
1678 }
1679}
1680
1681/// Find a condition code used by the instruction.
1682/// Returns AArch64CC::Invalid if either the instruction does not use condition
1683/// codes or we don't optimize CmpInstr in the presence of such instructions.
1686 return CCIdx >= 0 ? static_cast<AArch64CC::CondCode>(
1687 Instr.getOperand(CCIdx).getImm())
1689}
1690
1693 UsedNZCV UsedFlags;
1694 switch (CC) {
1695 default:
1696 break;
1697
1698 case AArch64CC::EQ: // Z set
1699 case AArch64CC::NE: // Z clear
1700 UsedFlags.Z = true;
1701 break;
1702
1703 case AArch64CC::HI: // Z clear and C set
1704 case AArch64CC::LS: // Z set or C clear
1705 UsedFlags.Z = true;
1706 [[fallthrough]];
1707 case AArch64CC::HS: // C set
1708 case AArch64CC::LO: // C clear
1709 UsedFlags.C = true;
1710 break;
1711
1712 case AArch64CC::MI: // N set
1713 case AArch64CC::PL: // N clear
1714 UsedFlags.N = true;
1715 break;
1716
1717 case AArch64CC::VS: // V set
1718 case AArch64CC::VC: // V clear
1719 UsedFlags.V = true;
1720 break;
1721
1722 case AArch64CC::GT: // Z clear, N and V the same
1723 case AArch64CC::LE: // Z set, N and V differ
1724 UsedFlags.Z = true;
1725 [[fallthrough]];
1726 case AArch64CC::GE: // N and V the same
1727 case AArch64CC::LT: // N and V differ
1728 UsedFlags.N = true;
1729 UsedFlags.V = true;
1730 break;
1731 }
1732 return UsedFlags;
1733}
1734
1735/// \returns Conditions flags used after \p CmpInstr in its MachineBB if NZCV
1736/// flags are not alive in successors of the same \p CmpInstr and \p MI parent.
1737/// \returns std::nullopt otherwise.
1738///
1739/// Collect instructions using that flags in \p CCUseInstrs if provided.
1740std::optional<UsedNZCV>
1742 const TargetRegisterInfo &TRI,
1743 SmallVectorImpl<MachineInstr *> *CCUseInstrs) {
1744 MachineBasicBlock *CmpParent = CmpInstr.getParent();
1745 if (MI.getParent() != CmpParent)
1746 return std::nullopt;
1747
1748 if (areCFlagsAliveInSuccessors(CmpParent))
1749 return std::nullopt;
1750
1751 UsedNZCV NZCVUsedAfterCmp;
1753 std::next(CmpInstr.getIterator()), CmpParent->instr_end())) {
1754 if (Instr.readsRegister(AArch64::NZCV, &TRI)) {
1756 if (CC == AArch64CC::Invalid) // Unsupported conditional instruction
1757 return std::nullopt;
1758 NZCVUsedAfterCmp |= getUsedNZCV(CC);
1759 if (CCUseInstrs)
1760 CCUseInstrs->push_back(&Instr);
1761 }
1762 if (Instr.modifiesRegister(AArch64::NZCV, &TRI))
1763 break;
1764 }
1765 return NZCVUsedAfterCmp;
1766}
1767
1768static bool isADDSRegImm(unsigned Opcode) {
1769 return Opcode == AArch64::ADDSWri || Opcode == AArch64::ADDSXri;
1770}
1771
1772static bool isSUBSRegImm(unsigned Opcode) {
1773 return Opcode == AArch64::SUBSWri || Opcode == AArch64::SUBSXri;
1774}
1775
1776/// Check if CmpInstr can be substituted by MI.
1777///
1778/// CmpInstr can be substituted:
1779/// - CmpInstr is either 'ADDS %vreg, 0' or 'SUBS %vreg, 0'
1780/// - and, MI and CmpInstr are from the same MachineBB
1781/// - and, condition flags are not alive in successors of the CmpInstr parent
1782/// - and, if MI opcode is the S form there must be no defs of flags between
1783/// MI and CmpInstr
1784/// or if MI opcode is not the S form there must be neither defs of flags
1785/// nor uses of flags between MI and CmpInstr.
1786/// - and, if C/V flags are not used after CmpInstr
1787/// or if N flag is used but MI produces poison value if signed overflow
1788/// occurs.
1790 const TargetRegisterInfo &TRI) {
1791 // NOTE this assertion guarantees that MI.getOpcode() is add or subtraction
1792 // that may or may not set flags.
1793 assert(sForm(MI) != AArch64::INSTRUCTION_LIST_END);
1794
1795 const unsigned CmpOpcode = CmpInstr.getOpcode();
1796 if (!isADDSRegImm(CmpOpcode) && !isSUBSRegImm(CmpOpcode))
1797 return false;
1798
1799 assert((CmpInstr.getOperand(2).isImm() &&
1800 CmpInstr.getOperand(2).getImm() == 0) &&
1801 "Caller guarantees that CmpInstr compares with constant 0");
1802
1803 std::optional<UsedNZCV> NZVCUsed = examineCFlagsUse(MI, CmpInstr, TRI);
1804 if (!NZVCUsed || NZVCUsed->C)
1805 return false;
1806
1807 // CmpInstr is either 'ADDS %vreg, 0' or 'SUBS %vreg, 0', and MI is either
1808 // '%vreg = add ...' or '%vreg = sub ...'.
1809 // Condition flag V is used to indicate signed overflow.
1810 // 1) MI and CmpInstr set N and V to the same value.
1811 // 2) If MI is add/sub with no-signed-wrap, it produces a poison value when
1812 // signed overflow occurs, so CmpInstr could still be simplified away.
1813 if (NZVCUsed->V && !MI.getFlag(MachineInstr::NoSWrap))
1814 return false;
1815
1816 AccessKind AccessToCheck = AK_Write;
1817 if (sForm(MI) != MI.getOpcode())
1818 AccessToCheck = AK_All;
1819 return !areCFlagsAccessedBetweenInstrs(&MI, &CmpInstr, &TRI, AccessToCheck);
1820}
1821
1822/// Substitute an instruction comparing to zero with another instruction
1823/// which produces needed condition flags.
1824///
1825/// Return true on success.
1826bool AArch64InstrInfo::substituteCmpToZero(
1827 MachineInstr &CmpInstr, unsigned SrcReg,
1828 const MachineRegisterInfo &MRI) const {
1829 // Get the unique definition of SrcReg.
1830 MachineInstr *MI = MRI.getUniqueVRegDef(SrcReg);
1831 if (!MI)
1832 return false;
1833
1835
1836 unsigned NewOpc = sForm(*MI);
1837 if (NewOpc == AArch64::INSTRUCTION_LIST_END)
1838 return false;
1839
1840 if (!canInstrSubstituteCmpInstr(*MI, CmpInstr, TRI))
1841 return false;
1842
1843 // Update the instruction to set NZCV.
1844 MI->setDesc(get(NewOpc));
1845 CmpInstr.eraseFromParent();
1847 (void)succeeded;
1848 assert(succeeded && "Some operands reg class are incompatible!");
1849 MI->addRegisterDefined(AArch64::NZCV, &TRI);
1850 return true;
1851}
1852
1853/// \returns True if \p CmpInstr can be removed.
1854///
1855/// \p IsInvertCC is true if, after removing \p CmpInstr, condition
1856/// codes used in \p CCUseInstrs must be inverted.
1858 int CmpValue, const TargetRegisterInfo &TRI,
1860 bool &IsInvertCC) {
1861 assert((CmpValue == 0 || CmpValue == 1) &&
1862 "Only comparisons to 0 or 1 considered for removal!");
1863
1864 // MI is 'CSINCWr %vreg, wzr, wzr, <cc>' or 'CSINCXr %vreg, xzr, xzr, <cc>'
1865 unsigned MIOpc = MI.getOpcode();
1866 if (MIOpc == AArch64::CSINCWr) {
1867 if (MI.getOperand(1).getReg() != AArch64::WZR ||
1868 MI.getOperand(2).getReg() != AArch64::WZR)
1869 return false;
1870 } else if (MIOpc == AArch64::CSINCXr) {
1871 if (MI.getOperand(1).getReg() != AArch64::XZR ||
1872 MI.getOperand(2).getReg() != AArch64::XZR)
1873 return false;
1874 } else {
1875 return false;
1876 }
1878 if (MICC == AArch64CC::Invalid)
1879 return false;
1880
1881 // NZCV needs to be defined
1882 if (MI.findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr, true) != -1)
1883 return false;
1884
1885 // CmpInstr is 'ADDS %vreg, 0' or 'SUBS %vreg, 0' or 'SUBS %vreg, 1'
1886 const unsigned CmpOpcode = CmpInstr.getOpcode();
1887 bool IsSubsRegImm = isSUBSRegImm(CmpOpcode);
1888 if (CmpValue && !IsSubsRegImm)
1889 return false;
1890 if (!CmpValue && !IsSubsRegImm && !isADDSRegImm(CmpOpcode))
1891 return false;
1892
1893 // MI conditions allowed: eq, ne, mi, pl
1894 UsedNZCV MIUsedNZCV = getUsedNZCV(MICC);
1895 if (MIUsedNZCV.C || MIUsedNZCV.V)
1896 return false;
1897
1898 std::optional<UsedNZCV> NZCVUsedAfterCmp =
1899 examineCFlagsUse(MI, CmpInstr, TRI, &CCUseInstrs);
1900 // Condition flags are not used in CmpInstr basic block successors and only
1901 // Z or N flags allowed to be used after CmpInstr within its basic block
1902 if (!NZCVUsedAfterCmp || NZCVUsedAfterCmp->C || NZCVUsedAfterCmp->V)
1903 return false;
1904 // Z or N flag used after CmpInstr must correspond to the flag used in MI
1905 if ((MIUsedNZCV.Z && NZCVUsedAfterCmp->N) ||
1906 (MIUsedNZCV.N && NZCVUsedAfterCmp->Z))
1907 return false;
1908 // If CmpInstr is comparison to zero MI conditions are limited to eq, ne
1909 if (MIUsedNZCV.N && !CmpValue)
1910 return false;
1911
1912 // There must be no defs of flags between MI and CmpInstr
1913 if (areCFlagsAccessedBetweenInstrs(&MI, &CmpInstr, &TRI, AK_Write))
1914 return false;
1915
1916 // Condition code is inverted in the following cases:
1917 // 1. MI condition is ne; CmpInstr is 'ADDS %vreg, 0' or 'SUBS %vreg, 0'
1918 // 2. MI condition is eq, pl; CmpInstr is 'SUBS %vreg, 1'
1919 IsInvertCC = (CmpValue && (MICC == AArch64CC::EQ || MICC == AArch64CC::PL)) ||
1920 (!CmpValue && MICC == AArch64CC::NE);
1921 return true;
1922}
1923
1924/// Remove comparison in csinc-cmp sequence
1925///
1926/// Examples:
1927/// 1. \code
1928/// csinc w9, wzr, wzr, ne
1929/// cmp w9, #0
1930/// b.eq
1931/// \endcode
1932/// to
1933/// \code
1934/// csinc w9, wzr, wzr, ne
1935/// b.ne
1936/// \endcode
1937///
1938/// 2. \code
1939/// csinc x2, xzr, xzr, mi
1940/// cmp x2, #1
1941/// b.pl
1942/// \endcode
1943/// to
1944/// \code
1945/// csinc x2, xzr, xzr, mi
1946/// b.pl
1947/// \endcode
1948///
1949/// \param CmpInstr comparison instruction
1950/// \return True when comparison removed
1951bool AArch64InstrInfo::removeCmpToZeroOrOne(
1952 MachineInstr &CmpInstr, unsigned SrcReg, int CmpValue,
1953 const MachineRegisterInfo &MRI) const {
1954 MachineInstr *MI = MRI.getUniqueVRegDef(SrcReg);
1955 if (!MI)
1956 return false;
1959 bool IsInvertCC = false;
1960 if (!canCmpInstrBeRemoved(*MI, CmpInstr, CmpValue, TRI, CCUseInstrs,
1961 IsInvertCC))
1962 return false;
1963 // Make transformation
1964 CmpInstr.eraseFromParent();
1965 if (IsInvertCC) {
1966 // Invert condition codes in CmpInstr CC users
1967 for (MachineInstr *CCUseInstr : CCUseInstrs) {
1969 assert(Idx >= 0 && "Unexpected instruction using CC.");
1970 MachineOperand &CCOperand = CCUseInstr->getOperand(Idx);
1972 static_cast<AArch64CC::CondCode>(CCOperand.getImm()));
1973 CCOperand.setImm(CCUse);
1974 }
1975 }
1976 return true;
1977}
1978
1980 if (MI.getOpcode() != TargetOpcode::LOAD_STACK_GUARD &&
1981 MI.getOpcode() != AArch64::CATCHRET)
1982 return false;
1983
1984 MachineBasicBlock &MBB = *MI.getParent();
1985 auto &Subtarget = MBB.getParent()->getSubtarget<AArch64Subtarget>();
1986 auto TRI = Subtarget.getRegisterInfo();
1987 DebugLoc DL = MI.getDebugLoc();
1988
1989 if (MI.getOpcode() == AArch64::CATCHRET) {
1990 // Skip to the first instruction before the epilog.
1991 const TargetInstrInfo *TII =
1993 MachineBasicBlock *TargetMBB = MI.getOperand(0).getMBB();
1995 MachineBasicBlock::iterator FirstEpilogSEH = std::prev(MBBI);
1996 while (FirstEpilogSEH->getFlag(MachineInstr::FrameDestroy) &&
1997 FirstEpilogSEH != MBB.begin())
1998 FirstEpilogSEH = std::prev(FirstEpilogSEH);
1999 if (FirstEpilogSEH != MBB.begin())
2000 FirstEpilogSEH = std::next(FirstEpilogSEH);
2001 BuildMI(MBB, FirstEpilogSEH, DL, TII->get(AArch64::ADRP))
2002 .addReg(AArch64::X0, RegState::Define)
2003 .addMBB(TargetMBB);
2004 BuildMI(MBB, FirstEpilogSEH, DL, TII->get(AArch64::ADDXri))
2005 .addReg(AArch64::X0, RegState::Define)
2006 .addReg(AArch64::X0)
2007 .addMBB(TargetMBB)
2008 .addImm(0);
2009 TargetMBB->setMachineBlockAddressTaken();
2010 return true;
2011 }
2012
2013 Register Reg = MI.getOperand(0).getReg();
2015 if (M.getStackProtectorGuard() == "sysreg") {
2016 const AArch64SysReg::SysReg *SrcReg =
2017 AArch64SysReg::lookupSysRegByName(M.getStackProtectorGuardReg());
2018 if (!SrcReg)
2019 report_fatal_error("Unknown SysReg for Stack Protector Guard Register");
2020
2021 // mrs xN, sysreg
2022 BuildMI(MBB, MI, DL, get(AArch64::MRS))
2024 .addImm(SrcReg->Encoding);
2025 int Offset = M.getStackProtectorGuardOffset();
2026 if (Offset >= 0 && Offset <= 32760 && Offset % 8 == 0) {
2027 // ldr xN, [xN, #offset]
2028 BuildMI(MBB, MI, DL, get(AArch64::LDRXui))
2029 .addDef(Reg)
2030 .addUse(Reg, RegState::Kill)
2031 .addImm(Offset / 8);
2032 } else if (Offset >= -256 && Offset <= 255) {
2033 // ldur xN, [xN, #offset]
2034 BuildMI(MBB, MI, DL, get(AArch64::LDURXi))
2035 .addDef(Reg)
2036 .addUse(Reg, RegState::Kill)
2037 .addImm(Offset);
2038 } else if (Offset >= -4095 && Offset <= 4095) {
2039 if (Offset > 0) {
2040 // add xN, xN, #offset
2041 BuildMI(MBB, MI, DL, get(AArch64::ADDXri))
2042 .addDef(Reg)
2043 .addUse(Reg, RegState::Kill)
2044 .addImm(Offset)
2045 .addImm(0);
2046 } else {
2047 // sub xN, xN, #offset
2048 BuildMI(MBB, MI, DL, get(AArch64::SUBXri))
2049 .addDef(Reg)
2050 .addUse(Reg, RegState::Kill)
2051 .addImm(-Offset)
2052 .addImm(0);
2053 }
2054 // ldr xN, [xN]
2055 BuildMI(MBB, MI, DL, get(AArch64::LDRXui))
2056 .addDef(Reg)
2057 .addUse(Reg, RegState::Kill)
2058 .addImm(0);
2059 } else {
2060 // Cases that are larger than +/- 4095 and not a multiple of 8, or larger
2061 // than 23760.
2062 // It might be nice to use AArch64::MOVi32imm here, which would get
2063 // expanded in PreSched2 after PostRA, but our lone scratch Reg already
2064 // contains the MRS result. findScratchNonCalleeSaveRegister() in
2065 // AArch64FrameLowering might help us find such a scratch register
2066 // though. If we failed to find a scratch register, we could emit a
2067 // stream of add instructions to build up the immediate. Or, we could try
2068 // to insert a AArch64::MOVi32imm before register allocation so that we
2069 // didn't need to scavenge for a scratch register.
2070 report_fatal_error("Unable to encode Stack Protector Guard Offset");
2071 }
2072 MBB.erase(MI);
2073 return true;
2074 }
2075
2076 const GlobalValue *GV =
2077 cast<GlobalValue>((*MI.memoperands_begin())->getValue());
2078 const TargetMachine &TM = MBB.getParent()->getTarget();
2079 unsigned OpFlags = Subtarget.ClassifyGlobalReference(GV, TM);
2080 const unsigned char MO_NC = AArch64II::MO_NC;
2081
2082 if ((OpFlags & AArch64II::MO_GOT) != 0) {
2083 BuildMI(MBB, MI, DL, get(AArch64::LOADgot), Reg)
2084 .addGlobalAddress(GV, 0, OpFlags);
2085 if (Subtarget.isTargetILP32()) {
2086 unsigned Reg32 = TRI->getSubReg(Reg, AArch64::sub_32);
2087 BuildMI(MBB, MI, DL, get(AArch64::LDRWui))
2088 .addDef(Reg32, RegState::Dead)
2089 .addUse(Reg, RegState::Kill)
2090 .addImm(0)
2091 .addMemOperand(*MI.memoperands_begin())
2093 } else {
2094 BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
2095 .addReg(Reg, RegState::Kill)
2096 .addImm(0)
2097 .addMemOperand(*MI.memoperands_begin());
2098 }
2099 } else if (TM.getCodeModel() == CodeModel::Large) {
2100 assert(!Subtarget.isTargetILP32() && "how can large exist in ILP32?");
2101 BuildMI(MBB, MI, DL, get(AArch64::MOVZXi), Reg)
2102 .addGlobalAddress(GV, 0, AArch64II::MO_G0 | MO_NC)
2103 .addImm(0);
2104 BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
2105 .addReg(Reg, RegState::Kill)
2106 .addGlobalAddress(GV, 0, AArch64II::MO_G1 | MO_NC)
2107 .addImm(16);
2108 BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
2109 .addReg(Reg, RegState::Kill)
2110 .addGlobalAddress(GV, 0, AArch64II::MO_G2 | MO_NC)
2111 .addImm(32);
2112 BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
2113 .addReg(Reg, RegState::Kill)
2115 .addImm(48);
2116 BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
2117 .addReg(Reg, RegState::Kill)
2118 .addImm(0)
2119 .addMemOperand(*MI.memoperands_begin());
2120 } else if (TM.getCodeModel() == CodeModel::Tiny) {
2121 BuildMI(MBB, MI, DL, get(AArch64::ADR), Reg)
2122 .addGlobalAddress(GV, 0, OpFlags);
2123 } else {
2124 BuildMI(MBB, MI, DL, get(AArch64::ADRP), Reg)
2125 .addGlobalAddress(GV, 0, OpFlags | AArch64II::MO_PAGE);
2126 unsigned char LoFlags = OpFlags | AArch64II::MO_PAGEOFF | MO_NC;
2127 if (Subtarget.isTargetILP32()) {
2128 unsigned Reg32 = TRI->getSubReg(Reg, AArch64::sub_32);
2129 BuildMI(MBB, MI, DL, get(AArch64::LDRWui))
2130 .addDef(Reg32, RegState::Dead)
2131 .addUse(Reg, RegState::Kill)
2132 .addGlobalAddress(GV, 0, LoFlags)
2133 .addMemOperand(*MI.memoperands_begin())
2135 } else {
2136 BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
2137 .addReg(Reg, RegState::Kill)
2138 .addGlobalAddress(GV, 0, LoFlags)
2139 .addMemOperand(*MI.memoperands_begin());
2140 }
2141 }
2142
2143 MBB.erase(MI);
2144
2145 return true;
2146}
2147
2148// Return true if this instruction simply sets its single destination register
2149// to zero. This is equivalent to a register rename of the zero-register.
2151 switch (MI.getOpcode()) {
2152 default:
2153 break;
2154 case AArch64::MOVZWi:
2155 case AArch64::MOVZXi: // movz Rd, #0 (LSL #0)
2156 if (MI.getOperand(1).isImm() && MI.getOperand(1).getImm() == 0) {
2157 assert(MI.getDesc().getNumOperands() == 3 &&
2158 MI.getOperand(2).getImm() == 0 && "invalid MOVZi operands");
2159 return true;
2160 }
2161 break;
2162 case AArch64::ANDWri: // and Rd, Rzr, #imm
2163 return MI.getOperand(1).getReg() == AArch64::WZR;
2164 case AArch64::ANDXri:
2165 return MI.getOperand(1).getReg() == AArch64::XZR;
2166 case TargetOpcode::COPY:
2167 return MI.getOperand(1).getReg() == AArch64::WZR;
2168 }
2169 return false;
2170}
2171
2172// Return true if this instruction simply renames a general register without
2173// modifying bits.
2175 switch (MI.getOpcode()) {
2176 default:
2177 break;
2178 case TargetOpcode::COPY: {
2179 // GPR32 copies will by lowered to ORRXrs
2180 Register DstReg = MI.getOperand(0).getReg();
2181 return (AArch64::GPR32RegClass.contains(DstReg) ||
2182 AArch64::GPR64RegClass.contains(DstReg));
2183 }
2184 case AArch64::ORRXrs: // orr Xd, Xzr, Xm (LSL #0)
2185 if (MI.getOperand(1).getReg() == AArch64::XZR) {
2186 assert(MI.getDesc().getNumOperands() == 4 &&
2187 MI.getOperand(3).getImm() == 0 && "invalid ORRrs operands");
2188 return true;
2189 }
2190 break;
2191 case AArch64::ADDXri: // add Xd, Xn, #0 (LSL #0)
2192 if (MI.getOperand(2).getImm() == 0) {
2193 assert(MI.getDesc().getNumOperands() == 4 &&
2194 MI.getOperand(3).getImm() == 0 && "invalid ADDXri operands");
2195 return true;
2196 }
2197 break;
2198 }
2199 return false;
2200}
2201
2202// Return true if this instruction simply renames a general register without
2203// modifying bits.
2205 switch (MI.getOpcode()) {
2206 default:
2207 break;
2208 case TargetOpcode::COPY: {
2209 Register DstReg = MI.getOperand(0).getReg();
2210 return AArch64::FPR128RegClass.contains(DstReg);
2211 }
2212 case AArch64::ORRv16i8:
2213 if (MI.getOperand(1).getReg() == MI.getOperand(2).getReg()) {
2214 assert(MI.getDesc().getNumOperands() == 3 && MI.getOperand(0).isReg() &&
2215 "invalid ORRv16i8 operands");
2216 return true;
2217 }
2218 break;
2219 }
2220 return false;
2221}
2222
2224 int &FrameIndex) const {
2225 switch (MI.getOpcode()) {
2226 default:
2227 break;
2228 case AArch64::LDRWui:
2229 case AArch64::LDRXui:
2230 case AArch64::LDRBui:
2231 case AArch64::LDRHui:
2232 case AArch64::LDRSui:
2233 case AArch64::LDRDui:
2234 case AArch64::LDRQui:
2235 case AArch64::LDR_PXI:
2236 if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() &&
2237 MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) {
2238 FrameIndex = MI.getOperand(1).getIndex();
2239 return MI.getOperand(0).getReg();
2240 }
2241 break;
2242 }
2243
2244 return 0;
2245}
2246
2248 int &FrameIndex) const {
2249 switch (MI.getOpcode()) {
2250 default:
2251 break;
2252 case AArch64::STRWui:
2253 case AArch64::STRXui:
2254 case AArch64::STRBui:
2255 case AArch64::STRHui:
2256 case AArch64::STRSui:
2257 case AArch64::STRDui:
2258 case AArch64::STRQui:
2259 case AArch64::STR_PXI:
2260 if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() &&
2261 MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) {
2262 FrameIndex = MI.getOperand(1).getIndex();
2263 return MI.getOperand(0).getReg();
2264 }
2265 break;
2266 }
2267 return 0;
2268}
2269
2270/// Check all MachineMemOperands for a hint to suppress pairing.
2272 return llvm::any_of(MI.memoperands(), [](MachineMemOperand *MMO) {
2273 return MMO->getFlags() & MOSuppressPair;
2274 });
2275}
2276
2277/// Set a flag on the first MachineMemOperand to suppress pairing.
2279 if (MI.memoperands_empty())
2280 return;
2281 (*MI.memoperands_begin())->setFlags(MOSuppressPair);
2282}
2283
2284/// Check all MachineMemOperands for a hint that the load/store is strided.
2286 return llvm::any_of(MI.memoperands(), [](MachineMemOperand *MMO) {
2287 return MMO->getFlags() & MOStridedAccess;
2288 });
2289}
2290
2292 switch (Opc) {
2293 default:
2294 return false;
2295 case AArch64::STURSi:
2296 case AArch64::STRSpre:
2297 case AArch64::STURDi:
2298 case AArch64::STRDpre:
2299 case AArch64::STURQi:
2300 case AArch64::STRQpre:
2301 case AArch64::STURBBi:
2302 case AArch64::STURHHi:
2303 case AArch64::STURWi:
2304 case AArch64::STRWpre:
2305 case AArch64::STURXi:
2306 case AArch64::STRXpre:
2307 case AArch64::LDURSi:
2308 case AArch64::LDRSpre:
2309 case AArch64::LDURDi:
2310 case AArch64::LDRDpre:
2311 case AArch64::LDURQi:
2312 case AArch64::LDRQpre:
2313 case AArch64::LDURWi:
2314 case AArch64::LDRWpre:
2315 case AArch64::LDURXi:
2316 case AArch64::LDRXpre:
2317 case AArch64::LDRSWpre:
2318 case AArch64::LDURSWi:
2319 case AArch64::LDURHHi:
2320 case AArch64::LDURBBi:
2321 case AArch64::LDURSBWi:
2322 case AArch64::LDURSHWi:
2323 return true;
2324 }
2325}
2326
2327std::optional<unsigned> AArch64InstrInfo::getUnscaledLdSt(unsigned Opc) {
2328 switch (Opc) {
2329 default: return {};
2330 case AArch64::PRFMui: return AArch64::PRFUMi;
2331 case AArch64::LDRXui: return AArch64::LDURXi;
2332 case AArch64::LDRWui: return AArch64::LDURWi;
2333 case AArch64::LDRBui: return AArch64::LDURBi;
2334 case AArch64::LDRHui: return AArch64::LDURHi;
2335 case AArch64::LDRSui: return AArch64::LDURSi;
2336 case AArch64::LDRDui: return AArch64::LDURDi;
2337 case AArch64::LDRQui: return AArch64::LDURQi;
2338 case AArch64::LDRBBui: return AArch64::LDURBBi;
2339 case AArch64::LDRHHui: return AArch64::LDURHHi;
2340 case AArch64::LDRSBXui: return AArch64::LDURSBXi;
2341 case AArch64::LDRSBWui: return AArch64::LDURSBWi;
2342 case AArch64::LDRSHXui: return AArch64::LDURSHXi;
2343 case AArch64::LDRSHWui: return AArch64::LDURSHWi;
2344 case AArch64::LDRSWui: return AArch64::LDURSWi;
2345 case AArch64::STRXui: return AArch64::STURXi;
2346 case AArch64::STRWui: return AArch64::STURWi;
2347 case AArch64::STRBui: return AArch64::STURBi;
2348 case AArch64::STRHui: return AArch64::STURHi;
2349 case AArch64::STRSui: return AArch64::STURSi;
2350 case AArch64::STRDui: return AArch64::STURDi;
2351 case AArch64::STRQui: return AArch64::STURQi;
2352 case AArch64::STRBBui: return AArch64::STURBBi;
2353 case AArch64::STRHHui: return AArch64::STURHHi;
2354 }
2355}
2356
2358 switch (Opc) {
2359 default:
2360 llvm_unreachable("Unhandled Opcode in getLoadStoreImmIdx");
2361 case AArch64::ADDG:
2362 case AArch64::LDAPURBi:
2363 case AArch64::LDAPURHi:
2364 case AArch64::LDAPURi:
2365 case AArch64::LDAPURSBWi:
2366 case AArch64::LDAPURSBXi:
2367 case AArch64::LDAPURSHWi:
2368 case AArch64::LDAPURSHXi:
2369 case AArch64::LDAPURSWi:
2370 case AArch64::LDAPURXi:
2371 case AArch64::LDR_PPXI:
2372 case AArch64::LDR_PXI:
2373 case AArch64::LDR_ZXI:
2374 case AArch64::LDR_ZZXI:
2375 case AArch64::LDR_ZZZXI:
2376 case AArch64::LDR_ZZZZXI:
2377 case AArch64::LDRBBui:
2378 case AArch64::LDRBui:
2379 case AArch64::LDRDui:
2380 case AArch64::LDRHHui:
2381 case AArch64::LDRHui:
2382 case AArch64::LDRQui:
2383 case AArch64::LDRSBWui:
2384 case AArch64::LDRSBXui:
2385 case AArch64::LDRSHWui:
2386 case AArch64::LDRSHXui:
2387 case AArch64::LDRSui:
2388 case AArch64::LDRSWui:
2389 case AArch64::LDRWui:
2390 case AArch64::LDRXui:
2391 case AArch64::LDURBBi:
2392 case AArch64::LDURBi:
2393 case AArch64::LDURDi:
2394 case AArch64::LDURHHi:
2395 case AArch64::LDURHi:
2396 case AArch64::LDURQi:
2397 case AArch64::LDURSBWi:
2398 case AArch64::LDURSBXi:
2399 case AArch64::LDURSHWi:
2400 case AArch64::LDURSHXi:
2401 case AArch64::LDURSi:
2402 case AArch64::LDURSWi:
2403 case AArch64::LDURWi:
2404 case AArch64::LDURXi:
2405 case AArch64::PRFMui:
2406 case AArch64::PRFUMi:
2407 case AArch64::ST2Gi:
2408 case AArch64::STGi:
2409 case AArch64::STLURBi:
2410 case AArch64::STLURHi:
2411 case AArch64::STLURWi:
2412 case AArch64::STLURXi:
2413 case AArch64::StoreSwiftAsyncContext:
2414 case AArch64::STR_PPXI:
2415 case AArch64::STR_PXI:
2416 case AArch64::STR_ZXI:
2417 case AArch64::STR_ZZXI:
2418 case AArch64::STR_ZZZXI:
2419 case AArch64::STR_ZZZZXI:
2420 case AArch64::STRBBui:
2421 case AArch64::STRBui:
2422 case AArch64::STRDui:
2423 case AArch64::STRHHui:
2424 case AArch64::STRHui:
2425 case AArch64::STRQui:
2426 case AArch64::STRSui:
2427 case AArch64::STRWui:
2428 case AArch64::STRXui:
2429 case AArch64::STURBBi:
2430 case AArch64::STURBi:
2431 case AArch64::STURDi:
2432 case AArch64::STURHHi:
2433 case AArch64::STURHi:
2434 case AArch64::STURQi:
2435 case AArch64::STURSi:
2436 case AArch64::STURWi:
2437 case AArch64::STURXi:
2438 case AArch64::STZ2Gi:
2439 case AArch64::STZGi:
2440 case AArch64::TAGPstack:
2441 return 2;
2442 case AArch64::LD1B_D_IMM:
2443 case AArch64::LD1B_H_IMM:
2444 case AArch64::LD1B_IMM:
2445 case AArch64::LD1B_S_IMM:
2446 case AArch64::LD1D_IMM:
2447 case AArch64::LD1H_D_IMM:
2448 case AArch64::LD1H_IMM:
2449 case AArch64::LD1H_S_IMM:
2450 case AArch64::LD1RB_D_IMM:
2451 case AArch64::LD1RB_H_IMM:
2452 case AArch64::LD1RB_IMM:
2453 case AArch64::LD1RB_S_IMM:
2454 case AArch64::LD1RD_IMM:
2455 case AArch64::LD1RH_D_IMM:
2456 case AArch64::LD1RH_IMM:
2457 case AArch64::LD1RH_S_IMM:
2458 case AArch64::LD1RSB_D_IMM:
2459 case AArch64::LD1RSB_H_IMM:
2460 case AArch64::LD1RSB_S_IMM:
2461 case AArch64::LD1RSH_D_IMM:
2462 case AArch64::LD1RSH_S_IMM:
2463 case AArch64::LD1RSW_IMM:
2464 case AArch64::LD1RW_D_IMM:
2465 case AArch64::LD1RW_IMM:
2466 case AArch64::LD1SB_D_IMM:
2467 case AArch64::LD1SB_H_IMM:
2468 case AArch64::LD1SB_S_IMM:
2469 case AArch64::LD1SH_D_IMM:
2470 case AArch64::LD1SH_S_IMM:
2471 case AArch64::LD1SW_D_IMM:
2472 case AArch64::LD1W_D_IMM:
2473 case AArch64::LD1W_IMM:
2474 case AArch64::LD2B_IMM:
2475 case AArch64::LD2D_IMM:
2476 case AArch64::LD2H_IMM:
2477 case AArch64::LD2W_IMM:
2478 case AArch64::LD3B_IMM:
2479 case AArch64::LD3D_IMM:
2480 case AArch64::LD3H_IMM:
2481 case AArch64::LD3W_IMM:
2482 case AArch64::LD4B_IMM:
2483 case AArch64::LD4D_IMM:
2484 case AArch64::LD4H_IMM:
2485 case AArch64::LD4W_IMM:
2486 case AArch64::LDG:
2487 case AArch64::LDNF1B_D_IMM:
2488 case AArch64::LDNF1B_H_IMM:
2489 case AArch64::LDNF1B_IMM:
2490 case AArch64::LDNF1B_S_IMM:
2491 case AArch64::LDNF1D_IMM:
2492 case AArch64::LDNF1H_D_IMM:
2493 case AArch64::LDNF1H_IMM:
2494 case AArch64::LDNF1H_S_IMM:
2495 case AArch64::LDNF1SB_D_IMM:
2496 case AArch64::LDNF1SB_H_IMM:
2497 case AArch64::LDNF1SB_S_IMM:
2498 case AArch64::LDNF1SH_D_IMM:
2499 case AArch64::LDNF1SH_S_IMM:
2500 case AArch64::LDNF1SW_D_IMM:
2501 case AArch64::LDNF1W_D_IMM:
2502 case AArch64::LDNF1W_IMM:
2503 case AArch64::LDNPDi:
2504 case AArch64::LDNPQi:
2505 case AArch64::LDNPSi:
2506 case AArch64::LDNPWi:
2507 case AArch64::LDNPXi:
2508 case AArch64::LDNT1B_ZRI:
2509 case AArch64::LDNT1D_ZRI:
2510 case AArch64::LDNT1H_ZRI:
2511 case AArch64::LDNT1W_ZRI:
2512 case AArch64::LDPDi:
2513 case AArch64::LDPQi:
2514 case AArch64::LDPSi:
2515 case AArch64::LDPWi:
2516 case AArch64::LDPXi:
2517 case AArch64::LDRBBpost:
2518 case AArch64::LDRBBpre:
2519 case AArch64::LDRBpost:
2520 case AArch64::LDRBpre:
2521 case AArch64::LDRDpost:
2522 case AArch64::LDRDpre:
2523 case AArch64::LDRHHpost:
2524 case AArch64::LDRHHpre:
2525 case AArch64::LDRHpost:
2526 case AArch64::LDRHpre:
2527 case AArch64::LDRQpost:
2528 case AArch64::LDRQpre:
2529 case AArch64::LDRSpost:
2530 case AArch64::LDRSpre:
2531 case AArch64::LDRWpost:
2532 case AArch64::LDRWpre:
2533 case AArch64::LDRXpost:
2534 case AArch64::LDRXpre:
2535 case AArch64::ST1B_D_IMM:
2536 case AArch64::ST1B_H_IMM:
2537 case AArch64::ST1B_IMM:
2538 case AArch64::ST1B_S_IMM:
2539 case AArch64::ST1D_IMM:
2540 case AArch64::ST1H_D_IMM:
2541 case AArch64::ST1H_IMM:
2542 case AArch64::ST1H_S_IMM:
2543 case AArch64::ST1W_D_IMM:
2544 case AArch64::ST1W_IMM:
2545 case AArch64::ST2B_IMM:
2546 case AArch64::ST2D_IMM:
2547 case AArch64::ST2H_IMM:
2548 case AArch64::ST2W_IMM:
2549 case AArch64::ST3B_IMM:
2550 case AArch64::ST3D_IMM:
2551 case AArch64::ST3H_IMM:
2552 case AArch64::ST3W_IMM:
2553 case AArch64::ST4B_IMM:
2554 case AArch64::ST4D_IMM:
2555 case AArch64::ST4H_IMM:
2556 case AArch64::ST4W_IMM:
2557 case AArch64::STGPi:
2558 case AArch64::STGPreIndex:
2559 case AArch64::STZGPreIndex:
2560 case AArch64::ST2GPreIndex:
2561 case AArch64::STZ2GPreIndex:
2562 case AArch64::STGPostIndex:
2563 case AArch64::STZGPostIndex:
2564 case AArch64::ST2GPostIndex:
2565 case AArch64::STZ2GPostIndex:
2566 case AArch64::STNPDi:
2567 case AArch64::STNPQi:
2568 case AArch64::STNPSi:
2569 case AArch64::STNPWi:
2570 case AArch64::STNPXi:
2571 case AArch64::STNT1B_ZRI:
2572 case AArch64::STNT1D_ZRI:
2573 case AArch64::STNT1H_ZRI:
2574 case AArch64::STNT1W_ZRI:
2575 case AArch64::STPDi:
2576 case AArch64::STPQi:
2577 case AArch64::STPSi:
2578 case AArch64::STPWi:
2579 case AArch64::STPXi:
2580 case AArch64::STRBBpost:
2581 case AArch64::STRBBpre:
2582 case AArch64::STRBpost:
2583 case AArch64::STRBpre:
2584 case AArch64::STRDpost:
2585 case AArch64::STRDpre:
2586 case AArch64::STRHHpost:
2587 case AArch64::STRHHpre:
2588 case AArch64::STRHpost:
2589 case AArch64::STRHpre:
2590 case AArch64::STRQpost:
2591 case AArch64::STRQpre:
2592 case AArch64::STRSpost:
2593 case AArch64::STRSpre:
2594 case AArch64::STRWpost:
2595 case AArch64::STRWpre:
2596 case AArch64::STRXpost:
2597 case AArch64::STRXpre:
2598 return 3;
2599 case AArch64::LDPDpost:
2600 case AArch64::LDPDpre:
2601 case AArch64::LDPQpost:
2602 case AArch64::LDPQpre:
2603 case AArch64::LDPSpost:
2604 case AArch64::LDPSpre:
2605 case AArch64::LDPWpost:
2606 case AArch64::LDPWpre:
2607 case AArch64::LDPXpost:
2608 case AArch64::LDPXpre:
2609 case AArch64::STGPpre:
2610 case AArch64::STGPpost:
2611 case AArch64::STPDpost:
2612 case AArch64::STPDpre:
2613 case AArch64::STPQpost:
2614 case AArch64::STPQpre:
2615 case AArch64::STPSpost:
2616 case AArch64::STPSpre:
2617 case AArch64::STPWpost:
2618 case AArch64::STPWpre:
2619 case AArch64::STPXpost:
2620 case AArch64::STPXpre:
2621 return 4;
2622 }
2623}
2624
2626 switch (MI.getOpcode()) {
2627 default:
2628 return false;
2629 // Scaled instructions.
2630 case AArch64::STRSui:
2631 case AArch64::STRDui:
2632 case AArch64::STRQui:
2633 case AArch64::STRXui:
2634 case AArch64::STRWui:
2635 case AArch64::LDRSui:
2636 case AArch64::LDRDui:
2637 case AArch64::LDRQui:
2638 case AArch64::LDRXui:
2639 case AArch64::LDRWui:
2640 case AArch64::LDRSWui:
2641 // Unscaled instructions.
2642 case AArch64::STURSi:
2643 case AArch64::STRSpre:
2644 case AArch64::STURDi:
2645 case AArch64::STRDpre:
2646 case AArch64::STURQi:
2647 case AArch64::STRQpre:
2648 case AArch64::STURWi:
2649 case AArch64::STRWpre:
2650 case AArch64::STURXi:
2651 case AArch64::STRXpre:
2652 case AArch64::LDURSi:
2653 case AArch64::LDRSpre:
2654 case AArch64::LDURDi:
2655 case AArch64::LDRDpre:
2656 case AArch64::LDURQi:
2657 case AArch64::LDRQpre:
2658 case AArch64::LDURWi:
2659 case AArch64::LDRWpre:
2660 case AArch64::LDURXi:
2661 case AArch64::LDRXpre:
2662 case AArch64::LDURSWi:
2663 case AArch64::LDRSWpre:
2664 return true;
2665 }
2666}
2667
2669 switch (MI.getOpcode()) {
2670 default:
2671 assert((!MI.isCall() || !MI.isReturn()) &&
2672 "Unexpected instruction - was a new tail call opcode introduced?");
2673 return false;
2674 case AArch64::TCRETURNdi:
2675 case AArch64::TCRETURNri:
2676 case AArch64::TCRETURNrix16x17:
2677 case AArch64::TCRETURNrix17:
2678 case AArch64::TCRETURNrinotx16:
2679 case AArch64::TCRETURNriALL:
2680 case AArch64::AUTH_TCRETURN:
2681 case AArch64::AUTH_TCRETURN_BTI:
2682 return true;
2683 }
2684}
2685
2687 switch (Opc) {
2688 default:
2689 llvm_unreachable("Opcode has no flag setting equivalent!");
2690 // 32-bit cases:
2691 case AArch64::ADDWri:
2692 return AArch64::ADDSWri;
2693 case AArch64::ADDWrr:
2694 return AArch64::ADDSWrr;
2695 case AArch64::ADDWrs:
2696 return AArch64::ADDSWrs;
2697 case AArch64::ADDWrx:
2698 return AArch64::ADDSWrx;
2699 case AArch64::ANDWri:
2700 return AArch64::ANDSWri;
2701 case AArch64::ANDWrr:
2702 return AArch64::ANDSWrr;
2703 case AArch64::ANDWrs:
2704 return AArch64::ANDSWrs;
2705 case AArch64::BICWrr:
2706 return AArch64::BICSWrr;
2707 case AArch64::BICWrs:
2708 return AArch64::BICSWrs;
2709 case AArch64::SUBWri:
2710 return AArch64::SUBSWri;
2711 case AArch64::SUBWrr:
2712 return AArch64::SUBSWrr;
2713 case AArch64::SUBWrs:
2714 return AArch64::SUBSWrs;
2715 case AArch64::SUBWrx:
2716 return AArch64::SUBSWrx;
2717 // 64-bit cases:
2718 case AArch64::ADDXri:
2719 return AArch64::ADDSXri;
2720 case AArch64::ADDXrr:
2721 return AArch64::ADDSXrr;
2722 case AArch64::ADDXrs:
2723 return AArch64::ADDSXrs;
2724 case AArch64::ADDXrx:
2725 return AArch64::ADDSXrx;
2726 case AArch64::ANDXri:
2727 return AArch64::ANDSXri;
2728 case AArch64::ANDXrr:
2729 return AArch64::ANDSXrr;
2730 case AArch64::ANDXrs:
2731 return AArch64::ANDSXrs;
2732 case AArch64::BICXrr:
2733 return AArch64::BICSXrr;
2734 case AArch64::BICXrs:
2735 return AArch64::BICSXrs;
2736 case AArch64::SUBXri:
2737 return AArch64::SUBSXri;
2738 case AArch64::SUBXrr:
2739 return AArch64::SUBSXrr;
2740 case AArch64::SUBXrs:
2741 return AArch64::SUBSXrs;
2742 case AArch64::SUBXrx:
2743 return AArch64::SUBSXrx;
2744 // SVE instructions:
2745 case AArch64::AND_PPzPP:
2746 return AArch64::ANDS_PPzPP;
2747 case AArch64::BIC_PPzPP:
2748 return AArch64::BICS_PPzPP;
2749 case AArch64::EOR_PPzPP:
2750 return AArch64::EORS_PPzPP;
2751 case AArch64::NAND_PPzPP:
2752 return AArch64::NANDS_PPzPP;
2753 case AArch64::NOR_PPzPP:
2754 return AArch64::NORS_PPzPP;
2755 case AArch64::ORN_PPzPP:
2756 return AArch64::ORNS_PPzPP;
2757 case AArch64::ORR_PPzPP:
2758 return AArch64::ORRS_PPzPP;
2759 case AArch64::BRKA_PPzP:
2760 return AArch64::BRKAS_PPzP;
2761 case AArch64::BRKPA_PPzPP:
2762 return AArch64::BRKPAS_PPzPP;
2763 case AArch64::BRKB_PPzP:
2764 return AArch64::BRKBS_PPzP;
2765 case AArch64::BRKPB_PPzPP:
2766 return AArch64::BRKPBS_PPzPP;
2767 case AArch64::BRKN_PPzP:
2768 return AArch64::BRKNS_PPzP;
2769 case AArch64::RDFFR_PPz:
2770 return AArch64::RDFFRS_PPz;
2771 case AArch64::PTRUE_B:
2772 return AArch64::PTRUES_B;
2773 }
2774}
2775
2776// Is this a candidate for ld/st merging or pairing? For example, we don't
2777// touch volatiles or load/stores that have a hint to avoid pair formation.
2779
2780 bool IsPreLdSt = isPreLdSt(MI);
2781
2782 // If this is a volatile load/store, don't mess with it.
2783 if (MI.hasOrderedMemoryRef())
2784 return false;
2785
2786 // Make sure this is a reg/fi+imm (as opposed to an address reloc).
2787 // For Pre-inc LD/ST, the operand is shifted by one.
2788 assert((MI.getOperand(IsPreLdSt ? 2 : 1).isReg() ||
2789 MI.getOperand(IsPreLdSt ? 2 : 1).isFI()) &&
2790 "Expected a reg or frame index operand.");
2791
2792 // For Pre-indexed addressing quadword instructions, the third operand is the
2793 // immediate value.
2794 bool IsImmPreLdSt = IsPreLdSt && MI.getOperand(3).isImm();
2795
2796 if (!MI.getOperand(2).isImm() && !IsImmPreLdSt)
2797 return false;
2798
2799 // Can't merge/pair if the instruction modifies the base register.
2800 // e.g., ldr x0, [x0]
2801 // This case will never occur with an FI base.
2802 // However, if the instruction is an LDR<S,D,Q,W,X,SW>pre or
2803 // STR<S,D,Q,W,X>pre, it can be merged.
2804 // For example:
2805 // ldr q0, [x11, #32]!
2806 // ldr q1, [x11, #16]
2807 // to
2808 // ldp q0, q1, [x11, #32]!
2809 if (MI.getOperand(1).isReg() && !IsPreLdSt) {
2810 Register BaseReg = MI.getOperand(1).getReg();
2812 if (MI.modifiesRegister(BaseReg, TRI))
2813 return false;
2814 }
2815
2816 // Check if this load/store has a hint to avoid pair formation.
2817 // MachineMemOperands hints are set by the AArch64StorePairSuppress pass.
2819 return false;
2820
2821 // Do not pair any callee-save store/reload instructions in the
2822 // prologue/epilogue if the CFI information encoded the operations as separate
2823 // instructions, as that will cause the size of the actual prologue to mismatch
2824 // with the prologue size recorded in the Windows CFI.
2825 const MCAsmInfo *MAI = MI.getMF()->getTarget().getMCAsmInfo();
2826 bool NeedsWinCFI = MAI->usesWindowsCFI() &&
2827 MI.getMF()->getFunction().needsUnwindTableEntry();
2828 if (NeedsWinCFI && (MI.getFlag(MachineInstr::FrameSetup) ||
2830 return false;
2831
2832 // On some CPUs quad load/store pairs are slower than two single load/stores.
2833 if (Subtarget.isPaired128Slow()) {
2834 switch (MI.getOpcode()) {
2835 default:
2836 break;
2837 case AArch64::LDURQi:
2838 case AArch64::STURQi:
2839 case AArch64::LDRQui:
2840 case AArch64::STRQui:
2841 return false;
2842 }
2843 }
2844
2845 return true;
2846}
2847
2850 int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width,
2851 const TargetRegisterInfo *TRI) const {
2852 if (!LdSt.mayLoadOrStore())
2853 return false;
2854
2855 const MachineOperand *BaseOp;
2856 TypeSize WidthN(0, false);
2857 if (!getMemOperandWithOffsetWidth(LdSt, BaseOp, Offset, OffsetIsScalable,
2858 WidthN, TRI))
2859 return false;
2860 // The maximum vscale is 16 under AArch64, return the maximal extent for the
2861 // vector.
2862 Width = LocationSize::precise(WidthN);
2863 BaseOps.push_back(BaseOp);
2864 return true;
2865}
2866
2867std::optional<ExtAddrMode>
2869 const TargetRegisterInfo *TRI) const {
2870 const MachineOperand *Base; // Filled with the base operand of MI.
2871 int64_t Offset; // Filled with the offset of MI.
2872 bool OffsetIsScalable;
2873 if (!getMemOperandWithOffset(MemI, Base, Offset, OffsetIsScalable, TRI))
2874 return std::nullopt;
2875
2876 if (!Base->isReg())
2877 return std::nullopt;
2878 ExtAddrMode AM;
2879 AM.BaseReg = Base->getReg();
2880 AM.Displacement = Offset;
2881 AM.ScaledReg = 0;
2882 AM.Scale = 0;
2883 return AM;
2884}
2885
2887 Register Reg,
2888 const MachineInstr &AddrI,
2889 ExtAddrMode &AM) const {
2890 // Filter out instructions into which we cannot fold.
2891 unsigned NumBytes;
2892 int64_t OffsetScale = 1;
2893 switch (MemI.getOpcode()) {
2894 default:
2895 return false;
2896
2897 case AArch64::LDURQi:
2898 case AArch64::STURQi:
2899 NumBytes = 16;
2900 break;
2901
2902 case AArch64::LDURDi:
2903 case AArch64::STURDi:
2904 case AArch64::LDURXi:
2905 case AArch64::STURXi:
2906 NumBytes = 8;
2907 break;
2908
2909 case AArch64::LDURWi:
2910 case AArch64::LDURSWi:
2911 case AArch64::STURWi:
2912 NumBytes = 4;
2913 break;
2914
2915 case AArch64::LDURHi:
2916 case AArch64::STURHi:
2917 case AArch64::LDURHHi:
2918 case AArch64::STURHHi:
2919 case AArch64::LDURSHXi:
2920 case AArch64::LDURSHWi:
2921 NumBytes = 2;
2922 break;
2923
2924 case AArch64::LDRBroX:
2925 case AArch64::LDRBBroX:
2926 case AArch64::LDRSBXroX:
2927 case AArch64::LDRSBWroX:
2928 case AArch64::STRBroX:
2929 case AArch64::STRBBroX:
2930 case AArch64::LDURBi:
2931 case AArch64::LDURBBi:
2932 case AArch64::LDURSBXi:
2933 case AArch64::LDURSBWi:
2934 case AArch64::STURBi:
2935 case AArch64::STURBBi:
2936 case AArch64::LDRBui:
2937 case AArch64::LDRBBui:
2938 case AArch64::LDRSBXui:
2939 case AArch64::LDRSBWui:
2940 case AArch64::STRBui:
2941 case AArch64::STRBBui:
2942 NumBytes = 1;
2943 break;
2944
2945 case AArch64::LDRQroX:
2946 case AArch64::STRQroX:
2947 case AArch64::LDRQui:
2948 case AArch64::STRQui:
2949 NumBytes = 16;
2950 OffsetScale = 16;
2951 break;
2952
2953 case AArch64::LDRDroX:
2954 case AArch64::STRDroX:
2955 case AArch64::LDRXroX:
2956 case AArch64::STRXroX:
2957 case AArch64::LDRDui:
2958 case AArch64::STRDui:
2959 case AArch64::LDRXui:
2960 case AArch64::STRXui:
2961 NumBytes = 8;
2962 OffsetScale = 8;
2963 break;
2964
2965 case AArch64::LDRWroX:
2966 case AArch64::LDRSWroX:
2967 case AArch64::STRWroX:
2968 case AArch64::LDRWui:
2969 case AArch64::LDRSWui:
2970 case AArch64::STRWui:
2971 NumBytes = 4;
2972 OffsetScale = 4;
2973 break;
2974
2975 case AArch64::LDRHroX:
2976 case AArch64::STRHroX:
2977 case AArch64::LDRHHroX:
2978 case AArch64::STRHHroX:
2979 case AArch64::LDRSHXroX:
2980 case AArch64::LDRSHWroX:
2981 case AArch64::LDRHui:
2982 case AArch64::STRHui:
2983 case AArch64::LDRHHui:
2984 case AArch64::STRHHui:
2985 case AArch64::LDRSHXui:
2986 case AArch64::LDRSHWui:
2987 NumBytes = 2;
2988 OffsetScale = 2;
2989 break;
2990 }
2991
2992 // Check the fold operand is not the loaded/stored value.
2993 const MachineOperand &BaseRegOp = MemI.getOperand(0);
2994 if (BaseRegOp.isReg() && BaseRegOp.getReg() == Reg)
2995 return false;
2996
2997 // Handle memory instructions with a [Reg, Reg] addressing mode.
2998 if (MemI.getOperand(2).isReg()) {
2999 // Bail if the addressing mode already includes extension of the offset
3000 // register.
3001 if (MemI.getOperand(3).getImm())
3002 return false;
3003
3004 // Check if we actually have a scaled offset.
3005 if (MemI.getOperand(4).getImm() == 0)
3006 OffsetScale = 1;
3007
3008 // If the address instructions is folded into the base register, then the
3009 // addressing mode must not have a scale. Then we can swap the base and the
3010 // scaled registers.
3011 if (MemI.getOperand(1).getReg() == Reg && OffsetScale != 1)
3012 return false;
3013
3014 switch (AddrI.getOpcode()) {
3015 default:
3016 return false;
3017
3018 case AArch64::SBFMXri:
3019 // sxtw Xa, Wm
3020 // ldr Xd, [Xn, Xa, lsl #N]
3021 // ->
3022 // ldr Xd, [Xn, Wm, sxtw #N]
3023 if (AddrI.getOperand(2).getImm() != 0 ||
3024 AddrI.getOperand(3).getImm() != 31)
3025 return false;
3026
3027 AM.BaseReg = MemI.getOperand(1).getReg();
3028 if (AM.BaseReg == Reg)
3029 AM.BaseReg = MemI.getOperand(2).getReg();
3030 AM.ScaledReg = AddrI.getOperand(1).getReg();
3031 AM.Scale = OffsetScale;
3032 AM.Displacement = 0;
3034 return true;
3035
3036 case TargetOpcode::SUBREG_TO_REG: {
3037 // mov Wa, Wm
3038 // ldr Xd, [Xn, Xa, lsl #N]
3039 // ->
3040 // ldr Xd, [Xn, Wm, uxtw #N]
3041
3042 // Zero-extension looks like an ORRWrs followed by a SUBREG_TO_REG.
3043 if (AddrI.getOperand(1).getImm() != 0 ||
3044 AddrI.getOperand(3).getImm() != AArch64::sub_32)
3045 return false;
3046
3047 const MachineRegisterInfo &MRI = AddrI.getMF()->getRegInfo();
3048 Register OffsetReg = AddrI.getOperand(2).getReg();
3049 if (!OffsetReg.isVirtual() || !MRI.hasOneNonDBGUse(OffsetReg))
3050 return false;
3051
3052 const MachineInstr &DefMI = *MRI.getVRegDef(OffsetReg);
3053 if (DefMI.getOpcode() != AArch64::ORRWrs ||
3054 DefMI.getOperand(1).getReg() != AArch64::WZR ||
3055 DefMI.getOperand(3).getImm() != 0)
3056 return false;
3057
3058 AM.BaseReg = MemI.getOperand(1).getReg();
3059 if (AM.BaseReg == Reg)
3060 AM.BaseReg = MemI.getOperand(2).getReg();
3061 AM.ScaledReg = DefMI.getOperand(2).getReg();
3062 AM.Scale = OffsetScale;
3063 AM.Displacement = 0;
3065 return true;
3066 }
3067 }
3068 }
3069
3070 // Handle memory instructions with a [Reg, #Imm] addressing mode.
3071
3072 // Check we are not breaking a potential conversion to an LDP.
3073 auto validateOffsetForLDP = [](unsigned NumBytes, int64_t OldOffset,
3074 int64_t NewOffset) -> bool {
3075 int64_t MinOffset, MaxOffset;
3076 switch (NumBytes) {
3077 default:
3078 return true;
3079 case 4:
3080 MinOffset = -256;
3081 MaxOffset = 252;
3082 break;
3083 case 8:
3084 MinOffset = -512;
3085 MaxOffset = 504;
3086 break;
3087 case 16:
3088 MinOffset = -1024;
3089 MaxOffset = 1008;
3090 break;
3091 }
3092 return OldOffset < MinOffset || OldOffset > MaxOffset ||
3093 (NewOffset >= MinOffset && NewOffset <= MaxOffset);
3094 };
3095 auto canFoldAddSubImmIntoAddrMode = [&](int64_t Disp) -> bool {
3096 int64_t OldOffset = MemI.getOperand(2).getImm() * OffsetScale;
3097 int64_t NewOffset = OldOffset + Disp;
3098 if (!isLegalAddressingMode(NumBytes, NewOffset, /* Scale */ 0))
3099 return false;
3100 // If the old offset would fit into an LDP, but the new offset wouldn't,
3101 // bail out.
3102 if (!validateOffsetForLDP(NumBytes, OldOffset, NewOffset))
3103 return false;
3104 AM.BaseReg = AddrI.getOperand(1).getReg();
3105 AM.ScaledReg = 0;
3106 AM.Scale = 0;
3107 AM.Displacement = NewOffset;
3109 return true;
3110 };
3111
3112 auto canFoldAddRegIntoAddrMode =
3113 [&](int64_t Scale,
3115 if (MemI.getOperand(2).getImm() != 0)
3116 return false;
3117 if (!isLegalAddressingMode(NumBytes, /* Offset */ 0, Scale))
3118 return false;
3119 AM.BaseReg = AddrI.getOperand(1).getReg();
3120 AM.ScaledReg = AddrI.getOperand(2).getReg();
3121 AM.Scale = Scale;
3122 AM.Displacement = 0;
3123 AM.Form = Form;
3124 return true;
3125 };
3126
3127 auto avoidSlowSTRQ = [&](const MachineInstr &MemI) {
3128 unsigned Opcode = MemI.getOpcode();
3129 return (Opcode == AArch64::STURQi || Opcode == AArch64::STRQui) &&
3130 Subtarget.isSTRQroSlow();
3131 };
3132
3133 int64_t Disp = 0;
3134 const bool OptSize = MemI.getMF()->getFunction().hasOptSize();
3135 switch (AddrI.getOpcode()) {
3136 default:
3137 return false;
3138
3139 case AArch64::ADDXri:
3140 // add Xa, Xn, #N
3141 // ldr Xd, [Xa, #M]
3142 // ->
3143 // ldr Xd, [Xn, #N'+M]
3144 Disp = AddrI.getOperand(2).getImm() << AddrI.getOperand(3).getImm();
3145 return canFoldAddSubImmIntoAddrMode(Disp);
3146
3147 case AArch64::SUBXri:
3148 // sub Xa, Xn, #N
3149 // ldr Xd, [Xa, #M]
3150 // ->
3151 // ldr Xd, [Xn, #N'+M]
3152 Disp = AddrI.getOperand(2).getImm() << AddrI.getOperand(3).getImm();
3153 return canFoldAddSubImmIntoAddrMode(-Disp);
3154
3155 case AArch64::ADDXrs: {
3156 // add Xa, Xn, Xm, lsl #N
3157 // ldr Xd, [Xa]
3158 // ->
3159 // ldr Xd, [Xn, Xm, lsl #N]
3160
3161 // Don't fold the add if the result would be slower, unless optimising for
3162 // size.
3163 unsigned Shift = static_cast<unsigned>(AddrI.getOperand(3).getImm());
3165 return false;
3166 Shift = AArch64_AM::getShiftValue(Shift);
3167 if (!OptSize) {
3168 if (Shift != 2 && Shift != 3 && Subtarget.hasAddrLSLSlow14())
3169 return false;
3170 if (avoidSlowSTRQ(MemI))
3171 return false;
3172 }
3173 return canFoldAddRegIntoAddrMode(1ULL << Shift);
3174 }
3175
3176 case AArch64::ADDXrr:
3177 // add Xa, Xn, Xm
3178 // ldr Xd, [Xa]
3179 // ->
3180 // ldr Xd, [Xn, Xm, lsl #0]
3181
3182 // Don't fold the add if the result would be slower, unless optimising for
3183 // size.
3184 if (!OptSize && avoidSlowSTRQ(MemI))
3185 return false;
3186 return canFoldAddRegIntoAddrMode(1);
3187
3188 case AArch64::ADDXrx:
3189 // add Xa, Xn, Wm, {s,u}xtw #N
3190 // ldr Xd, [Xa]
3191 // ->
3192 // ldr Xd, [Xn, Wm, {s,u}xtw #N]
3193
3194 // Don't fold the add if the result would be slower, unless optimising for
3195 // size.
3196 if (!OptSize && avoidSlowSTRQ(MemI))
3197 return false;
3198
3199 // Can fold only sign-/zero-extend of a word.
3200 unsigned Imm = static_cast<unsigned>(AddrI.getOperand(3).getImm());
3202 if (Extend != AArch64_AM::UXTW && Extend != AArch64_AM::SXTW)
3203 return false;
3204
3205 return canFoldAddRegIntoAddrMode(
3206 1ULL << AArch64_AM::getArithShiftValue(Imm),
3209 }
3210}
3211
3212// Given an opcode for an instruction with a [Reg, #Imm] addressing mode,
3213// return the opcode of an instruction performing the same operation, but using
3214// the [Reg, Reg] addressing mode.
3215static unsigned regOffsetOpcode(unsigned Opcode) {
3216 switch (Opcode) {
3217 default:
3218 llvm_unreachable("Address folding not implemented for instruction");
3219
3220 case AArch64::LDURQi:
3221 case AArch64::LDRQui:
3222 return AArch64::LDRQroX;
3223 case AArch64::STURQi:
3224 case AArch64::STRQui:
3225 return AArch64::STRQroX;
3226 case AArch64::LDURDi:
3227 case AArch64::LDRDui:
3228 return AArch64::LDRDroX;
3229 case AArch64::STURDi:
3230 case AArch64::STRDui:
3231 return AArch64::STRDroX;
3232 case AArch64::LDURXi:
3233 case AArch64::LDRXui:
3234 return AArch64::LDRXroX;
3235 case AArch64::STURXi:
3236 case AArch64::STRXui:
3237 return AArch64::STRXroX;
3238 case AArch64::LDURWi:
3239 case AArch64::LDRWui:
3240 return AArch64::LDRWroX;
3241 case AArch64::LDURSWi:
3242 case AArch64::LDRSWui:
3243 return AArch64::LDRSWroX;
3244 case AArch64::STURWi:
3245 case AArch64::STRWui:
3246 return AArch64::STRWroX;
3247 case AArch64::LDURHi:
3248 case AArch64::LDRHui:
3249 return AArch64::LDRHroX;
3250 case AArch64::STURHi:
3251 case AArch64::STRHui:
3252 return AArch64::STRHroX;
3253 case AArch64::LDURHHi:
3254 case AArch64::LDRHHui:
3255 return AArch64::LDRHHroX;
3256 case AArch64::STURHHi:
3257 case AArch64::STRHHui:
3258 return AArch64::STRHHroX;
3259 case AArch64::LDURSHXi:
3260 case AArch64::LDRSHXui:
3261 return AArch64::LDRSHXroX;
3262 case AArch64::LDURSHWi:
3263 case AArch64::LDRSHWui:
3264 return AArch64::LDRSHWroX;
3265 case AArch64::LDURBi:
3266 case AArch64::LDRBui:
3267 return AArch64::LDRBroX;
3268 case AArch64::LDURBBi:
3269 case AArch64::LDRBBui:
3270 return AArch64::LDRBBroX;
3271 case AArch64::LDURSBXi:
3272 case AArch64::LDRSBXui:
3273 return AArch64::LDRSBXroX;
3274 case AArch64::LDURSBWi:
3275 case AArch64::LDRSBWui:
3276 return AArch64::LDRSBWroX;
3277 case AArch64::STURBi:
3278 case AArch64::STRBui:
3279 return AArch64::STRBroX;
3280 case AArch64::STURBBi:
3281 case AArch64::STRBBui:
3282 return AArch64::STRBBroX;
3283 }
3284}
3285
3286// Given an opcode for an instruction with a [Reg, #Imm] addressing mode, return
3287// the opcode of an instruction performing the same operation, but using the
3288// [Reg, #Imm] addressing mode with scaled offset.
3289unsigned scaledOffsetOpcode(unsigned Opcode, unsigned &Scale) {
3290 switch (Opcode) {
3291 default:
3292 llvm_unreachable("Address folding not implemented for instruction");
3293
3294 case AArch64::LDURQi:
3295 Scale = 16;
3296 return AArch64::LDRQui;
3297 case AArch64::STURQi:
3298 Scale = 16;
3299 return AArch64::STRQui;
3300 case AArch64::LDURDi:
3301 Scale = 8;
3302 return AArch64::LDRDui;
3303 case AArch64::STURDi:
3304 Scale = 8;
3305 return AArch64::STRDui;
3306 case AArch64::LDURXi:
3307 Scale = 8;
3308 return AArch64::LDRXui;
3309 case AArch64::STURXi:
3310 Scale = 8;
3311 return AArch64::STRXui;
3312 case AArch64::LDURWi:
3313 Scale = 4;
3314 return AArch64::LDRWui;
3315 case AArch64::LDURSWi:
3316 Scale = 4;
3317 return AArch64::LDRSWui;
3318 case AArch64::STURWi:
3319 Scale = 4;
3320 return AArch64::STRWui;
3321 case AArch64::LDURHi:
3322 Scale = 2;
3323 return AArch64::LDRHui;
3324 case AArch64::STURHi:
3325 Scale = 2;
3326 return AArch64::STRHui;
3327 case AArch64::LDURHHi:
3328 Scale = 2;
3329 return AArch64::LDRHHui;
3330 case AArch64::STURHHi:
3331 Scale = 2;
3332 return AArch64::STRHHui;
3333 case AArch64::LDURSHXi:
3334 Scale = 2;
3335 return AArch64::LDRSHXui;
3336 case AArch64::LDURSHWi:
3337 Scale = 2;
3338 return AArch64::LDRSHWui;
3339 case AArch64::LDURBi:
3340 Scale = 1;
3341 return AArch64::LDRBui;
3342 case AArch64::LDURBBi:
3343 Scale = 1;
3344 return AArch64::LDRBBui;
3345 case AArch64::LDURSBXi:
3346 Scale = 1;
3347 return AArch64::LDRSBXui;
3348 case AArch64::LDURSBWi:
3349 Scale = 1;
3350 return AArch64::LDRSBWui;
3351 case AArch64::STURBi:
3352 Scale = 1;
3353 return AArch64::STRBui;
3354 case AArch64::STURBBi:
3355 Scale = 1;
3356 return AArch64::STRBBui;
3357 case AArch64::LDRQui:
3358 case AArch64::STRQui:
3359 Scale = 16;
3360 return Opcode;
3361 case AArch64::LDRDui:
3362 case AArch64::STRDui:
3363 case AArch64::LDRXui:
3364 case AArch64::STRXui:
3365 Scale = 8;
3366 return Opcode;
3367 case AArch64::LDRWui:
3368 case AArch64::LDRSWui:
3369 case AArch64::STRWui:
3370 Scale = 4;
3371 return Opcode;
3372 case AArch64::LDRHui:
3373 case AArch64::STRHui:
3374 case AArch64::LDRHHui:
3375 case AArch64::STRHHui:
3376 case AArch64::LDRSHXui:
3377 case AArch64::LDRSHWui:
3378 Scale = 2;
3379 return Opcode;
3380 case AArch64::LDRBui:
3381 case AArch64::LDRBBui:
3382 case AArch64::LDRSBXui:
3383 case AArch64::LDRSBWui:
3384 case AArch64::STRBui:
3385 case AArch64::STRBBui:
3386 Scale = 1;
3387 return Opcode;
3388 }
3389}
3390
3391// Given an opcode for an instruction with a [Reg, #Imm] addressing mode, return
3392// the opcode of an instruction performing the same operation, but using the
3393// [Reg, #Imm] addressing mode with unscaled offset.
3394unsigned unscaledOffsetOpcode(unsigned Opcode) {
3395 switch (Opcode) {
3396 default:
3397 llvm_unreachable("Address folding not implemented for instruction");
3398
3399 case AArch64::LDURQi:
3400 case AArch64::STURQi:
3401 case AArch64::LDURDi:
3402 case AArch64::STURDi:
3403 case AArch64::LDURXi:
3404 case AArch64::STURXi:
3405 case AArch64::LDURWi:
3406 case AArch64::LDURSWi:
3407 case AArch64::STURWi:
3408 case AArch64::LDURHi:
3409 case AArch64::STURHi:
3410 case AArch64::LDURHHi:
3411 case AArch64::STURHHi:
3412 case AArch64::LDURSHXi:
3413 case AArch64::LDURSHWi:
3414 case AArch64::LDURBi:
3415 case AArch64::STURBi:
3416 case AArch64::LDURBBi:
3417 case AArch64::STURBBi:
3418 case AArch64::LDURSBWi:
3419 case AArch64::LDURSBXi:
3420 return Opcode;
3421 case AArch64::LDRQui:
3422 return AArch64::LDURQi;
3423 case AArch64::STRQui:
3424 return AArch64::STURQi;
3425 case AArch64::LDRDui:
3426 return AArch64::LDURDi;
3427 case AArch64::STRDui:
3428 return AArch64::STURDi;
3429 case AArch64::LDRXui:
3430 return AArch64::LDURXi;
3431 case AArch64::STRXui:
3432 return AArch64::STURXi;
3433 case AArch64::LDRWui:
3434 return AArch64::LDURWi;
3435 case AArch64::LDRSWui:
3436 return AArch64::LDURSWi;
3437 case AArch64::STRWui:
3438 return AArch64::STURWi;
3439 case AArch64::LDRHui:
3440 return AArch64::LDURHi;
3441 case AArch64::STRHui:
3442 return AArch64::STURHi;
3443 case AArch64::LDRHHui:
3444 return AArch64::LDURHHi;
3445 case AArch64::STRHHui:
3446 return AArch64::STURHHi;
3447 case AArch64::LDRSHXui:
3448 return AArch64::LDURSHXi;
3449 case AArch64::LDRSHWui:
3450 return AArch64::LDURSHWi;
3451 case AArch64::LDRBBui:
3452 return AArch64::LDURBBi;
3453 case AArch64::LDRBui:
3454 return AArch64::LDURBi;
3455 case AArch64::STRBBui:
3456 return AArch64::STURBBi;
3457 case AArch64::STRBui:
3458 return AArch64::STURBi;
3459 case AArch64::LDRSBWui:
3460 return AArch64::LDURSBWi;
3461 case AArch64::LDRSBXui:
3462 return AArch64::LDURSBXi;
3463 }
3464}
3465
3466// Given the opcode of a memory load/store instruction, return the opcode of an
3467// instruction performing the same operation, but using
3468// the [Reg, Reg, {s,u}xtw #N] addressing mode with sign-/zero-extend of the
3469// offset register.
3470static unsigned offsetExtendOpcode(unsigned Opcode) {
3471 switch (Opcode) {
3472 default:
3473 llvm_unreachable("Address folding not implemented for instruction");
3474
3475 case AArch64::LDRQroX:
3476 case AArch64::LDURQi:
3477 case AArch64::LDRQui:
3478 return AArch64::LDRQroW;
3479 case AArch64::STRQroX:
3480 case AArch64::STURQi:
3481 case AArch64::STRQui:
3482 return AArch64::STRQroW;
3483 case AArch64::LDRDroX:
3484 case AArch64::LDURDi:
3485 case AArch64::LDRDui:
3486 return AArch64::LDRDroW;
3487 case AArch64::STRDroX:
3488 case AArch64::STURDi:
3489 case AArch64::STRDui:
3490 return AArch64::STRDroW;
3491 case AArch64::LDRXroX:
3492 case AArch64::LDURXi:
3493 case AArch64::LDRXui:
3494 return AArch64::LDRXroW;
3495 case AArch64::STRXroX:
3496 case AArch64::STURXi:
3497 case AArch64::STRXui:
3498 return AArch64::STRXroW;
3499 case AArch64::LDRWroX:
3500 case AArch64::LDURWi:
3501 case AArch64::LDRWui:
3502 return AArch64::LDRWroW;
3503 case AArch64::LDRSWroX:
3504 case AArch64::LDURSWi:
3505 case AArch64::LDRSWui:
3506 return AArch64::LDRSWroW;
3507 case AArch64::STRWroX:
3508 case AArch64::STURWi:
3509 case AArch64::STRWui:
3510 return AArch64::STRWroW;
3511 case AArch64::LDRHroX:
3512 case AArch64::LDURHi:
3513 case AArch64::LDRHui:
3514 return AArch64::LDRHroW;
3515 case AArch64::STRHroX:
3516 case AArch64::STURHi:
3517 case AArch64::STRHui:
3518 return AArch64::STRHroW;
3519 case AArch64::LDRHHroX:
3520 case AArch64::LDURHHi:
3521 case AArch64::LDRHHui:
3522 return AArch64::LDRHHroW;
3523 case AArch64::STRHHroX:
3524 case AArch64::STURHHi:
3525 case AArch64::STRHHui:
3526 return AArch64::STRHHroW;
3527 case AArch64::LDRSHXroX:
3528 case AArch64::LDURSHXi:
3529 case AArch64::LDRSHXui:
3530 return AArch64::LDRSHXroW;
3531 case AArch64::LDRSHWroX:
3532 case AArch64::LDURSHWi:
3533 case AArch64::LDRSHWui:
3534 return AArch64::LDRSHWroW;
3535 case AArch64::LDRBroX:
3536 case AArch64::LDURBi:
3537 case AArch64::LDRBui:
3538 return AArch64::LDRBroW;
3539 case AArch64::LDRBBroX:
3540 case AArch64::LDURBBi:
3541 case AArch64::LDRBBui:
3542 return AArch64::LDRBBroW;
3543 case AArch64::LDRSBXroX:
3544 case AArch64::LDURSBXi:
3545 case AArch64::LDRSBXui:
3546 return AArch64::LDRSBXroW;
3547 case AArch64::LDRSBWroX:
3548 case AArch64::LDURSBWi:
3549 case AArch64::LDRSBWui:
3550 return AArch64::LDRSBWroW;
3551 case AArch64::STRBroX:
3552 case AArch64::STURBi:
3553 case AArch64::STRBui:
3554 return AArch64::STRBroW;
3555 case AArch64::STRBBroX:
3556 case AArch64::STURBBi:
3557 case AArch64::STRBBui:
3558 return AArch64::STRBBroW;
3559 }
3560}
3561
3563 const ExtAddrMode &AM) const {
3564
3565 const DebugLoc &DL = MemI.getDebugLoc();
3566 MachineBasicBlock &MBB = *MemI.getParent();
3568
3570 if (AM.ScaledReg) {
3571 // The new instruction will be in the form `ldr Rt, [Xn, Xm, lsl #imm]`.
3572 unsigned Opcode = regOffsetOpcode(MemI.getOpcode());
3573 MRI.constrainRegClass(AM.BaseReg, &AArch64::GPR64spRegClass);
3574 auto B = BuildMI(MBB, MemI, DL, get(Opcode))
3575 .addReg(MemI.getOperand(0).getReg(),
3576 MemI.mayLoad() ? RegState::Define : 0)
3577 .addReg(AM.BaseReg)
3578 .addReg(AM.ScaledReg)
3579 .addImm(0)
3580 .addImm(AM.Scale > 1)
3581 .setMemRefs(MemI.memoperands())
3582 .setMIFlags(MemI.getFlags());
3583 return B.getInstr();
3584 }
3585
3586 assert(AM.ScaledReg == 0 && AM.Scale == 0 &&
3587 "Addressing mode not supported for folding");
3588
3589 // The new instruction will be in the form `ld[u]r Rt, [Xn, #imm]`.
3590 unsigned Scale = 1;
3591 unsigned Opcode = MemI.getOpcode();
3592 if (isInt<9>(AM.Displacement))
3593 Opcode = unscaledOffsetOpcode(Opcode);
3594 else
3595 Opcode = scaledOffsetOpcode(Opcode, Scale);
3596
3597 auto B = BuildMI(MBB, MemI, DL, get(Opcode))
3598 .addReg(MemI.getOperand(0).getReg(),
3599 MemI.mayLoad() ? RegState::Define : 0)
3600 .addReg(AM.BaseReg)
3601 .addImm(AM.Displacement / Scale)
3602 .setMemRefs(MemI.memoperands())
3603 .setMIFlags(MemI.getFlags());
3604 return B.getInstr();
3605 }
3606
3609 // The new instruction will be in the form `ldr Rt, [Xn, Wm, {s,u}xtw #N]`.
3610 assert(AM.ScaledReg && !AM.Displacement &&
3611 "Address offset can be a register or an immediate, but not both");
3612 unsigned Opcode = offsetExtendOpcode(MemI.getOpcode());
3613 MRI.constrainRegClass(AM.BaseReg, &AArch64::GPR64spRegClass);
3614 // Make sure the offset register is in the correct register class.
3615 Register OffsetReg = AM.ScaledReg;
3616 const TargetRegisterClass *RC = MRI.getRegClass(OffsetReg);
3617 if (RC->hasSuperClassEq(&AArch64::GPR64RegClass)) {
3618 OffsetReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3619 BuildMI(MBB, MemI, DL, get(TargetOpcode::COPY), OffsetReg)
3620 .addReg(AM.ScaledReg, 0, AArch64::sub_32);
3621 }
3622 auto B = BuildMI(MBB, MemI, DL, get(Opcode))
3623 .addReg(MemI.getOperand(0).getReg(),
3624 MemI.mayLoad() ? RegState::Define : 0)
3625 .addReg(AM.BaseReg)
3626 .addReg(OffsetReg)
3628 .addImm(AM.Scale != 1)
3629 .setMemRefs(MemI.memoperands())
3630 .setMIFlags(MemI.getFlags());
3631
3632 return B.getInstr();
3633 }
3634
3636 "Function must not be called with an addressing mode it can't handle");
3637}
3638
3639/// Return true if the opcode is a post-index ld/st instruction, which really
3640/// loads from base+0.
3641static bool isPostIndexLdStOpcode(unsigned Opcode) {
3642 switch (Opcode) {
3643 default:
3644 return false;
3645 case AArch64::LD1Fourv16b_POST:
3646 case AArch64::LD1Fourv1d_POST:
3647 case AArch64::LD1Fourv2d_POST:
3648 case AArch64::LD1Fourv2s_POST:
3649 case AArch64::LD1Fourv4h_POST:
3650 case AArch64::LD1Fourv4s_POST:
3651 case AArch64::LD1Fourv8b_POST:
3652 case AArch64::LD1Fourv8h_POST:
3653 case AArch64::LD1Onev16b_POST:
3654 case AArch64::LD1Onev1d_POST:
3655 case AArch64::LD1Onev2d_POST:
3656 case AArch64::LD1Onev2s_POST:
3657 case AArch64::LD1Onev4h_POST:
3658 case AArch64::LD1Onev4s_POST:
3659 case AArch64::LD1Onev8b_POST:
3660 case AArch64::LD1Onev8h_POST:
3661 case AArch64::LD1Rv16b_POST:
3662 case AArch64::LD1Rv1d_POST:
3663 case AArch64::LD1Rv2d_POST:
3664 case AArch64::LD1Rv2s_POST:
3665 case AArch64::LD1Rv4h_POST:
3666 case AArch64::LD1Rv4s_POST:
3667 case AArch64::LD1Rv8b_POST:
3668 case AArch64::LD1Rv8h_POST:
3669 case AArch64::LD1Threev16b_POST:
3670 case AArch64::LD1Threev1d_POST:
3671 case AArch64::LD1Threev2d_POST:
3672 case AArch64::LD1Threev2s_POST:
3673 case AArch64::LD1Threev4h_POST:
3674 case AArch64::LD1Threev4s_POST:
3675 case AArch64::LD1Threev8b_POST:
3676 case AArch64::LD1Threev8h_POST:
3677 case AArch64::LD1Twov16b_POST:
3678 case AArch64::LD1Twov1d_POST:
3679 case AArch64::LD1Twov2d_POST:
3680 case AArch64::LD1Twov2s_POST:
3681 case AArch64::LD1Twov4h_POST:
3682 case AArch64::LD1Twov4s_POST:
3683 case AArch64::LD1Twov8b_POST:
3684 case AArch64::LD1Twov8h_POST:
3685 case AArch64::LD1i16_POST:
3686 case AArch64::LD1i32_POST:
3687 case AArch64::LD1i64_POST:
3688 case AArch64::LD1i8_POST:
3689 case AArch64::LD2Rv16b_POST:
3690 case AArch64::LD2Rv1d_POST:
3691 case AArch64::LD2Rv2d_POST:
3692 case AArch64::LD2Rv2s_POST:
3693 case AArch64::LD2Rv4h_POST:
3694 case AArch64::LD2Rv4s_POST:
3695 case AArch64::LD2Rv8b_POST:
3696 case AArch64::LD2Rv8h_POST:
3697 case AArch64::LD2Twov16b_POST:
3698 case AArch64::LD2Twov2d_POST:
3699 case AArch64::LD2Twov2s_POST:
3700 case AArch64::LD2Twov4h_POST:
3701 case AArch64::LD2Twov4s_POST:
3702 case AArch64::LD2Twov8b_POST:
3703 case AArch64::LD2Twov8h_POST:
3704 case AArch64::LD2i16_POST:
3705 case AArch64::LD2i32_POST:
3706 case AArch64::LD2i64_POST:
3707 case AArch64::LD2i8_POST:
3708 case AArch64::LD3Rv16b_POST:
3709 case AArch64::LD3Rv1d_POST:
3710 case AArch64::LD3Rv2d_POST:
3711 case AArch64::LD3Rv2s_POST:
3712 case AArch64::LD3Rv4h_POST:
3713 case AArch64::LD3Rv4s_POST:
3714 case AArch64::LD3Rv8b_POST:
3715 case AArch64::LD3Rv8h_POST:
3716 case AArch64::LD3Threev16b_POST:
3717 case AArch64::LD3Threev2d_POST:
3718 case AArch64::LD3Threev2s_POST:
3719 case AArch64::LD3Threev4h_POST:
3720 case AArch64::LD3Threev4s_POST:
3721 case AArch64::LD3Threev8b_POST:
3722 case AArch64::LD3Threev8h_POST:
3723 case AArch64::LD3i16_POST:
3724 case AArch64::LD3i32_POST:
3725 case AArch64::LD3i64_POST:
3726 case AArch64::LD3i8_POST:
3727 case AArch64::LD4Fourv16b_POST:
3728 case AArch64::LD4Fourv2d_POST:
3729 case AArch64::LD4Fourv2s_POST:
3730 case AArch64::LD4Fourv4h_POST:
3731 case AArch64::LD4Fourv4s_POST:
3732 case AArch64::LD4Fourv8b_POST:
3733 case AArch64::LD4Fourv8h_POST:
3734 case AArch64::LD4Rv16b_POST:
3735 case AArch64::LD4Rv1d_POST:
3736 case AArch64::LD4Rv2d_POST:
3737 case AArch64::LD4Rv2s_POST:
3738 case AArch64::LD4Rv4h_POST:
3739 case AArch64::LD4Rv4s_POST:
3740 case AArch64::LD4Rv8b_POST:
3741 case AArch64::LD4Rv8h_POST:
3742 case AArch64::LD4i16_POST:
3743 case AArch64::LD4i32_POST:
3744 case AArch64::LD4i64_POST:
3745 case AArch64::LD4i8_POST:
3746 case AArch64::LDAPRWpost:
3747 case AArch64::LDAPRXpost:
3748 case AArch64::LDIAPPWpost:
3749 case AArch64::LDIAPPXpost:
3750 case AArch64::LDPDpost:
3751 case AArch64::LDPQpost:
3752 case AArch64::LDPSWpost:
3753 case AArch64::LDPSpost:
3754 case AArch64::LDPWpost:
3755 case AArch64::LDPXpost:
3756 case AArch64::LDRBBpost:
3757 case AArch64::LDRBpost:
3758 case AArch64::LDRDpost:
3759 case AArch64::LDRHHpost:
3760 case AArch64::LDRHpost:
3761 case AArch64::LDRQpost:
3762 case AArch64::LDRSBWpost:
3763 case AArch64::LDRSBXpost:
3764 case AArch64::LDRSHWpost:
3765 case AArch64::LDRSHXpost:
3766 case AArch64::LDRSWpost:
3767 case AArch64::LDRSpost:
3768 case AArch64::LDRWpost:
3769 case AArch64::LDRXpost:
3770 case AArch64::ST1Fourv16b_POST:
3771 case AArch64::ST1Fourv1d_POST:
3772 case AArch64::ST1Fourv2d_POST:
3773 case AArch64::ST1Fourv2s_POST:
3774 case AArch64::ST1Fourv4h_POST:
3775 case AArch64::ST1Fourv4s_POST:
3776 case AArch64::ST1Fourv8b_POST:
3777 case AArch64::ST1Fourv8h_POST:
3778 case AArch64::ST1Onev16b_POST:
3779 case AArch64::ST1Onev1d_POST:
3780 case AArch64::ST1Onev2d_POST:
3781 case AArch64::ST1Onev2s_POST:
3782 case AArch64::ST1Onev4h_POST:
3783 case AArch64::ST1Onev4s_POST:
3784 case AArch64::ST1Onev8b_POST:
3785 case AArch64::ST1Onev8h_POST:
3786 case AArch64::ST1Threev16b_POST:
3787 case AArch64::ST1Threev1d_POST:
3788 case AArch64::ST1Threev2d_POST:
3789 case AArch64::ST1Threev2s_POST:
3790 case AArch64::ST1Threev4h_POST:
3791 case AArch64::ST1Threev4s_POST:
3792 case AArch64::ST1Threev8b_POST:
3793 case AArch64::ST1Threev8h_POST:
3794 case AArch64::ST1Twov16b_POST:
3795 case AArch64::ST1Twov1d_POST:
3796 case AArch64::ST1Twov2d_POST:
3797 case AArch64::ST1Twov2s_POST:
3798 case AArch64::ST1Twov4h_POST:
3799 case AArch64::ST1Twov4s_POST:
3800 case AArch64::ST1Twov8b_POST:
3801 case AArch64::ST1Twov8h_POST:
3802 case AArch64::ST1i16_POST:
3803 case AArch64::ST1i32_POST:
3804 case AArch64::ST1i64_POST:
3805 case AArch64::ST1i8_POST:
3806 case AArch64::ST2GPostIndex:
3807 case AArch64::ST2Twov16b_POST:
3808 case AArch64::ST2Twov2d_POST:
3809 case AArch64::ST2Twov2s_POST:
3810 case AArch64::ST2Twov4h_POST:
3811 case AArch64::ST2Twov4s_POST:
3812 case AArch64::ST2Twov8b_POST:
3813 case AArch64::ST2Twov8h_POST:
3814 case AArch64::ST2i16_POST:
3815 case AArch64::ST2i32_POST:
3816 case AArch64::ST2i64_POST:
3817 case AArch64::ST2i8_POST:
3818 case AArch64::ST3Threev16b_POST:
3819 case AArch64::ST3Threev2d_POST:
3820 case AArch64::ST3Threev2s_POST:
3821 case AArch64::ST3Threev4h_POST:
3822 case AArch64::ST3Threev4s_POST:
3823 case AArch64::ST3Threev8b_POST:
3824 case AArch64::ST3Threev8h_POST:
3825 case AArch64::ST3i16_POST:
3826 case AArch64::ST3i32_POST:
3827 case AArch64::ST3i64_POST:
3828 case AArch64::ST3i8_POST:
3829 case AArch64::ST4Fourv16b_POST:
3830 case AArch64::ST4Fourv2d_POST:
3831 case AArch64::ST4Fourv2s_POST:
3832 case AArch64::ST4Fourv4h_POST:
3833 case AArch64::ST4Fourv4s_POST:
3834 case AArch64::ST4Fourv8b_POST:
3835 case AArch64::ST4Fourv8h_POST:
3836 case AArch64::ST4i16_POST:
3837 case AArch64::ST4i32_POST:
3838 case AArch64::ST4i64_POST:
3839 case AArch64::ST4i8_POST:
3840 case AArch64::STGPostIndex:
3841 case AArch64::STGPpost:
3842 case AArch64::STPDpost:
3843 case AArch64::STPQpost:
3844 case AArch64::STPSpost:
3845 case AArch64::STPWpost:
3846 case AArch64::STPXpost:
3847 case AArch64::STRBBpost:
3848 case AArch64::STRBpost:
3849 case AArch64::STRDpost:
3850 case AArch64::STRHHpost:
3851 case AArch64::STRHpost:
3852 case AArch64::STRQpost:
3853 case AArch64::STRSpost:
3854 case AArch64::STRWpost:
3855 case AArch64::STRXpost:
3856 case AArch64::STZ2GPostIndex:
3857 case AArch64::STZGPostIndex:
3858 return true;
3859 }
3860}
3861
3863 const MachineInstr &LdSt, const MachineOperand *&BaseOp, int64_t &Offset,
3864 bool &OffsetIsScalable, TypeSize &Width,
3865 const TargetRegisterInfo *TRI) const {
3866 assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
3867 // Handle only loads/stores with base register followed by immediate offset.
3868 if (LdSt.getNumExplicitOperands() == 3) {
3869 // Non-paired instruction (e.g., ldr x1, [x0, #8]).
3870 if ((!LdSt.getOperand(1).isReg() && !LdSt.getOperand(1).isFI()) ||
3871 !LdSt.getOperand(2).isImm())
3872 return false;
3873 } else if (LdSt.getNumExplicitOperands() == 4) {
3874 // Paired instruction (e.g., ldp x1, x2, [x0, #8]).
3875 if (!LdSt.getOperand(1).isReg() ||
3876 (!LdSt.getOperand(2).isReg() && !LdSt.getOperand(2).isFI()) ||
3877 !LdSt.getOperand(3).isImm())
3878 return false;
3879 } else
3880 return false;
3881
3882 // Get the scaling factor for the instruction and set the width for the
3883 // instruction.
3884 TypeSize Scale(0U, false);
3885 int64_t Dummy1, Dummy2;
3886
3887 // If this returns false, then it's an instruction we don't want to handle.
3888 if (!getMemOpInfo(LdSt.getOpcode(), Scale, Width, Dummy1, Dummy2))
3889 return false;
3890
3891 // Compute the offset. Offset is calculated as the immediate operand
3892 // multiplied by the scaling factor. Unscaled instructions have scaling factor
3893 // set to 1. Postindex are a special case which have an offset of 0.
3894 if (isPostIndexLdStOpcode(LdSt.getOpcode())) {
3895 BaseOp = &LdSt.getOperand(2);
3896 Offset = 0;
3897 } else if (LdSt.getNumExplicitOperands() == 3) {
3898 BaseOp = &LdSt.getOperand(1);
3899 Offset = LdSt.getOperand(2).getImm() * Scale.getKnownMinValue();
3900 } else {
3901 assert(LdSt.getNumExplicitOperands() == 4 && "invalid number of operands");
3902 BaseOp = &LdSt.getOperand(2);
3903 Offset = LdSt.getOperand(3).getImm() * Scale.getKnownMinValue();
3904 }
3905 OffsetIsScalable = Scale.isScalable();
3906
3907 return BaseOp->isReg() || BaseOp->isFI();
3908}
3909
3912 assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
3913 MachineOperand &OfsOp = LdSt.getOperand(LdSt.getNumExplicitOperands() - 1);
3914 assert(OfsOp.isImm() && "Offset operand wasn't immediate.");
3915 return OfsOp;
3916}
3917
3918bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
3919 TypeSize &Width, int64_t &MinOffset,
3920 int64_t &MaxOffset) {
3921 switch (Opcode) {
3922 // Not a memory operation or something we want to handle.
3923 default:
3924 Scale = TypeSize::getFixed(0);
3925 Width = TypeSize::getFixed(0);
3926 MinOffset = MaxOffset = 0;
3927 return false;
3928 // LDR / STR
3929 case AArch64::LDRQui:
3930 case AArch64::STRQui:
3931 Scale = TypeSize::getFixed(16);
3932 Width = TypeSize::getFixed(16);
3933 MinOffset = 0;
3934 MaxOffset = 4095;
3935 break;
3936 case AArch64::LDRXui:
3937 case AArch64::LDRDui:
3938 case AArch64::STRXui:
3939 case AArch64::STRDui:
3940 case AArch64::PRFMui:
3941 Scale = TypeSize::getFixed(8);
3942 Width = TypeSize::getFixed(8);
3943 MinOffset = 0;
3944 MaxOffset = 4095;
3945 break;
3946 case AArch64::LDRWui:
3947 case AArch64::LDRSui:
3948 case AArch64::LDRSWui:
3949 case AArch64::STRWui:
3950 case AArch64::STRSui:
3951 Scale = TypeSize::getFixed(4);
3952 Width = TypeSize::getFixed(4);
3953 MinOffset = 0;
3954 MaxOffset = 4095;
3955 break;
3956 case AArch64::LDRHui:
3957 case AArch64::LDRHHui:
3958 case AArch64::LDRSHWui:
3959 case AArch64::LDRSHXui:
3960 case AArch64::STRHui:
3961 case AArch64::STRHHui:
3962 Scale = TypeSize::getFixed(2);
3963 Width = TypeSize::getFixed(2);
3964 MinOffset = 0;
3965 MaxOffset = 4095;
3966 break;
3967 case AArch64::LDRBui:
3968 case AArch64::LDRBBui:
3969 case AArch64::LDRSBWui:
3970 case AArch64::LDRSBXui:
3971 case AArch64::STRBui:
3972 case AArch64::STRBBui:
3973 Scale = TypeSize::getFixed(1);
3974 Width = TypeSize::getFixed(1);
3975 MinOffset = 0;
3976 MaxOffset = 4095;
3977 break;
3978 // post/pre inc
3979 case AArch64::STRQpre:
3980 case AArch64::LDRQpost:
3981 Scale = TypeSize::getFixed(1);
3982 Width = TypeSize::getFixed(16);
3983 MinOffset = -256;
3984 MaxOffset = 255;
3985 break;
3986 case AArch64::LDRDpost:
3987 case AArch64::LDRDpre:
3988 case AArch64::LDRXpost:
3989 case AArch64::LDRXpre:
3990 case AArch64::STRDpost:
3991 case AArch64::STRDpre:
3992 case AArch64::STRXpost:
3993 case AArch64::STRXpre:
3994 Scale = TypeSize::getFixed(1);
3995 Width = TypeSize::getFixed(8);
3996 MinOffset = -256;
3997 MaxOffset = 255;
3998 break;
3999 case AArch64::STRWpost:
4000 case AArch64::STRWpre:
4001 case AArch64::LDRWpost:
4002 case AArch64::LDRWpre:
4003 case AArch64::STRSpost:
4004 case AArch64::STRSpre:
4005 case AArch64::LDRSpost:
4006 case AArch64::LDRSpre:
4007 Scale = TypeSize::getFixed(1);
4008 Width = TypeSize::getFixed(4);
4009 MinOffset = -256;
4010 MaxOffset = 255;
4011 break;
4012 case AArch64::LDRHpost:
4013 case AArch64::LDRHpre:
4014 case AArch64::STRHpost:
4015 case AArch64::STRHpre:
4016 case AArch64::LDRHHpost:
4017 case AArch64::LDRHHpre:
4018 case AArch64::STRHHpost:
4019 case AArch64::STRHHpre:
4020 Scale = TypeSize::getFixed(1);
4021 Width = TypeSize::getFixed(2);
4022 MinOffset = -256;
4023 MaxOffset = 255;
4024 break;
4025 case AArch64::LDRBpost:
4026 case AArch64::LDRBpre:
4027 case AArch64::STRBpost:
4028 case AArch64::STRBpre:
4029 case AArch64::LDRBBpost:
4030 case AArch64::LDRBBpre:
4031 case AArch64::STRBBpost:
4032 case AArch64::STRBBpre:
4033 Scale = TypeSize::getFixed(1);
4034 Width = TypeSize::getFixed(1);
4035 MinOffset = -256;
4036 MaxOffset = 255;
4037 break;
4038 // Unscaled
4039 case AArch64::LDURQi:
4040 case AArch64::STURQi:
4041 Scale = TypeSize::getFixed(1);
4042 Width = TypeSize::getFixed(16);
4043 MinOffset = -256;
4044 MaxOffset = 255;
4045 break;
4046 case AArch64::LDURXi:
4047 case AArch64::LDURDi:
4048 case AArch64::LDAPURXi:
4049 case AArch64::STURXi:
4050 case AArch64::STURDi:
4051 case AArch64::STLURXi:
4052 case AArch64::PRFUMi:
4053 Scale = TypeSize::getFixed(1);
4054 Width = TypeSize::getFixed(8);
4055 MinOffset = -256;
4056 MaxOffset = 255;
4057 break;
4058 case AArch64::LDURWi:
4059 case AArch64::LDURSi:
4060 case AArch64::LDURSWi:
4061 case AArch64::LDAPURi:
4062 case AArch64::LDAPURSWi:
4063 case AArch64::STURWi:
4064 case AArch64::STURSi:
4065 case AArch64::STLURWi:
4066 Scale = TypeSize::getFixed(1);
4067 Width = TypeSize::getFixed(4);
4068 MinOffset = -256;
4069 MaxOffset = 255;
4070 break;
4071 case AArch64::LDURHi:
4072 case AArch64::LDURHHi:
4073 case AArch64::LDURSHXi:
4074 case AArch64::LDURSHWi:
4075 case AArch64::LDAPURHi:
4076 case AArch64::LDAPURSHWi:
4077 case AArch64::LDAPURSHXi:
4078 case AArch64::STURHi:
4079 case AArch64::STURHHi:
4080 case AArch64::STLURHi:
4081 Scale = TypeSize::getFixed(1);
4082 Width = TypeSize::getFixed(2);
4083 MinOffset = -256;
4084 MaxOffset = 255;
4085 break;
4086 case AArch64::LDURBi:
4087 case AArch64::LDURBBi:
4088 case AArch64::LDURSBXi:
4089 case AArch64::LDURSBWi:
4090 case AArch64::LDAPURBi:
4091 case AArch64::LDAPURSBWi:
4092 case AArch64::LDAPURSBXi:
4093 case AArch64::STURBi:
4094 case AArch64::STURBBi:
4095 case AArch64::STLURBi:
4096 Scale = TypeSize::getFixed(1);
4097 Width = TypeSize::getFixed(1);
4098 MinOffset = -256;
4099 MaxOffset = 255;
4100 break;
4101 // LDP / STP (including pre/post inc)
4102 case AArch64::LDPQi:
4103 case AArch64::LDNPQi:
4104 case AArch64::STPQi:
4105 case AArch64::STNPQi:
4106 case AArch64::LDPQpost:
4107 case AArch64::LDPQpre:
4108 case AArch64::STPQpost:
4109 case AArch64::STPQpre:
4110 Scale = TypeSize::getFixed(16);
4111 Width = TypeSize::getFixed(16 * 2);
4112 MinOffset = -64;
4113 MaxOffset = 63;
4114 break;
4115 case AArch64::LDPXi:
4116 case AArch64::LDPDi:
4117 case AArch64::LDNPXi:
4118 case AArch64::LDNPDi:
4119 case AArch64::STPXi:
4120 case AArch64::STPDi:
4121 case AArch64::STNPXi:
4122 case AArch64::STNPDi:
4123 case AArch64::LDPDpost:
4124 case AArch64::LDPDpre:
4125 case AArch64::LDPXpost:
4126 case AArch64::LDPXpre:
4127 case AArch64::STPDpost:
4128 case AArch64::STPDpre:
4129 case AArch64::STPXpost:
4130 case AArch64::STPXpre:
4131 Scale = TypeSize::getFixed(8);
4132 Width = TypeSize::getFixed(8 * 2);
4133 MinOffset = -64;
4134 MaxOffset = 63;
4135 break;
4136 case AArch64::LDPWi:
4137 case AArch64::LDPSi:
4138 case AArch64::LDNPWi:
4139 case AArch64::LDNPSi:
4140 case AArch64::STPWi:
4141 case AArch64::STPSi:
4142 case AArch64::STNPWi:
4143 case AArch64::STNPSi:
4144 case AArch64::LDPSpost:
4145 case AArch64::LDPSpre:
4146 case AArch64::LDPWpost:
4147 case AArch64::LDPWpre:
4148 case AArch64::STPSpost:
4149 case AArch64::STPSpre:
4150 case AArch64::STPWpost:
4151 case AArch64::STPWpre:
4152 Scale = TypeSize::getFixed(4);
4153 Width = TypeSize::getFixed(4 * 2);
4154 MinOffset = -64;
4155 MaxOffset = 63;
4156 break;
4157 case AArch64::StoreSwiftAsyncContext:
4158 // Store is an STRXui, but there might be an ADDXri in the expansion too.
4159 Scale = TypeSize::getFixed(1);
4160 Width = TypeSize::getFixed(8);
4161 MinOffset = 0;
4162 MaxOffset = 4095;
4163 break;
4164 case AArch64::ADDG:
4165 Scale = TypeSize::getFixed(16);
4166 Width = TypeSize::getFixed(0);
4167 MinOffset = 0;
4168 MaxOffset = 63;
4169 break;
4170 case AArch64::TAGPstack:
4171 Scale = TypeSize::getFixed(16);
4172 Width = TypeSize::getFixed(0);
4173 // TAGP with a negative offset turns into SUBP, which has a maximum offset
4174 // of 63 (not 64!).
4175 MinOffset = -63;
4176 MaxOffset = 63;
4177 break;
4178 case AArch64::LDG:
4179 case AArch64::STGi:
4180 case AArch64::STGPreIndex:
4181 case AArch64::STGPostIndex:
4182 case AArch64::STZGi:
4183 case AArch64::STZGPreIndex:
4184 case AArch64::STZGPostIndex:
4185 Scale = TypeSize::getFixed(16);
4186 Width = TypeSize::getFixed(16);
4187 MinOffset = -256;
4188 MaxOffset = 255;
4189 break;
4190 // SVE
4191 case AArch64::STR_ZZZZXI:
4192 case AArch64::LDR_ZZZZXI:
4193 Scale = TypeSize::getScalable(16);
4194 Width = TypeSize::getScalable(16 * 4);
4195 MinOffset = -256;
4196 MaxOffset = 252;
4197 break;
4198 case AArch64::STR_ZZZXI:
4199 case AArch64::LDR_ZZZXI:
4200 Scale = TypeSize::getScalable(16);
4201 Width = TypeSize::getScalable(16 * 3);
4202 MinOffset = -256;
4203 MaxOffset = 253;
4204 break;
4205 case AArch64::STR_ZZXI:
4206 case AArch64::LDR_ZZXI:
4207 Scale = TypeSize::getScalable(16);
4208 Width = TypeSize::getScalable(16 * 2);
4209 MinOffset = -256;
4210 MaxOffset = 254;
4211 break;
4212 case AArch64::LDR_PXI:
4213 case AArch64::STR_PXI:
4214 Scale = TypeSize::getScalable(2);
4215 Width = TypeSize::getScalable(2);
4216 MinOffset = -256;
4217 MaxOffset = 255;
4218 break;
4219 case AArch64::LDR_PPXI:
4220 case AArch64::STR_PPXI:
4221 Scale = TypeSize::getScalable(2);
4222 Width = TypeSize::getScalable(2 * 2);
4223 MinOffset = -256;
4224 MaxOffset = 254;
4225 break;
4226 case AArch64::LDR_ZXI:
4227 case AArch64::STR_ZXI:
4228 Scale = TypeSize::getScalable(16);
4229 Width = TypeSize::getScalable(16);
4230 MinOffset = -256;
4231 MaxOffset = 255;
4232 break;
4233 case AArch64::LD1B_IMM:
4234 case AArch64::LD1H_IMM:
4235 case AArch64::LD1W_IMM:
4236 case AArch64::LD1D_IMM:
4237 case AArch64::LDNT1B_ZRI:
4238 case AArch64::LDNT1H_ZRI:
4239 case AArch64::LDNT1W_ZRI:
4240 case AArch64::LDNT1D_ZRI:
4241 case AArch64::ST1B_IMM:
4242 case AArch64::ST1H_IMM:
4243 case AArch64::ST1W_IMM:
4244 case AArch64::ST1D_IMM:
4245 case AArch64::STNT1B_ZRI:
4246 case AArch64::STNT1H_ZRI:
4247 case AArch64::STNT1W_ZRI:
4248 case AArch64::STNT1D_ZRI:
4249 case AArch64::LDNF1B_IMM:
4250 case AArch64::LDNF1H_IMM:
4251 case AArch64::LDNF1W_IMM:
4252 case AArch64::LDNF1D_IMM:
4253 // A full vectors worth of data
4254 // Width = mbytes * elements
4255 Scale = TypeSize::getScalable(16);
4256 Width = TypeSize::getScalable(16);
4257 MinOffset = -8;
4258 MaxOffset = 7;
4259 break;
4260 case AArch64::LD2B_IMM:
4261 case AArch64::LD2H_IMM:
4262 case AArch64::LD2W_IMM:
4263 case AArch64::LD2D_IMM:
4264 case AArch64::ST2B_IMM:
4265 case AArch64::ST2H_IMM:
4266 case AArch64::ST2W_IMM:
4267 case AArch64::ST2D_IMM:
4268 Scale = TypeSize::getScalable(32);
4269 Width = TypeSize::getScalable(16 * 2);
4270 MinOffset = -8;
4271 MaxOffset = 7;
4272 break;
4273 case AArch64::LD3B_IMM:
4274 case AArch64::LD3H_IMM:
4275 case AArch64::LD3W_IMM:
4276 case AArch64::LD3D_IMM:
4277 case AArch64::ST3B_IMM:
4278 case AArch64::ST3H_IMM:
4279 case AArch64::ST3W_IMM:
4280 case AArch64::ST3D_IMM:
4281 Scale = TypeSize::getScalable(48);
4282 Width = TypeSize::getScalable(16 * 3);
4283 MinOffset = -8;
4284 MaxOffset = 7;
4285 break;
4286 case AArch64::LD4B_IMM:
4287 case AArch64::LD4H_IMM:
4288 case AArch64::LD4W_IMM:
4289 case AArch64::LD4D_IMM:
4290 case AArch64::ST4B_IMM:
4291 case AArch64::ST4H_IMM:
4292 case AArch64::ST4W_IMM:
4293 case AArch64::ST4D_IMM:
4294 Scale = TypeSize::getScalable(64);
4295 Width = TypeSize::getScalable(16 * 4);
4296 MinOffset = -8;
4297 MaxOffset = 7;
4298 break;
4299 case AArch64::LD1B_H_IMM:
4300 case AArch64::LD1SB_H_IMM:
4301 case AArch64::LD1H_S_IMM:
4302 case AArch64::LD1SH_S_IMM:
4303 case AArch64::LD1W_D_IMM:
4304 case AArch64::LD1SW_D_IMM:
4305 case AArch64::ST1B_H_IMM:
4306 case AArch64::ST1H_S_IMM:
4307 case AArch64::ST1W_D_IMM:
4308 case AArch64::LDNF1B_H_IMM:
4309 case AArch64::LDNF1SB_H_IMM:
4310 case AArch64::LDNF1H_S_IMM:
4311 case AArch64::LDNF1SH_S_IMM:
4312 case AArch64::LDNF1W_D_IMM:
4313 case AArch64::LDNF1SW_D_IMM:
4314 // A half vector worth of data
4315 // Width = mbytes * elements
4316 Scale = TypeSize::getScalable(8);
4317 Width = TypeSize::getScalable(8);
4318 MinOffset = -8;
4319 MaxOffset = 7;
4320 break;
4321 case AArch64::LD1B_S_IMM:
4322 case AArch64::LD1SB_S_IMM:
4323 case AArch64::LD1H_D_IMM:
4324 case AArch64::LD1SH_D_IMM:
4325 case AArch64::ST1B_S_IMM:
4326 case AArch64::ST1H_D_IMM:
4327 case AArch64::LDNF1B_S_IMM:
4328 case AArch64::LDNF1SB_S_IMM:
4329 case AArch64::LDNF1H_D_IMM:
4330 case AArch64::LDNF1SH_D_IMM:
4331 // A quarter vector worth of data
4332 // Width = mbytes * elements
4333 Scale = TypeSize::getScalable(4);
4334 Width = TypeSize::getScalable(4);
4335 MinOffset = -8;
4336 MaxOffset = 7;
4337 break;
4338 case AArch64::LD1B_D_IMM:
4339 case AArch64::LD1SB_D_IMM:
4340 case AArch64::ST1B_D_IMM:
4341 case AArch64::LDNF1B_D_IMM:
4342 case AArch64::LDNF1SB_D_IMM:
4343 // A eighth vector worth of data
4344 // Width = mbytes * elements
4345 Scale = TypeSize::getScalable(2);
4346 Width = TypeSize::getScalable(2);
4347 MinOffset = -8;
4348 MaxOffset = 7;
4349 break;
4350 case AArch64::ST2Gi:
4351 case AArch64::ST2GPreIndex:
4352 case AArch64::ST2GPostIndex:
4353 case AArch64::STZ2Gi:
4354 case AArch64::STZ2GPreIndex:
4355 case AArch64::STZ2GPostIndex:
4356 Scale = TypeSize::getFixed(16);
4357 Width = TypeSize::getFixed(32);
4358 MinOffset = -256;
4359 MaxOffset = 255;
4360 break;
4361 case AArch64::STGPi:
4362 case AArch64::STGPpost:
4363 case AArch64::STGPpre:
4364 Scale = TypeSize::getFixed(16);
4365 Width = TypeSize::getFixed(16);
4366 MinOffset = -64;
4367 MaxOffset = 63;
4368 break;
4369 case AArch64::LD1RB_IMM:
4370 case AArch64::LD1RB_H_IMM:
4371 case AArch64::LD1RB_S_IMM:
4372 case AArch64::LD1RB_D_IMM:
4373 case AArch64::LD1RSB_H_IMM:
4374 case AArch64::LD1RSB_S_IMM:
4375 case AArch64::LD1RSB_D_IMM:
4376 Scale = TypeSize::getFixed(1);
4377 Width = TypeSize::getFixed(1);
4378 MinOffset = 0;
4379 MaxOffset = 63;
4380 break;
4381 case AArch64::LD1RH_IMM:
4382 case AArch64::LD1RH_S_IMM:
4383 case AArch64::LD1RH_D_IMM:
4384 case AArch64::LD1RSH_S_IMM:
4385 case AArch64::LD1RSH_D_IMM:
4386 Scale = TypeSize::getFixed(2);
4387 Width = TypeSize::getFixed(2);
4388 MinOffset = 0;
4389 MaxOffset = 63;
4390 break;
4391 case AArch64::LD1RW_IMM:
4392 case AArch64::LD1RW_D_IMM:
4393 case AArch64::LD1RSW_IMM:
4394 Scale = TypeSize::getFixed(4);
4395 Width = TypeSize::getFixed(4);
4396 MinOffset = 0;
4397 MaxOffset = 63;
4398 break;
4399 case AArch64::LD1RD_IMM:
4400 Scale = TypeSize::getFixed(8);
4401 Width = TypeSize::getFixed(8);
4402 MinOffset = 0;
4403 MaxOffset = 63;
4404 break;
4405 }
4406
4407 return true;
4408}
4409
4410// Scaling factor for unscaled load or store.
4412 switch (Opc) {
4413 default:
4414 llvm_unreachable("Opcode has unknown scale!");
4415 case AArch64::LDRBBui:
4416 case AArch64::LDURBBi:
4417 case AArch64::LDRSBWui:
4418 case AArch64::LDURSBWi:
4419 case AArch64::STRBBui:
4420 case AArch64::STURBBi:
4421 return 1;
4422 case AArch64::LDRHHui:
4423 case AArch64::LDURHHi:
4424 case AArch64::LDRSHWui:
4425 case AArch64::LDURSHWi:
4426 case AArch64::STRHHui:
4427 case AArch64::STURHHi:
4428 return 2;
4429 case AArch64::LDRSui:
4430 case AArch64::LDURSi:
4431 case AArch64::LDRSpre:
4432 case AArch64::LDRSWui:
4433 case AArch64::LDURSWi:
4434 case AArch64::LDRSWpre:
4435 case AArch64::LDRWpre:
4436 case AArch64::LDRWui:
4437 case AArch64::LDURWi:
4438 case AArch64::STRSui:
4439 case AArch64::STURSi:
4440 case AArch64::STRSpre:
4441 case AArch64::STRWui:
4442 case AArch64::STURWi:
4443 case AArch64::STRWpre:
4444 case AArch64::LDPSi:
4445 case AArch64::LDPSWi:
4446 case AArch64::LDPWi:
4447 case AArch64::STPSi:
4448 case AArch64::STPWi:
4449 return 4;
4450 case AArch64::LDRDui:
4451 case AArch64::LDURDi:
4452 case AArch64::LDRDpre:
4453 case AArch64::LDRXui:
4454 case AArch64::LDURXi:
4455 case AArch64::LDRXpre:
4456 case AArch64::STRDui:
4457 case AArch64::STURDi:
4458 case AArch64::STRDpre:
4459 case AArch64::STRXui:
4460 case AArch64::STURXi:
4461 case AArch64::STRXpre:
4462 case AArch64::LDPDi:
4463 case AArch64::LDPXi:
4464 case AArch64::STPDi:
4465 case AArch64::STPXi:
4466 return 8;
4467 case AArch64::LDRQui:
4468 case AArch64::LDURQi:
4469 case AArch64::STRQui:
4470 case AArch64::STURQi:
4471 case AArch64::STRQpre:
4472 case AArch64::LDPQi:
4473 case AArch64::LDRQpre:
4474 case AArch64::STPQi:
4475 case AArch64::STGi:
4476 case AArch64::STZGi:
4477 case AArch64::ST2Gi:
4478 case AArch64::STZ2Gi:
4479 case AArch64::STGPi:
4480 return 16;
4481 }
4482}
4483
4485 switch (MI.getOpcode()) {
4486 default:
4487 return false;
4488 case AArch64::LDRWpre:
4489 case AArch64::LDRXpre:
4490 case AArch64::LDRSWpre:
4491 case AArch64::LDRSpre:
4492 case AArch64::LDRDpre:
4493 case AArch64::LDRQpre:
4494 return true;
4495 }
4496}
4497
4499 switch (MI.getOpcode()) {
4500 default:
4501 return false;
4502 case AArch64::STRWpre:
4503 case AArch64::STRXpre:
4504 case AArch64::STRSpre:
4505 case AArch64::STRDpre:
4506 case AArch64::STRQpre:
4507 return true;
4508 }
4509}
4510
4512 return isPreLd(MI) || isPreSt(MI);
4513}
4514
4516 switch (MI.getOpcode()) {
4517 default:
4518 return false;
4519 case AArch64::LDPSi:
4520 case AArch64::LDPSWi:
4521 case AArch64::LDPDi:
4522 case AArch64::LDPQi:
4523 case AArch64::LDPWi:
4524 case AArch64::LDPXi:
4525 case AArch64::STPSi:
4526 case AArch64::STPDi:
4527 case AArch64::STPQi:
4528 case AArch64::STPWi:
4529 case AArch64::STPXi:
4530 case AArch64::STGPi:
4531 return true;
4532 }
4533}
4534
4536 assert(MI.mayLoadOrStore() && "Load or store instruction expected");
4537 unsigned Idx =
4539 : 1;
4540 return MI.getOperand(Idx);
4541}
4542
4543const MachineOperand &
4545 assert(MI.mayLoadOrStore() && "Load or store instruction expected");
4546 unsigned Idx =
4548 : 2;
4549 return MI.getOperand(Idx);
4550}
4551
4552const MachineOperand &
4554 switch (MI.getOpcode()) {
4555 default:
4556 llvm_unreachable("Unexpected opcode");
4557 case AArch64::LDRBroX:
4558 case AArch64::LDRBBroX:
4559 case AArch64::LDRSBXroX:
4560 case AArch64::LDRSBWroX:
4561 case AArch64::LDRHroX:
4562 case AArch64::LDRHHroX:
4563 case AArch64::LDRSHXroX:
4564 case AArch64::LDRSHWroX:
4565 case AArch64::LDRWroX:
4566 case AArch64::LDRSroX:
4567 case AArch64::LDRSWroX:
4568 case AArch64::LDRDroX:
4569 case AArch64::LDRXroX:
4570 case AArch64::LDRQroX:
4571 return MI.getOperand(4);
4572 }
4573}
4574
4576 Register Reg) {
4577 if (MI.getParent() == nullptr)
4578 return nullptr;
4579 const MachineFunction *MF = MI.getParent()->getParent();
4580 return MF ? MF->getRegInfo().getRegClassOrNull(Reg) : nullptr;
4581}
4582
4584 auto IsHFPR = [&](const MachineOperand &Op) {
4585 if (!Op.isReg())
4586 return false;
4587 auto Reg = Op.getReg();
4588 if (Reg.isPhysical())
4589 return AArch64::FPR16RegClass.contains(Reg);
4590 const TargetRegisterClass *TRC = ::getRegClass(MI, Reg);
4591 return TRC == &AArch64::FPR16RegClass ||
4592 TRC == &AArch64::FPR16_loRegClass;
4593 };
4594 return llvm::any_of(MI.operands(), IsHFPR);
4595}
4596
4598 auto IsQFPR = [&](const MachineOperand &Op) {
4599 if (!Op.isReg())
4600 return false;
4601 auto Reg = Op.getReg();
4602 if (Reg.isPhysical())
4603 return AArch64::FPR128RegClass.contains(Reg);
4604 const TargetRegisterClass *TRC = ::getRegClass(MI, Reg);
4605 return TRC == &AArch64::FPR128RegClass ||
4606 TRC == &AArch64::FPR128_loRegClass;
4607 };
4608 return llvm::any_of(MI.operands(), IsQFPR);
4609}
4610
4612 switch (MI.getOpcode()) {
4613 case AArch64::BRK:
4614 case AArch64::HLT:
4615 case AArch64::PACIASP:
4616 case AArch64::PACIBSP:
4617 // Implicit BTI behavior.
4618 return true;
4619 case AArch64::PAUTH_PROLOGUE:
4620 // PAUTH_PROLOGUE expands to PACI(A|B)SP.
4621 return true;
4622 case AArch64::HINT: {
4623 unsigned Imm = MI.getOperand(0).getImm();
4624 // Explicit BTI instruction.
4625 if (Imm == 32 || Imm == 34 || Imm == 36 || Imm == 38)
4626 return true;
4627 // PACI(A|B)SP instructions.
4628 if (Imm == 25 || Imm == 27)
4629 return true;
4630 return false;
4631 }
4632 default:
4633 return false;
4634 }
4635}
4636
4638 if (Reg == 0)
4639 return false;
4640 assert(Reg.isPhysical() && "Expected physical register in isFpOrNEON");
4641 return AArch64::FPR128RegClass.contains(Reg) ||
4642 AArch64::FPR64RegClass.contains(Reg) ||
4643 AArch64::FPR32RegClass.contains(Reg) ||
4644 AArch64::FPR16RegClass.contains(Reg) ||
4645 AArch64::FPR8RegClass.contains(Reg);
4646}
4647
4649 auto IsFPR = [&](const MachineOperand &Op) {
4650 if (!Op.isReg())
4651 return false;
4652 auto Reg = Op.getReg();
4653 if (Reg.isPhysical())
4654 return isFpOrNEON(Reg);
4655
4656 const TargetRegisterClass *TRC = ::getRegClass(MI, Reg);
4657 return TRC == &AArch64::FPR128RegClass ||
4658 TRC == &AArch64::FPR128_loRegClass ||
4659 TRC == &AArch64::FPR64RegClass ||
4660 TRC == &AArch64::FPR64_loRegClass ||
4661 TRC == &AArch64::FPR32RegClass || TRC == &AArch64::FPR16RegClass ||
4662 TRC == &AArch64::FPR8RegClass;
4663 };
4664 return llvm::any_of(MI.operands(), IsFPR);
4665}
4666
4667// Scale the unscaled offsets. Returns false if the unscaled offset can't be
4668// scaled.
4669static bool scaleOffset(unsigned Opc, int64_t &Offset) {
4670 int Scale = AArch64InstrInfo::getMemScale(Opc);
4671
4672 // If the byte-offset isn't a multiple of the stride, we can't scale this
4673 // offset.
4674 if (Offset % Scale != 0)
4675 return false;
4676
4677 // Convert the byte-offset used by unscaled into an "element" offset used
4678 // by the scaled pair load/store instructions.
4679 Offset /= Scale;
4680 return true;
4681}
4682
4683static bool canPairLdStOpc(unsigned FirstOpc, unsigned SecondOpc) {
4684 if (FirstOpc == SecondOpc)
4685 return true;
4686 // We can also pair sign-ext and zero-ext instructions.
4687 switch (FirstOpc) {
4688 default:
4689 return false;
4690 case AArch64::STRSui:
4691 case AArch64::STURSi:
4692 return SecondOpc == AArch64::STRSui || SecondOpc == AArch64::STURSi;
4693 case AArch64::STRDui:
4694 case AArch64::STURDi:
4695 return SecondOpc == AArch64::STRDui || SecondOpc == AArch64::STURDi;
4696 case AArch64::STRQui:
4697 case AArch64::STURQi:
4698 return SecondOpc == AArch64::STRQui || SecondOpc == AArch64::STURQi;
4699 case AArch64::STRWui:
4700 case AArch64::STURWi:
4701 return SecondOpc == AArch64::STRWui || SecondOpc == AArch64::STURWi;
4702 case AArch64::STRXui:
4703 case AArch64::STURXi:
4704 return SecondOpc == AArch64::STRXui || SecondOpc == AArch64::STURXi;
4705 case AArch64::LDRSui:
4706 case AArch64::LDURSi:
4707 return SecondOpc == AArch64::LDRSui || SecondOpc == AArch64::LDURSi;
4708 case AArch64::LDRDui:
4709 case AArch64::LDURDi:
4710 return SecondOpc == AArch64::LDRDui || SecondOpc == AArch64::LDURDi;
4711 case AArch64::LDRQui:
4712 case AArch64::LDURQi:
4713 return SecondOpc == AArch64::LDRQui || SecondOpc == AArch64::LDURQi;
4714 case AArch64::LDRWui:
4715 case AArch64::LDURWi:
4716 return SecondOpc == AArch64::LDRSWui || SecondOpc == AArch64::LDURSWi;
4717 case AArch64::LDRSWui:
4718 case AArch64::LDURSWi:
4719 return SecondOpc == AArch64::LDRWui || SecondOpc == AArch64::LDURWi;
4720 case AArch64::LDRXui:
4721 case AArch64::LDURXi:
4722 return SecondOpc == AArch64::LDRXui || SecondOpc == AArch64::LDURXi;
4723 }
4724 // These instructions can't be paired based on their opcodes.
4725 return false;
4726}
4727
4728static bool shouldClusterFI(const MachineFrameInfo &MFI, int FI1,
4729 int64_t Offset1, unsigned Opcode1, int FI2,
4730 int64_t Offset2, unsigned Opcode2) {
4731 // Accesses through fixed stack object frame indices may access a different
4732 // fixed stack slot. Check that the object offsets + offsets match.
4733 if (MFI.isFixedObjectIndex(FI1) && MFI.isFixedObjectIndex(FI2)) {
4734 int64_t ObjectOffset1 = MFI.getObjectOffset(FI1);
4735 int64_t ObjectOffset2 = MFI.getObjectOffset(FI2);
4736 assert(ObjectOffset1 <= ObjectOffset2 && "Object offsets are not ordered.");
4737 // Convert to scaled object offsets.
4738 int Scale1 = AArch64InstrInfo::getMemScale(Opcode1);
4739 if (ObjectOffset1 % Scale1 != 0)
4740 return false;
4741 ObjectOffset1 /= Scale1;
4742 int Scale2 = AArch64InstrInfo::getMemScale(Opcode2);
4743 if (ObjectOffset2 % Scale2 != 0)
4744 return false;
4745 ObjectOffset2 /= Scale2;
4746 ObjectOffset1 += Offset1;
4747 ObjectOffset2 += Offset2;
4748 return ObjectOffset1 + 1 == ObjectOffset2;
4749 }
4750
4751 return FI1 == FI2;
4752}
4753
4754/// Detect opportunities for ldp/stp formation.
4755///
4756/// Only called for LdSt for which getMemOperandWithOffset returns true.
4758 ArrayRef<const MachineOperand *> BaseOps1, int64_t OpOffset1,
4759 bool OffsetIsScalable1, ArrayRef<const MachineOperand *> BaseOps2,
4760 int64_t OpOffset2, bool OffsetIsScalable2, unsigned ClusterSize,
4761 unsigned NumBytes) const {
4762 assert(BaseOps1.size() == 1 && BaseOps2.size() == 1);
4763 const MachineOperand &BaseOp1 = *BaseOps1.front();
4764 const MachineOperand &BaseOp2 = *BaseOps2.front();
4765 const MachineInstr &FirstLdSt = *BaseOp1.getParent();
4766 const MachineInstr &SecondLdSt = *BaseOp2.getParent();
4767 if (BaseOp1.getType() != BaseOp2.getType())
4768 return false;
4769
4770 assert((BaseOp1.isReg() || BaseOp1.isFI()) &&
4771 "Only base registers and frame indices are supported.");
4772
4773 // Check for both base regs and base FI.
4774 if (BaseOp1.isReg() && BaseOp1.getReg() != BaseOp2.getReg())
4775 return false;
4776
4777 // Only cluster up to a single pair.
4778 if (ClusterSize > 2)
4779 return false;
4780
4781 if (!isPairableLdStInst(FirstLdSt) || !isPairableLdStInst(SecondLdSt))
4782 return false;
4783
4784 // Can we pair these instructions based on their opcodes?
4785 unsigned FirstOpc = FirstLdSt.getOpcode();
4786 unsigned SecondOpc = SecondLdSt.getOpcode();
4787 if (!canPairLdStOpc(FirstOpc, SecondOpc))
4788 return false;
4789
4790 // Can't merge volatiles or load/stores that have a hint to avoid pair
4791 // formation, for example.
4792 if (!isCandidateToMergeOrPair(FirstLdSt) ||
4793 !isCandidateToMergeOrPair(SecondLdSt))
4794 return false;
4795
4796 // isCandidateToMergeOrPair guarantees that operand 2 is an immediate.
4797 int64_t Offset1 = FirstLdSt.getOperand(2).getImm();
4798 if (hasUnscaledLdStOffset(FirstOpc) && !scaleOffset(FirstOpc, Offset1))
4799 return false;
4800
4801 int64_t Offset2 = SecondLdSt.getOperand(2).getImm();
4802 if (hasUnscaledLdStOffset(SecondOpc) && !scaleOffset(SecondOpc, Offset2))
4803 return false;
4804
4805 // Pairwise instructions have a 7-bit signed offset field.
4806 if (Offset1 > 63 || Offset1 < -64)
4807 return false;
4808
4809 // The caller should already have ordered First/SecondLdSt by offset.
4810 // Note: except for non-equal frame index bases
4811 if (BaseOp1.isFI()) {
4812 assert((!BaseOp1.isIdenticalTo(BaseOp2) || Offset1 <= Offset2) &&
4813 "Caller should have ordered offsets.");
4814
4815 const MachineFrameInfo &MFI =
4816 FirstLdSt.getParent()->getParent()->getFrameInfo();
4817 return shouldClusterFI(MFI, BaseOp1.getIndex(), Offset1, FirstOpc,
4818 BaseOp2.getIndex(), Offset2, SecondOpc);
4819 }
4820
4821 assert(Offset1 <= Offset2 && "Caller should have ordered offsets.");
4822
4823 return Offset1 + 1 == Offset2;
4824}
4825
4827 MCRegister Reg, unsigned SubIdx,
4828 unsigned State,
4829 const TargetRegisterInfo *TRI) {
4830 if (!SubIdx)
4831 return MIB.addReg(Reg, State);
4832
4834 return MIB.addReg(TRI->getSubReg(Reg, SubIdx), State);
4835 return MIB.addReg(Reg, State, SubIdx);
4836}
4837
4838static bool forwardCopyWillClobberTuple(unsigned DestReg, unsigned SrcReg,
4839 unsigned NumRegs) {
4840 // We really want the positive remainder mod 32 here, that happens to be
4841 // easily obtainable with a mask.
4842 return ((DestReg - SrcReg) & 0x1f) < NumRegs;
4843}
4844
4847 const DebugLoc &DL, MCRegister DestReg,
4848 MCRegister SrcReg, bool KillSrc,
4849 unsigned Opcode,
4850 ArrayRef<unsigned> Indices) const {
4851 assert(Subtarget.hasNEON() && "Unexpected register copy without NEON");
4853 uint16_t DestEncoding = TRI->getEncodingValue(DestReg);
4854 uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg);
4855 unsigned NumRegs = Indices.size();
4856
4857 int SubReg = 0, End = NumRegs, Incr = 1;
4858 if (forwardCopyWillClobberTuple(DestEncoding, SrcEncoding, NumRegs)) {
4859 SubReg = NumRegs - 1;
4860 End = -1;
4861 Incr = -1;
4862 }
4863
4864 for (; SubReg != End; SubReg += Incr) {
4865 const MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opcode));
4866 AddSubReg(MIB, DestReg, Indices[SubReg], RegState::Define, TRI);
4867 AddSubReg(MIB, SrcReg, Indices[SubReg], 0, TRI);
4868 AddSubReg(MIB, SrcReg, Indices[SubReg], getKillRegState(KillSrc), TRI);
4869 }
4870}
4871
4874 const DebugLoc &DL, MCRegister DestReg,
4875 MCRegister SrcReg, bool KillSrc,
4876 unsigned Opcode, unsigned ZeroReg,
4877 llvm::ArrayRef<unsigned> Indices) const {
4879 unsigned NumRegs = Indices.size();
4880
4881#ifndef NDEBUG
4882 uint16_t DestEncoding = TRI->getEncodingValue(DestReg);
4883 uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg);
4884 assert(DestEncoding % NumRegs == 0 && SrcEncoding % NumRegs == 0 &&
4885 "GPR reg sequences should not be able to overlap");
4886#endif
4887
4888 for (unsigned SubReg = 0; SubReg != NumRegs; ++SubReg) {
4889 const MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opcode));
4890 AddSubReg(MIB, DestReg, Indices[SubReg], RegState::Define, TRI);
4891 MIB.addReg(ZeroReg);
4892 AddSubReg(MIB, SrcReg, Indices[SubReg], getKillRegState(KillSrc), TRI);
4893 MIB.addImm(0);
4894 }
4895}
4896
4899 const DebugLoc &DL, MCRegister DestReg,
4900 MCRegister SrcReg, bool KillSrc,
4901 bool RenamableDest,
4902 bool RenamableSrc) const {
4903 if (AArch64::GPR32spRegClass.contains(DestReg) &&
4904 (AArch64::GPR32spRegClass.contains(SrcReg) || SrcReg == AArch64::WZR)) {
4906
4907 if (DestReg == AArch64::WSP || SrcReg == AArch64::WSP) {
4908 // If either operand is WSP, expand to ADD #0.
4909 if (Subtarget.hasZeroCycleRegMove()) {
4910 // Cyclone recognizes "ADD Xd, Xn, #0" as a zero-cycle register move.
4911 MCRegister DestRegX = TRI->getMatchingSuperReg(
4912 DestReg, AArch64::sub_32, &AArch64::GPR64spRegClass);
4913 MCRegister SrcRegX = TRI->getMatchingSuperReg(
4914 SrcReg, AArch64::sub_32, &AArch64::GPR64spRegClass);
4915 // This instruction is reading and writing X registers. This may upset
4916 // the register scavenger and machine verifier, so we need to indicate
4917 // that we are reading an undefined value from SrcRegX, but a proper
4918 // value from SrcReg.
4919 BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestRegX)
4920 .addReg(SrcRegX, RegState::Undef)
4921 .addImm(0)
4923 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
4924 } else {
4925 BuildMI(MBB, I, DL, get(AArch64::ADDWri), DestReg)
4926 .addReg(SrcReg, getKillRegState(KillSrc))
4927 .addImm(0)
4929 }
4930 } else if (SrcReg == AArch64::WZR && Subtarget.hasZeroCycleZeroingGP()) {
4931 BuildMI(MBB, I, DL, get(AArch64::MOVZWi), DestReg)
4932 .addImm(0)
4934 } else {
4935 if (Subtarget.hasZeroCycleRegMove()) {
4936 // Cyclone recognizes "ORR Xd, XZR, Xm" as a zero-cycle register move.
4937 MCRegister DestRegX = TRI->getMatchingSuperReg(
4938 DestReg, AArch64::sub_32, &AArch64::GPR64spRegClass);
4939 MCRegister SrcRegX = TRI->getMatchingSuperReg(
4940 SrcReg, AArch64::sub_32, &AArch64::GPR64spRegClass);
4941 // This instruction is reading and writing X registers. This may upset
4942 // the register scavenger and machine verifier, so we need to indicate
4943 // that we are reading an undefined value from SrcRegX, but a proper
4944 // value from SrcReg.
4945 BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestRegX)
4946 .addReg(AArch64::XZR)
4947 .addReg(SrcRegX, RegState::Undef)
4948 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
4949 } else {
4950 // Otherwise, expand to ORR WZR.
4951 BuildMI(MBB, I, DL, get(AArch64::ORRWrr), DestReg)
4952 .addReg(AArch64::WZR)
4953 .addReg(SrcReg, getKillRegState(KillSrc));
4954 }
4955 }
4956 return;
4957 }
4958
4959 // Copy a Predicate register by ORRing with itself.
4960 if (AArch64::PPRRegClass.contains(DestReg) &&
4961 AArch64::PPRRegClass.contains(SrcReg)) {
4963 "Unexpected SVE register.");
4964 BuildMI(MBB, I, DL, get(AArch64::ORR_PPzPP), DestReg)
4965 .addReg(SrcReg) // Pg
4966 .addReg(SrcReg)
4967 .addReg(SrcReg, getKillRegState(KillSrc));
4968 return;
4969 }
4970
4971 // Copy a predicate-as-counter register by ORRing with itself as if it
4972 // were a regular predicate (mask) register.
4973 bool DestIsPNR = AArch64::PNRRegClass.contains(DestReg);
4974 bool SrcIsPNR = AArch64::PNRRegClass.contains(SrcReg);
4975 if (DestIsPNR || SrcIsPNR) {
4976 auto ToPPR = [](MCRegister R) -> MCRegister {
4977 return (R - AArch64::PN0) + AArch64::P0;
4978 };
4979 MCRegister PPRSrcReg = SrcIsPNR ? ToPPR(SrcReg) : SrcReg;
4980 MCRegister PPRDestReg = DestIsPNR ? ToPPR(DestReg) : DestReg;
4981
4982 if (PPRSrcReg != PPRDestReg) {
4983 auto NewMI = BuildMI(MBB, I, DL, get(AArch64::ORR_PPzPP), PPRDestReg)
4984 .addReg(PPRSrcReg) // Pg
4985 .addReg(PPRSrcReg)
4986 .addReg(PPRSrcReg, getKillRegState(KillSrc));
4987 if (DestIsPNR)
4988 NewMI.addDef(DestReg, RegState::Implicit);
4989 }
4990 return;
4991 }
4992
4993 // Copy a Z register by ORRing with itself.
4994 if (AArch64::ZPRRegClass.contains(DestReg) &&
4995 AArch64::ZPRRegClass.contains(SrcReg)) {
4997 "Unexpected SVE register.");
4998 BuildMI(MBB, I, DL, get(AArch64::ORR_ZZZ), DestReg)
4999 .addReg(SrcReg)
5000 .addReg(SrcReg, getKillRegState(KillSrc));
5001 return;
5002 }
5003
5004 // Copy a Z register pair by copying the individual sub-registers.
5005 if ((AArch64::ZPR2RegClass.contains(DestReg) ||
5006 AArch64::ZPR2StridedOrContiguousRegClass.contains(DestReg)) &&
5007 (AArch64::ZPR2RegClass.contains(SrcReg) ||
5008 AArch64::ZPR2StridedOrContiguousRegClass.contains(SrcReg))) {
5010 "Unexpected SVE register.");
5011 static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1};
5012 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORR_ZZZ,
5013 Indices);
5014 return;
5015 }
5016
5017 // Copy a Z register triple by copying the individual sub-registers.
5018 if (AArch64::ZPR3RegClass.contains(DestReg) &&
5019 AArch64::ZPR3RegClass.contains(SrcReg)) {
5021 "Unexpected SVE register.");
5022 static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1,
5023 AArch64::zsub2};
5024 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORR_ZZZ,
5025 Indices);
5026 return;
5027 }
5028
5029 // Copy a Z register quad by copying the individual sub-registers.
5030 if ((AArch64::ZPR4RegClass.contains(DestReg) ||
5031 AArch64::ZPR4StridedOrContiguousRegClass.contains(DestReg)) &&
5032 (AArch64::ZPR4RegClass.contains(SrcReg) ||
5033 AArch64::ZPR4StridedOrContiguousRegClass.contains(SrcReg))) {
5035 "Unexpected SVE register.");
5036 static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1,
5037 AArch64::zsub2, AArch64::zsub3};
5038 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORR_ZZZ,
5039 Indices);
5040 return;
5041 }
5042
5043 if (AArch64::GPR64spRegClass.contains(DestReg) &&
5044 (AArch64::GPR64spRegClass.contains(SrcReg) || SrcReg == AArch64::XZR)) {
5045 if (DestReg == AArch64::SP || SrcReg == AArch64::SP) {
5046 // If either operand is SP, expand to ADD #0.
5047 BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestReg)
5048 .addReg(SrcReg, getKillRegState(KillSrc))
5049 .addImm(0)
5051 } else if (SrcReg == AArch64::XZR && Subtarget.hasZeroCycleZeroingGP()) {
5052 BuildMI(MBB, I, DL, get(AArch64::MOVZXi), DestReg)
5053 .addImm(0)
5055 } else {
5056 // Otherwise, expand to ORR XZR.
5057 BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestReg)
5058 .addReg(AArch64::XZR)
5059 .addReg(SrcReg, getKillRegState(KillSrc));
5060 }
5061 return;
5062 }
5063
5064 // Copy a DDDD register quad by copying the individual sub-registers.
5065 if (AArch64::DDDDRegClass.contains(DestReg) &&
5066 AArch64::DDDDRegClass.contains(SrcReg)) {
5067 static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1,
5068 AArch64::dsub2, AArch64::dsub3};
5069 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
5070 Indices);
5071 return;
5072 }
5073
5074 // Copy a DDD register triple by copying the individual sub-registers.
5075 if (AArch64::DDDRegClass.contains(DestReg) &&
5076 AArch64::DDDRegClass.contains(SrcReg)) {
5077 static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1,
5078 AArch64::dsub2};
5079 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
5080 Indices);
5081 return;
5082 }
5083
5084 // Copy a DD register pair by copying the individual sub-registers.
5085 if (AArch64::DDRegClass.contains(DestReg) &&
5086 AArch64::DDRegClass.contains(SrcReg)) {
5087 static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1};
5088 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
5089 Indices);
5090 return;
5091 }
5092
5093 // Copy a QQQQ register quad by copying the individual sub-registers.
5094 if (AArch64::QQQQRegClass.contains(DestReg) &&
5095 AArch64::QQQQRegClass.contains(SrcReg)) {
5096 static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1,
5097 AArch64::qsub2, AArch64::qsub3};
5098 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
5099 Indices);
5100 return;
5101 }
5102
5103 // Copy a QQQ register triple by copying the individual sub-registers.
5104 if (AArch64::QQQRegClass.contains(DestReg) &&
5105 AArch64::QQQRegClass.contains(SrcReg)) {
5106 static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1,
5107 AArch64::qsub2};
5108 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
5109 Indices);
5110 return;
5111 }
5112
5113 // Copy a QQ register pair by copying the individual sub-registers.
5114 if (AArch64::QQRegClass.contains(DestReg) &&
5115 AArch64::QQRegClass.contains(SrcReg)) {
5116 static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1};
5117 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
5118 Indices);
5119 return;
5120 }
5121
5122 if (AArch64::XSeqPairsClassRegClass.contains(DestReg) &&
5123 AArch64::XSeqPairsClassRegClass.contains(SrcReg)) {
5124 static const unsigned Indices[] = {AArch64::sube64, AArch64::subo64};
5125 copyGPRRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRXrs,
5126 AArch64::XZR, Indices);
5127 return;
5128 }
5129
5130 if (AArch64::WSeqPairsClassRegClass.contains(DestReg) &&
5131 AArch64::WSeqPairsClassRegClass.contains(SrcReg)) {
5132 static const unsigned Indices[] = {AArch64::sube32, AArch64::subo32};
5133 copyGPRRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRWrs,
5134 AArch64::WZR, Indices);
5135 return;
5136 }
5137
5138 if (AArch64::FPR128RegClass.contains(DestReg) &&
5139 AArch64::FPR128RegClass.contains(SrcReg)) {
5140 if (Subtarget.isSVEorStreamingSVEAvailable() &&
5141 !Subtarget.isNeonAvailable())
5142 BuildMI(MBB, I, DL, get(AArch64::ORR_ZZZ))
5143 .addReg(AArch64::Z0 + (DestReg - AArch64::Q0), RegState::Define)
5144 .addReg(AArch64::Z0 + (SrcReg - AArch64::Q0))
5145 .addReg(AArch64::Z0 + (SrcReg - AArch64::Q0));
5146 else if (Subtarget.isNeonAvailable())
5147 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
5148 .addReg(SrcReg)
5149 .addReg(SrcReg, getKillRegState(KillSrc));
5150 else {
5151 BuildMI(MBB, I, DL, get(AArch64::STRQpre))
5152 .addReg(AArch64::SP, RegState::Define)
5153 .addReg(SrcReg, getKillRegState(KillSrc))
5154 .addReg(AArch64::SP)
5155 .addImm(-16);
5156 BuildMI(MBB, I, DL, get(AArch64::LDRQpost))
5157 .addReg(AArch64::SP, RegState::Define)
5158 .addReg(DestReg, RegState::Define)
5159 .addReg(AArch64::SP)
5160 .addImm(16);
5161 }
5162 return;
5163 }
5164
5165 if (AArch64::FPR64RegClass.contains(DestReg) &&
5166 AArch64::FPR64RegClass.contains(SrcReg)) {
5167 BuildMI(MBB, I, DL, get(AArch64::FMOVDr), DestReg)
5168 .addReg(SrcReg, getKillRegState(KillSrc));
5169 return;
5170 }
5171
5172 if (AArch64::FPR32RegClass.contains(DestReg) &&
5173 AArch64::FPR32RegClass.contains(SrcReg)) {
5174 BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
5175 .addReg(SrcReg, getKillRegState(KillSrc));
5176 return;
5177 }
5178
5179 if (AArch64::FPR16RegClass.contains(DestReg) &&
5180 AArch64::FPR16RegClass.contains(SrcReg)) {
5181 DestReg =
5182 RI.getMatchingSuperReg(DestReg, AArch64::hsub, &AArch64::FPR32RegClass);
5183 SrcReg =
5184 RI.getMatchingSuperReg(SrcReg, AArch64::hsub, &AArch64::FPR32RegClass);
5185 BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
5186 .addReg(SrcReg, getKillRegState(KillSrc));
5187 return;
5188 }
5189
5190 if (AArch64::FPR8RegClass.contains(DestReg) &&
5191 AArch64::FPR8RegClass.contains(SrcReg)) {
5192 DestReg =
5193 RI.getMatchingSuperReg(DestReg, AArch64::bsub, &AArch64::FPR32RegClass);
5194 SrcReg =
5195 RI.getMatchingSuperReg(SrcReg, AArch64::bsub, &AArch64::FPR32RegClass);
5196 BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
5197 .addReg(SrcReg, getKillRegState(KillSrc));
5198 return;
5199 }
5200
5201 // Copies between GPR64 and FPR64.
5202 if (AArch64::FPR64RegClass.contains(DestReg) &&
5203 AArch64::GPR64RegClass.contains(SrcReg)) {
5204 BuildMI(MBB, I, DL, get(AArch64::FMOVXDr), DestReg)
5205 .addReg(SrcReg, getKillRegState(KillSrc));
5206 return;
5207 }
5208 if (AArch64::GPR64RegClass.contains(DestReg) &&
5209 AArch64::FPR64RegClass.contains(SrcReg)) {
5210 BuildMI(MBB, I, DL, get(AArch64::FMOVDXr), DestReg)
5211 .addReg(SrcReg, getKillRegState(KillSrc));
5212 return;
5213 }
5214 // Copies between GPR32 and FPR32.
5215 if (AArch64::FPR32RegClass.contains(DestReg) &&
5216 AArch64::GPR32RegClass.contains(SrcReg)) {
5217 BuildMI(MBB, I, DL, get(AArch64::FMOVWSr), DestReg)
5218 .addReg(SrcReg, getKillRegState(KillSrc));
5219 return;
5220 }
5221 if (AArch64::GPR32RegClass.contains(DestReg) &&
5222 AArch64::FPR32RegClass.contains(SrcReg)) {
5223 BuildMI(MBB, I, DL, get(AArch64::FMOVSWr), DestReg)
5224 .addReg(SrcReg, getKillRegState(KillSrc));
5225 return;
5226 }
5227
5228 if (DestReg == AArch64::NZCV) {
5229 assert(AArch64::GPR64RegClass.contains(SrcReg) && "Invalid NZCV copy");
5230 BuildMI(MBB, I, DL, get(AArch64::MSR))
5231 .addImm(AArch64SysReg::NZCV)
5232 .addReg(SrcReg, getKillRegState(KillSrc))
5233 .addReg(AArch64::NZCV, RegState::Implicit | RegState::Define);
5234 return;
5235 }
5236
5237 if (SrcReg == AArch64::NZCV) {
5238 assert(AArch64::GPR64RegClass.contains(DestReg) && "Invalid NZCV copy");
5239 BuildMI(MBB, I, DL, get(AArch64::MRS), DestReg)
5240 .addImm(AArch64SysReg::NZCV)
5241 .addReg(AArch64::NZCV, RegState::Implicit | getKillRegState(KillSrc));
5242 return;
5243 }
5244
5245#ifndef NDEBUG
5247 errs() << TRI.getRegAsmName(DestReg) << " = COPY "
5248 << TRI.getRegAsmName(SrcReg) << "\n";
5249#endif
5250 llvm_unreachable("unimplemented reg-to-reg copy");
5251}
5252
5255 MachineBasicBlock::iterator InsertBefore,
5256 const MCInstrDesc &MCID,
5257 Register SrcReg, bool IsKill,
5258 unsigned SubIdx0, unsigned SubIdx1, int FI,
5259 MachineMemOperand *MMO) {
5260 Register SrcReg0 = SrcReg;
5261 Register SrcReg1 = SrcReg;
5262 if (SrcReg.isPhysical()) {
5263 SrcReg0 = TRI.getSubReg(SrcReg, SubIdx0);
5264 SubIdx0 = 0;
5265 SrcReg1 = TRI.getSubReg(SrcReg, SubIdx1);
5266 SubIdx1 = 0;
5267 }
5268 BuildMI(MBB, InsertBefore, DebugLoc(), MCID)
5269 .addReg(SrcReg0, getKillRegState(IsKill), SubIdx0)
5270 .addReg(SrcReg1, getKillRegState(IsKill), SubIdx1)
5271 .addFrameIndex(FI)
5272 .addImm(0)
5273 .addMemOperand(MMO);
5274}
5275
5278 Register SrcReg, bool isKill, int FI,
5279 const TargetRegisterClass *RC,
5280 const TargetRegisterInfo *TRI,
5281 Register VReg) const {
5282 MachineFunction &MF = *MBB.getParent();
5283 MachineFrameInfo &MFI = MF.getFrameInfo();
5284
5286 MachineMemOperand *MMO =
5288 MFI.getObjectSize(FI), MFI.getObjectAlign(FI));
5289 unsigned Opc = 0;
5290 bool Offset = true;
5292 unsigned StackID = TargetStackID::Default;
5293 switch (TRI->getSpillSize(*RC)) {
5294 case 1:
5295 if (AArch64::FPR8RegClass.hasSubClassEq(RC))
5296 Opc = AArch64::STRBui;
5297 break;
5298 case 2: {
5299 if (AArch64::FPR16RegClass.hasSubClassEq(RC))
5300 Opc = AArch64::STRHui;
5301 else if (AArch64::PNRRegClass.hasSubClassEq(RC) ||
5302 AArch64::PPRRegClass.hasSubClassEq(RC)) {
5304 "Unexpected register store without SVE store instructions");
5305 Opc = AArch64::STR_PXI;
5307 }
5308 break;
5309 }
5310 case 4:
5311 if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
5312 Opc = AArch64::STRWui;
5313 if (SrcReg.isVirtual())
5314 MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR32RegClass);
5315 else
5316 assert(SrcReg != AArch64::WSP);
5317 } else if (AArch64::FPR32RegClass.hasSubClassEq(RC))
5318 Opc = AArch64::STRSui;
5319 else if (AArch64::PPR2RegClass.hasSubClassEq(RC)) {
5320 Opc = AArch64::STR_PPXI;
5322 }
5323 break;
5324 case 8:
5325 if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) {
5326 Opc = AArch64::STRXui;
5327 if (SrcReg.isVirtual())
5328 MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass);
5329 else
5330 assert(SrcReg != AArch64::SP);
5331 } else if (AArch64::FPR64RegClass.hasSubClassEq(RC)) {
5332 Opc = AArch64::STRDui;
5333 } else if (AArch64::WSeqPairsClassRegClass.hasSubClassEq(RC)) {
5335 get(AArch64::STPWi), SrcReg, isKill,
5336 AArch64::sube32, AArch64::subo32, FI, MMO);
5337 return;
5338 }
5339 break;
5340 case 16:
5341 if (AArch64::FPR128RegClass.hasSubClassEq(RC))
5342 Opc = AArch64::STRQui;
5343 else if (AArch64::DDRegClass.hasSubClassEq(RC)) {
5344 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
5345 Opc = AArch64::ST1Twov1d;
5346 Offset = false;
5347 } else if (AArch64::XSeqPairsClassRegClass.hasSubClassEq(RC)) {
5349 get(AArch64::STPXi), SrcReg, isKill,
5350 AArch64::sube64, AArch64::subo64, FI, MMO);
5351 return;
5352 } else if (AArch64::ZPRRegClass.hasSubClassEq(RC)) {
5354 "Unexpected register store without SVE store instructions");
5355 Opc = AArch64::STR_ZXI;
5357 }
5358 break;
5359 case 24:
5360 if (AArch64::DDDRegClass.hasSubClassEq(RC)) {
5361 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
5362 Opc = AArch64::ST1Threev1d;
5363 Offset = false;
5364 }
5365 break;
5366 case 32:
5367 if (AArch64::DDDDRegClass.hasSubClassEq(RC)) {
5368 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
5369 Opc = AArch64::ST1Fourv1d;
5370 Offset = false;
5371 } else if (AArch64::QQRegClass.hasSubClassEq(RC)) {
5372 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
5373 Opc = AArch64::ST1Twov2d;
5374 Offset = false;
5375 } else if (AArch64::ZPR2RegClass.hasSubClassEq(RC) ||
5376 AArch64::ZPR2StridedOrContiguousRegClass.hasSubClassEq(RC)) {
5378 "Unexpected register store without SVE store instructions");
5379 Opc = AArch64::STR_ZZXI;
5381 }
5382 break;
5383 case 48:
5384 if (AArch64::QQQRegClass.hasSubClassEq(RC)) {
5385 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
5386 Opc = AArch64::ST1Threev2d;
5387 Offset = false;
5388 } else if (AArch64::ZPR3RegClass.hasSubClassEq(RC)) {
5390 "Unexpected register store without SVE store instructions");
5391 Opc = AArch64::STR_ZZZXI;
5393 }
5394 break;
5395 case 64:
5396 if (AArch64::QQQQRegClass.hasSubClassEq(RC)) {
5397 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
5398 Opc = AArch64::ST1Fourv2d;
5399 Offset = false;
5400 } else if (AArch64::ZPR4RegClass.hasSubClassEq(RC) ||
5401 AArch64::ZPR4StridedOrContiguousRegClass.hasSubClassEq(RC)) {
5403 "Unexpected register store without SVE store instructions");
5404 Opc = AArch64::STR_ZZZZXI;
5406 }
5407 break;
5408 }
5409 assert(Opc && "Unknown register class");
5410 MFI.setStackID(FI, StackID);
5411
5412 const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DebugLoc(), get(Opc))
5413 .addReg(SrcReg, getKillRegState(isKill))
5414 .addFrameIndex(FI);
5415
5416 if (Offset)
5417 MI.addImm(0);
5418 if (PNRReg.isValid())
5419 MI.addDef(PNRReg, RegState::Implicit);
5420 MI.addMemOperand(MMO);
5421}
5422
5425 MachineBasicBlock::iterator InsertBefore,
5426 const MCInstrDesc &MCID,
5427 Register DestReg, unsigned SubIdx0,
5428 unsigned SubIdx1, int FI,
5429 MachineMemOperand *MMO) {
5430 Register DestReg0 = DestReg;
5431 Register DestReg1 = DestReg;
5432 bool IsUndef = true;
5433 if (DestReg.isPhysical()) {
5434 DestReg0 = TRI.getSubReg(DestReg, SubIdx0);
5435 SubIdx0 = 0;
5436 DestReg1 = TRI.getSubReg(DestReg, SubIdx1);
5437 SubIdx1 = 0;
5438 IsUndef = false;
5439 }
5440 BuildMI(MBB, InsertBefore, DebugLoc(), MCID)
5441 .addReg(DestReg0, RegState::Define | getUndefRegState(IsUndef), SubIdx0)
5442 .addReg(DestReg1, RegState::Define | getUndefRegState(IsUndef), SubIdx1)
5443 .addFrameIndex(FI)
5444 .addImm(0)
5445 .addMemOperand(MMO);
5446}
5447
5450 Register DestReg, int FI,
5451 const TargetRegisterClass *RC,
5452 const TargetRegisterInfo *TRI,
5453 Register VReg) const {
5454 MachineFunction &MF = *MBB.getParent();
5455 MachineFrameInfo &MFI = MF.getFrameInfo();
5457 MachineMemOperand *MMO =
5459 MFI.getObjectSize(FI), MFI.getObjectAlign(FI));
5460
5461 unsigned Opc = 0;
5462 bool Offset = true;
5463 unsigned StackID = TargetStackID::Default;
5465 switch (TRI->getSpillSize(*RC)) {
5466 case 1:
5467 if (AArch64::FPR8RegClass.hasSubClassEq(RC))
5468 Opc = AArch64::LDRBui;
5469 break;
5470 case 2: {
5471 bool IsPNR = AArch64::PNRRegClass.hasSubClassEq(RC);
5472 if (AArch64::FPR16RegClass.hasSubClassEq(RC))
5473 Opc = AArch64::LDRHui;
5474 else if (IsPNR || AArch64::PPRRegClass.hasSubClassEq(RC)) {
5476 "Unexpected register load without SVE load instructions");
5477 if (IsPNR)
5478 PNRReg = DestReg;
5479 Opc = AArch64::LDR_PXI;
5481 }
5482 break;
5483 }
5484 case 4:
5485 if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
5486 Opc = AArch64::LDRWui;
5487 if (DestReg.isVirtual())
5488 MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR32RegClass);
5489 else
5490 assert(DestReg != AArch64::WSP);
5491 } else if (AArch64::FPR32RegClass.hasSubClassEq(RC))
5492 Opc = AArch64::LDRSui;
5493 else if (AArch64::PPR2RegClass.hasSubClassEq(RC)) {
5494 Opc = AArch64::LDR_PPXI;
5496 }
5497 break;
5498 case 8:
5499 if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) {
5500 Opc = AArch64::LDRXui;
5501 if (DestReg.isVirtual())
5502 MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR64RegClass);
5503 else
5504 assert(DestReg != AArch64::SP);
5505 } else if (AArch64::FPR64RegClass.hasSubClassEq(RC)) {
5506 Opc = AArch64::LDRDui;
5507 } else if (AArch64::WSeqPairsClassRegClass.hasSubClassEq(RC)) {
5509 get(AArch64::LDPWi), DestReg, AArch64::sube32,
5510 AArch64::subo32, FI, MMO);
5511 return;
5512 }
5513 break;
5514 case 16:
5515 if (AArch64::FPR128RegClass.hasSubClassEq(RC))
5516 Opc = AArch64::LDRQui;
5517 else if (AArch64::DDRegClass.hasSubClassEq(RC)) {
5518 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
5519 Opc = AArch64::LD1Twov1d;
5520 Offset = false;
5521 } else if (AArch64::XSeqPairsClassRegClass.hasSubClassEq(RC)) {
5523 get(AArch64::LDPXi), DestReg, AArch64::sube64,
5524 AArch64::subo64, FI, MMO);
5525 return;
5526 } else if (AArch64::ZPRRegClass.hasSubClassEq(RC)) {
5528 "Unexpected register load without SVE load instructions");
5529 Opc = AArch64::LDR_ZXI;
5531 }
5532 break;
5533 case 24:
5534 if (AArch64::DDDRegClass.hasSubClassEq(RC)) {
5535 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
5536 Opc = AArch64::LD1Threev1d;
5537 Offset = false;
5538 }
5539 break;
5540 case 32:
5541 if (AArch64::DDDDRegClass.hasSubClassEq(RC)) {
5542 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
5543 Opc = AArch64::LD1Fourv1d;
5544 Offset = false;
5545 } else if (AArch64::QQRegClass.hasSubClassEq(RC)) {
5546 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
5547 Opc = AArch64::LD1Twov2d;
5548 Offset = false;
5549 } else if (AArch64::ZPR2RegClass.hasSubClassEq(RC) ||
5550 AArch64::ZPR2StridedOrContiguousRegClass.hasSubClassEq(RC)) {
5552 "Unexpected register load without SVE load instructions");
5553 Opc = AArch64::LDR_ZZXI;
5555 }
5556 break;
5557 case 48:
5558 if (AArch64::QQQRegClass.hasSubClassEq(RC)) {
5559 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
5560 Opc = AArch64::LD1Threev2d;
5561 Offset = false;
5562 } else if (AArch64::ZPR3RegClass.hasSubClassEq(RC)) {
5564 "Unexpected register load without SVE load instructions");
5565 Opc = AArch64::LDR_ZZZXI;
5567 }
5568 break;
5569 case 64:
5570 if (AArch64::QQQQRegClass.hasSubClassEq(RC)) {
5571 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
5572 Opc = AArch64::LD1Fourv2d;
5573 Offset = false;
5574 } else if (AArch64::ZPR4RegClass.hasSubClassEq(RC) ||
5575 AArch64::ZPR4StridedOrContiguousRegClass.hasSubClassEq(RC)) {
5577 "Unexpected register load without SVE load instructions");
5578 Opc = AArch64::LDR_ZZZZXI;
5580 }
5581 break;
5582 }
5583
5584 assert(Opc && "Unknown register class");
5585 MFI.setStackID(FI, StackID);
5586
5587 const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DebugLoc(), get(Opc))
5588 .addReg(DestReg, getDefRegState(true))
5589 .addFrameIndex(FI);
5590 if (Offset)
5591 MI.addImm(0);
5592 if (PNRReg.isValid() && !PNRReg.isVirtual())
5593 MI.addDef(PNRReg, RegState::Implicit);
5594 MI.addMemOperand(MMO);
5595}
5596
5598 const MachineInstr &UseMI,
5599 const TargetRegisterInfo *TRI) {
5600 return any_of(instructionsWithoutDebug(std::next(DefMI.getIterator()),
5601 UseMI.getIterator()),
5602 [TRI](const MachineInstr &I) {
5603 return I.modifiesRegister(AArch64::NZCV, TRI) ||
5604 I.readsRegister(AArch64::NZCV, TRI);
5605 });
5606}
5607
5609 const StackOffset &Offset, int64_t &ByteSized, int64_t &VGSized) {
5610 // The smallest scalable element supported by scaled SVE addressing
5611 // modes are predicates, which are 2 scalable bytes in size. So the scalable
5612 // byte offset must always be a multiple of 2.
5613 assert(Offset.getScalable() % 2 == 0 && "Invalid frame offset");
5614
5615 // VGSized offsets are divided by '2', because the VG register is the
5616 // the number of 64bit granules as opposed to 128bit vector chunks,
5617 // which is how the 'n' in e.g. MVT::nxv1i8 is modelled.
5618 // So, for a stack offset of 16 MVT::nxv1i8's, the size is n x 16 bytes.
5619 // VG = n * 2 and the dwarf offset must be VG * 8 bytes.
5620 ByteSized = Offset.getFixed();
5621 VGSized = Offset.getScalable() / 2;
5622}
5623
5624/// Returns the offset in parts to which this frame offset can be
5625/// decomposed for the purpose of describing a frame offset.
5626/// For non-scalable offsets this is simply its byte size.
5628 const StackOffset &Offset, int64_t &NumBytes, int64_t &NumPredicateVectors,
5629 int64_t &NumDataVectors) {
5630 // The smallest scalable element supported by scaled SVE addressing
5631 // modes are predicates, which are 2 scalable bytes in size. So the scalable
5632 // byte offset must always be a multiple of 2.
5633 assert(Offset.getScalable() % 2 == 0 && "Invalid frame offset");
5634
5635 NumBytes = Offset.getFixed();
5636 NumDataVectors = 0;
5637 NumPredicateVectors = Offset.getScalable() / 2;
5638 // This method is used to get the offsets to adjust the frame offset.
5639 // If the function requires ADDPL to be used and needs more than two ADDPL
5640 // instructions, part of the offset is folded into NumDataVectors so that it
5641 // uses ADDVL for part of it, reducing the number of ADDPL instructions.
5642 if (NumPredicateVectors % 8 == 0 || NumPredicateVectors < -64 ||
5643 NumPredicateVectors > 62) {
5644 NumDataVectors = NumPredicateVectors / 8;
5645 NumPredicateVectors -= NumDataVectors * 8;
5646 }
5647}
5648
5649// Convenience function to create a DWARF expression for
5650// Expr + NumBytes + NumVGScaledBytes * AArch64::VG
5651static void appendVGScaledOffsetExpr(SmallVectorImpl<char> &Expr, int NumBytes,
5652 int NumVGScaledBytes, unsigned VG,
5653 llvm::raw_string_ostream &Comment) {
5654 uint8_t buffer[16];
5655
5656 if (NumBytes) {
5657 Expr.push_back(dwarf::DW_OP_consts);
5658 Expr.append(buffer, buffer + encodeSLEB128(NumBytes, buffer));
5659 Expr.push_back((uint8_t)dwarf::DW_OP_plus);
5660 Comment << (NumBytes < 0 ? " - " : " + ") << std::abs(NumBytes);
5661 }
5662
5663 if (NumVGScaledBytes) {
5664 Expr.push_back((uint8_t)dwarf::DW_OP_consts);
5665 Expr.append(buffer, buffer + encodeSLEB128(NumVGScaledBytes, buffer));
5666
5667 Expr.push_back((uint8_t)dwarf::DW_OP_bregx);
5668 Expr.append(buffer, buffer + encodeULEB128(VG, buffer));
5669 Expr.push_back(0);
5670
5671 Expr.push_back((uint8_t)dwarf::DW_OP_mul);
5672 Expr.push_back((uint8_t)dwarf::DW_OP_plus);
5673
5674 Comment << (NumVGScaledBytes < 0 ? " - " : " + ")
5675 << std::abs(NumVGScaledBytes) << " * VG";
5676 }
5677}
5678
5679// Creates an MCCFIInstruction:
5680// { DW_CFA_def_cfa_expression, ULEB128 (sizeof expr), expr }
5682 unsigned Reg,
5683 const StackOffset &Offset) {
5684 int64_t NumBytes, NumVGScaledBytes;
5686 NumVGScaledBytes);
5687 std::string CommentBuffer;
5688 llvm::raw_string_ostream Comment(CommentBuffer);
5689
5690 if (Reg == AArch64::SP)
5691 Comment << "sp";
5692 else if (Reg == AArch64::FP)
5693 Comment << "fp";
5694 else
5695 Comment << printReg(Reg, &TRI);
5696
5697 // Build up the expression (Reg + NumBytes + NumVGScaledBytes * AArch64::VG)
5698 SmallString<64> Expr;
5699 unsigned DwarfReg = TRI.getDwarfRegNum(Reg, true);
5700 Expr.push_back((uint8_t)(dwarf::DW_OP_breg0 + DwarfReg));
5701 Expr.push_back(0);
5702 appendVGScaledOffsetExpr(Expr, NumBytes, NumVGScaledBytes,
5703 TRI.getDwarfRegNum(AArch64::VG, true), Comment);
5704
5705 // Wrap this into DW_CFA_def_cfa.
5706 SmallString<64> DefCfaExpr;
5707 DefCfaExpr.push_back(dwarf::DW_CFA_def_cfa_expression);
5708 uint8_t buffer[16];
5709 DefCfaExpr.append(buffer, buffer + encodeULEB128(Expr.size(), buffer));
5710 DefCfaExpr.append(Expr.str());
5711 return MCCFIInstruction::createEscape(nullptr, DefCfaExpr.str(), SMLoc(),
5712 Comment.str());
5713}
5714
5716 unsigned FrameReg, unsigned Reg,
5717 const StackOffset &Offset,
5718 bool LastAdjustmentWasScalable) {
5719 if (Offset.getScalable())
5720 return createDefCFAExpression(TRI, Reg, Offset);
5721
5722 if (FrameReg == Reg && !LastAdjustmentWasScalable)
5723 return MCCFIInstruction::cfiDefCfaOffset(nullptr, int(Offset.getFixed()));
5724
5725 unsigned DwarfReg = TRI.getDwarfRegNum(Reg, true);
5726 return MCCFIInstruction::cfiDefCfa(nullptr, DwarfReg, (int)Offset.getFixed());
5727}
5728
5730 unsigned Reg,
5731 const StackOffset &OffsetFromDefCFA) {
5732 int64_t NumBytes, NumVGScaledBytes;
5734 OffsetFromDefCFA, NumBytes, NumVGScaledBytes);
5735
5736 unsigned DwarfReg = TRI.getDwarfRegNum(Reg, true);
5737
5738 // Non-scalable offsets can use DW_CFA_offset directly.
5739 if (!NumVGScaledBytes)
5740 return MCCFIInstruction::createOffset(nullptr, DwarfReg, NumBytes);
5741
5742 std::string CommentBuffer;
5743 llvm::raw_string_ostream Comment(CommentBuffer);
5744 Comment << printReg(Reg, &TRI) << " @ cfa";
5745
5746 // Build up expression (NumBytes + NumVGScaledBytes * AArch64::VG)
5747 SmallString<64> OffsetExpr;
5748 appendVGScaledOffsetExpr(OffsetExpr, NumBytes, NumVGScaledBytes,
5749 TRI.getDwarfRegNum(AArch64::VG, true), Comment);
5750
5751 // Wrap this into DW_CFA_expression
5752 SmallString<64> CfaExpr;
5753 CfaExpr.push_back(dwarf::DW_CFA_expression);
5754 uint8_t buffer[16];
5755 CfaExpr.append(buffer, buffer + encodeULEB128(DwarfReg, buffer));
5756 CfaExpr.append(buffer, buffer + encodeULEB128(OffsetExpr.size(), buffer));
5757 CfaExpr.append(OffsetExpr.str());
5758
5759 return MCCFIInstruction::createEscape(nullptr, CfaExpr.str(), SMLoc(),
5760 Comment.str());
5761}
5762
5763// Helper function to emit a frame offset adjustment from a given
5764// pointer (SrcReg), stored into DestReg. This function is explicit
5765// in that it requires the opcode.
5768 const DebugLoc &DL, unsigned DestReg,
5769 unsigned SrcReg, int64_t Offset, unsigned Opc,
5770 const TargetInstrInfo *TII,
5771 MachineInstr::MIFlag Flag, bool NeedsWinCFI,
5772 bool *HasWinCFI, bool EmitCFAOffset,
5773 StackOffset CFAOffset, unsigned FrameReg) {
5774 int Sign = 1;
5775 unsigned MaxEncoding, ShiftSize;
5776 switch (Opc) {
5777 case AArch64::ADDXri:
5778 case AArch64::ADDSXri:
5779 case AArch64::SUBXri:
5780 case AArch64::SUBSXri:
5781 MaxEncoding = 0xfff;
5782 ShiftSize = 12;
5783 break;
5784 case AArch64::ADDVL_XXI:
5785 case AArch64::ADDPL_XXI:
5786 case AArch64::ADDSVL_XXI:
5787 case AArch64::ADDSPL_XXI:
5788 MaxEncoding = 31;
5789 ShiftSize = 0;
5790 if (Offset < 0) {
5791 MaxEncoding = 32;
5792 Sign = -1;
5793 Offset = -Offset;
5794 }
5795 break;
5796 default:
5797 llvm_unreachable("Unsupported opcode");
5798 }
5799
5800 // `Offset` can be in bytes or in "scalable bytes".
5801 int VScale = 1;
5802 if (Opc == AArch64::ADDVL_XXI || Opc == AArch64::ADDSVL_XXI)
5803 VScale = 16;
5804 else if (Opc == AArch64::ADDPL_XXI || Opc == AArch64::ADDSPL_XXI)
5805 VScale = 2;
5806
5807 // FIXME: If the offset won't fit in 24-bits, compute the offset into a
5808 // scratch register. If DestReg is a virtual register, use it as the
5809 // scratch register; otherwise, create a new virtual register (to be
5810 // replaced by the scavenger at the end of PEI). That case can be optimized
5811 // slightly if DestReg is SP which is always 16-byte aligned, so the scratch
5812 // register can be loaded with offset%8 and the add/sub can use an extending
5813 // instruction with LSL#3.
5814 // Currently the function handles any offsets but generates a poor sequence
5815 // of code.
5816 // assert(Offset < (1 << 24) && "unimplemented reg plus immediate");
5817
5818 const unsigned MaxEncodableValue = MaxEncoding << ShiftSize;
5819 Register TmpReg = DestReg;
5820 if (TmpReg == AArch64::XZR)
5822 &AArch64::GPR64RegClass);
5823 do {
5824 uint64_t ThisVal = std::min<uint64_t>(Offset, MaxEncodableValue);
5825 unsigned LocalShiftSize = 0;
5826 if (ThisVal > MaxEncoding) {
5827 ThisVal = ThisVal >> ShiftSize;
5828 LocalShiftSize = ShiftSize;
5829 }
5830 assert((ThisVal >> ShiftSize) <= MaxEncoding &&
5831 "Encoding cannot handle value that big");
5832
5833 Offset -= ThisVal << LocalShiftSize;
5834 if (Offset == 0)
5835 TmpReg = DestReg;
5836 auto MBI = BuildMI(MBB, MBBI, DL, TII->get(Opc), TmpReg)
5837 .addReg(SrcReg)
5838 .addImm(Sign * (int)ThisVal);
5839 if (ShiftSize)
5840 MBI = MBI.addImm(
5842 MBI = MBI.setMIFlag(Flag);
5843
5844 auto Change =
5845 VScale == 1
5846 ? StackOffset::getFixed(ThisVal << LocalShiftSize)
5847 : StackOffset::getScalable(VScale * (ThisVal << LocalShiftSize));
5848 if (Sign == -1 || Opc == AArch64::SUBXri || Opc == AArch64::SUBSXri)
5849 CFAOffset += Change;
5850 else
5851 CFAOffset -= Change;
5852 if (EmitCFAOffset && DestReg == TmpReg) {
5853 MachineFunction &MF = *MBB.getParent();
5854 const TargetSubtargetInfo &STI = MF.getSubtarget();
5855 const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
5856
5857 unsigned CFIIndex = MF.addFrameInst(
5858 createDefCFA(TRI, FrameReg, DestReg, CFAOffset, VScale != 1));
5859 BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
5860 .addCFIIndex(CFIIndex)
5861 .setMIFlags(Flag);
5862 }
5863
5864 if (NeedsWinCFI) {
5865 assert(Sign == 1 && "SEH directives should always have a positive sign");
5866 int Imm = (int)(ThisVal << LocalShiftSize);
5867 if ((DestReg == AArch64::FP && SrcReg == AArch64::SP) ||
5868 (SrcReg == AArch64::FP && DestReg == AArch64::SP)) {
5869 if (HasWinCFI)
5870 *HasWinCFI = true;
5871 if (Imm == 0)
5872 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_SetFP)).setMIFlag(Flag);
5873 else
5874 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_AddFP))
5875 .addImm(Imm)
5876 .setMIFlag(Flag);
5877 assert(Offset == 0 && "Expected remaining offset to be zero to "
5878 "emit a single SEH directive");
5879 } else if (DestReg == AArch64::SP) {
5880 if (HasWinCFI)
5881 *HasWinCFI = true;
5882 assert(SrcReg == AArch64::SP && "Unexpected SrcReg for SEH_StackAlloc");
5883 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_StackAlloc))
5884 .addImm(Imm)
5885 .setMIFlag(Flag);
5886 }
5887 }
5888
5889 SrcReg = TmpReg;
5890 } while (Offset);
5891}
5892
5895 unsigned DestReg, unsigned SrcReg,
5897 MachineInstr::MIFlag Flag, bool SetNZCV,
5898 bool NeedsWinCFI, bool *HasWinCFI,
5899 bool EmitCFAOffset, StackOffset CFAOffset,
5900 unsigned FrameReg) {
5901 // If a function is marked as arm_locally_streaming, then the runtime value of
5902 // vscale in the prologue/epilogue is different the runtime value of vscale
5903 // in the function's body. To avoid having to consider multiple vscales,
5904 // we can use `addsvl` to allocate any scalable stack-slots, which under
5905 // most circumstances will be only locals, not callee-save slots.
5906 const Function &F = MBB.getParent()->getFunction();
5907 bool UseSVL = F.hasFnAttribute("aarch64_pstate_sm_body");
5908
5909 int64_t Bytes, NumPredicateVectors, NumDataVectors;
5911 Offset, Bytes, NumPredicateVectors, NumDataVectors);
5912
5913 // First emit non-scalable frame offsets, or a simple 'mov'.
5914 if (Bytes || (!Offset && SrcReg != DestReg)) {
5915 assert((DestReg != AArch64::SP || Bytes % 8 == 0) &&
5916 "SP increment/decrement not 8-byte aligned");
5917 unsigned Opc = SetNZCV ? AArch64::ADDSXri : AArch64::ADDXri;
5918 if (Bytes < 0) {
5919 Bytes = -Bytes;
5920 Opc = SetNZCV ? AArch64::SUBSXri : AArch64::SUBXri;
5921 }
5922 emitFrameOffsetAdj(MBB, MBBI, DL, DestReg, SrcReg, Bytes, Opc, TII, Flag,
5923 NeedsWinCFI, HasWinCFI, EmitCFAOffset, CFAOffset,
5924 FrameReg);
5925 CFAOffset += (Opc == AArch64::ADDXri || Opc == AArch64::ADDSXri)
5926 ? StackOffset::getFixed(-Bytes)
5927 : StackOffset::getFixed(Bytes);
5928 SrcReg = DestReg;
5929 FrameReg = DestReg;
5930 }
5931
5932 assert(!(SetNZCV && (NumPredicateVectors || NumDataVectors)) &&
5933 "SetNZCV not supported with SVE vectors");
5934 assert(!(NeedsWinCFI && (NumPredicateVectors || NumDataVectors)) &&
5935 "WinCFI not supported with SVE vectors");
5936
5937 if (NumDataVectors) {
5938 emitFrameOffsetAdj(MBB, MBBI, DL, DestReg, SrcReg, NumDataVectors,
5939 UseSVL ? AArch64::ADDSVL_XXI : AArch64::ADDVL_XXI,
5940 TII, Flag, NeedsWinCFI, nullptr, EmitCFAOffset,
5941 CFAOffset, FrameReg);
5942 CFAOffset += StackOffset::getScalable(-NumDataVectors * 16);
5943 SrcReg = DestReg;
5944 }
5945
5946 if (NumPredicateVectors) {
5947 assert(DestReg != AArch64::SP && "Unaligned access to SP");
5948 emitFrameOffsetAdj(MBB, MBBI, DL, DestReg, SrcReg, NumPredicateVectors,
5949 UseSVL ? AArch64::ADDSPL_XXI : AArch64::ADDPL_XXI,
5950 TII, Flag, NeedsWinCFI, nullptr, EmitCFAOffset,
5951 CFAOffset, FrameReg);
5952 }
5953}
5954
5957 MachineBasicBlock::iterator InsertPt, int FrameIndex,
5958 LiveIntervals *LIS, VirtRegMap *VRM) const {
5959 // This is a bit of a hack. Consider this instruction:
5960 //
5961 // %0 = COPY %sp; GPR64all:%0
5962 //
5963 // We explicitly chose GPR64all for the virtual register so such a copy might
5964 // be eliminated by RegisterCoalescer. However, that may not be possible, and
5965 // %0 may even spill. We can't spill %sp, and since it is in the GPR64all
5966 // register class, TargetInstrInfo::foldMemoryOperand() is going to try.
5967 //
5968 // To prevent that, we are going to constrain the %0 register class here.
5969 if (MI.isFullCopy()) {
5970 Register DstReg = MI.getOperand(0).getReg();
5971 Register SrcReg = MI.getOperand(1).getReg();
5972 if (SrcReg == AArch64::SP && DstReg.isVirtual()) {
5973 MF.getRegInfo().constrainRegClass(DstReg, &AArch64::GPR64RegClass);
5974 return nullptr;
5975 }
5976 if (DstReg == AArch64::SP && SrcReg.isVirtual()) {
5977 MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass);
5978 return nullptr;
5979 }
5980 // Nothing can folded with copy from/to NZCV.
5981 if (SrcReg == AArch64::NZCV || DstReg == AArch64::NZCV)
5982 return nullptr;
5983 }
5984
5985 // Handle the case where a copy is being spilled or filled but the source
5986 // and destination register class don't match. For example:
5987 //
5988 // %0 = COPY %xzr; GPR64common:%0
5989 //
5990 // In this case we can still safely fold away the COPY and generate the
5991 // following spill code:
5992 //
5993 // STRXui %xzr, %stack.0
5994 //
5995 // This also eliminates spilled cross register class COPYs (e.g. between x and
5996 // d regs) of the same size. For example:
5997 //
5998 // %0 = COPY %1; GPR64:%0, FPR64:%1
5999 //
6000 // will be filled as
6001 //
6002 // LDRDui %0, fi<#0>
6003 //
6004 // instead of
6005 //
6006 // LDRXui %Temp, fi<#0>
6007 // %0 = FMOV %Temp
6008 //
6009 if (MI.isCopy() && Ops.size() == 1 &&
6010 // Make sure we're only folding the explicit COPY defs/uses.
6011 (Ops[0] == 0 || Ops[0] == 1)) {
6012 bool IsSpill = Ops[0] == 0;
6013 bool IsFill = !IsSpill;
6015 const MachineRegisterInfo &MRI = MF.getRegInfo();
6016 MachineBasicBlock &MBB = *MI.getParent();
6017 const MachineOperand &DstMO = MI.getOperand(0);
6018 const MachineOperand &SrcMO = MI.getOperand(1);
6019 Register DstReg = DstMO.getReg();
6020 Register SrcReg = SrcMO.getReg();
6021 // This is slightly expensive to compute for physical regs since
6022 // getMinimalPhysRegClass is slow.
6023 auto getRegClass = [&](unsigned Reg) {
6024 return Register::isVirtualRegister(Reg) ? MRI.getRegClass(Reg)
6025 : TRI.getMinimalPhysRegClass(Reg);
6026 };
6027
6028 if (DstMO.getSubReg() == 0 && SrcMO.getSubReg() == 0) {
6029 assert(TRI.getRegSizeInBits(*getRegClass(DstReg)) ==
6030 TRI.getRegSizeInBits(*getRegClass(SrcReg)) &&
6031 "Mismatched register size in non subreg COPY");
6032 if (IsSpill)
6033 storeRegToStackSlot(MBB, InsertPt, SrcReg, SrcMO.isKill(), FrameIndex,
6034 getRegClass(SrcReg), &TRI, Register());
6035 else
6036 loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex,
6037 getRegClass(DstReg), &TRI, Register());
6038 return &*--InsertPt;
6039 }
6040
6041 // Handle cases like spilling def of:
6042 //
6043 // %0:sub_32<def,read-undef> = COPY %wzr; GPR64common:%0
6044 //
6045 // where the physical register source can be widened and stored to the full
6046 // virtual reg destination stack slot, in this case producing:
6047 //
6048 // STRXui %xzr, %stack.0
6049 //
6050 if (IsSpill && DstMO.isUndef() && SrcReg == AArch64::WZR &&
6051 TRI.getRegSizeInBits(*getRegClass(DstReg)) == 64) {
6052 assert(SrcMO.getSubReg() == 0 &&
6053 "Unexpected subreg on physical register");
6054 storeRegToStackSlot(MBB, InsertPt, AArch64::XZR, SrcMO.isKill(),
6055 FrameIndex, &AArch64::GPR64RegClass, &TRI,
6056 Register());
6057 return &*--InsertPt;
6058 }
6059
6060 // Handle cases like filling use of:
6061 //
6062 // %0:sub_32<def,read-undef> = COPY %1; GPR64:%0, GPR32:%1
6063 //
6064 // where we can load the full virtual reg source stack slot, into the subreg
6065 // destination, in this case producing:
6066 //
6067 // LDRWui %0:sub_32<def,read-undef>, %stack.0
6068 //
6069 if (IsFill && SrcMO.getSubReg() == 0 && DstMO.isUndef()) {
6070 const TargetRegisterClass *FillRC;
6071 switch (DstMO.getSubReg()) {
6072 default:
6073 FillRC = nullptr;
6074 break;
6075 case AArch64::sub_32:
6076 FillRC = &AArch64::GPR32RegClass;
6077 break;
6078 case AArch64::ssub:
6079 FillRC = &AArch64::FPR32RegClass;
6080 break;
6081 case AArch64::dsub:
6082 FillRC = &AArch64::FPR64RegClass;
6083 break;
6084 }
6085
6086 if (FillRC) {
6087 assert(TRI.getRegSizeInBits(*getRegClass(SrcReg)) ==
6088 TRI.getRegSizeInBits(*FillRC) &&
6089 "Mismatched regclass size on folded subreg COPY");
6090 loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex, FillRC, &TRI,
6091 Register());
6092 MachineInstr &LoadMI = *--InsertPt;
6093 MachineOperand &LoadDst = LoadMI.getOperand(0);
6094 assert(LoadDst.getSubReg() == 0 && "unexpected subreg on fill load");
6095 LoadDst.setSubReg(DstMO.getSubReg());
6096 LoadDst.setIsUndef();
6097 return &LoadMI;
6098 }
6099 }
6100 }
6101
6102 // Cannot fold.
6103 return nullptr;
6104}
6105
6107 StackOffset &SOffset,
6108 bool *OutUseUnscaledOp,
6109 unsigned *OutUnscaledOp,
6110 int64_t *EmittableOffset) {
6111 // Set output values in case of early exit.
6112 if (EmittableOffset)
6113 *EmittableOffset = 0;
6114 if (OutUseUnscaledOp)
6115 *OutUseUnscaledOp = false;
6116 if (OutUnscaledOp)
6117 *OutUnscaledOp = 0;
6118
6119 // Exit early for structured vector spills/fills as they can't take an
6120 // immediate offset.
6121 switch (MI.getOpcode()) {
6122 default:
6123 break;
6124 case AArch64::LD1Rv1d:
6125 case AArch64::LD1Rv2s:
6126 case AArch64::LD1Rv2d:
6127 case AArch64::LD1Rv4h:
6128 case AArch64::LD1Rv4s:
6129 case AArch64::LD1Rv8b:
6130 case AArch64::LD1Rv8h:
6131 case AArch64::LD1Rv16b:
6132 case AArch64::LD1Twov2d:
6133 case AArch64::LD1Threev2d:
6134 case AArch64::LD1Fourv2d:
6135 case AArch64::LD1Twov1d:
6136 case AArch64::LD1Threev1d:
6137 case AArch64::LD1Fourv1d:
6138 case AArch64::ST1Twov2d:
6139 case AArch64::ST1Threev2d:
6140 case AArch64::ST1Fourv2d:
6141 case AArch64::ST1Twov1d:
6142 case AArch64::ST1Threev1d:
6143 case AArch64::ST1Fourv1d:
6144 case AArch64::ST1i8:
6145 case AArch64::ST1i16:
6146 case AArch64::ST1i32:
6147 case AArch64::ST1i64:
6148 case AArch64::IRG:
6149 case AArch64::IRGstack:
6150 case AArch64::STGloop:
6151 case AArch64::STZGloop:
6153 }
6154
6155 // Get the min/max offset and the scale.
6156 TypeSize ScaleValue(0U, false), Width(0U, false);
6157 int64_t MinOff, MaxOff;
6158 if (!AArch64InstrInfo::getMemOpInfo(MI.getOpcode(), ScaleValue, Width, MinOff,
6159 MaxOff))
6160 llvm_unreachable("unhandled opcode in isAArch64FrameOffsetLegal");
6161
6162 // Construct the complete offset.
6163 bool IsMulVL = ScaleValue.isScalable();
6164 unsigned Scale = ScaleValue.getKnownMinValue();
6165 int64_t Offset = IsMulVL ? SOffset.getScalable() : SOffset.getFixed();
6166
6167 const MachineOperand &ImmOpnd =
6168 MI.getOperand(AArch64InstrInfo::getLoadStoreImmIdx(MI.getOpcode()));
6169 Offset += ImmOpnd.getImm() * Scale;
6170
6171 // If the offset doesn't match the scale, we rewrite the instruction to
6172 // use the unscaled instruction instead. Likewise, if we have a negative
6173 // offset and there is an unscaled op to use.
6174 std::optional<unsigned> UnscaledOp =
6176 bool useUnscaledOp = UnscaledOp && (Offset % Scale || Offset < 0);
6177 if (useUnscaledOp &&
6178 !AArch64InstrInfo::getMemOpInfo(*UnscaledOp, ScaleValue, Width, MinOff,
6179 MaxOff))
6180 llvm_unreachable("unhandled opcode in isAArch64FrameOffsetLegal");
6181
6182 Scale = ScaleValue.getKnownMinValue();
6183 assert(IsMulVL == ScaleValue.isScalable() &&
6184 "Unscaled opcode has different value for scalable");
6185
6186 int64_t Remainder = Offset % Scale;
6187 assert(!(Remainder && useUnscaledOp) &&
6188 "Cannot have remainder when using unscaled op");
6189
6190 assert(MinOff < MaxOff && "Unexpected Min/Max offsets");
6191 int64_t NewOffset = Offset / Scale;
6192 if (MinOff <= NewOffset && NewOffset <= MaxOff)
6193 Offset = Remainder;
6194 else {
6195 NewOffset = NewOffset < 0 ? MinOff : MaxOff;
6196 Offset = Offset - (NewOffset * Scale);
6197 }
6198
6199 if (EmittableOffset)
6200 *EmittableOffset = NewOffset;
6201 if (OutUseUnscaledOp)
6202 *OutUseUnscaledOp = useUnscaledOp;
6203 if (OutUnscaledOp && UnscaledOp)
6204 *OutUnscaledOp = *UnscaledOp;
6205
6206 if (IsMulVL)
6207 SOffset = StackOffset::get(SOffset.getFixed(), Offset);
6208 else
6209 SOffset = StackOffset::get(Offset, SOffset.getScalable());
6211 (SOffset ? 0 : AArch64FrameOffsetIsLegal);
6212}
6213
6215 unsigned FrameReg, StackOffset &Offset,
6216 const AArch64InstrInfo *TII) {
6217 unsigned Opcode = MI.getOpcode();
6218 unsigned ImmIdx = FrameRegIdx + 1;
6219
6220 if (Opcode == AArch64::ADDSXri || Opcode == AArch64::ADDXri) {
6221 Offset += StackOffset::getFixed(MI.getOperand(ImmIdx).getImm());
6222 emitFrameOffset(*MI.getParent(), MI, MI.getDebugLoc(),
6223 MI.getOperand(0).getReg(), FrameReg, Offset, TII,
6224 MachineInstr::NoFlags, (Opcode == AArch64::ADDSXri));
6225 MI.eraseFromParent();
6226 Offset = StackOffset();
6227 return true;
6228 }
6229
6230 int64_t NewOffset;
6231 unsigned UnscaledOp;
6232 bool UseUnscaledOp;
6233 int Status = isAArch64FrameOffsetLegal(MI, Offset, &UseUnscaledOp,
6234 &UnscaledOp, &NewOffset);
6237 // Replace the FrameIndex with FrameReg.
6238 MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
6239 if (UseUnscaledOp)
6240 MI.setDesc(TII->get(UnscaledOp));
6241
6242 MI.getOperand(ImmIdx).ChangeToImmediate(NewOffset);
6243 return !Offset;
6244 }
6245
6246 return false;
6247}
6248
6251 DebugLoc DL;
6252 BuildMI(MBB, MI, DL, get(AArch64::HINT)).addImm(0);
6253}
6254
6256 return MCInstBuilder(AArch64::HINT).addImm(0);
6257}
6258
6259// AArch64 supports MachineCombiner.
6260bool AArch64InstrInfo::useMachineCombiner() const { return true; }
6261
6262// True when Opc sets flag
6263static bool isCombineInstrSettingFlag(unsigned Opc) {
6264 switch (Opc) {
6265 case AArch64::ADDSWrr:
6266 case AArch64::ADDSWri:
6267 case AArch64::ADDSXrr:
6268 case AArch64::ADDSXri:
6269 case AArch64::SUBSWrr:
6270 case AArch64::SUBSXrr:
6271 // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
6272 case AArch64::SUBSWri:
6273 case AArch64::SUBSXri:
6274 return true;
6275 default:
6276 break;
6277 }
6278 return false;
6279}
6280
6281// 32b Opcodes that can be combined with a MUL
6282static bool isCombineInstrCandidate32(unsigned Opc) {
6283 switch (Opc) {
6284 case AArch64::ADDWrr:
6285 case AArch64::ADDWri:
6286 case AArch64::SUBWrr:
6287 case AArch64::ADDSWrr:
6288 case AArch64::ADDSWri:
6289 case AArch64::SUBSWrr:
6290 // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
6291 case AArch64::SUBWri:
6292 case AArch64::SUBSWri:
6293 return true;
6294 default:
6295 break;
6296 }
6297 return false;
6298}
6299
6300// 64b Opcodes that can be combined with a MUL
6301static bool isCombineInstrCandidate64(unsigned Opc) {
6302 switch (Opc) {
6303 case AArch64::ADDXrr:
6304 case AArch64::ADDXri:
6305 case AArch64::SUBXrr:
6306 case AArch64::ADDSXrr:
6307 case AArch64::ADDSXri:
6308 case AArch64::SUBSXrr:
6309 // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
6310 case AArch64::SUBXri:
6311 case AArch64::SUBSXri:
6312 case AArch64::ADDv8i8:
6313 case AArch64::ADDv16i8:
6314 case AArch64::ADDv4i16:
6315 case AArch64::ADDv8i16:
6316 case AArch64::ADDv2i32:
6317 case AArch64::ADDv4i32:
6318 case AArch64::SUBv8i8:
6319 case AArch64::SUBv16i8:
6320 case AArch64::SUBv4i16:
6321 case AArch64::SUBv8i16:
6322 case AArch64::SUBv2i32:
6323 case AArch64::SUBv4i32:
6324 return true;
6325 default:
6326 break;
6327 }
6328 return false;
6329}
6330
6331// FP Opcodes that can be combined with a FMUL.
6332static bool isCombineInstrCandidateFP(const MachineInstr &Inst) {
6333 switch (Inst.getOpcode()) {
6334 default:
6335 break;
6336 case AArch64::FADDHrr:
6337 case AArch64::FADDSrr:
6338 case AArch64::FADDDrr:
6339 case AArch64::FADDv4f16:
6340 case AArch64::FADDv8f16:
6341 case AArch64::FADDv2f32:
6342 case AArch64::FADDv2f64:
6343 case AArch64::FADDv4f32:
6344 case AArch64::FSUBHrr:
6345 case AArch64::FSUBSrr:
6346 case AArch64::FSUBDrr:
6347 case AArch64::FSUBv4f16:
6348 case AArch64::FSUBv8f16:
6349 case AArch64::FSUBv2f32:
6350 case AArch64::FSUBv2f64:
6351 case AArch64::FSUBv4f32:
6353 // We can fuse FADD/FSUB with FMUL, if fusion is either allowed globally by
6354 // the target options or if FADD/FSUB has the contract fast-math flag.
6355 return Options.UnsafeFPMath ||
6356 Options.AllowFPOpFusion == FPOpFusion::Fast ||
6358 return true;
6359 }
6360 return false;
6361}
6362
6363// Opcodes that can be combined with a MUL
6364static bool isCombineInstrCandidate(unsigned Opc) {
6366}
6367
6368//
6369// Utility routine that checks if \param MO is defined by an
6370// \param CombineOpc instruction in the basic block \param MBB
6372 unsigned CombineOpc, unsigned ZeroReg = 0,
6373 bool CheckZeroReg = false) {
6375 MachineInstr *MI = nullptr;
6376
6377 if (MO.isReg() && MO.getReg().isVirtual())
6378 MI = MRI.getUniqueVRegDef(MO.getReg());
6379 // And it needs to be in the trace (otherwise, it won't have a depth).
6380 if (!MI || MI->getParent() != &MBB || (unsigned)MI->getOpcode() != CombineOpc)
6381 return false;
6382 // Must only used by the user we combine with.
6383 if (!MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
6384 return false;
6385
6386 if (CheckZeroReg) {
6387 assert(MI->getNumOperands() >= 4 && MI->getOperand(0).isReg() &&
6388 MI->getOperand(1).isReg() && MI->getOperand(2).isReg() &&
6389 MI->getOperand(3).isReg() && "MAdd/MSub must have a least 4 regs");
6390 // The third input reg must be zero.
6391 if (MI->getOperand(3).getReg() != ZeroReg)
6392 return false;
6393 }
6394
6395 if (isCombineInstrSettingFlag(CombineOpc) &&
6396 MI->findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr, true) == -1)
6397 return false;
6398
6399 return true;
6400}
6401
6402//
6403// Is \param MO defined by an integer multiply and can be combined?
6405 unsigned MulOpc, unsigned ZeroReg) {
6406 return canCombine(MBB, MO, MulOpc, ZeroReg, true);
6407}
6408
6409//
6410// Is \param MO defined by a floating-point multiply and can be combined?
6412 unsigned MulOpc) {
6413 return canCombine(MBB, MO, MulOpc);
6414}
6415
6416// TODO: There are many more machine instruction opcodes to match:
6417// 1. Other data types (integer, vectors)
6418// 2. Other math / logic operations (xor, or)
6419// 3. Other forms of the same operation (intrinsics and other variants)
6421 bool Invert) const {
6422 if (Invert)
6423 return false;
6424 switch (Inst.getOpcode()) {
6425 // == Floating-point types ==
6426 // -- Floating-point instructions --
6427 case AArch64::FADDHrr:
6428 case AArch64::FADDSrr:
6429 case AArch64::FADDDrr:
6430 case AArch64::FMULHrr:
6431 case AArch64::FMULSrr:
6432 case AArch64::FMULDrr:
6433 case AArch64::FMULX16:
6434 case AArch64::FMULX32:
6435 case AArch64::FMULX64:
6436 // -- Advanced SIMD instructions --
6437 case AArch64::FADDv4f16:
6438 case AArch64::FADDv8f16:
6439 case AArch64::FADDv2f32:
6440 case AArch64::FADDv4f32:
6441 case AArch64::FADDv2f64:
6442 case AArch64::FMULv4f16:
6443 case AArch64::FMULv8f16:
6444 case AArch64::FMULv2f32:
6445 case AArch64::FMULv4f32:
6446 case AArch64::FMULv2f64:
6447 case AArch64::FMULXv4f16:
6448 case AArch64::FMULXv8f16:
6449 case AArch64::FMULXv2f32:
6450 case AArch64::FMULXv4f32:
6451 case AArch64::FMULXv2f64:
6452 // -- SVE instructions --
6453 // Opcodes FMULX_ZZZ_? don't exist because there is no unpredicated FMULX
6454 // in the SVE instruction set (though there are predicated ones).
6455 case AArch64::FADD_ZZZ_H:
6456 case AArch64::FADD_ZZZ_S:
6457 case AArch64::FADD_ZZZ_D:
6458 case AArch64::FMUL_ZZZ_H:
6459 case AArch64::FMUL_ZZZ_S:
6460 case AArch64::FMUL_ZZZ_D:
6461 return Inst.getParent()->getParent()->getTarget().Options.UnsafeFPMath ||
6464
6465 // == Integer types ==
6466 // -- Base instructions --
6467 // Opcodes MULWrr and MULXrr don't exist because
6468 // `MUL <Wd>, <Wn>, <Wm>` and `MUL <Xd>, <Xn>, <Xm>` are aliases of
6469 // `MADD <Wd>, <Wn>, <Wm>, WZR` and `MADD <Xd>, <Xn>, <Xm>, XZR` respectively.
6470 // The machine-combiner does not support three-source-operands machine
6471 // instruction. So we cannot reassociate MULs.
6472 case AArch64::ADDWrr:
6473 case AArch64::ADDXrr:
6474 case AArch64::ANDWrr:
6475 case AArch64::ANDXrr:
6476 case AArch64::ORRWrr:
6477 case AArch64::ORRXrr:
6478 case AArch64::EORWrr:
6479 case AArch64::EORXrr:
6480 case AArch64::EONWrr:
6481 case AArch64::EONXrr:
6482 // -- Advanced SIMD instructions --
6483 // Opcodes MULv1i64 and MULv2i64 don't exist because there is no 64-bit MUL
6484 // in the Advanced SIMD instruction set.
6485 case AArch64::ADDv8i8:
6486 case AArch64::ADDv16i8:
6487 case AArch64::ADDv4i16:
6488 case AArch64::ADDv8i16:
6489 case AArch64::ADDv2i32:
6490 case AArch64::ADDv4i32:
6491 case AArch64::ADDv1i64:
6492 case AArch64::ADDv2i64:
6493 case AArch64::MULv8i8:
6494 case AArch64::MULv16i8:
6495 case AArch64::MULv4i16:
6496 case AArch64::MULv8i16:
6497 case AArch64::MULv2i32:
6498 case AArch64::MULv4i32:
6499 case AArch64::ANDv8i8:
6500 case AArch64::ANDv16i8:
6501 case AArch64::ORRv8i8:
6502 case AArch64::ORRv16i8:
6503 case AArch64::EORv8i8:
6504 case AArch64::EORv16i8:
6505 // -- SVE instructions --
6506 case AArch64::ADD_ZZZ_B:
6507 case AArch64::ADD_ZZZ_H:
6508 case AArch64::ADD_ZZZ_S:
6509 case AArch64::ADD_ZZZ_D:
6510 case AArch64::MUL_ZZZ_B:
6511 case AArch64::MUL_ZZZ_H:
6512 case AArch64::MUL_ZZZ_S:
6513 case AArch64::MUL_ZZZ_D:
6514 case AArch64::AND_ZZZ:
6515 case AArch64::ORR_ZZZ:
6516 case AArch64::EOR_ZZZ:
6517 return true;
6518
6519 default:
6520 return false;
6521 }
6522}
6523
6524/// Find instructions that can be turned into madd.
6526 SmallVectorImpl<unsigned> &Patterns) {
6527 unsigned Opc = Root.getOpcode();
6528 MachineBasicBlock &MBB = *Root.getParent();
6529 bool Found = false;
6530
6531 if (!isCombineInstrCandidate(Opc))
6532 return false;
6533 if (isCombineInstrSettingFlag(Opc)) {
6534 int Cmp_NZCV =
6535 Root.findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr, true);
6536 // When NZCV is live bail out.
6537 if (Cmp_NZCV == -1)
6538 return false;
6539 unsigned NewOpc = convertToNonFlagSettingOpc(Root);
6540 // When opcode can't change bail out.
6541 // CHECKME: do we miss any cases for opcode conversion?
6542 if (NewOpc == Opc)
6543 return false;
6544 Opc = NewOpc;
6545 }
6546
6547 auto setFound = [&](int Opcode, int Operand, unsigned ZeroReg,
6548 unsigned Pattern) {
6549 if (canCombineWithMUL(MBB, Root.getOperand(Operand), Opcode, ZeroReg)) {
6550 Patterns.push_back(Pattern);
6551 Found = true;
6552 }
6553 };
6554
6555 auto setVFound = [&](int Opcode, int Operand, unsigned Pattern) {
6556 if (canCombine(MBB, Root.getOperand(Operand), Opcode)) {
6557 Patterns.push_back(Pattern);
6558 Found = true;
6559 }
6560 };
6561
6563
6564 switch (Opc) {
6565 default:
6566 break;
6567 case AArch64::ADDWrr:
6568 assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
6569 "ADDWrr does not have register operands");
6570 setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULADDW_OP1);
6571 setFound(AArch64::MADDWrrr, 2, AArch64::WZR, MCP::MULADDW_OP2);
6572 break;
6573 case AArch64::ADDXrr:
6574 setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULADDX_OP1);
6575 setFound(AArch64::MADDXrrr, 2, AArch64::XZR, MCP::MULADDX_OP2);
6576 break;
6577 case AArch64::SUBWrr:
6578 setFound(AArch64::MADDWrrr, 2, AArch64::WZR, MCP::MULSUBW_OP2);
6579 setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULSUBW_OP1);
6580 break;
6581 case AArch64::SUBXrr:
6582 setFound(AArch64::MADDXrrr, 2, AArch64::XZR, MCP::MULSUBX_OP2);
6583 setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULSUBX_OP1);
6584 break;
6585 case AArch64::ADDWri:
6586 setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULADDWI_OP1);
6587 break;
6588 case AArch64::ADDXri:
6589 setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULADDXI_OP1);
6590 break;
6591 case AArch64::SUBWri:
6592 setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULSUBWI_OP1);
6593 break;
6594 case AArch64::SUBXri:
6595 setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULSUBXI_OP1);
6596 break;
6597 case AArch64::ADDv8i8:
6598 setVFound(AArch64::MULv8i8, 1, MCP::MULADDv8i8_OP1);
6599 setVFound(AArch64::MULv8i8, 2, MCP::MULADDv8i8_OP2);
6600 break;
6601 case AArch64::ADDv16i8:
6602 setVFound(AArch64::MULv16i8, 1, MCP::MULADDv16i8_OP1);
6603 setVFound(AArch64::MULv16i8, 2, MCP::MULADDv16i8_OP2);
6604 break;
6605 case AArch64::ADDv4i16:
6606 setVFound(AArch64::MULv4i16, 1, MCP::MULADDv4i16_OP1);
6607 setVFound(AArch64::MULv4i16, 2, MCP::MULADDv4i16_OP2);
6608 setVFound(AArch64::MULv4i16_indexed, 1, MCP::MULADDv4i16_indexed_OP1);
6609 setVFound(AArch64::MULv4i16_indexed, 2, MCP::MULADDv4i16_indexed_OP2);
6610 break;
6611 case AArch64::ADDv8i16:
6612 setVFound(AArch64::MULv8i16, 1, MCP::MULADDv8i16_OP1);
6613 setVFound(AArch64::MULv8i16, 2, MCP::MULADDv8i16_OP2);
6614 setVFound(AArch64::MULv8i16_indexed, 1, MCP::MULADDv8i16_indexed_OP1);
6615 setVFound(AArch64::MULv8i16_indexed, 2, MCP::MULADDv8i16_indexed_OP2);
6616 break;
6617 case AArch64::ADDv2i32:
6618 setVFound(AArch64::MULv2i32, 1, MCP::MULADDv2i32_OP1);
6619 setVFound(AArch64::MULv2i32, 2, MCP::MULADDv2i32_OP2);
6620 setVFound(AArch64::MULv2i32_indexed, 1, MCP::MULADDv2i32_indexed_OP1);
6621 setVFound(AArch64::MULv2i32_indexed, 2, MCP::MULADDv2i32_indexed_OP2);
6622 break;
6623 case AArch64::ADDv4i32:
6624 setVFound(AArch64::MULv4i32, 1, MCP::MULADDv4i32_OP1);
6625 setVFound(AArch64::MULv4i32, 2, MCP::MULADDv4i32_OP2);
6626 setVFound(AArch64::MULv4i32_indexed, 1, MCP::MULADDv4i32_indexed_OP1);
6627 setVFound(AArch64::MULv4i32_indexed, 2, MCP::MULADDv4i32_indexed_OP2);
6628 break;
6629 case AArch64::SUBv8i8:
6630 setVFound(AArch64::MULv8i8, 1, MCP::MULSUBv8i8_OP1);
6631 setVFound(AArch64::MULv8i8, 2, MCP::MULSUBv8i8_OP2);
6632 break;
6633 case AArch64::SUBv16i8:
6634 setVFound(AArch64::MULv16i8, 1, MCP::MULSUBv16i8_OP1);
6635 setVFound(AArch64::MULv16i8, 2, MCP::MULSUBv16i8_OP2);
6636 break;
6637 case AArch64::SUBv4i16:
6638 setVFound(AArch64::MULv4i16, 1, MCP::MULSUBv4i16_OP1);
6639 setVFound(AArch64::MULv4i16, 2, MCP::MULSUBv4i16_OP2);
6640 setVFound(AArch64::MULv4i16_indexed, 1, MCP::MULSUBv4i16_indexed_OP1);
6641 setVFound(AArch64::MULv4i16_indexed, 2, MCP::MULSUBv4i16_indexed_OP2);
6642 break;
6643 case AArch64::SUBv8i16:
6644 setVFound(AArch64::MULv8i16, 1, MCP::MULSUBv8i16_OP1);
6645 setVFound(AArch64::MULv8i16, 2, MCP::MULSUBv8i16_OP2);
6646 setVFound(AArch64::MULv8i16_indexed, 1, MCP::MULSUBv8i16_indexed_OP1);
6647 setVFound(AArch64::MULv8i16_indexed, 2, MCP::MULSUBv8i16_indexed_OP2);
6648 break;
6649 case AArch64::SUBv2i32:
6650 setVFound(AArch64::MULv2i32, 1, MCP::MULSUBv2i32_OP1);
6651 setVFound(AArch64::MULv2i32, 2, MCP::MULSUBv2i32_OP2);
6652 setVFound(AArch64::MULv2i32_indexed, 1, MCP::MULSUBv2i32_indexed_OP1);
6653 setVFound(AArch64::MULv2i32_indexed, 2, MCP::MULSUBv2i32_indexed_OP2);
6654 break;
6655 case AArch64::SUBv4i32:
6656 setVFound(AArch64::MULv4i32, 1, MCP::MULSUBv4i32_OP1);
6657 setVFound(AArch64::MULv4i32, 2, MCP::MULSUBv4i32_OP2);
6658 setVFound(AArch64::MULv4i32_indexed, 1, MCP::MULSUBv4i32_indexed_OP1);
6659 setVFound(AArch64::MULv4i32_indexed, 2, MCP::MULSUBv4i32_indexed_OP2);
6660 break;
6661 }
6662 return Found;
6663}
6664/// Floating-Point Support
6665
6666/// Find instructions that can be turned into madd.
6668 SmallVectorImpl<unsigned> &Patterns) {
6669
6670 if (!isCombineInstrCandidateFP(Root))
6671 return false;
6672
6673 MachineBasicBlock &MBB = *Root.getParent();
6674 bool Found = false;
6675
6676 auto Match = [&](int Opcode, int Operand, unsigned Pattern) -> bool {
6677 if (canCombineWithFMUL(MBB, Root.getOperand(Operand), Opcode)) {
6678 Patterns.push_back(Pattern);
6679 return true;
6680 }
6681 return false;
6682 };
6683
6685
6686 switch (Root.getOpcode()) {
6687 default:
6688 assert(false && "Unsupported FP instruction in combiner\n");
6689 break;
6690 case AArch64::FADDHrr:
6691 assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
6692 "FADDHrr does not have register operands");
6693
6694 Found = Match(AArch64::FMULHrr, 1, MCP::FMULADDH_OP1);
6695 Found |= Match(AArch64::FMULHrr, 2, MCP::FMULADDH_OP2);
6696 break;
6697 case AArch64::FADDSrr:
6698 assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
6699 "FADDSrr does not have register operands");
6700
6701 Found |= Match(AArch64::FMULSrr, 1, MCP::FMULADDS_OP1) ||
6702 Match(AArch64::FMULv1i32_indexed, 1, MCP::FMLAv1i32_indexed_OP1);
6703
6704 Found |= Match(AArch64::FMULSrr, 2, MCP::FMULADDS_OP2) ||
6705 Match(AArch64::FMULv1i32_indexed, 2, MCP::FMLAv1i32_indexed_OP2);
6706 break;
6707 case AArch64::FADDDrr:
6708 Found |= Match(AArch64::FMULDrr, 1, MCP::FMULADDD_OP1) ||
6709 Match(AArch64::FMULv1i64_indexed, 1, MCP::FMLAv1i64_indexed_OP1);
6710
6711 Found |= Match(AArch64::FMULDrr, 2, MCP::FMULADDD_OP2) ||
6712 Match(AArch64::FMULv1i64_indexed, 2, MCP::FMLAv1i64_indexed_OP2);
6713 break;
6714 case AArch64::FADDv4f16:
6715 Found |= Match(AArch64::FMULv4i16_indexed, 1, MCP::FMLAv4i16_indexed_OP1) ||
6716 Match(AArch64::FMULv4f16, 1, MCP::FMLAv4f16_OP1);
6717
6718 Found |= Match(AArch64::FMULv4i16_indexed, 2, MCP::FMLAv4i16_indexed_OP2) ||
6719 Match(AArch64::FMULv4f16, 2, MCP::FMLAv4f16_OP2);
6720 break;
6721 case AArch64::FADDv8f16:
6722 Found |= Match(AArch64::FMULv8i16_indexed, 1, MCP::FMLAv8i16_indexed_OP1) ||
6723 Match(AArch64::FMULv8f16, 1, MCP::FMLAv8f16_OP1);
6724
6725 Found |= Match(AArch64::FMULv8i16_indexed, 2, MCP::FMLAv8i16_indexed_OP2) ||
6726 Match(AArch64::FMULv8f16, 2, MCP::FMLAv8f16_OP2);
6727 break;
6728 case AArch64::FADDv2f32:
6729 Found |= Match(AArch64::FMULv2i32_indexed, 1, MCP::FMLAv2i32_indexed_OP1) ||
6730 Match(AArch64::FMULv2f32, 1, MCP::FMLAv2f32_OP1);
6731
6732 Found |= Match(AArch64::FMULv2i32_indexed, 2, MCP::FMLAv2i32_indexed_OP2) ||
6733 Match(AArch64::FMULv2f32, 2, MCP::FMLAv2f32_OP2);
6734 break;
6735 case AArch64::FADDv2f64:
6736 Found |= Match(AArch64::FMULv2i64_indexed, 1, MCP::FMLAv2i64_indexed_OP1) ||
6737 Match(AArch64::FMULv2f64, 1, MCP::FMLAv2f64_OP1);
6738
6739 Found |= Match(AArch64::FMULv2i64_indexed, 2, MCP::FMLAv2i64_indexed_OP2) ||
6740 Match(AArch64::FMULv2f64, 2, MCP::FMLAv2f64_OP2);
6741 break;
6742 case AArch64::FADDv4f32:
6743 Found |= Match(AArch64::FMULv4i32_indexed, 1, MCP::FMLAv4i32_indexed_OP1) ||
6744 Match(AArch64::FMULv4f32, 1, MCP::FMLAv4f32_OP1);
6745
6746 Found |= Match(AArch64::FMULv4i32_indexed, 2, MCP::FMLAv4i32_indexed_OP2) ||
6747 Match(AArch64::FMULv4f32, 2, MCP::FMLAv4f32_OP2);
6748 break;
6749 case AArch64::FSUBHrr:
6750 Found = Match(AArch64::FMULHrr, 1, MCP::FMULSUBH_OP1);
6751 Found |= Match(AArch64::FMULHrr, 2, MCP::FMULSUBH_OP2);
6752 Found |= Match(AArch64::FNMULHrr, 1, MCP::FNMULSUBH_OP1);
6753 break;
6754 case AArch64::FSUBSrr:
6755 Found = Match(AArch64::FMULSrr, 1, MCP::FMULSUBS_OP1);
6756
6757 Found |= Match(AArch64::FMULSrr, 2, MCP::FMULSUBS_OP2) ||
6758 Match(AArch64::FMULv1i32_indexed, 2, MCP::FMLSv1i32_indexed_OP2);
6759
6760 Found |= Match(AArch64::FNMULSrr, 1, MCP::FNMULSUBS_OP1);
6761 break;
6762 case AArch64::FSUBDrr:
6763 Found = Match(AArch64::FMULDrr, 1, MCP::FMULSUBD_OP1);
6764
6765 Found |= Match(AArch64::FMULDrr, 2, MCP::FMULSUBD_OP2) ||
6766 Match(AArch64::FMULv1i64_indexed, 2, MCP::FMLSv1i64_indexed_OP2);
6767
6768 Found |= Match(AArch64::FNMULDrr, 1, MCP::FNMULSUBD_OP1);
6769 break;
6770 case AArch64::FSUBv4f16:
6771 Found |= Match(AArch64::FMULv4i16_indexed, 2, MCP::FMLSv4i16_indexed_OP2) ||
6772 Match(AArch64::FMULv4f16, 2, MCP::FMLSv4f16_OP2);
6773
6774 Found |= Match(AArch64::FMULv4i16_indexed, 1, MCP::FMLSv4i16_indexed_OP1) ||
6775 Match(AArch64::FMULv4f16, 1, MCP::FMLSv4f16_OP1);
6776 break;
6777 case AArch64::FSUBv8f16:
6778 Found |= Match(AArch64::FMULv8i16_indexed, 2, MCP::FMLSv8i16_indexed_OP2) ||
6779 Match(AArch64::FMULv8f16, 2, MCP::FMLSv8f16_OP2);
6780
6781 Found |= Match(AArch64::FMULv8i16_indexed, 1, MCP::FMLSv8i16_indexed_OP1) ||
6782 Match(AArch64::FMULv8f16, 1, MCP::FMLSv8f16_OP1);
6783 break;
6784 case AArch64::FSUBv2f32:
6785 Found |= Match(AArch64::FMULv2i32_indexed, 2, MCP::FMLSv2i32_indexed_OP2) ||
6786 Match(AArch64::FMULv2f32, 2, MCP::FMLSv2f32_OP2);
6787
6788 Found |= Match(AArch64::FMULv2i32_indexed, 1, MCP::FMLSv2i32_indexed_OP1) ||
6789 Match(AArch64::FMULv2f32, 1, MCP::FMLSv2f32_OP1);
6790 break;
6791 case AArch64::FSUBv2f64:
6792 Found |= Match(AArch64::FMULv2i64_indexed, 2, MCP::FMLSv2i64_indexed_OP2) ||
6793 Match(AArch64::FMULv2f64, 2, MCP::FMLSv2f64_OP2);
6794
6795 Found |= Match(AArch64::FMULv2i64_indexed, 1, MCP::FMLSv2i64_indexed_OP1) ||
6796 Match(AArch64::FMULv2f64, 1, MCP::FMLSv2f64_OP1);
6797 break;
6798 case AArch64::FSUBv4f32:
6799 Found |= Match(AArch64::FMULv4i32_indexed, 2, MCP::FMLSv4i32_indexed_OP2) ||
6800 Match(AArch64::FMULv4f32, 2, MCP::FMLSv4f32_OP2);
6801
6802 Found |= Match(AArch64::FMULv4i32_indexed, 1, MCP::FMLSv4i32_indexed_OP1) ||
6803 Match(AArch64::FMULv4f32, 1, MCP::FMLSv4f32_OP1);
6804 break;
6805 }
6806 return Found;
6807}
6808
6810 SmallVectorImpl<unsigned> &Patterns) {
6811 MachineBasicBlock &MBB = *Root.getParent();
6812 bool Found = false;
6813
6814 auto Match = [&](unsigned Opcode, int Operand, unsigned Pattern) -> bool {
6816 MachineOperand &MO = Root.getOperand(Operand);
6817 MachineInstr *MI = nullptr;
6818 if (MO.isReg() && MO.getReg().isVirtual())
6819 MI = MRI.getUniqueVRegDef(MO.getReg());
6820 // Ignore No-op COPYs in FMUL(COPY(DUP(..)))
6821 if (MI && MI->getOpcode() == TargetOpcode::COPY &&
6822 MI->getOperand(1).getReg().isVirtual())
6823 MI = MRI.getUniqueVRegDef(MI->getOperand(1).getReg());
6824 if (MI && MI->getOpcode() == Opcode) {
6825 Patterns.push_back(Pattern);
6826 return true;
6827 }
6828 return false;
6829 };
6830
6832
6833 switch (Root.getOpcode()) {
6834 default:
6835 return false;
6836 case AArch64::FMULv2f32:
6837 Found = Match(AArch64::DUPv2i32lane, 1, MCP::FMULv2i32_indexed_OP1);
6838 Found |= Match(AArch64::DUPv2i32lane, 2, MCP::FMULv2i32_indexed_OP2);
6839 break;
6840 case AArch64::FMULv2f64:
6841 Found = Match(AArch64::DUPv2i64lane, 1, MCP::FMULv2i64_indexed_OP1);
6842 Found |= Match(AArch64::DUPv2i64lane, 2, MCP::FMULv2i64_indexed_OP2);
6843 break;
6844 case AArch64::FMULv4f16:
6845 Found = Match(AArch64::DUPv4i16lane, 1, MCP::FMULv4i16_indexed_OP1);
6846 Found |= Match(AArch64::DUPv4i16lane, 2, MCP::FMULv4i16_indexed_OP2);
6847 break;
6848 case AArch64::FMULv4f32:
6849 Found = Match(AArch64::DUPv4i32lane, 1, MCP::FMULv4i32_indexed_OP1);
6850 Found |= Match(AArch64::DUPv4i32lane, 2, MCP::FMULv4i32_indexed_OP2);
6851 break;
6852 case AArch64::FMULv8f16:
6853 Found = Match(AArch64::DUPv8i16lane, 1, MCP::FMULv8i16_indexed_OP1);
6854 Found |= Match(AArch64::DUPv8i16lane, 2, MCP::FMULv8i16_indexed_OP2);
6855 break;
6856 }
6857
6858 return Found;
6859}
6860
6862 SmallVectorImpl<unsigned> &Patterns) {
6863 unsigned Opc = Root.getOpcode();
6864 MachineBasicBlock &MBB = *Root.getParent();
6866
6867 auto Match = [&](unsigned Opcode, unsigned Pattern) -> bool {
6868 MachineOperand &MO = Root.getOperand(1);
6869 MachineInstr *MI = MRI.getUniqueVRegDef(MO.getReg());
6870 if (MI != nullptr && (MI->getOpcode() == Opcode) &&
6871 MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()) &&
6875 MI->getFlag(MachineInstr::MIFlag::FmNsz)) {
6876 Patterns.push_back(Pattern);
6877 return true;
6878 }
6879 return false;
6880 };
6881
6882 switch (Opc) {
6883 default:
6884 break;
6885 case AArch64::FNEGDr:
6886 return Match(AArch64::FMADDDrrr, AArch64MachineCombinerPattern::FNMADD);
6887 case AArch64::FNEGSr:
6888 return Match(AArch64::FMADDSrrr, AArch64MachineCombinerPattern::FNMADD);
6889 }
6890
6891 return false;
6892}
6893
6894/// Return true when a code sequence can improve throughput. It
6895/// should be called only for instructions in loops.
6896/// \param Pattern - combiner pattern
6898 switch (Pattern) {
6899 default:
6900 break;
7006 return true;
7007 } // end switch (Pattern)
7008 return false;
7009}
7010
7011/// Find other MI combine patterns.
7013 SmallVectorImpl<unsigned> &Patterns) {
7014 // A - (B + C) ==> (A - B) - C or (A - C) - B
7015 unsigned Opc = Root.getOpcode();
7016 MachineBasicBlock &MBB = *Root.getParent();
7017
7018 switch (Opc) {
7019 case AArch64::SUBWrr:
7020 case AArch64::SUBSWrr:
7021 case AArch64::SUBXrr:
7022 case AArch64::SUBSXrr:
7023 // Found candidate root.
7024 break;
7025 default:
7026 return false;
7027 }
7028
7029 if (isCombineInstrSettingFlag(Opc) &&
7030 Root.findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr, true) ==
7031 -1)
7032 return false;
7033
7034 if (canCombine(MBB, Root.getOperand(2), AArch64::ADDWrr) ||
7035 canCombine(MBB, Root.getOperand(2), AArch64::ADDSWrr) ||
7036 canCombine(MBB, Root.getOperand(2), AArch64::ADDXrr) ||
7037 canCombine(MBB, Root.getOperand(2), AArch64::ADDSXrr)) {
7040 return true;
7041 }
7042
7043 return false;
7044}
7045
7048 switch (Pattern) {
7052 default:
7054 }
7055}
7056
7057/// Return true when there is potentially a faster code sequence for an
7058/// instruction chain ending in \p Root. All potential patterns are listed in
7059/// the \p Pattern vector. Pattern should be sorted in priority order since the
7060/// pattern evaluator stops checking as soon as it finds a faster sequence.
7061
7063 MachineInstr &Root, SmallVectorImpl<unsigned> &Patterns,
7064 bool DoRegPressureReduce) const {
7065 // Integer patterns
7066 if (getMaddPatterns(Root, Patterns))
7067 return true;
7068 // Floating point patterns
7069 if (getFMULPatterns(Root, Patterns))
7070 return true;
7071 if (getFMAPatterns(Root, Patterns))
7072 return true;
7073 if (getFNEGPatterns(Root, Patterns))
7074 return true;
7075
7076 // Other patterns
7077 if (getMiscPatterns(Root, Patterns))
7078 return true;
7079
7080 return TargetInstrInfo::getMachineCombinerPatterns(Root, Patterns,
7081 DoRegPressureReduce);
7082}
7083
7085/// genFusedMultiply - Generate fused multiply instructions.
7086/// This function supports both integer and floating point instructions.
7087/// A typical example:
7088/// F|MUL I=A,B,0
7089/// F|ADD R,I,C
7090/// ==> F|MADD R,A,B,C
7091/// \param MF Containing MachineFunction
7092/// \param MRI Register information
7093/// \param TII Target information
7094/// \param Root is the F|ADD instruction
7095/// \param [out] InsInstrs is a vector of machine instructions and will
7096/// contain the generated madd instruction
7097/// \param IdxMulOpd is index of operand in Root that is the result of
7098/// the F|MUL. In the example above IdxMulOpd is 1.
7099/// \param MaddOpc the opcode fo the f|madd instruction
7100/// \param RC Register class of operands
7101/// \param kind of fma instruction (addressing mode) to be generated
7102/// \param ReplacedAddend is the result register from the instruction
7103/// replacing the non-combined operand, if any.
7104static MachineInstr *
7106 const TargetInstrInfo *TII, MachineInstr &Root,
7107 SmallVectorImpl<MachineInstr *> &InsInstrs, unsigned IdxMulOpd,
7108 unsigned MaddOpc, const TargetRegisterClass *RC,
7109 FMAInstKind kind = FMAInstKind::Default,
7110 const Register *ReplacedAddend = nullptr) {
7111 assert(IdxMulOpd == 1 || IdxMulOpd == 2);
7112
7113 unsigned IdxOtherOpd = IdxMulOpd == 1 ? 2 : 1;
7114 MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg());
7115 Register ResultReg = Root.getOperand(0).getReg();
7116 Register SrcReg0 = MUL->getOperand(1).getReg();
7117 bool Src0IsKill = MUL->getOperand(1).isKill();
7118 Register SrcReg1 = MUL->getOperand(2).getReg();
7119 bool Src1IsKill = MUL->getOperand(2).isKill();
7120
7121 Register SrcReg2;
7122 bool Src2IsKill;
7123 if (ReplacedAddend) {
7124 // If we just generated a new addend, we must be it's only use.
7125 SrcReg2 = *ReplacedAddend;
7126 Src2IsKill = true;
7127 } else {
7128 SrcReg2 = Root.getOperand(IdxOtherOpd).getReg();
7129 Src2IsKill = Root.getOperand(IdxOtherOpd).isKill();
7130 }
7131
7132 if (ResultReg.isVirtual())
7133 MRI.constrainRegClass(ResultReg, RC);
7134 if (SrcReg0.isVirtual())
7135 MRI.constrainRegClass(SrcReg0, RC);
7136 if (SrcReg1.isVirtual())
7137 MRI.constrainRegClass(SrcReg1, RC);
7138 if (SrcReg2.isVirtual())
7139 MRI.constrainRegClass(SrcReg2, RC);
7140
7142 if (kind == FMAInstKind::Default)
7143 MIB = BuildMI(MF, MIMetadata(Root), TII->get(MaddOpc), ResultReg)
7144 .addReg(SrcReg0, getKillRegState(Src0IsKill))
7145 .addReg(SrcReg1, getKillRegState(Src1IsKill))
7146 .addReg(SrcReg2, getKillRegState(Src2IsKill));
7147 else if (kind == FMAInstKind::Indexed)
7148 MIB = BuildMI(MF, MIMetadata(Root), TII->get(MaddOpc), ResultReg)
7149 .addReg(SrcReg2, getKillRegState(Src2IsKill))
7150 .addReg(SrcReg0, getKillRegState(Src0IsKill))
7151 .addReg(SrcReg1, getKillRegState(Src1IsKill))
7152 .addImm(MUL->getOperand(3).getImm());
7153 else if (kind == FMAInstKind::Accumulator)
7154 MIB = BuildMI(MF, MIMetadata(Root), TII->get(MaddOpc), ResultReg)
7155 .addReg(SrcReg2, getKillRegState(Src2IsKill))
7156 .addReg(SrcReg0, getKillRegState(Src0IsKill))
7157 .addReg(SrcReg1, getKillRegState(Src1IsKill));
7158 else
7159 assert(false && "Invalid FMA instruction kind \n");
7160 // Insert the MADD (MADD, FMA, FMS, FMLA, FMSL)
7161 InsInstrs.push_back(MIB);
7162 return MUL;
7163}
7164
7165static MachineInstr *
7167 const TargetInstrInfo *TII, MachineInstr &Root,
7169 MachineInstr *MAD = MRI.getUniqueVRegDef(Root.getOperand(1).getReg());
7170
7171 unsigned Opc = 0;
7172 const TargetRegisterClass *RC = MRI.getRegClass(MAD->getOperand(0).getReg());
7173 if (AArch64::FPR32RegClass.hasSubClassEq(RC))
7174 Opc = AArch64::FNMADDSrrr;
7175 else if (AArch64::FPR64RegClass.hasSubClassEq(RC))
7176 Opc = AArch64::FNMADDDrrr;
7177 else
7178 return nullptr;
7179
7180 Register ResultReg = Root.getOperand(0).getReg();
7181 Register SrcReg0 = MAD->getOperand(1).getReg();
7182 Register SrcReg1 = MAD->getOperand(2).getReg();
7183 Register SrcReg2 = MAD->getOperand(3).getReg();
7184 bool Src0IsKill = MAD->getOperand(1).isKill();
7185 bool Src1IsKill = MAD->getOperand(2).isKill();
7186 bool Src2IsKill = MAD->getOperand(3).isKill();
7187 if (ResultReg.isVirtual())
7188 MRI.constrainRegClass(ResultReg, RC);
7189 if (SrcReg0.isVirtual())
7190 MRI.constrainRegClass(SrcReg0, RC);
7191 if (SrcReg1.isVirtual())
7192 MRI.constrainRegClass(SrcReg1, RC);
7193 if (SrcReg2.isVirtual())
7194 MRI.constrainRegClass(SrcReg2, RC);
7195
7197 BuildMI(MF, MIMetadata(Root), TII->get(Opc), ResultReg)
7198 .addReg(SrcReg0, getKillRegState(Src0IsKill))
7199 .addReg(SrcReg1, getKillRegState(Src1IsKill))
7200 .addReg(SrcReg2, getKillRegState(Src2IsKill));
7201 InsInstrs.push_back(MIB);
7202
7203 return MAD;
7204}
7205
7206/// Fold (FMUL x (DUP y lane)) into (FMUL_indexed x y lane)
7207static MachineInstr *
7210 unsigned IdxDupOp, unsigned MulOpc,
7212 assert(((IdxDupOp == 1) || (IdxDupOp == 2)) &&
7213 "Invalid index of FMUL operand");
7214
7215 MachineFunction &MF = *Root.getMF();
7217
7218 MachineInstr *Dup =
7219 MF.getRegInfo().getUniqueVRegDef(Root.getOperand(IdxDupOp).getReg());
7220
7221 if (Dup->getOpcode() == TargetOpcode::COPY)
7222 Dup = MRI.getUniqueVRegDef(Dup->getOperand(1).getReg());
7223
7224 Register DupSrcReg = Dup->getOperand(1).getReg();
7225 MRI.clearKillFlags(DupSrcReg);
7226 MRI.constrainRegClass(DupSrcReg, RC);
7227
7228 unsigned DupSrcLane = Dup->getOperand(2).getImm();
7229
7230 unsigned IdxMulOp = IdxDupOp == 1 ? 2 : 1;
7231 MachineOperand &MulOp = Root.getOperand(IdxMulOp);
7232
7233 Register ResultReg = Root.getOperand(0).getReg();
7234
7236 MIB = BuildMI(MF, MIMetadata(Root), TII->get(MulOpc), ResultReg)
7237 .add(MulOp)
7238 .addReg(DupSrcReg)
7239 .addImm(DupSrcLane);
7240
7241 InsInstrs.push_back(MIB);
7242 return &Root;
7243}
7244
7245/// genFusedMultiplyAcc - Helper to generate fused multiply accumulate
7246/// instructions.
7247///
7248/// \see genFusedMultiply
7252 unsigned IdxMulOpd, unsigned MaddOpc, const TargetRegisterClass *RC) {
7253 return genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd, MaddOpc, RC,
7254 FMAInstKind::Accumulator);
7255}
7256
7257/// genNeg - Helper to generate an intermediate negation of the second operand
7258/// of Root
7260 const TargetInstrInfo *TII, MachineInstr &Root,
7262 DenseMap<unsigned, unsigned> &InstrIdxForVirtReg,
7263 unsigned MnegOpc, const TargetRegisterClass *RC) {
7264 Register NewVR = MRI.createVirtualRegister(RC);
7266 BuildMI(MF, MIMetadata(Root), TII->get(MnegOpc), NewVR)
7267 .add(Root.getOperand(2));
7268 InsInstrs.push_back(MIB);
7269
7270 assert(InstrIdxForVirtReg.empty());
7271 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
7272
7273 return NewVR;
7274}
7275
7276/// genFusedMultiplyAccNeg - Helper to generate fused multiply accumulate
7277/// instructions with an additional negation of the accumulator
7281 DenseMap<unsigned, unsigned> &InstrIdxForVirtReg, unsigned IdxMulOpd,
7282 unsigned MaddOpc, unsigned MnegOpc, const TargetRegisterClass *RC) {
7283 assert(IdxMulOpd == 1);
7284
7285 Register NewVR =
7286 genNeg(MF, MRI, TII, Root, InsInstrs, InstrIdxForVirtReg, MnegOpc, RC);
7287 return genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd, MaddOpc, RC,
7288 FMAInstKind::Accumulator, &NewVR);
7289}
7290
7291/// genFusedMultiplyIdx - Helper to generate fused multiply accumulate
7292/// instructions.
7293///
7294/// \see genFusedMultiply
7298 unsigned IdxMulOpd, unsigned MaddOpc, const TargetRegisterClass *RC) {
7299 return genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd, MaddOpc, RC,
7300 FMAInstKind::Indexed);
7301}
7302
7303/// genFusedMultiplyAccNeg - Helper to generate fused multiply accumulate
7304/// instructions with an additional negation of the accumulator
7308 DenseMap<unsigned, unsigned> &InstrIdxForVirtReg, unsigned IdxMulOpd,
7309 unsigned MaddOpc, unsigned MnegOpc, const TargetRegisterClass *RC) {
7310 assert(IdxMulOpd == 1);
7311
7312 Register NewVR =
7313 genNeg(MF, MRI, TII, Root, InsInstrs, InstrIdxForVirtReg, MnegOpc, RC);
7314
7315 return genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd, MaddOpc, RC,
7316 FMAInstKind::Indexed, &NewVR);
7317}
7318
7319/// genMaddR - Generate madd instruction and combine mul and add using
7320/// an extra virtual register
7321/// Example - an ADD intermediate needs to be stored in a register:
7322/// MUL I=A,B,0
7323/// ADD R,I,Imm
7324/// ==> ORR V, ZR, Imm
7325/// ==> MADD R,A,B,V
7326/// \param MF Containing MachineFunction
7327/// \param MRI Register information
7328/// \param TII Target information
7329/// \param Root is the ADD instruction
7330/// \param [out] InsInstrs is a vector of machine instructions and will
7331/// contain the generated madd instruction
7332/// \param IdxMulOpd is index of operand in Root that is the result of
7333/// the MUL. In the example above IdxMulOpd is 1.
7334/// \param MaddOpc the opcode fo the madd instruction
7335/// \param VR is a virtual register that holds the value of an ADD operand
7336/// (V in the example above).
7337/// \param RC Register class of operands
7339 const TargetInstrInfo *TII, MachineInstr &Root,
7341 unsigned IdxMulOpd, unsigned MaddOpc, unsigned VR,
7342 const TargetRegisterClass *RC) {
7343 assert(IdxMulOpd == 1 || IdxMulOpd == 2);
7344
7345 MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg());
7346 Register ResultReg = Root.getOperand(0).getReg();
7347 Register SrcReg0 = MUL->getOperand(1).getReg();
7348 bool Src0IsKill = MUL->getOperand(1).isKill();
7349 Register SrcReg1 = MUL->getOperand(2).getReg();
7350 bool Src1IsKill = MUL->getOperand(2).isKill();
7351
7352 if (ResultReg.isVirtual())
7353 MRI.constrainRegClass(ResultReg, RC);
7354 if (SrcReg0.isVirtual())
7355 MRI.constrainRegClass(SrcReg0, RC);
7356 if (SrcReg1.isVirtual())
7357 MRI.constrainRegClass(SrcReg1, RC);
7359 MRI.constrainRegClass(VR, RC);
7360
7362 BuildMI(MF, MIMetadata(Root), TII->get(MaddOpc), ResultReg)
7363 .addReg(SrcReg0, getKillRegState(Src0IsKill))
7364 .addReg(SrcReg1, getKillRegState(Src1IsKill))
7365 .addReg(VR);
7366 // Insert the MADD
7367 InsInstrs.push_back(MIB);
7368 return MUL;
7369}
7370
7371/// Do the following transformation
7372/// A - (B + C) ==> (A - B) - C
7373/// A - (B + C) ==> (A - C) - B
7374static void
7376 const TargetInstrInfo *TII, MachineInstr &Root,
7379 unsigned IdxOpd1,
7380 DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) {
7381 assert(IdxOpd1 == 1 || IdxOpd1 == 2);
7382 unsigned IdxOtherOpd = IdxOpd1 == 1 ? 2 : 1;
7383 MachineInstr *AddMI = MRI.getUniqueVRegDef(Root.getOperand(2).getReg());
7384
7385 Register ResultReg = Root.getOperand(0).getReg();
7386 Register RegA = Root.getOperand(1).getReg();
7387 bool RegAIsKill = Root.getOperand(1).isKill();
7388 Register RegB = AddMI->getOperand(IdxOpd1).getReg();
7389 bool RegBIsKill = AddMI->getOperand(IdxOpd1).isKill();
7390 Register RegC = AddMI->getOperand(IdxOtherOpd).getReg();
7391 bool RegCIsKill = AddMI->getOperand(IdxOtherOpd).isKill();
7392 Register NewVR =
7393 MRI.createVirtualRegister(MRI.getRegClass(Root.getOperand(2).getReg()));
7394
7395 unsigned Opcode = Root.getOpcode();
7396 if (Opcode == AArch64::SUBSWrr)
7397 Opcode = AArch64::SUBWrr;
7398 else if (Opcode == AArch64::SUBSXrr)
7399 Opcode = AArch64::SUBXrr;
7400 else
7401 assert((Opcode == AArch64::SUBWrr || Opcode == AArch64::SUBXrr) &&
7402 "Unexpected instruction opcode.");
7403
7404 uint32_t Flags = Root.mergeFlagsWith(*AddMI);
7405 Flags &= ~MachineInstr::NoSWrap;
7406 Flags &= ~MachineInstr::NoUWrap;
7407
7408 MachineInstrBuilder MIB1 =
7409 BuildMI(MF, MIMetadata(Root), TII->get(Opcode), NewVR)
7410 .addReg(RegA, getKillRegState(RegAIsKill))
7411 .addReg(RegB, getKillRegState(RegBIsKill))
7412 .setMIFlags(Flags);
7413 MachineInstrBuilder MIB2 =
7414 BuildMI(MF, MIMetadata(Root), TII->get(Opcode), ResultReg)
7415 .addReg(NewVR, getKillRegState(true))
7416 .addReg(RegC, getKillRegState(RegCIsKill))
7417 .setMIFlags(Flags);
7418
7419 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
7420 InsInstrs.push_back(MIB1);
7421 InsInstrs.push_back(MIB2);
7422 DelInstrs.push_back(AddMI);
7423 DelInstrs.push_back(&Root);
7424}
7425
7426/// When getMachineCombinerPatterns() finds potential patterns,
7427/// this function generates the instructions that could replace the
7428/// original code sequence
7430 MachineInstr &Root, unsigned Pattern,
7433 DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const {
7434 MachineBasicBlock &MBB = *Root.getParent();
7436 MachineFunction &MF = *MBB.getParent();
7438
7439 MachineInstr *MUL = nullptr;
7440 const TargetRegisterClass *RC;
7441 unsigned Opc;
7442 switch (Pattern) {
7443 default:
7444 // Reassociate instructions.
7446 DelInstrs, InstrIdxForVirtReg);
7447 return;
7449 // A - (B + C)
7450 // ==> (A - B) - C
7451 genSubAdd2SubSub(MF, MRI, TII, Root, InsInstrs, DelInstrs, 1,
7452 InstrIdxForVirtReg);
7453 return;
7455 // A - (B + C)
7456 // ==> (A - C) - B
7457 genSubAdd2SubSub(MF, MRI, TII, Root, InsInstrs, DelInstrs, 2,
7458 InstrIdxForVirtReg);
7459 return;
7462 // MUL I=A,B,0
7463 // ADD R,I,C
7464 // ==> MADD R,A,B,C
7465 // --- Create(MADD);
7467 Opc = AArch64::MADDWrrr;
7468 RC = &AArch64::GPR32RegClass;
7469 } else {
7470 Opc = AArch64::MADDXrrr;
7471 RC = &AArch64::GPR64RegClass;
7472 }
7473 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
7474 break;
7477 // MUL I=A,B,0
7478 // ADD R,C,I
7479 // ==> MADD R,A,B,C
7480 // --- Create(MADD);
7482 Opc = AArch64::MADDWrrr;
7483 RC = &AArch64::GPR32RegClass;
7484 } else {
7485 Opc = AArch64::MADDXrrr;
7486 RC = &AArch64::GPR64RegClass;
7487 }
7488 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7489 break;
7492 // MUL I=A,B,0
7493 // ADD R,I,Imm
7494 // ==> MOV V, Imm
7495 // ==> MADD R,A,B,V
7496 // --- Create(MADD);
7497 const TargetRegisterClass *OrrRC;
7498 unsigned BitSize, OrrOpc, ZeroReg;
7500 OrrOpc = AArch64::ORRWri;
7501 OrrRC = &AArch64::GPR32spRegClass;
7502 BitSize = 32;
7503 ZeroReg = AArch64::WZR;
7504 Opc = AArch64::MADDWrrr;
7505 RC = &AArch64::GPR32RegClass;
7506 } else {
7507 OrrOpc = AArch64::ORRXri;
7508 OrrRC = &AArch64::GPR64spRegClass;
7509 BitSize = 64;
7510 ZeroReg = AArch64::XZR;
7511 Opc = AArch64::MADDXrrr;
7512 RC = &AArch64::GPR64RegClass;
7513 }
7514 Register NewVR = MRI.createVirtualRegister(OrrRC);
7515 uint64_t Imm = Root.getOperand(2).getImm();
7516
7517 if (Root.getOperand(3).isImm()) {
7518 unsigned Val = Root.getOperand(3).getImm();
7519 Imm = Imm << Val;
7520 }
7521 uint64_t UImm = SignExtend64(Imm, BitSize);
7522 // The immediate can be composed via a single instruction.
7524 AArch64_IMM::expandMOVImm(UImm, BitSize, Insn);
7525 if (Insn.size() != 1)
7526 return;
7527 auto MovI = Insn.begin();
7529 // MOV is an alias for one of three instructions: movz, movn, and orr.
7530 if (MovI->Opcode == OrrOpc)
7531 MIB1 = BuildMI(MF, MIMetadata(Root), TII->get(OrrOpc), NewVR)
7532 .addReg(ZeroReg)
7533 .addImm(MovI->Op2);
7534 else {
7535 if (BitSize == 32)
7536 assert((MovI->Opcode == AArch64::MOVNWi ||
7537 MovI->Opcode == AArch64::MOVZWi) &&
7538 "Expected opcode");
7539 else
7540 assert((MovI->Opcode == AArch64::MOVNXi ||
7541 MovI->Opcode == AArch64::MOVZXi) &&
7542 "Expected opcode");
7543 MIB1 = BuildMI(MF, MIMetadata(Root), TII->get(MovI->Opcode), NewVR)
7544 .addImm(MovI->Op1)
7545 .addImm(MovI->Op2);
7546 }
7547 InsInstrs.push_back(MIB1);
7548 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
7549 MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
7550 break;
7551 }
7554 // MUL I=A,B,0
7555 // SUB R,I, C
7556 // ==> SUB V, 0, C
7557 // ==> MADD R,A,B,V // = -C + A*B
7558 // --- Create(MADD);
7559 const TargetRegisterClass *SubRC;
7560 unsigned SubOpc, ZeroReg;
7562 SubOpc = AArch64::SUBWrr;
7563 SubRC = &AArch64::GPR32spRegClass;
7564 ZeroReg = AArch64::WZR;
7565 Opc = AArch64::MADDWrrr;
7566 RC = &AArch64::GPR32RegClass;
7567 } else {
7568 SubOpc = AArch64::SUBXrr;
7569 SubRC = &AArch64::GPR64spRegClass;
7570 ZeroReg = AArch64::XZR;
7571 Opc = AArch64::MADDXrrr;
7572 RC = &AArch64::GPR64RegClass;
7573 }
7574 Register NewVR = MRI.createVirtualRegister(SubRC);
7575 // SUB NewVR, 0, C
7576 MachineInstrBuilder MIB1 =
7577 BuildMI(MF, MIMetadata(Root), TII->get(SubOpc), NewVR)
7578 .addReg(ZeroReg)
7579 .add(Root.getOperand(2));
7580 InsInstrs.push_back(MIB1);
7581 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
7582 MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
7583 break;
7584 }
7587 // MUL I=A,B,0
7588 // SUB R,C,I
7589 // ==> MSUB R,A,B,C (computes C - A*B)
7590 // --- Create(MSUB);
7592 Opc = AArch64::MSUBWrrr;
7593 RC = &AArch64::GPR32RegClass;
7594 } else {
7595 Opc = AArch64::MSUBXrrr;
7596 RC = &AArch64::GPR64RegClass;
7597 }
7598 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7599 break;
7602 // MUL I=A,B,0
7603 // SUB R,I, Imm
7604 // ==> MOV V, -Imm
7605 // ==> MADD R,A,B,V // = -Imm + A*B
7606 // --- Create(MADD);
7607 const TargetRegisterClass *OrrRC;
7608 unsigned BitSize, OrrOpc, ZeroReg;
7610 OrrOpc = AArch64::ORRWri;
7611 OrrRC = &AArch64::GPR32spRegClass;
7612 BitSize = 32;
7613 ZeroReg = AArch64::WZR;
7614 Opc = AArch64::MADDWrrr;
7615 RC = &AArch64::GPR32RegClass;
7616 } else {
7617 OrrOpc = AArch64::ORRXri;
7618 OrrRC = &AArch64::GPR64spRegClass;
7619 BitSize = 64;
7620 ZeroReg = AArch64::XZR;
7621 Opc = AArch64::MADDXrrr;
7622 RC = &AArch64::GPR64RegClass;
7623 }
7624 Register NewVR = MRI.createVirtualRegister(OrrRC);
7625 uint64_t Imm = Root.getOperand(2).getImm();
7626 if (Root.getOperand(3).isImm()) {
7627 unsigned Val = Root.getOperand(3).getImm();
7628 Imm = Imm << Val;
7629 }
7630 uint64_t UImm = SignExtend64(-Imm, BitSize);
7631 // The immediate can be composed via a single instruction.
7633 AArch64_IMM::expandMOVImm(UImm, BitSize, Insn);
7634 if (Insn.size() != 1)
7635 return;
7636 auto MovI = Insn.begin();
7638 // MOV is an alias for one of three instructions: movz, movn, and orr.
7639 if (MovI->Opcode == OrrOpc)
7640 MIB1 = BuildMI(MF, MIMetadata(Root), TII->get(OrrOpc), NewVR)
7641 .addReg(ZeroReg)
7642 .addImm(MovI->Op2);
7643 else {
7644 if (BitSize == 32)
7645 assert((MovI->Opcode == AArch64::MOVNWi ||
7646 MovI->Opcode == AArch64::MOVZWi) &&
7647 "Expected opcode");
7648 else
7649 assert((MovI->Opcode == AArch64::MOVNXi ||
7650 MovI->Opcode == AArch64::MOVZXi) &&
7651 "Expected opcode");
7652 MIB1 = BuildMI(MF, MIMetadata(Root), TII->get(MovI->Opcode), NewVR)
7653 .addImm(MovI->Op1)
7654 .addImm(MovI->Op2);
7655 }
7656 InsInstrs.push_back(MIB1);
7657 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
7658 MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
7659 break;
7660 }
7661
7663 Opc = AArch64::MLAv8i8;
7664 RC = &AArch64::FPR64RegClass;
7665 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
7666 break;
7668 Opc = AArch64::MLAv8i8;
7669 RC = &AArch64::FPR64RegClass;
7670 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7671 break;
7673 Opc = AArch64::MLAv16i8;
7674 RC = &AArch64::FPR128RegClass;
7675 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
7676 break;
7678 Opc = AArch64::MLAv16i8;
7679 RC = &AArch64::FPR128RegClass;
7680 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7681 break;
7683 Opc = AArch64::MLAv4i16;
7684 RC = &AArch64::FPR64RegClass;
7685 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
7686 break;
7688 Opc = AArch64::MLAv4i16;
7689 RC = &AArch64::FPR64RegClass;
7690 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7691 break;
7693 Opc = AArch64::MLAv8i16;
7694 RC = &AArch64::FPR128RegClass;
7695 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
7696 break;
7698 Opc = AArch64::MLAv8i16;
7699 RC = &AArch64::FPR128RegClass;
7700 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7701 break;
7703 Opc = AArch64::MLAv2i32;
7704 RC = &AArch64::FPR64RegClass;
7705 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
7706 break;
7708 Opc = AArch64::MLAv2i32;
7709 RC = &AArch64::FPR64RegClass;
7710 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7711 break;
7713 Opc = AArch64::MLAv4i32;
7714 RC = &AArch64::FPR128RegClass;
7715 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
7716 break;
7718 Opc = AArch64::MLAv4i32;
7719 RC = &AArch64::FPR128RegClass;
7720 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7721 break;
7722
7724 Opc = AArch64::MLAv8i8;
7725 RC = &AArch64::FPR64RegClass;
7726 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
7727 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv8i8,
7728 RC);
7729 break;
7731 Opc = AArch64::MLSv8i8;
7732 RC = &AArch64::FPR64RegClass;
7733 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7734 break;
7736 Opc = AArch64::MLAv16i8;
7737 RC = &AArch64::FPR128RegClass;
7738 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
7739 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv16i8,
7740 RC);
7741 break;
7743 Opc = AArch64::MLSv16i8;
7744 RC = &AArch64::FPR128RegClass;
7745 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7746 break;
7748 Opc = AArch64::MLAv4i16;
7749 RC = &AArch64::FPR64RegClass;
7750 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
7751 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i16,
7752 RC);
7753 break;
7755 Opc = AArch64::MLSv4i16;
7756 RC = &AArch64::FPR64RegClass;
7757 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7758 break;
7760 Opc = AArch64::MLAv8i16;
7761 RC = &AArch64::FPR128RegClass;
7762 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
7763 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv8i16,
7764 RC);
7765 break;
7767 Opc = AArch64::MLSv8i16;
7768 RC = &AArch64::FPR128RegClass;
7769 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7770 break;
7772 Opc = AArch64::MLAv2i32;
7773 RC = &AArch64::FPR64RegClass;
7774 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
7775 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv2i32,
7776 RC);
7777 break;
7779 Opc = AArch64::MLSv2i32;
7780 RC = &AArch64::FPR64RegClass;
7781 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7782 break;
7784 Opc = AArch64::MLAv4i32;
7785 RC = &AArch64::FPR128RegClass;
7786 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
7787 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i32,
7788 RC);
7789 break;
7791 Opc = AArch64::MLSv4i32;
7792 RC = &AArch64::FPR128RegClass;
7793 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7794 break;
7795
7797 Opc = AArch64::MLAv4i16_indexed;
7798 RC = &AArch64::FPR64RegClass;
7799 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
7800 break;
7802 Opc = AArch64::MLAv4i16_indexed;
7803 RC = &AArch64::FPR64RegClass;
7804 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7805 break;
7807 Opc = AArch64::MLAv8i16_indexed;
7808 RC = &AArch64::FPR128RegClass;
7809 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
7810 break;
7812 Opc = AArch64::MLAv8i16_indexed;
7813 RC = &AArch64::FPR128RegClass;
7814 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7815 break;
7817 Opc = AArch64::MLAv2i32_indexed;
7818 RC = &AArch64::FPR64RegClass;
7819 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
7820 break;
7822 Opc = AArch64::MLAv2i32_indexed;
7823 RC = &AArch64::FPR64RegClass;
7824 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7825 break;
7827 Opc = AArch64::MLAv4i32_indexed;
7828 RC = &AArch64::FPR128RegClass;
7829 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
7830 break;
7832 Opc = AArch64::MLAv4i32_indexed;
7833 RC = &AArch64::FPR128RegClass;
7834 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7835 break;
7836
7838 Opc = AArch64::MLAv4i16_indexed;
7839 RC = &AArch64::FPR64RegClass;
7840 MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs,
7841 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i16,
7842 RC);
7843 break;
7845 Opc = AArch64::MLSv4i16_indexed;
7846 RC = &AArch64::FPR64RegClass;
7847 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7848 break;
7850 Opc = AArch64::MLAv8i16_indexed;
7851 RC = &AArch64::FPR128RegClass;
7852 MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs,
7853 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv8i16,
7854 RC);
7855 break;
7857 Opc = AArch64::MLSv8i16_indexed;
7858 RC = &AArch64::FPR128RegClass;
7859 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7860 break;
7862 Opc = AArch64::MLAv2i32_indexed;
7863 RC = &AArch64::FPR64RegClass;
7864 MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs,
7865 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv2i32,
7866 RC);
7867 break;
7869 Opc = AArch64::MLSv2i32_indexed;
7870 RC = &AArch64::FPR64RegClass;
7871 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7872 break;
7874 Opc = AArch64::MLAv4i32_indexed;
7875 RC = &AArch64::FPR128RegClass;
7876 MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs,
7877 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i32,
7878 RC);
7879 break;
7881 Opc = AArch64::MLSv4i32_indexed;
7882 RC = &AArch64::FPR128RegClass;
7883 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7884 break;
7885
7886 // Floating Point Support
7888 Opc = AArch64::FMADDHrrr;
7889 RC = &AArch64::FPR16RegClass;
7890 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
7891 break;
7893 Opc = AArch64::FMADDSrrr;
7894 RC = &AArch64::FPR32RegClass;
7895 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
7896 break;
7898 Opc = AArch64::FMADDDrrr;
7899 RC = &AArch64::FPR64RegClass;
7900 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
7901 break;
7902
7904 Opc = AArch64::FMADDHrrr;
7905 RC = &AArch64::FPR16RegClass;
7906 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7907 break;
7909 Opc = AArch64::FMADDSrrr;
7910 RC = &AArch64::FPR32RegClass;
7911 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7912 break;
7914 Opc = AArch64::FMADDDrrr;
7915 RC = &AArch64::FPR64RegClass;
7916 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7917 break;
7918
7920 Opc = AArch64::FMLAv1i32_indexed;
7921 RC = &AArch64::FPR32RegClass;
7922 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
7923 FMAInstKind::Indexed);
7924 break;
7926 Opc = AArch64::FMLAv1i32_indexed;
7927 RC = &AArch64::FPR32RegClass;
7928 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
7929 FMAInstKind::Indexed);
7930 break;
7931
7933 Opc = AArch64::FMLAv1i64_indexed;
7934 RC = &AArch64::FPR64RegClass;
7935 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
7936 FMAInstKind::Indexed);
7937 break;
7939 Opc = AArch64::FMLAv1i64_indexed;
7940 RC = &AArch64::FPR64RegClass;
7941 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
7942 FMAInstKind::Indexed);
7943 break;
7944
7946 RC = &AArch64::FPR64RegClass;
7947 Opc = AArch64::FMLAv4i16_indexed;
7948 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
7949 FMAInstKind::Indexed);
7950 break;
7952 RC = &AArch64::FPR64RegClass;
7953 Opc = AArch64::FMLAv4f16;
7954 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
7955 FMAInstKind::Accumulator);
7956 break;
7958 RC = &AArch64::FPR64RegClass;
7959 Opc = AArch64::FMLAv4i16_indexed;
7960 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
7961 FMAInstKind::Indexed);
7962 break;
7964 RC = &AArch64::FPR64RegClass;
7965 Opc = AArch64::FMLAv4f16;
7966 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
7967 FMAInstKind::Accumulator);
7968 break;
7969
7972 RC = &AArch64::FPR64RegClass;
7974 Opc = AArch64::FMLAv2i32_indexed;
7975 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
7976 FMAInstKind::Indexed);
7977 } else {
7978 Opc = AArch64::FMLAv2f32;
7979 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
7980 FMAInstKind::Accumulator);
7981 }
7982 break;
7985 RC = &AArch64::FPR64RegClass;
7987 Opc = AArch64::FMLAv2i32_indexed;
7988 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
7989 FMAInstKind::Indexed);
7990 } else {
7991 Opc = AArch64::FMLAv2f32;
7992 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
7993 FMAInstKind::Accumulator);
7994 }
7995 break;
7996
7998 RC = &AArch64::FPR128RegClass;
7999 Opc = AArch64::FMLAv8i16_indexed;
8000 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
8001 FMAInstKind::Indexed);
8002 break;
8004 RC = &AArch64::FPR128RegClass;
8005 Opc = AArch64::FMLAv8f16;
8006 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
8007 FMAInstKind::Accumulator);
8008 break;
8010 RC = &AArch64::FPR128RegClass;
8011 Opc = AArch64::FMLAv8i16_indexed;
8012 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8013 FMAInstKind::Indexed);
8014 break;
8016 RC = &AArch64::FPR128RegClass;
8017 Opc = AArch64::FMLAv8f16;
8018 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8019 FMAInstKind::Accumulator);
8020 break;
8021
8024 RC = &AArch64::FPR128RegClass;
8026 Opc = AArch64::FMLAv2i64_indexed;
8027 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
8028 FMAInstKind::Indexed);
8029 } else {
8030 Opc = AArch64::FMLAv2f64;
8031 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
8032 FMAInstKind::Accumulator);
8033 }
8034 break;
8037 RC = &AArch64::FPR128RegClass;
8039 Opc = AArch64::FMLAv2i64_indexed;
8040 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8041 FMAInstKind::Indexed);
8042 } else {
8043 Opc = AArch64::FMLAv2f64;
8044 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8045 FMAInstKind::Accumulator);
8046 }
8047 break;
8048
8051 RC = &AArch64::FPR128RegClass;
8053 Opc = AArch64::FMLAv4i32_indexed;
8054 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
8055 FMAInstKind::Indexed);
8056 } else {
8057 Opc = AArch64::FMLAv4f32;
8058 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
8059 FMAInstKind::Accumulator);
8060 }
8061 break;
8062
8065 RC = &AArch64::FPR128RegClass;
8067 Opc = AArch64::FMLAv4i32_indexed;
8068 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8069 FMAInstKind::Indexed);
8070 } else {
8071 Opc = AArch64::FMLAv4f32;
8072 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8073 FMAInstKind::Accumulator);
8074 }
8075 break;
8076
8078 Opc = AArch64::FNMSUBHrrr;
8079 RC = &AArch64::FPR16RegClass;
8080 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8081 break;
8083 Opc = AArch64::FNMSUBSrrr;
8084 RC = &AArch64::FPR32RegClass;
8085 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8086 break;
8088 Opc = AArch64::FNMSUBDrrr;
8089 RC = &AArch64::FPR64RegClass;
8090 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8091 break;
8092
8094 Opc = AArch64::FNMADDHrrr;
8095 RC = &AArch64::FPR16RegClass;
8096 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8097 break;
8099 Opc = AArch64::FNMADDSrrr;
8100 RC = &AArch64::FPR32RegClass;
8101 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8102 break;
8104 Opc = AArch64::FNMADDDrrr;
8105 RC = &AArch64::FPR64RegClass;
8106 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8107 break;
8108
8110 Opc = AArch64::FMSUBHrrr;
8111 RC = &AArch64::FPR16RegClass;
8112 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8113 break;
8115 Opc = AArch64::FMSUBSrrr;
8116 RC = &AArch64::FPR32RegClass;
8117 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8118 break;
8120 Opc = AArch64::FMSUBDrrr;
8121 RC = &AArch64::FPR64RegClass;
8122 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8123 break;
8124
8126 Opc = AArch64::FMLSv1i32_indexed;
8127 RC = &AArch64::FPR32RegClass;
8128 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8129 FMAInstKind::Indexed);
8130 break;
8131
8133 Opc = AArch64::FMLSv1i64_indexed;
8134 RC = &AArch64::FPR64RegClass;
8135 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8136 FMAInstKind::Indexed);
8137 break;
8138
8141 RC = &AArch64::FPR64RegClass;
8142 Register NewVR = MRI.createVirtualRegister(RC);
8143 MachineInstrBuilder MIB1 =
8144 BuildMI(MF, MIMetadata(Root), TII->get(AArch64::FNEGv4f16), NewVR)
8145 .add(Root.getOperand(2));
8146 InsInstrs.push_back(MIB1);
8147 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
8149 Opc = AArch64::FMLAv4f16;
8150 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
8151 FMAInstKind::Accumulator, &NewVR);
8152 } else {
8153 Opc = AArch64::FMLAv4i16_indexed;
8154 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
8155 FMAInstKind::Indexed, &NewVR);
8156 }
8157 break;
8158 }
8160 RC = &AArch64::FPR64RegClass;
8161 Opc = AArch64::FMLSv4f16;
8162 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8163 FMAInstKind::Accumulator);
8164 break;
8166 RC = &AArch64::FPR64RegClass;
8167 Opc = AArch64::FMLSv4i16_indexed;
8168 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8169 FMAInstKind::Indexed);
8170 break;
8171
8174 RC = &AArch64::FPR64RegClass;
8176 Opc = AArch64::FMLSv2i32_indexed;
8177 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8178 FMAInstKind::Indexed);
8179 } else {
8180 Opc = AArch64::FMLSv2f32;
8181 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8182 FMAInstKind::Accumulator);
8183 }
8184 break;
8185
8188 RC = &AArch64::FPR128RegClass;
8189 Register NewVR = MRI.createVirtualRegister(RC);
8190 MachineInstrBuilder MIB1 =
8191 BuildMI(MF, MIMetadata(Root), TII->get(AArch64::FNEGv8f16), NewVR)
8192 .add(Root.getOperand(2));
8193 InsInstrs.push_back(MIB1);
8194 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
8196 Opc = AArch64::FMLAv8f16;
8197 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
8198 FMAInstKind::Accumulator, &NewVR);
8199 } else {
8200 Opc = AArch64::FMLAv8i16_indexed;
8201 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
8202 FMAInstKind::Indexed, &NewVR);
8203 }
8204 break;
8205 }
8207 RC = &AArch64::FPR128RegClass;
8208 Opc = AArch64::FMLSv8f16;
8209 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8210 FMAInstKind::Accumulator);
8211 break;
8213 RC = &AArch64::FPR128RegClass;
8214 Opc = AArch64::FMLSv8i16_indexed;
8215 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8216 FMAInstKind::Indexed);
8217 break;
8218
8221 RC = &AArch64::FPR128RegClass;
8223 Opc = AArch64::FMLSv2i64_indexed;
8224 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8225 FMAInstKind::Indexed);
8226 } else {
8227 Opc = AArch64::FMLSv2f64;
8228 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8229 FMAInstKind::Accumulator);
8230 }
8231 break;
8232
8235 RC = &AArch64::FPR128RegClass;
8237 Opc = AArch64::FMLSv4i32_indexed;
8238 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8239 FMAInstKind::Indexed);
8240 } else {
8241 Opc = AArch64::FMLSv4f32;
8242 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8243 FMAInstKind::Accumulator);
8244 }
8245 break;
8248 RC = &AArch64::FPR64RegClass;
8249 Register NewVR = MRI.createVirtualRegister(RC);
8250 MachineInstrBuilder MIB1 =
8251 BuildMI(MF, MIMetadata(Root), TII->get(AArch64::FNEGv2f32), NewVR)
8252 .add(Root.getOperand(2));
8253 InsInstrs.push_back(MIB1);
8254 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
8256 Opc = AArch64::FMLAv2i32_indexed;
8257 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
8258 FMAInstKind::Indexed, &NewVR);
8259 } else {
8260 Opc = AArch64::FMLAv2f32;
8261 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
8262 FMAInstKind::Accumulator, &NewVR);
8263 }
8264 break;
8265 }
8268 RC = &AArch64::FPR128RegClass;
8269 Register NewVR = MRI.createVirtualRegister(RC);
8270 MachineInstrBuilder MIB1 =
8271 BuildMI(MF, MIMetadata(Root), TII->get(AArch64::FNEGv4f32), NewVR)
8272 .add(Root.getOperand(2));
8273 InsInstrs.push_back(MIB1);
8274 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
8276 Opc = AArch64::FMLAv4i32_indexed;
8277 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
8278 FMAInstKind::Indexed, &NewVR);
8279 } else {
8280 Opc = AArch64::FMLAv4f32;
8281 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
8282 FMAInstKind::Accumulator, &NewVR);
8283 }
8284 break;
8285 }
8288 RC = &AArch64::FPR128RegClass;
8289 Register NewVR = MRI.createVirtualRegister(RC);
8290 MachineInstrBuilder MIB1 =
8291 BuildMI(MF, MIMetadata(Root), TII->get(AArch64::FNEGv2f64), NewVR)
8292 .add(Root.getOperand(2));
8293 InsInstrs.push_back(MIB1);
8294 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
8296 Opc = AArch64::FMLAv2i64_indexed;
8297 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
8298 FMAInstKind::Indexed, &NewVR);
8299 } else {
8300 Opc = AArch64::FMLAv2f64;
8301 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
8302 FMAInstKind::Accumulator, &NewVR);
8303 }
8304 break;
8305 }
8308 unsigned IdxDupOp =
8310 : 2;
8311 genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv2i32_indexed,
8312 &AArch64::FPR128RegClass, MRI);
8313 break;
8314 }
8317 unsigned IdxDupOp =
8319 : 2;
8320 genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv2i64_indexed,
8321 &AArch64::FPR128RegClass, MRI);
8322 break;
8323 }
8326 unsigned IdxDupOp =
8328 : 2;
8329 genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv4i16_indexed,
8330 &AArch64::FPR128_loRegClass, MRI);
8331 break;
8332 }
8335 unsigned IdxDupOp =
8337 : 2;
8338 genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv4i32_indexed,
8339 &AArch64::FPR128RegClass, MRI);
8340 break;
8341 }
8344 unsigned IdxDupOp =
8346 : 2;
8347 genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv8i16_indexed,
8348 &AArch64::FPR128_loRegClass, MRI);
8349 break;
8350 }
8352 MUL = genFNegatedMAD(MF, MRI, TII, Root, InsInstrs);
8353 break;
8354 }
8355
8356 } // end switch (Pattern)
8357 // Record MUL and ADD/SUB for deletion
8358 if (MUL)
8359 DelInstrs.push_back(MUL);
8360 DelInstrs.push_back(&Root);
8361
8362 // Set the flags on the inserted instructions to be the merged flags of the
8363 // instructions that we have combined.
8364 uint32_t Flags = Root.getFlags();
8365 if (MUL)
8366 Flags = Root.mergeFlagsWith(*MUL);
8367 for (auto *MI : InsInstrs)
8368 MI->setFlags(Flags);
8369}
8370
8371/// Replace csincr-branch sequence by simple conditional branch
8372///
8373/// Examples:
8374/// 1. \code
8375/// csinc w9, wzr, wzr, <condition code>
8376/// tbnz w9, #0, 0x44
8377/// \endcode
8378/// to
8379/// \code
8380/// b.<inverted condition code>
8381/// \endcode
8382///
8383/// 2. \code
8384/// csinc w9, wzr, wzr, <condition code>
8385/// tbz w9, #0, 0x44
8386/// \endcode
8387/// to
8388/// \code
8389/// b.<condition code>
8390/// \endcode
8391///
8392/// Replace compare and branch sequence by TBZ/TBNZ instruction when the
8393/// compare's constant operand is power of 2.
8394///
8395/// Examples:
8396/// \code
8397/// and w8, w8, #0x400
8398/// cbnz w8, L1
8399/// \endcode
8400/// to
8401/// \code
8402/// tbnz w8, #10, L1
8403/// \endcode
8404///
8405/// \param MI Conditional Branch
8406/// \return True when the simple conditional branch is generated
8407///
8409 bool IsNegativeBranch = false;
8410 bool IsTestAndBranch = false;
8411 unsigned TargetBBInMI = 0;
8412 switch (MI.getOpcode()) {
8413 default:
8414 llvm_unreachable("Unknown branch instruction?");
8415 case AArch64::Bcc:
8416 return false;
8417 case AArch64::CBZW:
8418 case AArch64::CBZX:
8419 TargetBBInMI = 1;
8420 break;
8421 case AArch64::CBNZW:
8422 case AArch64::CBNZX:
8423 TargetBBInMI = 1;
8424 IsNegativeBranch = true;
8425 break;
8426 case AArch64::TBZW:
8427 case AArch64::TBZX:
8428 TargetBBInMI = 2;
8429 IsTestAndBranch = true;
8430 break;
8431 case AArch64::TBNZW:
8432 case AArch64::TBNZX:
8433 TargetBBInMI = 2;
8434 IsNegativeBranch = true;
8435 IsTestAndBranch = true;
8436 break;
8437 }
8438 // So we increment a zero register and test for bits other
8439 // than bit 0? Conservatively bail out in case the verifier
8440 // missed this case.
8441 if (IsTestAndBranch && MI.getOperand(1).getImm())
8442 return false;
8443
8444 // Find Definition.
8445 assert(MI.getParent() && "Incomplete machine instruciton\n");
8446 MachineBasicBlock *MBB = MI.getParent();
8447 MachineFunction *MF = MBB->getParent();
8449 Register VReg = MI.getOperand(0).getReg();
8450 if (!VReg.isVirtual())
8451 return false;
8452
8453 MachineInstr *DefMI = MRI->getVRegDef(VReg);
8454
8455 // Look through COPY instructions to find definition.
8456 while (DefMI->isCopy()) {
8457 Register CopyVReg = DefMI->getOperand(1).getReg();
8458 if (!MRI->hasOneNonDBGUse(CopyVReg))
8459 return false;
8460 if (!MRI->hasOneDef(CopyVReg))
8461 return false;
8462 DefMI = MRI->getVRegDef(CopyVReg);
8463 }
8464
8465 switch (DefMI->getOpcode()) {
8466 default:
8467 return false;
8468 // Fold AND into a TBZ/TBNZ if constant operand is power of 2.
8469 case AArch64::ANDWri:
8470 case AArch64::ANDXri: {
8471 if (IsTestAndBranch)
8472 return false;
8473 if (DefMI->getParent() != MBB)
8474 return false;
8475 if (!MRI->hasOneNonDBGUse(VReg))
8476 return false;
8477
8478 bool Is32Bit = (DefMI->getOpcode() == AArch64::ANDWri);
8480 DefMI->getOperand(2).getImm(), Is32Bit ? 32 : 64);
8481 if (!isPowerOf2_64(Mask))
8482 return false;
8483
8485 Register NewReg = MO.getReg();
8486 if (!NewReg.isVirtual())
8487 return false;
8488
8489 assert(!MRI->def_empty(NewReg) && "Register must be defined.");
8490
8491 MachineBasicBlock &RefToMBB = *MBB;
8492 MachineBasicBlock *TBB = MI.getOperand(1).getMBB();
8493 DebugLoc DL = MI.getDebugLoc();
8494 unsigned Imm = Log2_64(Mask);
8495 unsigned Opc = (Imm < 32)
8496 ? (IsNegativeBranch ? AArch64::TBNZW : AArch64::TBZW)
8497 : (IsNegativeBranch ? AArch64::TBNZX : AArch64::TBZX);
8498 MachineInstr *NewMI = BuildMI(RefToMBB, MI, DL, get(Opc))
8499 .addReg(NewReg)
8500 .addImm(Imm)
8501 .addMBB(TBB);
8502 // Register lives on to the CBZ now.
8503 MO.setIsKill(false);
8504
8505 // For immediate smaller than 32, we need to use the 32-bit
8506 // variant (W) in all cases. Indeed the 64-bit variant does not
8507 // allow to encode them.
8508 // Therefore, if the input register is 64-bit, we need to take the
8509 // 32-bit sub-part.
8510 if (!Is32Bit && Imm < 32)
8511 NewMI->getOperand(0).setSubReg(AArch64::sub_32);
8512 MI.eraseFromParent();
8513 return true;
8514 }
8515 // Look for CSINC
8516 case AArch64::CSINCWr:
8517 case AArch64::CSINCXr: {
8518 if (!(DefMI->getOperand(1).getReg() == AArch64::WZR &&
8519 DefMI->getOperand(2).getReg() == AArch64::WZR) &&
8520 !(DefMI->getOperand(1).getReg() == AArch64::XZR &&
8521 DefMI->getOperand(2).getReg() == AArch64::XZR))
8522 return false;
8523
8524 if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr,
8525 true) != -1)
8526 return false;
8527
8529 // Convert only when the condition code is not modified between
8530 // the CSINC and the branch. The CC may be used by other
8531 // instructions in between.
8533 return false;
8534 MachineBasicBlock &RefToMBB = *MBB;
8535 MachineBasicBlock *TBB = MI.getOperand(TargetBBInMI).getMBB();
8536 DebugLoc DL = MI.getDebugLoc();
8537 if (IsNegativeBranch)
8539 BuildMI(RefToMBB, MI, DL, get(AArch64::Bcc)).addImm(CC).addMBB(TBB);
8540 MI.eraseFromParent();
8541 return true;
8542 }
8543 }
8544}
8545
8546std::pair<unsigned, unsigned>
8548 const unsigned Mask = AArch64II::MO_FRAGMENT;
8549 return std::make_pair(TF & Mask, TF & ~Mask);
8550}
8551
8554 using namespace AArch64II;
8555
8556 static const std::pair<unsigned, const char *> TargetFlags[] = {
8557 {MO_PAGE, "aarch64-page"}, {MO_PAGEOFF, "aarch64-pageoff"},
8558 {MO_G3, "aarch64-g3"}, {MO_G2, "aarch64-g2"},
8559 {MO_G1, "aarch64-g1"}, {MO_G0, "aarch64-g0"},
8560 {MO_HI12, "aarch64-hi12"}};
8561 return ArrayRef(TargetFlags);
8562}
8563
8566 using namespace AArch64II;
8567
8568 static const std::pair<unsigned, const char *> TargetFlags[] = {
8569 {MO_COFFSTUB, "aarch64-coffstub"},
8570 {MO_GOT, "aarch64-got"},
8571 {MO_NC, "aarch64-nc"},
8572 {MO_S, "aarch64-s"},
8573 {MO_TLS, "aarch64-tls"},
8574 {MO_DLLIMPORT, "aarch64-dllimport"},
8575 {MO_PREL, "aarch64-prel"},
8576 {MO_TAGGED, "aarch64-tagged"},
8577 {MO_ARM64EC_CALLMANGLE, "aarch64-arm64ec-callmangle"},
8578 };
8579 return ArrayRef(TargetFlags);
8580}
8581
8584 static const std::pair<MachineMemOperand::Flags, const char *> TargetFlags[] =
8585 {{MOSuppressPair, "aarch64-suppress-pair"},
8586 {MOStridedAccess, "aarch64-strided-access"}};
8587 return ArrayRef(TargetFlags);
8588}
8589
8590/// Constants defining how certain sequences should be outlined.
8591/// This encompasses how an outlined function should be called, and what kind of
8592/// frame should be emitted for that outlined function.
8593///
8594/// \p MachineOutlinerDefault implies that the function should be called with
8595/// a save and restore of LR to the stack.
8596///
8597/// That is,
8598///
8599/// I1 Save LR OUTLINED_FUNCTION:
8600/// I2 --> BL OUTLINED_FUNCTION I1
8601/// I3 Restore LR I2
8602/// I3
8603/// RET
8604///
8605/// * Call construction overhead: 3 (save + BL + restore)
8606/// * Frame construction overhead: 1 (ret)
8607/// * Requires stack fixups? Yes
8608///
8609/// \p MachineOutlinerTailCall implies that the function is being created from
8610/// a sequence of instructions ending in a return.
8611///
8612/// That is,
8613///
8614/// I1 OUTLINED_FUNCTION:
8615/// I2 --> B OUTLINED_FUNCTION I1
8616/// RET I2
8617/// RET
8618///
8619/// * Call construction overhead: 1 (B)
8620/// * Frame construction overhead: 0 (Return included in sequence)
8621/// * Requires stack fixups? No
8622///
8623/// \p MachineOutlinerNoLRSave implies that the function should be called using
8624/// a BL instruction, but doesn't require LR to be saved and restored. This
8625/// happens when LR is known to be dead.
8626///
8627/// That is,
8628///
8629/// I1 OUTLINED_FUNCTION:
8630/// I2 --> BL OUTLINED_FUNCTION I1
8631/// I3 I2
8632/// I3
8633/// RET
8634///
8635/// * Call construction overhead: 1 (BL)
8636/// * Frame construction overhead: 1 (RET)
8637/// * Requires stack fixups? No
8638///
8639/// \p MachineOutlinerThunk implies that the function is being created from
8640/// a sequence of instructions ending in a call. The outlined function is
8641/// called with a BL instruction, and the outlined function tail-calls the
8642/// original call destination.
8643///
8644/// That is,
8645///
8646/// I1 OUTLINED_FUNCTION:
8647/// I2 --> BL OUTLINED_FUNCTION I1
8648/// BL f I2
8649/// B f
8650/// * Call construction overhead: 1 (BL)
8651/// * Frame construction overhead: 0
8652/// * Requires stack fixups? No
8653///
8654/// \p MachineOutlinerRegSave implies that the function should be called with a
8655/// save and restore of LR to an available register. This allows us to avoid
8656/// stack fixups. Note that this outlining variant is compatible with the
8657/// NoLRSave case.
8658///
8659/// That is,
8660///
8661/// I1 Save LR OUTLINED_FUNCTION:
8662/// I2 --> BL OUTLINED_FUNCTION I1
8663/// I3 Restore LR I2
8664/// I3
8665/// RET
8666///
8667/// * Call construction overhead: 3 (save + BL + restore)
8668/// * Frame construction overhead: 1 (ret)
8669/// * Requires stack fixups? No
8671 MachineOutlinerDefault, /// Emit a save, restore, call, and return.
8672 MachineOutlinerTailCall, /// Only emit a branch.
8673 MachineOutlinerNoLRSave, /// Emit a call and return.
8674 MachineOutlinerThunk, /// Emit a call and tail-call.
8675 MachineOutlinerRegSave /// Same as default, but save to a register.
8677
8681 UnsafeRegsDead = 0x8
8683
8685AArch64InstrInfo::findRegisterToSaveLRTo(outliner::Candidate &C) const {
8686 MachineFunction *MF = C.getMF();
8688 const AArch64RegisterInfo *ARI =
8689 static_cast<const AArch64RegisterInfo *>(&TRI);
8690 // Check if there is an available register across the sequence that we can
8691 // use.
8692 for (unsigned Reg : AArch64::GPR64RegClass) {
8693 if (!ARI->isReservedReg(*MF, Reg) &&
8694 Reg != AArch64::LR && // LR is not reserved, but don't use it.
8695 Reg != AArch64::X16 && // X16 is not guaranteed to be preserved.
8696 Reg != AArch64::X17 && // Ditto for X17.
8697 C.isAvailableAcrossAndOutOfSeq(Reg, TRI) &&
8698 C.isAvailableInsideSeq(Reg, TRI))
8699 return Reg;
8700 }
8701 return Register();
8702}
8703
8704static bool
8706 const outliner::Candidate &b) {
8707 const auto &MFIa = a.getMF()->getInfo<AArch64FunctionInfo>();
8708 const auto &MFIb = b.getMF()->getInfo<AArch64FunctionInfo>();
8709
8710 return MFIa->shouldSignReturnAddress(false) == MFIb->shouldSignReturnAddress(false) &&
8711 MFIa->shouldSignReturnAddress(true) == MFIb->shouldSignReturnAddress(true);
8712}
8713
8714static bool
8716 const outliner::Candidate &b) {
8717 const auto &MFIa = a.getMF()->getInfo<AArch64FunctionInfo>();
8718 const auto &MFIb = b.getMF()->getInfo<AArch64FunctionInfo>();
8719
8720 return MFIa->shouldSignWithBKey() == MFIb->shouldSignWithBKey();
8721}
8722
8724 const outliner::Candidate &b) {
8725 const AArch64Subtarget &SubtargetA =
8727 const AArch64Subtarget &SubtargetB =
8728 b.getMF()->getSubtarget<AArch64Subtarget>();
8729 return SubtargetA.hasV8_3aOps() == SubtargetB.hasV8_3aOps();
8730}
8731
8732std::optional<std::unique_ptr<outliner::OutlinedFunction>>
8734 const MachineModuleInfo &MMI,
8735 std::vector<outliner::Candidate> &RepeatedSequenceLocs,
8736 unsigned MinRepeats) const {
8737 unsigned SequenceSize = 0;
8738 for (auto &MI : RepeatedSequenceLocs[0])
8739 SequenceSize += getInstSizeInBytes(MI);
8740
8741 unsigned NumBytesToCreateFrame = 0;
8742
8743 // We only allow outlining for functions having exactly matching return
8744 // address signing attributes, i.e., all share the same value for the
8745 // attribute "sign-return-address" and all share the same type of key they
8746 // are signed with.
8747 // Additionally we require all functions to simultaniously either support
8748 // v8.3a features or not. Otherwise an outlined function could get signed
8749 // using dedicated v8.3 instructions and a call from a function that doesn't
8750 // support v8.3 instructions would therefore be invalid.
8751 if (std::adjacent_find(
8752 RepeatedSequenceLocs.begin(), RepeatedSequenceLocs.end(),
8753 [](const outliner::Candidate &a, const outliner::Candidate &b) {
8754 // Return true if a and b are non-equal w.r.t. return address
8755 // signing or support of v8.3a features
8756 if (outliningCandidatesSigningScopeConsensus(a, b) &&
8757 outliningCandidatesSigningKeyConsensus(a, b) &&
8758 outliningCandidatesV8_3OpsConsensus(a, b)) {
8759 return false;
8760 }
8761 return true;
8762 }) != RepeatedSequenceLocs.end()) {
8763 return std::nullopt;
8764 }
8765
8766 // Since at this point all candidates agree on their return address signing
8767 // picking just one is fine. If the candidate functions potentially sign their
8768 // return addresses, the outlined function should do the same. Note that in
8769 // the case of "sign-return-address"="non-leaf" this is an assumption: It is
8770 // not certainly true that the outlined function will have to sign its return
8771 // address but this decision is made later, when the decision to outline
8772 // has already been made.
8773 // The same holds for the number of additional instructions we need: On
8774 // v8.3a RET can be replaced by RETAA/RETAB and no AUT instruction is
8775 // necessary. However, at this point we don't know if the outlined function
8776 // will have a RET instruction so we assume the worst.
8777 const TargetRegisterInfo &TRI = getRegisterInfo();
8778 // Performing a tail call may require extra checks when PAuth is enabled.
8779 // If PAuth is disabled, set it to zero for uniformity.
8780 unsigned NumBytesToCheckLRInTCEpilogue = 0;
8781 if (RepeatedSequenceLocs[0]
8782 .getMF()
8783 ->getInfo<AArch64FunctionInfo>()
8784 ->shouldSignReturnAddress(true)) {
8785 // One PAC and one AUT instructions
8786 NumBytesToCreateFrame += 8;
8787
8788 // PAuth is enabled - set extra tail call cost, if any.
8789 auto LRCheckMethod = Subtarget.getAuthenticatedLRCheckMethod(
8790 *RepeatedSequenceLocs[0].getMF());
8791 NumBytesToCheckLRInTCEpilogue =
8793 // Checking the authenticated LR value may significantly impact
8794 // SequenceSize, so account for it for more precise results.
8795 if (isTailCallReturnInst(RepeatedSequenceLocs[0].back()))
8796 SequenceSize += NumBytesToCheckLRInTCEpilogue;
8797
8798 // We have to check if sp modifying instructions would get outlined.
8799 // If so we only allow outlining if sp is unchanged overall, so matching
8800 // sub and add instructions are okay to outline, all other sp modifications
8801 // are not
8802 auto hasIllegalSPModification = [&TRI](outliner::Candidate &C) {
8803 int SPValue = 0;
8804 for (auto &MI : C) {
8805 if (MI.modifiesRegister(AArch64::SP, &TRI)) {
8806 switch (MI.getOpcode()) {
8807 case AArch64::ADDXri:
8808 case AArch64::ADDWri:
8809 assert(MI.getNumOperands() == 4 && "Wrong number of operands");
8810 assert(MI.getOperand(2).isImm() &&
8811 "Expected operand to be immediate");
8812 assert(MI.getOperand(1).isReg() &&
8813 "Expected operand to be a register");
8814 // Check if the add just increments sp. If so, we search for
8815 // matching sub instructions that decrement sp. If not, the
8816 // modification is illegal
8817 if (MI.getOperand(1).getReg() == AArch64::SP)
8818 SPValue += MI.getOperand(2).getImm();
8819 else
8820 return true;
8821 break;
8822 case AArch64::SUBXri:
8823 case AArch64::SUBWri:
8824 assert(MI.getNumOperands() == 4 && "Wrong number of operands");
8825 assert(MI.getOperand(2).isImm() &&
8826 "Expected operand to be immediate");
8827 assert(MI.getOperand(1).isReg() &&
8828 "Expected operand to be a register");
8829 // Check if the sub just decrements sp. If so, we search for
8830 // matching add instructions that increment sp. If not, the
8831 // modification is illegal
8832 if (MI.getOperand(1).getReg() == AArch64::SP)
8833 SPValue -= MI.getOperand(2).getImm();
8834 else
8835 return true;
8836 break;
8837 default:
8838 return true;
8839 }
8840 }
8841 }
8842 if (SPValue)
8843 return true;
8844 return false;
8845 };
8846 // Remove candidates with illegal stack modifying instructions
8847 llvm::erase_if(RepeatedSequenceLocs, hasIllegalSPModification);
8848
8849 // If the sequence doesn't have enough candidates left, then we're done.
8850 if (RepeatedSequenceLocs.size() < MinRepeats)
8851 return std::nullopt;
8852 }
8853
8854 // Properties about candidate MBBs that hold for all of them.
8855 unsigned FlagsSetInAll = 0xF;
8856
8857 // Compute liveness information for each candidate, and set FlagsSetInAll.
8858 for (outliner::Candidate &C : RepeatedSequenceLocs)
8859 FlagsSetInAll &= C.Flags;
8860
8861 unsigned LastInstrOpcode = RepeatedSequenceLocs[0].back().getOpcode();
8862
8863 // Helper lambda which sets call information for every candidate.
8864 auto SetCandidateCallInfo =
8865 [&RepeatedSequenceLocs](unsigned CallID, unsigned NumBytesForCall) {
8866 for (outliner::Candidate &C : RepeatedSequenceLocs)
8867 C.setCallInfo(CallID, NumBytesForCall);
8868 };
8869
8870 unsigned FrameID = MachineOutlinerDefault;
8871 NumBytesToCreateFrame += 4;
8872
8873 bool HasBTI = any_of(RepeatedSequenceLocs, [](outliner::Candidate &C) {
8874 return C.getMF()->getInfo<AArch64FunctionInfo>()->branchTargetEnforcement();
8875 });
8876
8877 // We check to see if CFI Instructions are present, and if they are
8878 // we find the number of CFI Instructions in the candidates.
8879 unsigned CFICount = 0;
8880 for (auto &I : RepeatedSequenceLocs[0]) {
8881 if (I.isCFIInstruction())
8882 CFICount++;
8883 }
8884
8885 // We compare the number of found CFI Instructions to the number of CFI
8886 // instructions in the parent function for each candidate. We must check this
8887 // since if we outline one of the CFI instructions in a function, we have to
8888 // outline them all for correctness. If we do not, the address offsets will be
8889 // incorrect between the two sections of the program.
8890 for (outliner::Candidate &C : RepeatedSequenceLocs) {
8891 std::vector<MCCFIInstruction> CFIInstructions =
8892 C.getMF()->getFrameInstructions();
8893
8894 if (CFICount > 0 && CFICount != CFIInstructions.size())
8895 return std::nullopt;
8896 }
8897
8898 // Returns true if an instructions is safe to fix up, false otherwise.
8899 auto IsSafeToFixup = [this, &TRI](MachineInstr &MI) {
8900 if (MI.isCall())
8901 return true;
8902
8903 if (!MI.modifiesRegister(AArch64::SP, &TRI) &&
8904 !MI.readsRegister(AArch64::SP, &TRI))
8905 return true;
8906
8907 // Any modification of SP will break our code to save/restore LR.
8908 // FIXME: We could handle some instructions which add a constant
8909 // offset to SP, with a bit more work.
8910 if (MI.modifiesRegister(AArch64::SP, &TRI))
8911 return false;
8912
8913 // At this point, we have a stack instruction that we might need to
8914 // fix up. We'll handle it if it's a load or store.
8915 if (MI.mayLoadOrStore()) {
8916 const MachineOperand *Base; // Filled with the base operand of MI.
8917 int64_t Offset; // Filled with the offset of MI.
8918 bool OffsetIsScalable;
8919
8920 // Does it allow us to offset the base operand and is the base the
8921 // register SP?
8922 if (!getMemOperandWithOffset(MI, Base, Offset, OffsetIsScalable, &TRI) ||
8923 !Base->isReg() || Base->getReg() != AArch64::SP)
8924 return false;
8925
8926 // Fixe-up code below assumes bytes.
8927 if (OffsetIsScalable)
8928 return false;
8929
8930 // Find the minimum/maximum offset for this instruction and check
8931 // if fixing it up would be in range.
8932 int64_t MinOffset,
8933 MaxOffset; // Unscaled offsets for the instruction.
8934 // The scale to multiply the offsets by.
8935 TypeSize Scale(0U, false), DummyWidth(0U, false);
8936 getMemOpInfo(MI.getOpcode(), Scale, DummyWidth, MinOffset, MaxOffset);
8937
8938 Offset += 16; // Update the offset to what it would be if we outlined.
8939 if (Offset < MinOffset * (int64_t)Scale.getFixedValue() ||
8940 Offset > MaxOffset * (int64_t)Scale.getFixedValue())
8941 return false;
8942
8943 // It's in range, so we can outline it.
8944 return true;
8945 }
8946
8947 // FIXME: Add handling for instructions like "add x0, sp, #8".
8948
8949 // We can't fix it up, so don't outline it.
8950 return false;
8951 };
8952
8953 // True if it's possible to fix up each stack instruction in this sequence.
8954 // Important for frames/call variants that modify the stack.
8955 bool AllStackInstrsSafe =
8956 llvm::all_of(RepeatedSequenceLocs[0], IsSafeToFixup);
8957
8958 // If the last instruction in any candidate is a terminator, then we should
8959 // tail call all of the candidates.
8960 if (RepeatedSequenceLocs[0].back().isTerminator()) {
8961 FrameID = MachineOutlinerTailCall;
8962 NumBytesToCreateFrame = 0;
8963 unsigned NumBytesForCall = 4 + NumBytesToCheckLRInTCEpilogue;
8964 SetCandidateCallInfo(MachineOutlinerTailCall, NumBytesForCall);
8965 }
8966
8967 else if (LastInstrOpcode == AArch64::BL ||
8968 ((LastInstrOpcode == AArch64::BLR ||
8969 LastInstrOpcode == AArch64::BLRNoIP) &&
8970 !HasBTI)) {
8971 // FIXME: Do we need to check if the code after this uses the value of LR?
8972 FrameID = MachineOutlinerThunk;
8973 NumBytesToCreateFrame = NumBytesToCheckLRInTCEpilogue;
8974 SetCandidateCallInfo(MachineOutlinerThunk, 4);
8975 }
8976
8977 else {
8978 // We need to decide how to emit calls + frames. We can always emit the same
8979 // frame if we don't need to save to the stack. If we have to save to the
8980 // stack, then we need a different frame.
8981 unsigned NumBytesNoStackCalls = 0;
8982 std::vector<outliner::Candidate> CandidatesWithoutStackFixups;
8983
8984 // Check if we have to save LR.
8985 for (outliner::Candidate &C : RepeatedSequenceLocs) {
8986 bool LRAvailable =
8987 (C.Flags & MachineOutlinerMBBFlags::LRUnavailableSomewhere)
8988 ? C.isAvailableAcrossAndOutOfSeq(AArch64::LR, TRI)
8989 : true;
8990 // If we have a noreturn caller, then we're going to be conservative and
8991 // say that we have to save LR. If we don't have a ret at the end of the
8992 // block, then we can't reason about liveness accurately.
8993 //
8994 // FIXME: We can probably do better than always disabling this in
8995 // noreturn functions by fixing up the liveness info.
8996 bool IsNoReturn =
8997 C.getMF()->getFunction().hasFnAttribute(Attribute::NoReturn);
8998
8999 // Is LR available? If so, we don't need a save.
9000 if (LRAvailable && !IsNoReturn) {
9001 NumBytesNoStackCalls += 4;
9002 C.setCallInfo(MachineOutlinerNoLRSave, 4);
9003 CandidatesWithoutStackFixups.push_back(C);
9004 }
9005
9006 // Is an unused register available? If so, we won't modify the stack, so
9007 // we can outline with the same frame type as those that don't save LR.
9008 else if (findRegisterToSaveLRTo(C)) {
9009 NumBytesNoStackCalls += 12;
9010 C.setCallInfo(MachineOutlinerRegSave, 12);
9011 CandidatesWithoutStackFixups.push_back(C);
9012 }
9013
9014 // Is SP used in the sequence at all? If not, we don't have to modify
9015 // the stack, so we are guaranteed to get the same frame.
9016 else if (C.isAvailableInsideSeq(AArch64::SP, TRI)) {
9017 NumBytesNoStackCalls += 12;
9018 C.setCallInfo(MachineOutlinerDefault, 12);
9019 CandidatesWithoutStackFixups.push_back(C);
9020 }
9021
9022 // If we outline this, we need to modify the stack. Pretend we don't
9023 // outline this by saving all of its bytes.
9024 else {
9025 NumBytesNoStackCalls += SequenceSize;
9026 }
9027 }
9028
9029 // If there are no places where we have to save LR, then note that we
9030 // don't have to update the stack. Otherwise, give every candidate the
9031 // default call type, as long as it's safe to do so.
9032 if (!AllStackInstrsSafe ||
9033 NumBytesNoStackCalls <= RepeatedSequenceLocs.size() * 12) {
9034 RepeatedSequenceLocs = CandidatesWithoutStackFixups;
9035 FrameID = MachineOutlinerNoLRSave;
9036 if (RepeatedSequenceLocs.size() < MinRepeats)
9037 return std::nullopt;
9038 } else {
9039 SetCandidateCallInfo(MachineOutlinerDefault, 12);
9040
9041 // Bugzilla ID: 46767
9042 // TODO: Check if fixing up the stack more than once is safe so we can
9043 // outline these.
9044 //
9045 // An outline resulting in a caller that requires stack fixups at the
9046 // callsite to a callee that also requires stack fixups can happen when
9047 // there are no available registers at the candidate callsite for a
9048 // candidate that itself also has calls.
9049 //
9050 // In other words if function_containing_sequence in the following pseudo
9051 // assembly requires that we save LR at the point of the call, but there
9052 // are no available registers: in this case we save using SP and as a
9053 // result the SP offsets requires stack fixups by multiples of 16.
9054 //
9055 // function_containing_sequence:
9056 // ...
9057 // save LR to SP <- Requires stack instr fixups in OUTLINED_FUNCTION_N
9058 // call OUTLINED_FUNCTION_N
9059 // restore LR from SP
9060 // ...
9061 //
9062 // OUTLINED_FUNCTION_N:
9063 // save LR to SP <- Requires stack instr fixups in OUTLINED_FUNCTION_N
9064 // ...
9065 // bl foo
9066 // restore LR from SP
9067 // ret
9068 //
9069 // Because the code to handle more than one stack fixup does not
9070 // currently have the proper checks for legality, these cases will assert
9071 // in the AArch64 MachineOutliner. This is because the code to do this
9072 // needs more hardening, testing, better checks that generated code is
9073 // legal, etc and because it is only verified to handle a single pass of
9074 // stack fixup.
9075 //
9076 // The assert happens in AArch64InstrInfo::buildOutlinedFrame to catch
9077 // these cases until they are known to be handled. Bugzilla 46767 is
9078 // referenced in comments at the assert site.
9079 //
9080 // To avoid asserting (or generating non-legal code on noassert builds)
9081 // we remove all candidates which would need more than one stack fixup by
9082 // pruning the cases where the candidate has calls while also having no
9083 // available LR and having no available general purpose registers to copy
9084 // LR to (ie one extra stack save/restore).
9085 //
9086 if (FlagsSetInAll & MachineOutlinerMBBFlags::HasCalls) {
9087 erase_if(RepeatedSequenceLocs, [this, &TRI](outliner::Candidate &C) {
9088 auto IsCall = [](const MachineInstr &MI) { return MI.isCall(); };
9089 return (llvm::any_of(C, IsCall)) &&
9090 (!C.isAvailableAcrossAndOutOfSeq(AArch64::LR, TRI) ||
9091 !findRegisterToSaveLRTo(C));
9092 });
9093 }
9094 }
9095
9096 // If we dropped all of the candidates, bail out here.
9097 if (RepeatedSequenceLocs.size() < MinRepeats)
9098 return std::nullopt;
9099 }
9100
9101 // Does every candidate's MBB contain a call? If so, then we might have a call
9102 // in the range.
9103 if (FlagsSetInAll & MachineOutlinerMBBFlags::HasCalls) {
9104 // Check if the range contains a call. These require a save + restore of the
9105 // link register.
9106 outliner::Candidate &FirstCand = RepeatedSequenceLocs[0];
9107 bool ModStackToSaveLR = false;
9108 if (any_of(drop_end(FirstCand),
9109 [](const MachineInstr &MI) { return MI.isCall(); }))
9110 ModStackToSaveLR = true;
9111
9112 // Handle the last instruction separately. If this is a tail call, then the
9113 // last instruction is a call. We don't want to save + restore in this case.
9114 // However, it could be possible that the last instruction is a call without
9115 // it being valid to tail call this sequence. We should consider this as
9116 // well.
9117 else if (FrameID != MachineOutlinerThunk &&
9118 FrameID != MachineOutlinerTailCall && FirstCand.back().isCall())
9119 ModStackToSaveLR = true;
9120
9121 if (ModStackToSaveLR) {
9122 // We can't fix up the stack. Bail out.
9123 if (!AllStackInstrsSafe)
9124 return std::nullopt;
9125
9126 // Save + restore LR.
9127 NumBytesToCreateFrame += 8;
9128 }
9129 }
9130
9131 // If we have CFI instructions, we can only outline if the outlined section
9132 // can be a tail call
9133 if (FrameID != MachineOutlinerTailCall && CFICount > 0)
9134 return std::nullopt;
9135
9136 return std::make_unique<outliner::OutlinedFunction>(
9137 RepeatedSequenceLocs, SequenceSize, NumBytesToCreateFrame, FrameID);
9138}
9139
9141 Function &F, std::vector<outliner::Candidate> &Candidates) const {
9142 // If a bunch of candidates reach this point they must agree on their return
9143 // address signing. It is therefore enough to just consider the signing
9144 // behaviour of one of them
9145 const auto &CFn = Candidates.front().getMF()->getFunction();
9146
9147 if (CFn.hasFnAttribute("ptrauth-returns"))
9148 F.addFnAttr(CFn.getFnAttribute("ptrauth-returns"));
9149 if (CFn.hasFnAttribute("ptrauth-auth-traps"))
9150 F.addFnAttr(CFn.getFnAttribute("ptrauth-auth-traps"));
9151 // Since all candidates belong to the same module, just copy the
9152 // function-level attributes of an arbitrary function.
9153 if (CFn.hasFnAttribute("sign-return-address"))
9154 F.addFnAttr(CFn.getFnAttribute("sign-return-address"));
9155 if (CFn.hasFnAttribute("sign-return-address-key"))
9156 F.addFnAttr(CFn.getFnAttribute("sign-return-address-key"));
9157
9158 AArch64GenInstrInfo::mergeOutliningCandidateAttributes(F, Candidates);
9159}
9160
9162 MachineFunction &MF, bool OutlineFromLinkOnceODRs) const {
9163 const Function &F = MF.getFunction();
9164
9165 // Can F be deduplicated by the linker? If it can, don't outline from it.
9166 if (!OutlineFromLinkOnceODRs && F.hasLinkOnceODRLinkage())
9167 return false;
9168
9169 // Don't outline from functions with section markings; the program could
9170 // expect that all the code is in the named section.
9171 // FIXME: Allow outlining from multiple functions with the same section
9172 // marking.
9173 if (F.hasSection())
9174 return false;
9175
9176 // Outlining from functions with redzones is unsafe since the outliner may
9177 // modify the stack. Check if hasRedZone is true or unknown; if yes, don't
9178 // outline from it.
9180 if (!AFI || AFI->hasRedZone().value_or(true))
9181 return false;
9182
9183 // FIXME: Determine whether it is safe to outline from functions which contain
9184 // streaming-mode changes. We may need to ensure any smstart/smstop pairs are
9185 // outlined together and ensure it is safe to outline with async unwind info,
9186 // required for saving & restoring VG around calls.
9187 if (AFI->hasStreamingModeChanges())
9188 return false;
9189
9190 // FIXME: Teach the outliner to generate/handle Windows unwind info.
9192 return false;
9193
9194 // It's safe to outline from MF.
9195 return true;
9196}
9197
9200 unsigned &Flags) const {
9202 "Must track liveness!");
9204 std::pair<MachineBasicBlock::iterator, MachineBasicBlock::iterator>>
9205 Ranges;
9206 // According to the AArch64 Procedure Call Standard, the following are
9207 // undefined on entry/exit from a function call:
9208 //
9209 // * Registers x16, x17, (and thus w16, w17)
9210 // * Condition codes (and thus the NZCV register)
9211 //
9212 // If any of these registers are used inside or live across an outlined
9213 // function, then they may be modified later, either by the compiler or
9214 // some other tool (like the linker).
9215 //
9216 // To avoid outlining in these situations, partition each block into ranges
9217 // where these registers are dead. We will only outline from those ranges.
9219 auto AreAllUnsafeRegsDead = [&LRU]() {
9220 return LRU.available(AArch64::W16) && LRU.available(AArch64::W17) &&
9221 LRU.available(AArch64::NZCV);
9222 };
9223
9224 // We need to know if LR is live across an outlining boundary later on in
9225 // order to decide how we'll create the outlined call, frame, etc.
9226 //
9227 // It's pretty expensive to check this for *every candidate* within a block.
9228 // That's some potentially n^2 behaviour, since in the worst case, we'd need
9229 // to compute liveness from the end of the block for O(n) candidates within
9230 // the block.
9231 //
9232 // So, to improve the average case, let's keep track of liveness from the end
9233 // of the block to the beginning of *every outlinable range*. If we know that
9234 // LR is available in every range we could outline from, then we know that
9235 // we don't need to check liveness for any candidate within that range.
9236 bool LRAvailableEverywhere = true;
9237 // Compute liveness bottom-up.
9238 LRU.addLiveOuts(MBB);
9239 // Update flags that require info about the entire MBB.
9240 auto UpdateWholeMBBFlags = [&Flags](const MachineInstr &MI) {
9241 if (MI.isCall() && !MI.isTerminator())
9242 Flags |= MachineOutlinerMBBFlags::HasCalls;
9243 };
9244 // Range: [RangeBegin, RangeEnd)
9245 MachineBasicBlock::instr_iterator RangeBegin, RangeEnd;
9246 unsigned RangeLen;
9247 auto CreateNewRangeStartingAt =
9248 [&RangeBegin, &RangeEnd,
9249 &RangeLen](MachineBasicBlock::instr_iterator NewBegin) {
9250 RangeBegin = NewBegin;
9251 RangeEnd = std::next(RangeBegin);
9252 RangeLen = 0;
9253 };
9254 auto SaveRangeIfNonEmpty = [&RangeLen, &Ranges, &RangeBegin, &RangeEnd]() {
9255 // At least one unsafe register is not dead. We do not want to outline at
9256 // this point. If it is long enough to outline from, save the range
9257 // [RangeBegin, RangeEnd).
9258 if (RangeLen > 1)
9259 Ranges.push_back(std::make_pair(RangeBegin, RangeEnd));
9260 };
9261 // Find the first point where all unsafe registers are dead.
9262 // FIND: <safe instr> <-- end of first potential range
9263 // SKIP: <unsafe def>
9264 // SKIP: ... everything between ...
9265 // SKIP: <unsafe use>
9266 auto FirstPossibleEndPt = MBB.instr_rbegin();
9267 for (; FirstPossibleEndPt != MBB.instr_rend(); ++FirstPossibleEndPt) {
9268 LRU.stepBackward(*FirstPossibleEndPt);
9269 // Update flags that impact how we outline across the entire block,
9270 // regardless of safety.
9271 UpdateWholeMBBFlags(*FirstPossibleEndPt);
9272 if (AreAllUnsafeRegsDead())
9273 break;
9274 }
9275 // If we exhausted the entire block, we have no safe ranges to outline.
9276 if (FirstPossibleEndPt == MBB.instr_rend())
9277 return Ranges;
9278 // Current range.
9279 CreateNewRangeStartingAt(FirstPossibleEndPt->getIterator());
9280 // StartPt points to the first place where all unsafe registers
9281 // are dead (if there is any such point). Begin partitioning the MBB into
9282 // ranges.
9283 for (auto &MI : make_range(FirstPossibleEndPt, MBB.instr_rend())) {
9284 LRU.stepBackward(MI);
9285 UpdateWholeMBBFlags(MI);
9286 if (!AreAllUnsafeRegsDead()) {
9287 SaveRangeIfNonEmpty();
9288 CreateNewRangeStartingAt(MI.getIterator());
9289 continue;
9290 }
9291 LRAvailableEverywhere &= LRU.available(AArch64::LR);
9292 RangeBegin = MI.getIterator();
9293 ++RangeLen;
9294 }
9295 // Above loop misses the last (or only) range. If we are still safe, then
9296 // let's save the range.
9297 if (AreAllUnsafeRegsDead())
9298 SaveRangeIfNonEmpty();
9299 if (Ranges.empty())
9300 return Ranges;
9301 // We found the ranges bottom-up. Mapping expects the top-down. Reverse
9302 // the order.
9303 std::reverse(Ranges.begin(), Ranges.end());
9304 // If there is at least one outlinable range where LR is unavailable
9305 // somewhere, remember that.
9306 if (!LRAvailableEverywhere)
9307 Flags |= MachineOutlinerMBBFlags::LRUnavailableSomewhere;
9308 return Ranges;
9309}
9310
9314 unsigned Flags) const {
9315 MachineInstr &MI = *MIT;
9316 MachineBasicBlock *MBB = MI.getParent();
9317 MachineFunction *MF = MBB->getParent();
9319
9320 // Don't outline anything used for return address signing. The outlined
9321 // function will get signed later if needed
9322 switch (MI.getOpcode()) {
9323 case AArch64::PACM:
9324 case AArch64::PACIASP:
9325 case AArch64::PACIBSP:
9326 case AArch64::PACIASPPC:
9327 case AArch64::PACIBSPPC:
9328 case AArch64::AUTIASP:
9329 case AArch64::AUTIBSP:
9330 case AArch64::AUTIASPPCi:
9331 case AArch64::AUTIASPPCr:
9332 case AArch64::AUTIBSPPCi:
9333 case AArch64::AUTIBSPPCr:
9334 case AArch64::RETAA:
9335 case AArch64::RETAB:
9336 case AArch64::RETAASPPCi:
9337 case AArch64::RETAASPPCr:
9338 case AArch64::RETABSPPCi:
9339 case AArch64::RETABSPPCr:
9340 case AArch64::EMITBKEY:
9341 case AArch64::PAUTH_PROLOGUE:
9342 case AArch64::PAUTH_EPILOGUE:
9344 }
9345
9346 // Don't outline LOHs.
9347 if (FuncInfo->getLOHRelated().count(&MI))
9349
9350 // We can only outline these if we will tail call the outlined function, or
9351 // fix up the CFI offsets. Currently, CFI instructions are outlined only if
9352 // in a tail call.
9353 //
9354 // FIXME: If the proper fixups for the offset are implemented, this should be
9355 // possible.
9356 if (MI.isCFIInstruction())
9358
9359 // Is this a terminator for a basic block?
9360 if (MI.isTerminator())
9361 // TargetInstrInfo::getOutliningType has already filtered out anything
9362 // that would break this, so we can allow it here.
9364
9365 // Make sure none of the operands are un-outlinable.
9366 for (const MachineOperand &MOP : MI.operands()) {
9367 // A check preventing CFI indices was here before, but only CFI
9368 // instructions should have those.
9369 assert(!MOP.isCFIIndex());
9370
9371 // If it uses LR or W30 explicitly, then don't touch it.
9372 if (MOP.isReg() && !MOP.isImplicit() &&
9373 (MOP.getReg() == AArch64::LR || MOP.getReg() == AArch64::W30))
9375 }
9376
9377 // Special cases for instructions that can always be outlined, but will fail
9378 // the later tests. e.g, ADRPs, which are PC-relative use LR, but can always
9379 // be outlined because they don't require a *specific* value to be in LR.
9380 if (MI.getOpcode() == AArch64::ADRP)
9382
9383 // If MI is a call we might be able to outline it. We don't want to outline
9384 // any calls that rely on the position of items on the stack. When we outline
9385 // something containing a call, we have to emit a save and restore of LR in
9386 // the outlined function. Currently, this always happens by saving LR to the
9387 // stack. Thus, if we outline, say, half the parameters for a function call
9388 // plus the call, then we'll break the callee's expectations for the layout
9389 // of the stack.
9390 //
9391 // FIXME: Allow calls to functions which construct a stack frame, as long
9392 // as they don't access arguments on the stack.
9393 // FIXME: Figure out some way to analyze functions defined in other modules.
9394 // We should be able to compute the memory usage based on the IR calling
9395 // convention, even if we can't see the definition.
9396 if (MI.isCall()) {
9397 // Get the function associated with the call. Look at each operand and find
9398 // the one that represents the callee and get its name.
9399 const Function *Callee = nullptr;
9400 for (const MachineOperand &MOP : MI.operands()) {
9401 if (MOP.isGlobal()) {
9402 Callee = dyn_cast<Function>(MOP.getGlobal());
9403 break;
9404 }
9405 }
9406
9407 // Never outline calls to mcount. There isn't any rule that would require
9408 // this, but the Linux kernel's "ftrace" feature depends on it.
9409 if (Callee && Callee->getName() == "\01_mcount")
9411
9412 // If we don't know anything about the callee, assume it depends on the
9413 // stack layout of the caller. In that case, it's only legal to outline
9414 // as a tail-call. Explicitly list the call instructions we know about so we
9415 // don't get unexpected results with call pseudo-instructions.
9416 auto UnknownCallOutlineType = outliner::InstrType::Illegal;
9417 if (MI.getOpcode() == AArch64::BLR ||
9418 MI.getOpcode() == AArch64::BLRNoIP || MI.getOpcode() == AArch64::BL)
9419 UnknownCallOutlineType = outliner::InstrType::LegalTerminator;
9420
9421 if (!Callee)
9422 return UnknownCallOutlineType;
9423
9424 // We have a function we have information about. Check it if it's something
9425 // can safely outline.
9426 MachineFunction *CalleeMF = MMI.getMachineFunction(*Callee);
9427
9428 // We don't know what's going on with the callee at all. Don't touch it.
9429 if (!CalleeMF)
9430 return UnknownCallOutlineType;
9431
9432 // Check if we know anything about the callee saves on the function. If we
9433 // don't, then don't touch it, since that implies that we haven't
9434 // computed anything about its stack frame yet.
9435 MachineFrameInfo &MFI = CalleeMF->getFrameInfo();
9436 if (!MFI.isCalleeSavedInfoValid() || MFI.getStackSize() > 0 ||
9437 MFI.getNumObjects() > 0)
9438 return UnknownCallOutlineType;
9439
9440 // At this point, we can say that CalleeMF ought to not pass anything on the
9441 // stack. Therefore, we can outline it.
9443 }
9444
9445 // Don't touch the link register or W30.
9446 if (MI.readsRegister(AArch64::W30, &getRegisterInfo()) ||
9447 MI.modifiesRegister(AArch64::W30, &getRegisterInfo()))
9449
9450 // Don't outline BTI instructions, because that will prevent the outlining
9451 // site from being indirectly callable.
9452 if (hasBTISemantics(MI))
9454
9456}
9457
9458void AArch64InstrInfo::fixupPostOutline(MachineBasicBlock &MBB) const {
9459 for (MachineInstr &MI : MBB) {
9460 const MachineOperand *Base;
9461 TypeSize Width(0, false);
9462 int64_t Offset;
9463 bool OffsetIsScalable;
9464
9465 // Is this a load or store with an immediate offset with SP as the base?
9466 if (!MI.mayLoadOrStore() ||
9467 !getMemOperandWithOffsetWidth(MI, Base, Offset, OffsetIsScalable, Width,
9468 &RI) ||
9469 (Base->isReg() && Base->getReg() != AArch64::SP))
9470 continue;
9471
9472 // It is, so we have to fix it up.
9473 TypeSize Scale(0U, false);
9474 int64_t Dummy1, Dummy2;
9475
9477 assert(StackOffsetOperand.isImm() && "Stack offset wasn't immediate!");
9478 getMemOpInfo(MI.getOpcode(), Scale, Width, Dummy1, Dummy2);
9479 assert(Scale != 0 && "Unexpected opcode!");
9480 assert(!OffsetIsScalable && "Expected offset to be a byte offset");
9481
9482 // We've pushed the return address to the stack, so add 16 to the offset.
9483 // This is safe, since we already checked if it would overflow when we
9484 // checked if this instruction was legal to outline.
9485 int64_t NewImm = (Offset + 16) / (int64_t)Scale.getFixedValue();
9486 StackOffsetOperand.setImm(NewImm);
9487 }
9488}
9489
9491 const AArch64InstrInfo *TII,
9492 bool ShouldSignReturnAddr) {
9493 if (!ShouldSignReturnAddr)
9494 return;
9495
9496 BuildMI(MBB, MBB.begin(), DebugLoc(), TII->get(AArch64::PAUTH_PROLOGUE))
9499 TII->get(AArch64::PAUTH_EPILOGUE))
9501}
9502
9505 const outliner::OutlinedFunction &OF) const {
9506
9508
9510 FI->setOutliningStyle("Tail Call");
9512 // For thunk outlining, rewrite the last instruction from a call to a
9513 // tail-call.
9514 MachineInstr *Call = &*--MBB.instr_end();
9515 unsigned TailOpcode;
9516 if (Call->getOpcode() == AArch64::BL) {
9517 TailOpcode = AArch64::TCRETURNdi;
9518 } else {
9519 assert(Call->getOpcode() == AArch64::BLR ||
9520 Call->getOpcode() == AArch64::BLRNoIP);
9521 TailOpcode = AArch64::TCRETURNriALL;
9522 }
9523 MachineInstr *TC = BuildMI(MF, DebugLoc(), get(TailOpcode))
9524 .add(Call->getOperand(0))
9525 .addImm(0);
9526 MBB.insert(MBB.end(), TC);
9527 Call->eraseFromParent();
9528
9529 FI->setOutliningStyle("Thunk");
9530 }
9531
9532 bool IsLeafFunction = true;
9533
9534 // Is there a call in the outlined range?
9535 auto IsNonTailCall = [](const MachineInstr &MI) {
9536 return MI.isCall() && !MI.isReturn();
9537 };
9538
9539 if (llvm::any_of(MBB.instrs(), IsNonTailCall)) {
9540 // Fix up the instructions in the range, since we're going to modify the
9541 // stack.
9542
9543 // Bugzilla ID: 46767
9544 // TODO: Check if fixing up twice is safe so we can outline these.
9546 "Can only fix up stack references once");
9547 fixupPostOutline(MBB);
9548
9549 IsLeafFunction = false;
9550
9551 // LR has to be a live in so that we can save it.
9552 if (!MBB.isLiveIn(AArch64::LR))
9553 MBB.addLiveIn(AArch64::LR);
9554
9557
9560 Et = std::prev(MBB.end());
9561
9562 // Insert a save before the outlined region
9563 MachineInstr *STRXpre = BuildMI(MF, DebugLoc(), get(AArch64::STRXpre))
9564 .addReg(AArch64::SP, RegState::Define)
9565 .addReg(AArch64::LR)
9566 .addReg(AArch64::SP)
9567 .addImm(-16);
9568 It = MBB.insert(It, STRXpre);
9569
9571 const TargetSubtargetInfo &STI = MF.getSubtarget();
9572 const MCRegisterInfo *MRI = STI.getRegisterInfo();
9573 unsigned DwarfReg = MRI->getDwarfRegNum(AArch64::LR, true);
9574
9575 // Add a CFI saying the stack was moved 16 B down.
9576 int64_t StackPosEntry =
9578 BuildMI(MBB, It, DebugLoc(), get(AArch64::CFI_INSTRUCTION))
9579 .addCFIIndex(StackPosEntry)
9581
9582 // Add a CFI saying that the LR that we want to find is now 16 B higher
9583 // than before.
9584 int64_t LRPosEntry = MF.addFrameInst(
9585 MCCFIInstruction::createOffset(nullptr, DwarfReg, -16));
9586 BuildMI(MBB, It, DebugLoc(), get(AArch64::CFI_INSTRUCTION))
9587 .addCFIIndex(LRPosEntry)
9589 }
9590
9591 // Insert a restore before the terminator for the function.
9592 MachineInstr *LDRXpost = BuildMI(MF, DebugLoc(), get(AArch64::LDRXpost))
9593 .addReg(AArch64::SP, RegState::Define)
9594 .addReg(AArch64::LR, RegState::Define)
9595 .addReg(AArch64::SP)
9596 .addImm(16);
9597 Et = MBB.insert(Et, LDRXpost);
9598 }
9599
9600 bool ShouldSignReturnAddr = FI->shouldSignReturnAddress(!IsLeafFunction);
9601
9602 // If this is a tail call outlined function, then there's already a return.
9605 signOutlinedFunction(MF, MBB, this, ShouldSignReturnAddr);
9606 return;
9607 }
9608
9609 // It's not a tail call, so we have to insert the return ourselves.
9610
9611 // LR has to be a live in so that we can return to it.
9612 if (!MBB.isLiveIn(AArch64::LR))
9613 MBB.addLiveIn(AArch64::LR);
9614
9615 MachineInstr *ret = BuildMI(MF, DebugLoc(), get(AArch64::RET))
9616 .addReg(AArch64::LR);
9617 MBB.insert(MBB.end(), ret);
9618
9619 signOutlinedFunction(MF, MBB, this, ShouldSignReturnAddr);
9620
9621 FI->setOutliningStyle("Function");
9622
9623 // Did we have to modify the stack by saving the link register?
9625 return;
9626
9627 // We modified the stack.
9628 // Walk over the basic block and fix up all the stack accesses.
9629 fixupPostOutline(MBB);
9630}
9631
9635
9636 // Are we tail calling?
9637 if (C.CallConstructionID == MachineOutlinerTailCall) {
9638 // If yes, then we can just branch to the label.
9639 It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::TCRETURNdi))
9640 .addGlobalAddress(M.getNamedValue(MF.getName()))
9641 .addImm(0));
9642 return It;
9643 }
9644
9645 // Are we saving the link register?
9646 if (C.CallConstructionID == MachineOutlinerNoLRSave ||
9647 C.CallConstructionID == MachineOutlinerThunk) {
9648 // No, so just insert the call.
9649 It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::BL))
9650 .addGlobalAddress(M.getNamedValue(MF.getName())));
9651 return It;
9652 }
9653
9654 // We want to return the spot where we inserted the call.
9656
9657 // Instructions for saving and restoring LR around the call instruction we're
9658 // going to insert.
9659 MachineInstr *Save;
9660 MachineInstr *Restore;
9661 // Can we save to a register?
9662 if (C.CallConstructionID == MachineOutlinerRegSave) {
9663 // FIXME: This logic should be sunk into a target-specific interface so that
9664 // we don't have to recompute the register.
9665 Register Reg = findRegisterToSaveLRTo(C);
9666 assert(Reg && "No callee-saved register available?");
9667
9668 // LR has to be a live in so that we can save it.
9669 if (!MBB.isLiveIn(AArch64::LR))
9670 MBB.addLiveIn(AArch64::LR);
9671
9672 // Save and restore LR from Reg.
9673 Save = BuildMI(MF, DebugLoc(), get(AArch64::ORRXrs), Reg)
9674 .addReg(AArch64::XZR)
9675 .addReg(AArch64::LR)
9676 .addImm(0);
9677 Restore = BuildMI(MF, DebugLoc(), get(AArch64::ORRXrs), AArch64::LR)
9678 .addReg(AArch64::XZR)
9679 .addReg(Reg)
9680 .addImm(0);
9681 } else {
9682 // We have the default case. Save and restore from SP.
9683 Save = BuildMI(MF, DebugLoc(), get(AArch64::STRXpre))
9684 .addReg(AArch64::SP, RegState::Define)
9685 .addReg(AArch64::LR)
9686 .addReg(AArch64::SP)
9687 .addImm(-16);
9688 Restore = BuildMI(MF, DebugLoc(), get(AArch64::LDRXpost))
9689 .addReg(AArch64::SP, RegState::Define)
9690 .addReg(AArch64::LR, RegState::Define)
9691 .addReg(AArch64::SP)
9692 .addImm(16);
9693 }
9694
9695 It = MBB.insert(It, Save);
9696 It++;
9697
9698 // Insert the call.
9699 It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::BL))
9700 .addGlobalAddress(M.getNamedValue(MF.getName())));
9701 CallPt = It;
9702 It++;
9703
9704 It = MBB.insert(It, Restore);
9705 return CallPt;
9706}
9707
9709 MachineFunction &MF) const {
9710 return MF.getFunction().hasMinSize();
9711}
9712
9715 DebugLoc &DL,
9716 bool AllowSideEffects) const {
9717 const MachineFunction &MF = *MBB.getParent();
9719 const AArch64RegisterInfo &TRI = *STI.getRegisterInfo();
9720
9721 if (TRI.isGeneralPurposeRegister(MF, Reg)) {
9722 BuildMI(MBB, Iter, DL, get(AArch64::MOVZXi), Reg).addImm(0).addImm(0);
9723 } else if (STI.isSVEorStreamingSVEAvailable()) {
9724 BuildMI(MBB, Iter, DL, get(AArch64::DUP_ZI_D), Reg)
9725 .addImm(0)
9726 .addImm(0);
9727 } else if (STI.isNeonAvailable()) {
9728 BuildMI(MBB, Iter, DL, get(AArch64::MOVIv2d_ns), Reg)
9729 .addImm(0);
9730 } else {
9731 // This is a streaming-compatible function without SVE. We don't have full
9732 // Neon (just FPRs), so we can at most use the first 64-bit sub-register.
9733 // So given `movi v..` would be illegal use `fmov d..` instead.
9734 assert(STI.hasNEON() && "Expected to have NEON.");
9735 Register Reg64 = TRI.getSubReg(Reg, AArch64::dsub);
9736 BuildMI(MBB, Iter, DL, get(AArch64::FMOVD0), Reg64);
9737 }
9738}
9739
9740std::optional<DestSourcePair>
9742
9743 // AArch64::ORRWrs and AArch64::ORRXrs with WZR/XZR reg
9744 // and zero immediate operands used as an alias for mov instruction.
9745 if (MI.getOpcode() == AArch64::ORRWrs &&
9746 MI.getOperand(1).getReg() == AArch64::WZR &&
9747 MI.getOperand(3).getImm() == 0x0 &&
9748 // Check that the w->w move is not a zero-extending w->x mov.
9749 (!MI.getOperand(0).getReg().isVirtual() ||
9750 MI.getOperand(0).getSubReg() == 0) &&
9751 (!MI.getOperand(0).getReg().isPhysical() ||
9752 MI.findRegisterDefOperandIdx(MI.getOperand(0).getReg() - AArch64::W0 +
9753 AArch64::X0,
9754 /*TRI=*/nullptr) == -1))
9755 return DestSourcePair{MI.getOperand(0), MI.getOperand(2)};
9756
9757 if (MI.getOpcode() == AArch64::ORRXrs &&
9758 MI.getOperand(1).getReg() == AArch64::XZR &&
9759 MI.getOperand(3).getImm() == 0x0)
9760 return DestSourcePair{MI.getOperand(0), MI.getOperand(2)};
9761
9762 return std::nullopt;
9763}
9764
9765std::optional<DestSourcePair>
9767 if (MI.getOpcode() == AArch64::ORRWrs &&
9768 MI.getOperand(1).getReg() == AArch64::WZR &&
9769 MI.getOperand(3).getImm() == 0x0)
9770 return DestSourcePair{MI.getOperand(0), MI.getOperand(2)};
9771 return std::nullopt;
9772}
9773
9774std::optional<RegImmPair>
9776 int Sign = 1;
9777 int64_t Offset = 0;
9778
9779 // TODO: Handle cases where Reg is a super- or sub-register of the
9780 // destination register.
9781 const MachineOperand &Op0 = MI.getOperand(0);
9782 if (!Op0.isReg() || Reg != Op0.getReg())
9783 return std::nullopt;
9784
9785 switch (MI.getOpcode()) {
9786 default:
9787 return std::nullopt;
9788 case AArch64::SUBWri:
9789 case AArch64::SUBXri:
9790 case AArch64::SUBSWri:
9791 case AArch64::SUBSXri:
9792 Sign *= -1;
9793 [[fallthrough]];
9794 case AArch64::ADDSWri:
9795 case AArch64::ADDSXri:
9796 case AArch64::ADDWri:
9797 case AArch64::ADDXri: {
9798 // TODO: Third operand can be global address (usually some string).
9799 if (!MI.getOperand(0).isReg() || !MI.getOperand(1).isReg() ||
9800 !MI.getOperand(2).isImm())
9801 return std::nullopt;
9802 int Shift = MI.getOperand(3).getImm();
9803 assert((Shift == 0 || Shift == 12) && "Shift can be either 0 or 12");
9804 Offset = Sign * (MI.getOperand(2).getImm() << Shift);
9805 }
9806 }
9807 return RegImmPair{MI.getOperand(1).getReg(), Offset};
9808}
9809
9810/// If the given ORR instruction is a copy, and \p DescribedReg overlaps with
9811/// the destination register then, if possible, describe the value in terms of
9812/// the source register.
9813static std::optional<ParamLoadedValue>
9815 const TargetInstrInfo *TII,
9816 const TargetRegisterInfo *TRI) {
9817 auto DestSrc = TII->isCopyLikeInstr(MI);
9818 if (!DestSrc)
9819 return std::nullopt;
9820
9821 Register DestReg = DestSrc->Destination->getReg();
9822 Register SrcReg = DestSrc->Source->getReg();
9823
9824 auto Expr = DIExpression::get(MI.getMF()->getFunction().getContext(), {});
9825
9826 // If the described register is the destination, just return the source.
9827 if (DestReg == DescribedReg)
9828 return ParamLoadedValue(MachineOperand::CreateReg(SrcReg, false), Expr);
9829
9830 // ORRWrs zero-extends to 64-bits, so we need to consider such cases.
9831 if (MI.getOpcode() == AArch64::ORRWrs &&
9832 TRI->isSuperRegister(DestReg, DescribedReg))
9833 return ParamLoadedValue(MachineOperand::CreateReg(SrcReg, false), Expr);
9834
9835 // We may need to describe the lower part of a ORRXrs move.
9836 if (MI.getOpcode() == AArch64::ORRXrs &&
9837 TRI->isSubRegister(DestReg, DescribedReg)) {
9838 Register SrcSubReg = TRI->getSubReg(SrcReg, AArch64::sub_32);
9839 return ParamLoadedValue(MachineOperand::CreateReg(SrcSubReg, false), Expr);
9840 }
9841
9842 assert(!TRI->isSuperOrSubRegisterEq(DestReg, DescribedReg) &&
9843 "Unhandled ORR[XW]rs copy case");
9844
9845 return std::nullopt;
9846}
9847
9849 // Functions cannot be split to different sections on AArch64 if they have
9850 // a red zone. This is because relaxing a cross-section branch may require
9851 // incrementing the stack pointer to spill a register, which would overwrite
9852 // the red zone.
9853 if (MF.getInfo<AArch64FunctionInfo>()->hasRedZone().value_or(true))
9854 return false;
9855
9857}
9858
9860 const MachineBasicBlock &MBB) const {
9861 // Asm Goto blocks can contain conditional branches to goto labels, which can
9862 // get moved out of range of the branch instruction.
9863 auto isAsmGoto = [](const MachineInstr &MI) {
9864 return MI.getOpcode() == AArch64::INLINEASM_BR;
9865 };
9866 if (llvm::any_of(MBB, isAsmGoto) || MBB.isInlineAsmBrIndirectTarget())
9867 return false;
9868
9869 // Because jump tables are label-relative instead of table-relative, they all
9870 // must be in the same section or relocation fixup handling will fail.
9871
9872 // Check if MBB is a jump table target
9874 auto containsMBB = [&MBB](const MachineJumpTableEntry &JTE) {
9875 return llvm::is_contained(JTE.MBBs, &MBB);
9876 };
9877 if (MJTI != nullptr && llvm::any_of(MJTI->getJumpTables(), containsMBB))
9878 return false;
9879
9880 // Check if MBB contains a jump table lookup
9881 for (const MachineInstr &MI : MBB) {
9882 switch (MI.getOpcode()) {
9883 case TargetOpcode::G_BRJT:
9884 case AArch64::JumpTableDest32:
9885 case AArch64::JumpTableDest16:
9886 case AArch64::JumpTableDest8:
9887 return false;
9888 default:
9889 continue;
9890 }
9891 }
9892
9893 // MBB isn't a special case, so it's safe to be split to the cold section.
9894 return true;
9895}
9896
9897std::optional<ParamLoadedValue>
9899 Register Reg) const {
9900 const MachineFunction *MF = MI.getMF();
9902 switch (MI.getOpcode()) {
9903 case AArch64::MOVZWi:
9904 case AArch64::MOVZXi: {
9905 // MOVZWi may be used for producing zero-extended 32-bit immediates in
9906 // 64-bit parameters, so we need to consider super-registers.
9907 if (!TRI->isSuperRegisterEq(MI.getOperand(0).getReg(), Reg))
9908 return std::nullopt;
9909
9910 if (!MI.getOperand(1).isImm())
9911 return std::nullopt;
9912 int64_t Immediate = MI.getOperand(1).getImm();
9913 int Shift = MI.getOperand(2).getImm();
9914 return ParamLoadedValue(MachineOperand::CreateImm(Immediate << Shift),
9915 nullptr);
9916 }
9917 case AArch64::ORRWrs:
9918 case AArch64::ORRXrs:
9919 return describeORRLoadedValue(MI, Reg, this, TRI);
9920 }
9921
9923}
9924
9926 MachineInstr &ExtMI, MachineRegisterInfo &MRI) const {
9927 assert(ExtMI.getOpcode() == TargetOpcode::G_SEXT ||
9928 ExtMI.getOpcode() == TargetOpcode::G_ZEXT ||
9929 ExtMI.getOpcode() == TargetOpcode::G_ANYEXT);
9930
9931 // Anyexts are nops.
9932 if (ExtMI.getOpcode() == TargetOpcode::G_ANYEXT)
9933 return true;
9934
9935 Register DefReg = ExtMI.getOperand(0).getReg();
9936 if (!MRI.hasOneNonDBGUse(DefReg))
9937 return false;
9938
9939 // It's likely that a sext/zext as a G_PTR_ADD offset will be folded into an
9940 // addressing mode.
9941 auto *UserMI = &*MRI.use_instr_nodbg_begin(DefReg);
9942 return UserMI->getOpcode() == TargetOpcode::G_PTR_ADD;
9943}
9944
9946 return get(Opc).TSFlags & AArch64::ElementSizeMask;
9947}
9948
9949bool AArch64InstrInfo::isPTestLikeOpcode(unsigned Opc) const {
9950 return get(Opc).TSFlags & AArch64::InstrFlagIsPTestLike;
9951}
9952
9953bool AArch64InstrInfo::isWhileOpcode(unsigned Opc) const {
9954 return get(Opc).TSFlags & AArch64::InstrFlagIsWhile;
9955}
9956
9957unsigned int
9959 return OptLevel >= CodeGenOptLevel::Aggressive ? 6 : 2;
9960}
9961
9962bool AArch64InstrInfo::isLegalAddressingMode(unsigned NumBytes, int64_t Offset,
9963 unsigned Scale) const {
9964 if (Offset && Scale)
9965 return false;
9966
9967 // Check Reg + Imm
9968 if (!Scale) {
9969 // 9-bit signed offset
9970 if (isInt<9>(Offset))
9971 return true;
9972
9973 // 12-bit unsigned offset
9974 unsigned Shift = Log2_64(NumBytes);
9975 if (NumBytes && Offset > 0 && (Offset / NumBytes) <= (1LL << 12) - 1 &&
9976 // Must be a multiple of NumBytes (NumBytes is a power of 2)
9977 (Offset >> Shift) << Shift == Offset)
9978 return true;
9979 return false;
9980 }
9981
9982 // Check reg1 + SIZE_IN_BYTES * reg2 and reg1 + reg2
9983 return Scale == 1 || (Scale > 0 && Scale == NumBytes);
9984}
9985
9987 if (MF.getSubtarget<AArch64Subtarget>().hardenSlsBlr())
9988 return AArch64::BLRNoIP;
9989 else
9990 return AArch64::BLR;
9991}
9992
9995 Register TargetReg, bool FrameSetup) const {
9996 assert(TargetReg != AArch64::SP && "New top of stack cannot aleady be in SP");
9997
9999 MachineFunction &MF = *MBB.getParent();
10000 const AArch64InstrInfo *TII =
10001 MF.getSubtarget<AArch64Subtarget>().getInstrInfo();
10002 int64_t ProbeSize = MF.getInfo<AArch64FunctionInfo>()->getStackProbeSize();
10004
10005 MachineFunction::iterator MBBInsertPoint = std::next(MBB.getIterator());
10006 MachineBasicBlock *LoopTestMBB =
10008 MF.insert(MBBInsertPoint, LoopTestMBB);
10009 MachineBasicBlock *LoopBodyMBB =
10011 MF.insert(MBBInsertPoint, LoopBodyMBB);
10013 MF.insert(MBBInsertPoint, ExitMBB);
10014 MachineInstr::MIFlag Flags =
10016
10017 // LoopTest:
10018 // SUB SP, SP, #ProbeSize
10019 emitFrameOffset(*LoopTestMBB, LoopTestMBB->end(), DL, AArch64::SP,
10020 AArch64::SP, StackOffset::getFixed(-ProbeSize), TII, Flags);
10021
10022 // CMP SP, TargetReg
10023 BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL, TII->get(AArch64::SUBSXrx64),
10024 AArch64::XZR)
10025 .addReg(AArch64::SP)
10026 .addReg(TargetReg)
10028 .setMIFlags(Flags);
10029
10030 // B.<Cond> LoopExit
10031 BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL, TII->get(AArch64::Bcc))
10033 .addMBB(ExitMBB)
10034 .setMIFlags(Flags);
10035
10036 // STR XZR, [SP]
10037 BuildMI(*LoopBodyMBB, LoopBodyMBB->end(), DL, TII->get(AArch64::STRXui))
10038 .addReg(AArch64::XZR)
10039 .addReg(AArch64::SP)
10040 .addImm(0)
10041 .setMIFlags(Flags);
10042
10043 // B loop
10044 BuildMI(*LoopBodyMBB, LoopBodyMBB->end(), DL, TII->get(AArch64::B))
10045 .addMBB(LoopTestMBB)
10046 .setMIFlags(Flags);
10047
10048 // LoopExit:
10049 // MOV SP, TargetReg
10050 BuildMI(*ExitMBB, ExitMBB->end(), DL, TII->get(AArch64::ADDXri), AArch64::SP)
10051 .addReg(TargetReg)
10052 .addImm(0)
10054 .setMIFlags(Flags);
10055
10056 // LDR XZR, [SP]
10057 BuildMI(*ExitMBB, ExitMBB->end(), DL, TII->get(AArch64::LDRXui))
10058 .addReg(AArch64::XZR, RegState::Define)
10059 .addReg(AArch64::SP)
10060 .addImm(0)
10061 .setMIFlags(Flags);
10062
10063 ExitMBB->splice(ExitMBB->end(), &MBB, std::next(MBBI), MBB.end());
10065
10066 LoopTestMBB->addSuccessor(ExitMBB);
10067 LoopTestMBB->addSuccessor(LoopBodyMBB);
10068 LoopBodyMBB->addSuccessor(LoopTestMBB);
10069 MBB.addSuccessor(LoopTestMBB);
10070
10071 // Update liveins.
10072 if (MF.getRegInfo().reservedRegsFrozen())
10073 fullyRecomputeLiveIns({ExitMBB, LoopBodyMBB, LoopTestMBB});
10074
10075 return ExitMBB->begin();
10076}
10077
10078namespace {
10079class AArch64PipelinerLoopInfo : public TargetInstrInfo::PipelinerLoopInfo {
10080 MachineFunction *MF;
10081 const TargetInstrInfo *TII;
10082 const TargetRegisterInfo *TRI;
10084
10085 /// The block of the loop
10086 MachineBasicBlock *LoopBB;
10087 /// The conditional branch of the loop
10088 MachineInstr *CondBranch;
10089 /// The compare instruction for loop control
10090 MachineInstr *Comp;
10091 /// The number of the operand of the loop counter value in Comp
10092 unsigned CompCounterOprNum;
10093 /// The instruction that updates the loop counter value
10094 MachineInstr *Update;
10095 /// The number of the operand of the loop counter value in Update
10096 unsigned UpdateCounterOprNum;
10097 /// The initial value of the loop counter
10098 Register Init;
10099 /// True iff Update is a predecessor of Comp
10100 bool IsUpdatePriorComp;
10101
10102 /// The normalized condition used by createTripCountGreaterCondition()
10104
10105public:
10106 AArch64PipelinerLoopInfo(MachineBasicBlock *LoopBB, MachineInstr *CondBranch,
10107 MachineInstr *Comp, unsigned CompCounterOprNum,
10108 MachineInstr *Update, unsigned UpdateCounterOprNum,
10109 Register Init, bool IsUpdatePriorComp,
10111 : MF(Comp->getParent()->getParent()),
10112 TII(MF->getSubtarget().getInstrInfo()),
10113 TRI(MF->getSubtarget().getRegisterInfo()), MRI(MF->getRegInfo()),
10114 LoopBB(LoopBB), CondBranch(CondBranch), Comp(Comp),
10115 CompCounterOprNum(CompCounterOprNum), Update(Update),
10116 UpdateCounterOprNum(UpdateCounterOprNum), Init(Init),
10117 IsUpdatePriorComp(IsUpdatePriorComp), Cond(Cond.begin(), Cond.end()) {}
10118
10119 bool shouldIgnoreForPipelining(const MachineInstr *MI) const override {
10120 // Make the instructions for loop control be placed in stage 0.
10121 // The predecessors of Comp are considered by the caller.
10122 return MI == Comp;
10123 }
10124
10125 std::optional<bool> createTripCountGreaterCondition(
10126 int TC, MachineBasicBlock &MBB,
10127 SmallVectorImpl<MachineOperand> &CondParam) override {
10128 // A branch instruction will be inserted as "if (Cond) goto epilogue".
10129 // Cond is normalized for such use.
10130 // The predecessors of the branch are assumed to have already been inserted.
10131 CondParam = Cond;
10132 return {};
10133 }
10134
10135 void createRemainingIterationsGreaterCondition(
10137 DenseMap<MachineInstr *, MachineInstr *> &LastStage0Insts) override;
10138
10139 void setPreheader(MachineBasicBlock *NewPreheader) override {}
10140
10141 void adjustTripCount(int TripCountAdjust) override {}
10142
10143 void disposed() override {}
10144 bool isMVEExpanderSupported() override { return true; }
10145};
10146} // namespace
10147
10148/// Clone an instruction from MI. The register of ReplaceOprNum-th operand
10149/// is replaced by ReplaceReg. The output register is newly created.
10150/// The other operands are unchanged from MI.
10151static Register cloneInstr(const MachineInstr *MI, unsigned ReplaceOprNum,
10152 Register ReplaceReg, MachineBasicBlock &MBB,
10153 MachineBasicBlock::iterator InsertTo) {
10156 const TargetRegisterInfo *TRI =
10159 Register Result = 0;
10160 for (unsigned I = 0; I < NewMI->getNumOperands(); ++I) {
10161 if (I == 0 && NewMI->getOperand(0).getReg().isVirtual()) {
10162 Result = MRI.createVirtualRegister(
10163 MRI.getRegClass(NewMI->getOperand(0).getReg()));
10164 NewMI->getOperand(I).setReg(Result);
10165 } else if (I == ReplaceOprNum) {
10166 MRI.constrainRegClass(
10167 ReplaceReg,
10168 TII->getRegClass(NewMI->getDesc(), I, TRI, *MBB.getParent()));
10169 NewMI->getOperand(I).setReg(ReplaceReg);
10170 }
10171 }
10172 MBB.insert(InsertTo, NewMI);
10173 return Result;
10174}
10175
10176void AArch64PipelinerLoopInfo::createRemainingIterationsGreaterCondition(
10179 // Create and accumulate conditions for next TC iterations.
10180 // Example:
10181 // SUBSXrr N, counter, implicit-def $nzcv # compare instruction for the last
10182 // # iteration of the kernel
10183 //
10184 // # insert the following instructions
10185 // cond = CSINCXr 0, 0, C, implicit $nzcv
10186 // counter = ADDXri counter, 1 # clone from this->Update
10187 // SUBSXrr n, counter, implicit-def $nzcv # clone from this->Comp
10188 // cond = CSINCXr cond, cond, C, implicit $nzcv
10189 // ... (repeat TC times)
10190 // SUBSXri cond, 0, implicit-def $nzcv
10191
10192 assert(CondBranch->getOpcode() == AArch64::Bcc);
10193 // CondCode to exit the loop
10195 (AArch64CC::CondCode)CondBranch->getOperand(0).getImm();
10196 if (CondBranch->getOperand(1).getMBB() == LoopBB)
10198
10199 // Accumulate conditions to exit the loop
10200 Register AccCond = AArch64::XZR;
10201
10202 // If CC holds, CurCond+1 is returned; otherwise CurCond is returned.
10203 auto AccumulateCond = [&](Register CurCond,
10205 Register NewCond = MRI.createVirtualRegister(&AArch64::GPR64commonRegClass);
10206 BuildMI(MBB, MBB.end(), Comp->getDebugLoc(), TII->get(AArch64::CSINCXr))
10207 .addReg(NewCond, RegState::Define)
10208 .addReg(CurCond)
10209 .addReg(CurCond)
10211 return NewCond;
10212 };
10213
10214 if (!LastStage0Insts.empty() && LastStage0Insts[Comp]->getParent() == &MBB) {
10215 // Update and Comp for I==0 are already exists in MBB
10216 // (MBB is an unrolled kernel)
10217 Register Counter;
10218 for (int I = 0; I <= TC; ++I) {
10219 Register NextCounter;
10220 if (I != 0)
10221 NextCounter =
10222 cloneInstr(Comp, CompCounterOprNum, Counter, MBB, MBB.end());
10223
10224 AccCond = AccumulateCond(AccCond, CC);
10225
10226 if (I != TC) {
10227 if (I == 0) {
10228 if (Update != Comp && IsUpdatePriorComp) {
10229 Counter =
10230 LastStage0Insts[Comp]->getOperand(CompCounterOprNum).getReg();
10231 NextCounter = cloneInstr(Update, UpdateCounterOprNum, Counter, MBB,
10232 MBB.end());
10233 } else {
10234 // can use already calculated value
10235 NextCounter = LastStage0Insts[Update]->getOperand(0).getReg();
10236 }
10237 } else if (Update != Comp) {
10238 NextCounter =
10239 cloneInstr(Update, UpdateCounterOprNum, Counter, MBB, MBB.end());
10240 }
10241 }
10242 Counter = NextCounter;
10243 }
10244 } else {
10245 Register Counter;
10246 if (LastStage0Insts.empty()) {
10247 // use initial counter value (testing if the trip count is sufficient to
10248 // be executed by pipelined code)
10249 Counter = Init;
10250 if (IsUpdatePriorComp)
10251 Counter =
10252 cloneInstr(Update, UpdateCounterOprNum, Counter, MBB, MBB.end());
10253 } else {
10254 // MBB is an epilogue block. LastStage0Insts[Comp] is in the kernel block.
10255 Counter = LastStage0Insts[Comp]->getOperand(CompCounterOprNum).getReg();
10256 }
10257
10258 for (int I = 0; I <= TC; ++I) {
10259 Register NextCounter;
10260 NextCounter =
10261 cloneInstr(Comp, CompCounterOprNum, Counter, MBB, MBB.end());
10262 AccCond = AccumulateCond(AccCond, CC);
10263 if (I != TC && Update != Comp)
10264 NextCounter =
10265 cloneInstr(Update, UpdateCounterOprNum, Counter, MBB, MBB.end());
10266 Counter = NextCounter;
10267 }
10268 }
10269
10270 // If AccCond == 0, the remainder is greater than TC.
10271 BuildMI(MBB, MBB.end(), Comp->getDebugLoc(), TII->get(AArch64::SUBSXri))
10272 .addReg(AArch64::XZR, RegState::Define | RegState::Dead)
10273 .addReg(AccCond)
10274 .addImm(0)
10275 .addImm(0);
10276 Cond.clear();
10278}
10279
10280static void extractPhiReg(const MachineInstr &Phi, const MachineBasicBlock *MBB,
10281 Register &RegMBB, Register &RegOther) {
10282 assert(Phi.getNumOperands() == 5);
10283 if (Phi.getOperand(2).getMBB() == MBB) {
10284 RegMBB = Phi.getOperand(1).getReg();
10285 RegOther = Phi.getOperand(3).getReg();
10286 } else {
10287 assert(Phi.getOperand(4).getMBB() == MBB);
10288 RegMBB = Phi.getOperand(3).getReg();
10289 RegOther = Phi.getOperand(1).getReg();
10290 }
10291}
10292
10293static bool isDefinedOutside(Register Reg, const MachineBasicBlock *BB) {
10294 if (!Reg.isVirtual())
10295 return false;
10296 const MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
10297 return MRI.getVRegDef(Reg)->getParent() != BB;
10298}
10299
10300/// If Reg is an induction variable, return true and set some parameters
10301static bool getIndVarInfo(Register Reg, const MachineBasicBlock *LoopBB,
10302 MachineInstr *&UpdateInst,
10303 unsigned &UpdateCounterOprNum, Register &InitReg,
10304 bool &IsUpdatePriorComp) {
10305 // Example:
10306 //
10307 // Preheader:
10308 // InitReg = ...
10309 // LoopBB:
10310 // Reg0 = PHI (InitReg, Preheader), (Reg1, LoopBB)
10311 // Reg = COPY Reg0 ; COPY is ignored.
10312 // Reg1 = ADD Reg, #1; UpdateInst. Incremented by a loop invariant value.
10313 // ; Reg is the value calculated in the previous
10314 // ; iteration, so IsUpdatePriorComp == false.
10315
10316 if (LoopBB->pred_size() != 2)
10317 return false;
10318 if (!Reg.isVirtual())
10319 return false;
10320 const MachineRegisterInfo &MRI = LoopBB->getParent()->getRegInfo();
10321 UpdateInst = nullptr;
10322 UpdateCounterOprNum = 0;
10323 InitReg = 0;
10324 IsUpdatePriorComp = true;
10325 Register CurReg = Reg;
10326 while (true) {
10327 MachineInstr *Def = MRI.getVRegDef(CurReg);
10328 if (Def->getParent() != LoopBB)
10329 return false;
10330 if (Def->isCopy()) {
10331 // Ignore copy instructions unless they contain subregisters
10332 if (Def->getOperand(0).getSubReg() || Def->getOperand(1).getSubReg())
10333 return false;
10334 CurReg = Def->getOperand(1).getReg();
10335 } else if (Def->isPHI()) {
10336 if (InitReg != 0)
10337 return false;
10338 if (!UpdateInst)
10339 IsUpdatePriorComp = false;
10340 extractPhiReg(*Def, LoopBB, CurReg, InitReg);
10341 } else {
10342 if (UpdateInst)
10343 return false;
10344 switch (Def->getOpcode()) {
10345 case AArch64::ADDSXri:
10346 case AArch64::ADDSWri:
10347 case AArch64::SUBSXri:
10348 case AArch64::SUBSWri:
10349 case AArch64::ADDXri:
10350 case AArch64::ADDWri:
10351 case AArch64::SUBXri:
10352 case AArch64::SUBWri:
10353 UpdateInst = Def;
10354 UpdateCounterOprNum = 1;
10355 break;
10356 case AArch64::ADDSXrr:
10357 case AArch64::ADDSWrr:
10358 case AArch64::SUBSXrr:
10359 case AArch64::SUBSWrr:
10360 case AArch64::ADDXrr:
10361 case AArch64::ADDWrr:
10362 case AArch64::SUBXrr:
10363 case AArch64::SUBWrr:
10364 UpdateInst = Def;
10365 if (isDefinedOutside(Def->getOperand(2).getReg(), LoopBB))
10366 UpdateCounterOprNum = 1;
10367 else if (isDefinedOutside(Def->getOperand(1).getReg(), LoopBB))
10368 UpdateCounterOprNum = 2;
10369 else
10370 return false;
10371 break;
10372 default:
10373 return false;
10374 }
10375 CurReg = Def->getOperand(UpdateCounterOprNum).getReg();
10376 }
10377
10378 if (!CurReg.isVirtual())
10379 return false;
10380 if (Reg == CurReg)
10381 break;
10382 }
10383
10384 if (!UpdateInst)
10385 return false;
10386
10387 return true;
10388}
10389
10390std::unique_ptr<TargetInstrInfo::PipelinerLoopInfo>
10392 // Accept loops that meet the following conditions
10393 // * The conditional branch is BCC
10394 // * The compare instruction is ADDS/SUBS/WHILEXX
10395 // * One operand of the compare is an induction variable and the other is a
10396 // loop invariant value
10397 // * The induction variable is incremented/decremented by a single instruction
10398 // * Does not contain CALL or instructions which have unmodeled side effects
10399
10400 for (MachineInstr &MI : *LoopBB)
10401 if (MI.isCall() || MI.hasUnmodeledSideEffects())
10402 // This instruction may use NZCV, which interferes with the instruction to
10403 // be inserted for loop control.
10404 return nullptr;
10405
10406 MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
10408 if (analyzeBranch(*LoopBB, TBB, FBB, Cond))
10409 return nullptr;
10410
10411 // Infinite loops are not supported
10412 if (TBB == LoopBB && FBB == LoopBB)
10413 return nullptr;
10414
10415 // Must be conditional branch
10416 if (TBB != LoopBB && FBB == nullptr)
10417 return nullptr;
10418
10419 assert((TBB == LoopBB || FBB == LoopBB) &&
10420 "The Loop must be a single-basic-block loop");
10421
10422 MachineInstr *CondBranch = &*LoopBB->getFirstTerminator();
10424
10425 if (CondBranch->getOpcode() != AArch64::Bcc)
10426 return nullptr;
10427
10428 // Normalization for createTripCountGreaterCondition()
10429 if (TBB == LoopBB)
10431
10432 MachineInstr *Comp = nullptr;
10433 unsigned CompCounterOprNum = 0;
10434 for (MachineInstr &MI : reverse(*LoopBB)) {
10435 if (MI.modifiesRegister(AArch64::NZCV, &TRI)) {
10436 // Guarantee that the compare is SUBS/ADDS/WHILEXX and that one of the
10437 // operands is a loop invariant value
10438
10439 switch (MI.getOpcode()) {
10440 case AArch64::SUBSXri:
10441 case AArch64::SUBSWri:
10442 case AArch64::ADDSXri:
10443 case AArch64::ADDSWri:
10444 Comp = &MI;
10445 CompCounterOprNum = 1;
10446 break;
10447 case AArch64::ADDSWrr:
10448 case AArch64::ADDSXrr:
10449 case AArch64::SUBSWrr:
10450 case AArch64::SUBSXrr:
10451 Comp = &MI;
10452 break;
10453 default:
10454 if (isWhileOpcode(MI.getOpcode())) {
10455 Comp = &MI;
10456 break;
10457 }
10458 return nullptr;
10459 }
10460
10461 if (CompCounterOprNum == 0) {
10462 if (isDefinedOutside(Comp->getOperand(1).getReg(), LoopBB))
10463 CompCounterOprNum = 2;
10464 else if (isDefinedOutside(Comp->getOperand(2).getReg(), LoopBB))
10465 CompCounterOprNum = 1;
10466 else
10467 return nullptr;
10468 }
10469 break;
10470 }
10471 }
10472 if (!Comp)
10473 return nullptr;
10474
10475 MachineInstr *Update = nullptr;
10476 Register Init;
10477 bool IsUpdatePriorComp;
10478 unsigned UpdateCounterOprNum;
10479 if (!getIndVarInfo(Comp->getOperand(CompCounterOprNum).getReg(), LoopBB,
10480 Update, UpdateCounterOprNum, Init, IsUpdatePriorComp))
10481 return nullptr;
10482
10483 return std::make_unique<AArch64PipelinerLoopInfo>(
10484 LoopBB, CondBranch, Comp, CompCounterOprNum, Update, UpdateCounterOprNum,
10485 Init, IsUpdatePriorComp, Cond);
10486}
10487
10488/// verifyInstruction - Perform target specific instruction verification.
10489bool AArch64InstrInfo::verifyInstruction(const MachineInstr &MI,
10490 StringRef &ErrInfo) const {
10491
10492 // Verify that immediate offsets on load/store instructions are within range.
10493 // Stack objects with an FI operand are excluded as they can be fixed up
10494 // during PEI.
10495 TypeSize Scale(0U, false), Width(0U, false);
10496 int64_t MinOffset, MaxOffset;
10497 if (getMemOpInfo(MI.getOpcode(), Scale, Width, MinOffset, MaxOffset)) {
10498 unsigned ImmIdx = getLoadStoreImmIdx(MI.getOpcode());
10499 if (MI.getOperand(ImmIdx).isImm() && !MI.getOperand(ImmIdx - 1).isFI()) {
10500 int64_t Imm = MI.getOperand(ImmIdx).getImm();
10501 if (Imm < MinOffset || Imm > MaxOffset) {
10502 ErrInfo = "Unexpected immediate on load/store instruction";
10503 return false;
10504 }
10505 }
10506 }
10507 return true;
10508}
10509
10510#define GET_INSTRINFO_HELPERS
10511#define GET_INSTRMAP_INFO
10512#include "AArch64GenInstrInfo.inc"
unsigned SubReg
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
static bool forwardCopyWillClobberTuple(unsigned DestReg, unsigned SrcReg, unsigned NumRegs)
static cl::opt< unsigned > BCCDisplacementBits("aarch64-bcc-offset-bits", cl::Hidden, cl::init(19), cl::desc("Restrict range of Bcc instructions (DEBUG)"))
static void genSubAdd2SubSub(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, SmallVectorImpl< MachineInstr * > &DelInstrs, unsigned IdxOpd1, DenseMap< unsigned, unsigned > &InstrIdxForVirtReg)
Do the following transformation A - (B + C) ==> (A - B) - C A - (B + C) ==> (A - C) - B.
static bool getMaddPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns)
Find instructions that can be turned into madd.
static AArch64CC::CondCode findCondCodeUsedByInstr(const MachineInstr &Instr)
Find a condition code used by the instruction.
static MachineInstr * genFusedMultiplyAcc(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, unsigned IdxMulOpd, unsigned MaddOpc, const TargetRegisterClass *RC)
genFusedMultiplyAcc - Helper to generate fused multiply accumulate instructions.
static bool isCombineInstrCandidate64(unsigned Opc)
static unsigned removeCopies(const MachineRegisterInfo &MRI, unsigned VReg)
static bool areCFlagsAccessedBetweenInstrs(MachineBasicBlock::iterator From, MachineBasicBlock::iterator To, const TargetRegisterInfo *TRI, const AccessKind AccessToCheck=AK_All)
True when condition flags are accessed (either by writing or reading) on the instruction trace starti...
static bool getFMAPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns)
Floating-Point Support.
static bool isADDSRegImm(unsigned Opcode)
static MachineInstr * genFusedMultiplyIdxNeg(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, DenseMap< unsigned, unsigned > &InstrIdxForVirtReg, unsigned IdxMulOpd, unsigned MaddOpc, unsigned MnegOpc, const TargetRegisterClass *RC)
genFusedMultiplyAccNeg - Helper to generate fused multiply accumulate instructions with an additional...
static unsigned sForm(MachineInstr &Instr)
Get opcode of S version of Instr.
static bool isCombineInstrSettingFlag(unsigned Opc)
@ AK_Write
static bool getFNEGPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns)
static bool getIndVarInfo(Register Reg, const MachineBasicBlock *LoopBB, MachineInstr *&UpdateInst, unsigned &UpdateCounterOprNum, Register &InitReg, bool &IsUpdatePriorComp)
If Reg is an induction variable, return true and set some parameters.
static const MachineInstrBuilder & AddSubReg(const MachineInstrBuilder &MIB, MCRegister Reg, unsigned SubIdx, unsigned State, const TargetRegisterInfo *TRI)
static bool canPairLdStOpc(unsigned FirstOpc, unsigned SecondOpc)
static int findCondCodeUseOperandIdxForBranchOrSelect(const MachineInstr &Instr)
static bool isPostIndexLdStOpcode(unsigned Opcode)
Return true if the opcode is a post-index ld/st instruction, which really loads from base+0.
static unsigned getBranchDisplacementBits(unsigned Opc)
static std::optional< ParamLoadedValue > describeORRLoadedValue(const MachineInstr &MI, Register DescribedReg, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
If the given ORR instruction is a copy, and DescribedReg overlaps with the destination register then,...
static bool getFMULPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns)
static MachineInstr * genFusedMultiplyAccNeg(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, DenseMap< unsigned, unsigned > &InstrIdxForVirtReg, unsigned IdxMulOpd, unsigned MaddOpc, unsigned MnegOpc, const TargetRegisterClass *RC)
genFusedMultiplyAccNeg - Helper to generate fused multiply accumulate instructions with an additional...
static void appendVGScaledOffsetExpr(SmallVectorImpl< char > &Expr, int NumBytes, int NumVGScaledBytes, unsigned VG, llvm::raw_string_ostream &Comment)
static MachineInstr * genMaddR(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, unsigned IdxMulOpd, unsigned MaddOpc, unsigned VR, const TargetRegisterClass *RC)
genMaddR - Generate madd instruction and combine mul and add using an extra virtual register Example ...
static Register cloneInstr(const MachineInstr *MI, unsigned ReplaceOprNum, Register ReplaceReg, MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertTo)
Clone an instruction from MI.
static bool scaleOffset(unsigned Opc, int64_t &Offset)
static bool canCombineWithFMUL(MachineBasicBlock &MBB, MachineOperand &MO, unsigned MulOpc)
unsigned scaledOffsetOpcode(unsigned Opcode, unsigned &Scale)
static MachineInstr * genFusedMultiplyIdx(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, unsigned IdxMulOpd, unsigned MaddOpc, const TargetRegisterClass *RC)
genFusedMultiplyIdx - Helper to generate fused multiply accumulate instructions.
static MachineInstr * genIndexedMultiply(MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, unsigned IdxDupOp, unsigned MulOpc, const TargetRegisterClass *RC, MachineRegisterInfo &MRI)
Fold (FMUL x (DUP y lane)) into (FMUL_indexed x y lane)
static bool isSUBSRegImm(unsigned Opcode)
static bool UpdateOperandRegClass(MachineInstr &Instr)
static const TargetRegisterClass * getRegClass(const MachineInstr &MI, Register Reg)
static bool canCmpInstrBeRemoved(MachineInstr &MI, MachineInstr &CmpInstr, int CmpValue, const TargetRegisterInfo &TRI, SmallVectorImpl< MachineInstr * > &CCUseInstrs, bool &IsInvertCC)
unsigned unscaledOffsetOpcode(unsigned Opcode)
static bool canInstrSubstituteCmpInstr(MachineInstr &MI, MachineInstr &CmpInstr, const TargetRegisterInfo &TRI)
Check if CmpInstr can be substituted by MI.
static UsedNZCV getUsedNZCV(AArch64CC::CondCode CC)
static Register genNeg(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, DenseMap< unsigned, unsigned > &InstrIdxForVirtReg, unsigned MnegOpc, const TargetRegisterClass *RC)
genNeg - Helper to generate an intermediate negation of the second operand of Root
static bool isCombineInstrCandidateFP(const MachineInstr &Inst)
static bool canCombine(MachineBasicBlock &MBB, MachineOperand &MO, unsigned CombineOpc, unsigned ZeroReg=0, bool CheckZeroReg=false)
static unsigned convertToNonFlagSettingOpc(const MachineInstr &MI)
Return the opcode that does not set flags when possible - otherwise return the original opcode.
static bool outliningCandidatesV8_3OpsConsensus(const outliner::Candidate &a, const outliner::Candidate &b)
static bool isCombineInstrCandidate32(unsigned Opc)
static void parseCondBranch(MachineInstr *LastInst, MachineBasicBlock *&Target, SmallVectorImpl< MachineOperand > &Cond)
static unsigned offsetExtendOpcode(unsigned Opcode)
MachineOutlinerMBBFlags
@ LRUnavailableSomewhere
@ HasCalls
@ UnsafeRegsDead
static void loadRegPairFromStackSlot(const TargetRegisterInfo &TRI, MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore, const MCInstrDesc &MCID, Register DestReg, unsigned SubIdx0, unsigned SubIdx1, int FI, MachineMemOperand *MMO)
static bool getMiscPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns)
Find other MI combine patterns.
static bool outliningCandidatesSigningKeyConsensus(const outliner::Candidate &a, const outliner::Candidate &b)
static bool outliningCandidatesSigningScopeConsensus(const outliner::Candidate &a, const outliner::Candidate &b)
static bool shouldClusterFI(const MachineFrameInfo &MFI, int FI1, int64_t Offset1, unsigned Opcode1, int FI2, int64_t Offset2, unsigned Opcode2)
static cl::opt< unsigned > TBZDisplacementBits("aarch64-tbz-offset-bits", cl::Hidden, cl::init(14), cl::desc("Restrict range of TB[N]Z instructions (DEBUG)"))
static void extractPhiReg(const MachineInstr &Phi, const MachineBasicBlock *MBB, Register &RegMBB, Register &RegOther)
static MCCFIInstruction createDefCFAExpression(const TargetRegisterInfo &TRI, unsigned Reg, const StackOffset &Offset)
static bool isDefinedOutside(Register Reg, const MachineBasicBlock *BB)
static MachineInstr * genFusedMultiply(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, unsigned IdxMulOpd, unsigned MaddOpc, const TargetRegisterClass *RC, FMAInstKind kind=FMAInstKind::Default, const Register *ReplacedAddend=nullptr)
genFusedMultiply - Generate fused multiply instructions.
static bool isCombineInstrCandidate(unsigned Opc)
static unsigned regOffsetOpcode(unsigned Opcode)
MachineOutlinerClass
Constants defining how certain sequences should be outlined.
@ MachineOutlinerTailCall
Emit a save, restore, call, and return.
@ MachineOutlinerRegSave
Emit a call and tail-call.
@ MachineOutlinerNoLRSave
Only emit a branch.
@ MachineOutlinerThunk
Emit a call and return.
@ MachineOutlinerDefault
static cl::opt< unsigned > BDisplacementBits("aarch64-b-offset-bits", cl::Hidden, cl::init(26), cl::desc("Restrict range of B instructions (DEBUG)"))
static bool areCFlagsAliveInSuccessors(const MachineBasicBlock *MBB)
Check if AArch64::NZCV should be alive in successors of MBB.
static void emitFrameOffsetAdj(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, unsigned DestReg, unsigned SrcReg, int64_t Offset, unsigned Opc, const TargetInstrInfo *TII, MachineInstr::MIFlag Flag, bool NeedsWinCFI, bool *HasWinCFI, bool EmitCFAOffset, StackOffset CFAOffset, unsigned FrameReg)
static bool isCheapImmediate(const MachineInstr &MI, unsigned BitSize)
static cl::opt< unsigned > CBZDisplacementBits("aarch64-cbz-offset-bits", cl::Hidden, cl::init(19), cl::desc("Restrict range of CB[N]Z instructions (DEBUG)"))
static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg, unsigned *NewVReg=nullptr)
static void signOutlinedFunction(MachineFunction &MF, MachineBasicBlock &MBB, const AArch64InstrInfo *TII, bool ShouldSignReturnAddr)
static MachineInstr * genFNegatedMAD(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs)
static bool canCombineWithMUL(MachineBasicBlock &MBB, MachineOperand &MO, unsigned MulOpc, unsigned ZeroReg)
static void storeRegPairToStackSlot(const TargetRegisterInfo &TRI, MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore, const MCInstrDesc &MCID, Register SrcReg, bool IsKill, unsigned SubIdx0, unsigned SubIdx1, int FI, MachineMemOperand *MMO)
SmallVector< AArch64_IMM::ImmInsnModel, 4 > Insn
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static const Function * getParent(const Value *V)
BlockVerifier::State From
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
uint64_t Size
bool End
Definition: ELF_riscv.cpp:480
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
Module.h This file contains the declarations for the Module class.
static LVOptions Options
Definition: LVOptions.cpp:25
This file implements the LivePhysRegs utility for tracking liveness of physical registers.
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
unsigned const TargetRegisterInfo * TRI
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
const SmallVectorImpl< MachineOperand > & Cond
This file declares the machine register scavenger class.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition: Value.cpp:469
This file defines the SmallVector class.
AArch64FunctionInfo - This class is derived from MachineFunctionInfo and contains private AArch64-spe...
bool shouldSignReturnAddress(const MachineFunction &MF) const
void setOutliningStyle(const std::string &Style)
const SetOfInstructions & getLOHRelated() const
bool needsDwarfUnwindInfo(const MachineFunction &MF) const
std::optional< bool > hasRedZone() const
static bool isHForm(const MachineInstr &MI)
Returns whether the instruction is in H form (16 bit operands)
void insertSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, Register DstReg, ArrayRef< MachineOperand > Cond, Register TrueReg, Register FalseReg) const override
static bool hasBTISemantics(const MachineInstr &MI)
Returns whether the instruction can be compatible with non-zero BTYPE.
static bool isQForm(const MachineInstr &MI)
Returns whether the instruction is in Q form (128 bit operands)
static void decomposeStackOffsetForFrameOffsets(const StackOffset &Offset, int64_t &NumBytes, int64_t &NumPredicateVectors, int64_t &NumDataVectors)
Returns the offset in parts to which this frame offset can be decomposed for the purpose of describin...
static bool getMemOpInfo(unsigned Opcode, TypeSize &Scale, TypeSize &Width, int64_t &MinOffset, int64_t &MaxOffset)
Returns true if opcode Opc is a memory operation.
static bool isTailCallReturnInst(const MachineInstr &MI)
Returns true if MI is one of the TCRETURN* instructions.
static bool isFPRCopy(const MachineInstr &MI)
Does this instruction rename an FPR without modifying bits?
MachineInstr * emitLdStWithAddr(MachineInstr &MemI, const ExtAddrMode &AM) const override
std::optional< DestSourcePair > isCopyInstrImpl(const MachineInstr &MI) const override
If the specific machine instruction is an instruction that moves/copies value from one register to an...
MachineBasicBlock * getBranchDestBlock(const MachineInstr &MI) const override
std::optional< RegImmPair > isAddImmediate(const MachineInstr &MI, Register Reg) const override
unsigned getInstSizeInBytes(const MachineInstr &MI) const override
GetInstSize - Return the number of bytes of code the specified instruction may be.
uint64_t getElementSizeForOpcode(unsigned Opc) const
Returns the vector element size (B, H, S or D) of an SVE opcode.
bool areMemAccessesTriviallyDisjoint(const MachineInstr &MIa, const MachineInstr &MIb) const override
static bool isGPRCopy(const MachineInstr &MI)
Does this instruction rename a GPR without modifying bits?
static unsigned convertToFlagSettingOpc(unsigned Opc)
Return the opcode that set flags when possible.
bool getMachineCombinerPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns, bool DoRegPressureReduce) const override
Return true when there is potentially a faster code sequence for an instruction chain ending in Root.
bool isBranchOffsetInRange(unsigned BranchOpc, int64_t BrOffset) const override
bool canInsertSelect(const MachineBasicBlock &, ArrayRef< MachineOperand > Cond, Register, Register, Register, int &, int &, int &) const override
static const MachineOperand & getLdStOffsetOp(const MachineInstr &MI)
Returns the immediate offset operator of a load/store.
bool isCoalescableExtInstr(const MachineInstr &MI, Register &SrcReg, Register &DstReg, unsigned &SubIdx) const override
bool isWhileOpcode(unsigned Opc) const
Returns true if the opcode is for an SVE WHILE## instruction.
static std::optional< unsigned > getUnscaledLdSt(unsigned Opc)
Returns the unscaled load/store for the scaled load/store opcode, if there is a corresponding unscale...
static bool hasUnscaledLdStOffset(unsigned Opc)
Return true if it has an unscaled load/store offset.
static const MachineOperand & getLdStAmountOp(const MachineInstr &MI)
Returns the shift amount operator of a load/store.
static bool isPreLdSt(const MachineInstr &MI)
Returns whether the instruction is a pre-indexed load/store.
MachineBasicBlock::iterator insertOutlinedCall(Module &M, MachineBasicBlock &MBB, MachineBasicBlock::iterator &It, MachineFunction &MF, outliner::Candidate &C) const override
std::optional< ExtAddrMode > getAddrModeFromMemoryOp(const MachineInstr &MemI, const TargetRegisterInfo *TRI) const override
bool getMemOperandsWithOffsetWidth(const MachineInstr &MI, SmallVectorImpl< const MachineOperand * > &BaseOps, int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width, const TargetRegisterInfo *TRI) const override
bool analyzeBranchPredicate(MachineBasicBlock &MBB, MachineBranchPredicate &MBP, bool AllowModify) const override
static bool isSEHInstruction(const MachineInstr &MI)
Return true if the instructions is a SEH instruciton used for unwinding on Windows.
void insertIndirectBranch(MachineBasicBlock &MBB, MachineBasicBlock &NewDestBB, MachineBasicBlock &RestoreBB, const DebugLoc &DL, int64_t BrOffset, RegScavenger *RS) const override
SmallVector< std::pair< MachineBasicBlock::iterator, MachineBasicBlock::iterator > > getOutlinableRanges(MachineBasicBlock &MBB, unsigned &Flags) const override
static bool isPairableLdStInst(const MachineInstr &MI)
Return true if pairing the given load or store may be paired with another.
void genAlternativeCodeSequence(MachineInstr &Root, unsigned Pattern, SmallVectorImpl< MachineInstr * > &InsInstrs, SmallVectorImpl< MachineInstr * > &DelInstrs, DenseMap< unsigned, unsigned > &InstrIdxForVirtReg) const override
When getMachineCombinerPatterns() finds patterns, this function generates the instructions that could...
const AArch64RegisterInfo & getRegisterInfo() const
getRegisterInfo - TargetInstrInfo is a superset of MRegister info.
static bool isPreSt(const MachineInstr &MI)
Returns whether the instruction is a pre-indexed store.
MachineInstr * foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI, ArrayRef< unsigned > Ops, MachineBasicBlock::iterator InsertPt, int FrameIndex, LiveIntervals *LIS=nullptr, VirtRegMap *VRM=nullptr) const override
void insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const override
AArch64InstrInfo(const AArch64Subtarget &STI)
static bool isPairedLdSt(const MachineInstr &MI)
Returns whether the instruction is a paired load/store.
outliner::InstrType getOutliningTypeImpl(const MachineModuleInfo &MMI, MachineBasicBlock::iterator &MIT, unsigned Flags) const override
bool useMachineCombiner() const override
AArch64 supports MachineCombiner.
ArrayRef< std::pair< MachineMemOperand::Flags, const char * > > getSerializableMachineMemOperandTargetFlags() const override
void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register DestReg, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg) const override
bool isExtendLikelyToBeFolded(MachineInstr &ExtMI, MachineRegisterInfo &MRI) const override
static bool isFalkorShiftExtFast(const MachineInstr &MI)
Returns true if the instruction has a shift by immediate that can be executed in one cycle less.
std::optional< ParamLoadedValue > describeLoadedValue(const MachineInstr &MI, Register Reg) const override
bool getMemOperandWithOffsetWidth(const MachineInstr &MI, const MachineOperand *&BaseOp, int64_t &Offset, bool &OffsetIsScalable, TypeSize &Width, const TargetRegisterInfo *TRI) const
If OffsetIsScalable is set to 'true', the offset is scaled by vscale.
Register isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const override
bool reverseBranchCondition(SmallVectorImpl< MachineOperand > &Cond) const override
static bool isStridedAccess(const MachineInstr &MI)
Return true if the given load or store is a strided memory access.
bool shouldClusterMemOps(ArrayRef< const MachineOperand * > BaseOps1, int64_t Offset1, bool OffsetIsScalable1, ArrayRef< const MachineOperand * > BaseOps2, int64_t Offset2, bool OffsetIsScalable2, unsigned ClusterSize, unsigned NumBytes) const override
Detect opportunities for ldp/stp formation.
bool expandPostRAPseudo(MachineInstr &MI) const override
unsigned int getTailDuplicateSize(CodeGenOptLevel OptLevel) const override
bool isFunctionSafeToOutlineFrom(MachineFunction &MF, bool OutlineFromLinkOnceODRs) const override
void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg) const override
bool shouldOutlineFromFunctionByDefault(MachineFunction &MF) const override
unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const override
bool isThroughputPattern(unsigned Pattern) const override
Return true when a code sequence can improve throughput.
void buildClearRegister(Register Reg, MachineBasicBlock &MBB, MachineBasicBlock::iterator Iter, DebugLoc &DL, bool AllowSideEffects=true) const override
void buildOutlinedFrame(MachineBasicBlock &MBB, MachineFunction &MF, const outliner::OutlinedFunction &OF) const override
ArrayRef< std::pair< unsigned, const char * > > getSerializableDirectMachineOperandTargetFlags() const override
MachineOperand & getMemOpBaseRegImmOfsOffsetOperand(MachineInstr &LdSt) const
Return the immediate offset of the base register in a load/store LdSt.
bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify=false) const override
bool canFoldIntoAddrMode(const MachineInstr &MemI, Register Reg, const MachineInstr &AddrI, ExtAddrMode &AM) const override
static bool isLdStPairSuppressed(const MachineInstr &MI)
Return true if pairing the given load or store is hinted to be unprofitable.
bool isFunctionSafeToSplit(const MachineFunction &MF) const override
bool isAssociativeAndCommutative(const MachineInstr &Inst, bool Invert) const override
Return true when Inst is associative and commutative so that it can be reassociated.
std::unique_ptr< TargetInstrInfo::PipelinerLoopInfo > analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const override
bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const override
unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const override
MachineBasicBlock::iterator probedStackAlloc(MachineBasicBlock::iterator MBBI, Register TargetReg, bool FrameSetup) const
bool optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int64_t CmpMask, int64_t CmpValue, const MachineRegisterInfo *MRI) const override
optimizeCompareInstr - Convert the instruction supplying the argument to the comparison into one that...
static unsigned getLoadStoreImmIdx(unsigned Opc)
Returns the index for the immediate for a given instruction.
static bool isGPRZero(const MachineInstr &MI)
Does this instruction set its full destination register to zero?
std::pair< unsigned, unsigned > decomposeMachineOperandsTargetFlags(unsigned TF) const override
void copyGPRRegTuple(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc, unsigned Opcode, unsigned ZeroReg, llvm::ArrayRef< unsigned > Indices) const
bool analyzeCompare(const MachineInstr &MI, Register &SrcReg, Register &SrcReg2, int64_t &CmpMask, int64_t &CmpValue) const override
analyzeCompare - For a comparison instruction, return the source registers in SrcReg and SrcReg2,...
CombinerObjective getCombinerObjective(unsigned Pattern) const override
static bool isFpOrNEON(Register Reg)
Returns whether the physical register is FP or NEON.
bool isMBBSafeToSplitToCold(const MachineBasicBlock &MBB) const override
void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc, bool RenamableDest=false, bool RenamableSrc=false) const override
bool isAsCheapAsAMove(const MachineInstr &MI) const override
bool isLegalAddressingMode(unsigned NumBytes, int64_t Offset, unsigned Scale) const
std::optional< std::unique_ptr< outliner::OutlinedFunction > > getOutliningCandidateInfo(const MachineModuleInfo &MMI, std::vector< outliner::Candidate > &RepeatedSequenceLocs, unsigned MinRepeats) const override
std::optional< DestSourcePair > isCopyLikeInstrImpl(const MachineInstr &MI) const override
static void suppressLdStPair(MachineInstr &MI)
Hint that pairing the given load or store is unprofitable.
Register isStoreToStackSlot(const MachineInstr &MI, int &FrameIndex) const override
static bool isPreLd(const MachineInstr &MI)
Returns whether the instruction is a pre-indexed load.
ArrayRef< std::pair< unsigned, const char * > > getSerializableBitmaskMachineOperandTargetFlags() const override
void copyPhysRegTuple(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc, unsigned Opcode, llvm::ArrayRef< unsigned > Indices) const
bool optimizeCondBranch(MachineInstr &MI) const override
Replace csincr-branch sequence by simple conditional branch.
static int getMemScale(unsigned Opc)
Scaling factor for (scaled or unscaled) load or store.
bool isCandidateToMergeOrPair(const MachineInstr &MI) const
Return true if this is a load/store that can be potentially paired/merged.
MCInst getNop() const override
static const MachineOperand & getLdStBaseOp(const MachineInstr &MI)
Returns the base register operator of a load/store.
bool isPTestLikeOpcode(unsigned Opc) const
Returns true if the opcode is for an SVE instruction that sets the condition codes as if it's results...
void mergeOutliningCandidateAttributes(Function &F, std::vector< outliner::Candidate > &Candidates) const override
static void decomposeStackOffsetForDwarfOffsets(const StackOffset &Offset, int64_t &ByteSized, int64_t &VGSized)
bool isReservedReg(const MachineFunction &MF, MCRegister Reg) const
const AArch64RegisterInfo * getRegisterInfo() const override
bool isNeonAvailable() const
Returns true if the target has NEON and the function at runtime is known to have NEON enabled (e....
bool isSVEorStreamingSVEAvailable() const
Returns true if the target has access to either the full range of SVE instructions,...
unsigned ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const
ClassifyGlobalReference - Find the target operand flags that describe how a global value should be re...
AArch64PAuth::AuthCheckMethod getAuthenticatedLRCheckMethod(const MachineFunction &MF) const
Choose a method of checking LR before performing a tail call.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
const T & front() const
front - Get the first element.
Definition: ArrayRef.h:171
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:168
This class represents an Operation in the Expression.
A debug info location.
Definition: DebugLoc.h:33
bool empty() const
Definition: DenseMap.h:98
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:211
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition: Function.h:707
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:704
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:656
A set of register units used to track register liveness.
Definition: LiveRegUnits.h:30
bool available(MCPhysReg Reg) const
Returns true if no part of physical register Reg is live.
Definition: LiveRegUnits.h:116
void stepBackward(const MachineInstr &MI)
Updates liveness when stepping backwards over the instruction MI.
void addLiveOuts(const MachineBasicBlock &MBB)
Adds registers living out of block MBB.
static LocationSize precise(uint64_t Value)
This class is intended to be used as a base class for asm properties and features specific to the tar...
Definition: MCAsmInfo.h:56
bool usesWindowsCFI() const
Definition: MCAsmInfo.h:759
static MCCFIInstruction cfiDefCfa(MCSymbol *L, unsigned Register, int64_t Offset, SMLoc Loc={})
.cfi_def_cfa defines a rule for computing CFA as: take address from Register and add Offset to it.
Definition: MCDwarf.h:575
static MCCFIInstruction createOffset(MCSymbol *L, unsigned Register, int64_t Offset, SMLoc Loc={})
.cfi_offset Previous value of Register is saved at offset Offset from CFA.
Definition: MCDwarf.h:617
static MCCFIInstruction cfiDefCfaOffset(MCSymbol *L, int64_t Offset, SMLoc Loc={})
.cfi_def_cfa_offset modifies a rule for computing CFA.
Definition: MCDwarf.h:590
static MCCFIInstruction createEscape(MCSymbol *L, StringRef Vals, SMLoc Loc={}, StringRef Comment="")
.cfi_escape Allows the user to add arbitrary bytes to the unwind info.
Definition: MCDwarf.h:687
MCInstBuilder & addImm(int64_t Val)
Add a new integer immediate operand.
Definition: MCInstBuilder.h:43
Instances of this class represent a single low-level machine instruction.
Definition: MCInst.h:185
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:198
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
Wrapper class representing physical registers. Should be passed by value.
Definition: MCRegister.h:33
constexpr bool isValid() const
Definition: MCRegister.h:81
static constexpr unsigned NoRegister
Definition: MCRegister.h:52
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition: Metadata.h:1543
Set of metadata that should be preserved when using BuildMI().
bool isInlineAsmBrIndirectTarget() const
Returns true if this is the indirect dest of an INLINEASM_BR.
unsigned pred_size() const
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
reverse_instr_iterator instr_rbegin()
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
MBBSectionID getSectionID() const
Returns the section ID of this basic block.
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
reverse_instr_iterator instr_rend()
DebugLoc findDebugLoc(instr_iterator MBBI)
Find the next valid DebugLoc starting at MBBI, skipping any debug instructions.
Instructions::iterator instr_iterator
iterator getLastNonDebugInstr(bool SkipPseudoOp=true)
Returns an iterator to the last non-debug instruction in the basic block, or end().
bool isLayoutSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB will be emitted immediately after this block, such that if this bloc...
instr_iterator instr_end()
Instructions::const_iterator const_instr_iterator
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
iterator_range< succ_iterator > successors()
instr_iterator getFirstInstrTerminator()
Same getFirstTerminator but it ignores bundles and return an instr_iterator instead.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
void setMachineBlockAddressTaken()
Set this block to indicate that its address is used as something other than the target of a terminato...
bool isLiveIn(MCRegister Reg, LaneBitmask LaneMask=LaneBitmask::getAll()) const
Return true if the specified register is in the live in set.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
uint64_t getStackSize() const
Return the number of bytes that must be allocated to hold all of the fixed size frame objects.
void setStackID(int ObjectIdx, uint8_t ID)
bool isCalleeSavedInfoValid() const
Has the callee saved info been calculated yet?
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
unsigned getNumObjects() const
Return the number of objects.
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
unsigned addFrameInst(const MCCFIInstruction &Inst)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
MachineInstr * CloneMachineInstr(const MachineInstr *Orig)
Create a new MachineInstr which is a copy of Orig, identical in all ways except the instruction has n...
const MachineJumpTableInfo * getJumpTableInfo() const
getJumpTableInfo - Return the jump table info object for the current function.
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & setMemRefs(ArrayRef< MachineMemOperand * > MMOs) const
const MachineInstrBuilder & addCFIIndex(unsigned CFIIndex) const
const MachineInstrBuilder & setMIFlag(MachineInstr::MIFlag Flag) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addSym(MCSymbol *Sym, unsigned char TargetFlags=0) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
reverse_iterator getReverse() const
Get a reverse iterator to the same node.
Representation of each machine instruction.
Definition: MachineInstr.h:69
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:575
bool mayLoadOrStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read or modify memory.
bool isCopy() const
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:347
bool isCall(QueryType Type=AnyInBundle) const
Definition: MachineInstr.h:956
bool getFlag(MIFlag Flag) const
Return whether an MI flag is set.
Definition: MachineInstr.h:397
uint32_t mergeFlagsWith(const MachineInstr &Other) const
Return the MIFlags which represent both MachineInstrs.
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:578
unsigned getNumExplicitOperands() const
Returns the number of non-implicit operands.
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
bool isFullCopy() const
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
Definition: MachineInstr.h:572
bool hasUnmodeledSideEffects() const
Return true if this instruction has side effects that are not modeled by mayLoad / mayStore,...
bool registerDefIsDead(Register Reg, const TargetRegisterInfo *TRI) const
Returns true if the register is dead in this machine instruction.
bool definesRegister(Register Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr fully defines the specified register.
void setDesc(const MCInstrDesc &TID)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one.
bool hasOrderedMemoryRef() const
Return true if this instruction may have an ordered or volatile memory reference, or if the informati...
const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
ArrayRef< MachineMemOperand * > memoperands() const
Access to memory operands of the instruction.
Definition: MachineInstr.h:788
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:499
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
void removeOperand(unsigned OpNo)
Erase an operand from an instruction, leaving it with one fewer operand than it started with.
void addRegisterDefined(Register Reg, const TargetRegisterInfo *RegInfo=nullptr)
We have determined MI defines a register.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:585
uint32_t getFlags() const
Return the MI flags bitvector.
Definition: MachineInstr.h:392
int findRegisterDefOperandIdx(Register Reg, const TargetRegisterInfo *TRI, bool isDead=false, bool Overlap=false) const
Returns the operand index that is a def of the specified register or -1 if it is not found.
const std::vector< MachineJumpTableEntry > & getJumpTables() const
A description of a memory reference used in the backend.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
This class contains meta information specific to a module.
MachineFunction * getMachineFunction(const Function &F) const
Returns the MachineFunction associated to IR function F if there is one, otherwise nullptr.
MachineOperand class - Representation of each machine instruction operand.
void setSubReg(unsigned subReg)
unsigned getSubReg() const
void setImm(int64_t immVal)
int64_t getImm() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
MachineBasicBlock * getMBB() const
void setIsDead(bool Val=true)
void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
void setIsKill(bool Val=true)
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
static MachineOperand CreateImm(int64_t Val)
MachineOperandType getType() const
getType - Returns the MachineOperandType for this operand.
void setIsUndef(bool Val=true)
Register getReg() const
getReg - Returns the register number.
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
bool tracksLiveness() const
tracksLiveness - Returns true when tracking register liveness accurately.
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
bool reservedRegsFrozen() const
reservedRegsFrozen - Returns true after freezeReservedRegs() was called to ensure the set of reserved...
const TargetRegisterClass * getRegClassOrNull(Register Reg) const
Return the register class of Reg, or null if Reg has not been assigned a register class yet.
const TargetRegisterClass * constrainRegClass(Register Reg, const TargetRegisterClass *RC, unsigned MinNumRegs=0)
constrainRegClass - Constrain the register class of the specified virtual register to be a common sub...
MachineInstr * getUniqueVRegDef(Register Reg) const
getUniqueVRegDef - Return the unique machine instr that defines the specified virtual register or nul...
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
MI-level patchpoint operands.
Definition: StackMaps.h:76
uint32_t getNumPatchBytes() const
Return the number of patchable bytes the given patchpoint should emit.
Definition: StackMaps.h:104
void enterBasicBlockEnd(MachineBasicBlock &MBB)
Start tracking liveness from the end of basic block MBB.
bool isRegUsed(Register Reg, bool includeReserved=true) const
Return if a specific register is currently used.
Register FindUnusedReg(const TargetRegisterClass *RC) const
Find an unused register of the specified register class.
void setRegUsed(Register Reg, LaneBitmask LaneMask=LaneBitmask::getAll())
Tell the scavenger a register is used.
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
constexpr bool isValid() const
Definition: Register.h:116
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
static constexpr bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:71
static constexpr bool isPhysicalRegister(unsigned Reg)
Return true if the specified register number is in the physical register namespace.
Definition: Register.h:65
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition: Register.h:95
Represents a location in source code.
Definition: SMLoc.h:23
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:452
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition: SmallString.h:26
void append(StringRef RHS)
Append from a StringRef.
Definition: SmallString.h:68
StringRef str() const
Explicit conversion to StringRef.
Definition: SmallString.h:254
size_t size() const
Definition: SmallVector.h:78
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:573
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:683
void push_back(const T &Elt)
Definition: SmallVector.h:413
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
MI-level stackmap operands.
Definition: StackMaps.h:35
uint32_t getNumPatchBytes() const
Return the number of patchable bytes the given stackmap should emit.
Definition: StackMaps.h:50
StackOffset holds a fixed and a scalable offset in bytes.
Definition: TypeSize.h:33
int64_t getFixed() const
Returns the fixed component of the stack.
Definition: TypeSize.h:49
int64_t getScalable() const
Returns the scalable component of the stack.
Definition: TypeSize.h:52
static StackOffset get(int64_t Fixed, int64_t Scalable)
Definition: TypeSize.h:44
static StackOffset getScalable(int64_t Scalable)
Definition: TypeSize.h:43
static StackOffset getFixed(int64_t Fixed)
Definition: TypeSize.h:42
MI-level Statepoint operands.
Definition: StackMaps.h:158
uint32_t getNumPatchBytes() const
Return the number of patchable bytes the given statepoint should emit.
Definition: StackMaps.h:207
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:51
Object returned by analyzeLoopForPipelining.
TargetInstrInfo - Interface to description of machine instruction set.
virtual void genAlternativeCodeSequence(MachineInstr &Root, unsigned Pattern, SmallVectorImpl< MachineInstr * > &InsInstrs, SmallVectorImpl< MachineInstr * > &DelInstrs, DenseMap< unsigned, unsigned > &InstIdxForVirtReg) const
When getMachineCombinerPatterns() finds patterns, this function generates the instructions that could...
virtual std::optional< ParamLoadedValue > describeLoadedValue(const MachineInstr &MI, Register Reg) const
Produce the expression describing the MI loading a value into the physical register Reg.
virtual bool getMachineCombinerPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns, bool DoRegPressureReduce) const
Return true when there is potentially a faster code sequence for an instruction chain ending in Root.
virtual bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const
Test if the given instruction should be considered a scheduling boundary.
virtual CombinerObjective getCombinerObjective(unsigned Pattern) const
Return the objective of a combiner pattern.
virtual bool isFunctionSafeToSplit(const MachineFunction &MF) const
Return true if the function is a viable candidate for machine function splitting.
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:77
TargetOptions Options
const MCAsmInfo * getMCAsmInfo() const
Return target specific asm information.
unsigned UnsafeFPMath
UnsafeFPMath - This flag is enabled when the -enable-unsafe-fp-math flag is specified on the command ...
bool contains(Register Reg) const
Return true if the specified register is included in this register class.
bool hasSubClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a sub-class of or equal to this class.
bool hasSuperClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a super-class of or equal to this class.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
TargetSubtargetInfo - Generic base class for all target subtargets.
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
virtual const TargetInstrInfo * getInstrInfo() const
Target - Wrapper for Target specific information.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition: TypeSize.h:345
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
Definition: TypeSize.h:348
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:171
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition: TypeSize.h:168
self_iterator getIterator()
Definition: ilist_node.h:132
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
Definition: ilist_node.h:353
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:661
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static CondCode getInvertedCondCode(CondCode Code)
@ MO_NC
MO_NC - Indicates whether the linker is expected to check the symbol reference for overflow.
@ MO_G1
MO_G1 - A symbol operand with this flag (granule 1) represents the bits 16-31 of a 64-bit address,...
@ MO_PAGEOFF
MO_PAGEOFF - A symbol operand with this flag represents the offset of that symbol within a 4K page.
@ MO_GOT
MO_GOT - This flag indicates that a symbol operand represents the address of the GOT entry for the sy...
@ MO_G0
MO_G0 - A symbol operand with this flag (granule 0) represents the bits 0-15 of a 64-bit address,...
@ MO_PAGE
MO_PAGE - A symbol operand with this flag represents the pc-relative offset of the 4K page containing...
@ MO_G2
MO_G2 - A symbol operand with this flag (granule 2) represents the bits 32-47 of a 64-bit address,...
@ MO_G3
MO_G3 - A symbol operand with this flag (granule 3) represents the high 16-bits of a 64-bit address,...
unsigned getCheckerSizeInBytes(AuthCheckMethod Method)
Returns the number of bytes added by checkAuthenticatedRegister.
const SysReg * lookupSysRegByName(StringRef)
static uint64_t decodeLogicalImmediate(uint64_t val, unsigned regSize)
decodeLogicalImmediate - Decode a logical immediate value in the form "N:immr:imms" (where the immr a...
static unsigned getShiftValue(unsigned Imm)
getShiftValue - Extract the shift value.
static unsigned getArithExtendImm(AArch64_AM::ShiftExtendType ET, unsigned Imm)
getArithExtendImm - Encode the extend type and shift amount for an arithmetic instruction: imm: 3-bit...
static unsigned getArithShiftValue(unsigned Imm)
getArithShiftValue - get the arithmetic shift value.
static uint64_t encodeLogicalImmediate(uint64_t imm, unsigned regSize)
encodeLogicalImmediate - Return the encoded immediate value for a logical immediate instruction of th...
static AArch64_AM::ShiftExtendType getArithExtendType(unsigned Imm)
static AArch64_AM::ShiftExtendType getShiftType(unsigned Imm)
getShiftType - Extract the shift type.
static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)
getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...
void expandMOVImm(uint64_t Imm, unsigned BitSize, SmallVectorImpl< ImmInsnModel > &Insn)
Expand a MOVi32imm or MOVi64imm pseudo instruction to one or more real move-immediate instructions to...
static const uint64_t InstrFlagIsWhile
static const uint64_t InstrFlagIsPTestLike
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:125
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Dead
Unused definition.
@ Renamable
Register that may be renamed.
@ Define
Register definition.
@ Kill
The last use of a register.
@ Undef
Value of the register doesn't matter.
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
constexpr double e
Definition: MathExtras.h:47
InstrType
Represents how an instruction should be mapped by the outliner.
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:480
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1739
static bool isCondBranchOpcode(int Opc)
std::pair< MachineOperand, DIExpression * > ParamLoadedValue
MCCFIInstruction createDefCFA(const TargetRegisterInfo &TRI, unsigned FrameReg, unsigned Reg, const StackOffset &Offset, bool LastAdjustmentWasScalable=true)
static bool isPTrueOpcode(unsigned Opc)
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
bool succeeded(LogicalResult Result)
Utility function that returns true if the provided LogicalResult corresponds to a success value.
Definition: LogicalResult.h:67
int isAArch64FrameOffsetLegal(const MachineInstr &MI, StackOffset &Offset, bool *OutUseUnscaledOp=nullptr, unsigned *OutUnscaledOp=nullptr, int64_t *EmittableOffset=nullptr)
Check if the Offset is a valid frame offset for MI.
static bool isIndirectBranchOpcode(int Opc)
MCCFIInstruction createCFAOffset(const TargetRegisterInfo &MRI, unsigned Reg, const StackOffset &OffsetFromDefCFA)
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
unsigned getBLRCallOpcode(const MachineFunction &MF)
Return opcode to be used for indirect calls.
@ AArch64FrameOffsetIsLegal
Offset is legal.
@ AArch64FrameOffsetCanUpdate
Offset can apply, at least partly.
@ AArch64FrameOffsetCannotUpdate
Offset cannot apply.
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition: MathExtras.h:296
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:346
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1746
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:420
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
AArch64MachineCombinerPattern
@ MULSUBv8i16_OP2
@ FMULv4i16_indexed_OP1
@ FMLSv1i32_indexed_OP2
@ MULSUBv2i32_indexed_OP1
@ MULADDXI_OP1
@ FMLAv2i32_indexed_OP2
@ MULADDv4i16_indexed_OP2
@ FMLAv1i64_indexed_OP1
@ MULSUBv16i8_OP1
@ FMLAv8i16_indexed_OP2
@ FMULv2i32_indexed_OP1
@ MULSUBv8i16_indexed_OP2
@ FMLAv1i64_indexed_OP2
@ MULSUBv4i16_indexed_OP2
@ FMLAv1i32_indexed_OP1
@ FMLAv2i64_indexed_OP2
@ FMLSv8i16_indexed_OP1
@ MULSUBv2i32_OP1
@ FMULv4i16_indexed_OP2
@ MULSUBv4i32_indexed_OP2
@ FMULv2i64_indexed_OP2
@ MULSUBXI_OP1
@ FMLAv4i32_indexed_OP1
@ MULADDWI_OP1
@ MULADDv4i16_OP2
@ FMULv8i16_indexed_OP2
@ MULSUBv4i16_OP1
@ MULADDv4i32_OP2
@ MULADDv8i8_OP1
@ MULADDv2i32_OP2
@ MULADDv16i8_OP2
@ MULADDv8i8_OP2
@ FMLSv4i16_indexed_OP1
@ MULADDv16i8_OP1
@ FMLAv2i64_indexed_OP1
@ FMLAv1i32_indexed_OP2
@ FMLSv2i64_indexed_OP2
@ MULADDv2i32_OP1
@ MULADDv4i32_OP1
@ MULADDv2i32_indexed_OP1
@ MULSUBv16i8_OP2
@ MULADDv4i32_indexed_OP1
@ MULADDv2i32_indexed_OP2
@ FMLAv4i16_indexed_OP2
@ MULSUBv8i16_OP1
@ FMULv2i32_indexed_OP2
@ FMLSv2i32_indexed_OP2
@ FMLSv4i32_indexed_OP1
@ FMULv2i64_indexed_OP1
@ MULSUBv4i16_OP2
@ FMLSv4i16_indexed_OP2
@ FMLAv2i32_indexed_OP1
@ FMLSv2i32_indexed_OP1
@ FMLAv8i16_indexed_OP1
@ MULSUBv4i16_indexed_OP1
@ FMLSv4i32_indexed_OP2
@ MULADDv4i32_indexed_OP2
@ MULSUBv4i32_OP2
@ MULSUBv8i16_indexed_OP1
@ MULADDv8i16_OP2
@ MULSUBv2i32_indexed_OP2
@ FMULv4i32_indexed_OP2
@ FMLSv2i64_indexed_OP1
@ MULADDv4i16_OP1
@ FMLAv4i32_indexed_OP2
@ MULADDv8i16_indexed_OP1
@ FMULv4i32_indexed_OP1
@ FMLAv4i16_indexed_OP1
@ FMULv8i16_indexed_OP1
@ MULSUBv8i8_OP1
@ MULADDv8i16_OP1
@ MULSUBv4i32_indexed_OP1
@ MULSUBv4i32_OP1
@ FMLSv8i16_indexed_OP2
@ MULADDv8i16_indexed_OP2
@ MULSUBWI_OP1
@ MULSUBv2i32_OP2
@ FMLSv1i64_indexed_OP2
@ MULADDv4i16_indexed_OP1
@ MULSUBv8i8_OP2
void emitFrameOffset(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, unsigned DestReg, unsigned SrcReg, StackOffset Offset, const TargetInstrInfo *TII, MachineInstr::MIFlag=MachineInstr::NoFlags, bool SetNZCV=false, bool NeedsWinCFI=false, bool *HasWinCFI=nullptr, bool EmitCFAOffset=false, StackOffset InitialOffset={}, unsigned FrameReg=AArch64::SP)
emitFrameOffset - Emit instructions as needed to set DestReg to SrcReg plus Offset.
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:167
CombinerObjective
The combiner's goal may differ based on which pattern it is attempting to optimize.
std::optional< UsedNZCV > examineCFlagsUse(MachineInstr &MI, MachineInstr &CmpInstr, const TargetRegisterInfo &TRI, SmallVectorImpl< MachineInstr * > *CCUseInstrs=nullptr)
CodeGenOptLevel
Code generation optimization level.
Definition: CodeGen.h:54
auto instructionsWithoutDebug(IterT It, IterT End, bool SkipPseudoOp=true)
Construct a range iterator which begins at It and moves forwards until End is reached,...
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
auto drop_end(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the last N elements excluded.
Definition: STLExtras.h:336
unsigned getUndefRegState(bool B)
unsigned getDefRegState(bool B)
unsigned getKillRegState(bool B)
bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
Definition: MathExtras.h:260
DWARFExpression::Operation Op
static bool isUncondBranchOpcode(int Opc)
unsigned encodeSLEB128(int64_t Value, raw_ostream &OS, unsigned PadTo=0)
Utility function to encode a SLEB128 value to an output stream.
Definition: LEB128.h:23
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition: STLExtras.h:2099
bool rewriteAArch64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, unsigned FrameReg, StackOffset &Offset, const AArch64InstrInfo *TII)
rewriteAArch64FrameIndex - Rewrite MI to access 'Offset' bytes from the FP.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1903
static const MachineMemOperand::Flags MOSuppressPair
unsigned encodeULEB128(uint64_t Value, raw_ostream &OS, unsigned PadTo=0)
Utility function to encode a ULEB128 value to an output stream.
Definition: LEB128.h:80
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition: MathExtras.h:581
bool isNZCVTouchedInInstructionRange(const MachineInstr &DefMI, const MachineInstr &UseMI, const TargetRegisterInfo *TRI)
Return true if there is an instruction /after/ DefMI and before UseMI which either reads or clobbers ...
static const MachineMemOperand::Flags MOStridedAccess
@ Default
The result values are uniform if and only if all operands are uniform.
void fullyRecomputeLiveIns(ArrayRef< MachineBasicBlock * > MBBs)
Convenience function for recomputing live-in's for a set of MBBs until the computation converges.
Definition: LivePhysRegs.h:215
Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
Description of the encoding of one expression Op.
Used to describe addressing mode similar to ExtAddrMode in CodeGenPrepare.
static const MBBSectionID ColdSectionID
MachineJumpTableEntry - One jump table in the jump table info.
This class contains a discriminated union of information about pointers in memory operands,...
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
Used to describe a register and immediate addition.
An individual sequence of instructions to be replaced with a call to an outlined function.
MachineFunction * getMF() const
The information necessary to create an outlined function for some class of candidate.
unsigned FrameConstructionID
Target-defined identifier for constructing a frame for this function.