LLVM 21.0.0git
AArch64InstrInfo.cpp
Go to the documentation of this file.
1//===- AArch64InstrInfo.cpp - AArch64 Instruction Information -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains the AArch64 implementation of the TargetInstrInfo class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "AArch64InstrInfo.h"
14#include "AArch64ExpandImm.h"
16#include "AArch64PointerAuth.h"
17#include "AArch64Subtarget.h"
21#include "llvm/ADT/ArrayRef.h"
22#include "llvm/ADT/STLExtras.h"
40#include "llvm/IR/DebugLoc.h"
41#include "llvm/IR/GlobalValue.h"
42#include "llvm/IR/Module.h"
43#include "llvm/MC/MCAsmInfo.h"
44#include "llvm/MC/MCInst.h"
46#include "llvm/MC/MCInstrDesc.h"
51#include "llvm/Support/LEB128.h"
55#include <cassert>
56#include <cstdint>
57#include <iterator>
58#include <utility>
59
60using namespace llvm;
61
62#define GET_INSTRINFO_CTOR_DTOR
63#include "AArch64GenInstrInfo.inc"
64
66 "aarch64-tbz-offset-bits", cl::Hidden, cl::init(14),
67 cl::desc("Restrict range of TB[N]Z instructions (DEBUG)"));
68
70 "aarch64-cbz-offset-bits", cl::Hidden, cl::init(19),
71 cl::desc("Restrict range of CB[N]Z instructions (DEBUG)"));
72
74 BCCDisplacementBits("aarch64-bcc-offset-bits", cl::Hidden, cl::init(19),
75 cl::desc("Restrict range of Bcc instructions (DEBUG)"));
76
78 BDisplacementBits("aarch64-b-offset-bits", cl::Hidden, cl::init(26),
79 cl::desc("Restrict range of B instructions (DEBUG)"));
80
82 : AArch64GenInstrInfo(AArch64::ADJCALLSTACKDOWN, AArch64::ADJCALLSTACKUP,
83 AArch64::CATCHRET),
84 RI(STI.getTargetTriple(), STI.getHwMode()), Subtarget(STI) {}
85
86/// GetInstSize - Return the number of bytes of code the specified
87/// instruction may be. This returns the maximum number of bytes.
89 const MachineBasicBlock &MBB = *MI.getParent();
90 const MachineFunction *MF = MBB.getParent();
91 const Function &F = MF->getFunction();
92 const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo();
93
94 {
95 auto Op = MI.getOpcode();
96 if (Op == AArch64::INLINEASM || Op == AArch64::INLINEASM_BR)
97 return getInlineAsmLength(MI.getOperand(0).getSymbolName(), *MAI);
98 }
99
100 // Meta-instructions emit no code.
101 if (MI.isMetaInstruction())
102 return 0;
103
104 // FIXME: We currently only handle pseudoinstructions that don't get expanded
105 // before the assembly printer.
106 unsigned NumBytes = 0;
107 const MCInstrDesc &Desc = MI.getDesc();
108
109 if (!MI.isBundle() && isTailCallReturnInst(MI)) {
110 NumBytes = Desc.getSize() ? Desc.getSize() : 4;
111
112 const auto *MFI = MF->getInfo<AArch64FunctionInfo>();
113 if (!MFI->shouldSignReturnAddress(MF))
114 return NumBytes;
115
116 const auto &STI = MF->getSubtarget<AArch64Subtarget>();
117 auto Method = STI.getAuthenticatedLRCheckMethod(*MF);
118 NumBytes += AArch64PAuth::getCheckerSizeInBytes(Method);
119 return NumBytes;
120 }
121
122 // Size should be preferably set in
123 // llvm/lib/Target/AArch64/AArch64InstrInfo.td (default case).
124 // Specific cases handle instructions of variable sizes
125 switch (Desc.getOpcode()) {
126 default:
127 if (Desc.getSize())
128 return Desc.getSize();
129
130 // Anything not explicitly designated otherwise (i.e. pseudo-instructions
131 // with fixed constant size but not specified in .td file) is a normal
132 // 4-byte insn.
133 NumBytes = 4;
134 break;
135 case TargetOpcode::STACKMAP:
136 // The upper bound for a stackmap intrinsic is the full length of its shadow
137 NumBytes = StackMapOpers(&MI).getNumPatchBytes();
138 assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
139 break;
140 case TargetOpcode::PATCHPOINT:
141 // The size of the patchpoint intrinsic is the number of bytes requested
142 NumBytes = PatchPointOpers(&MI).getNumPatchBytes();
143 assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
144 break;
145 case TargetOpcode::STATEPOINT:
146 NumBytes = StatepointOpers(&MI).getNumPatchBytes();
147 assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
148 // No patch bytes means a normal call inst is emitted
149 if (NumBytes == 0)
150 NumBytes = 4;
151 break;
152 case TargetOpcode::PATCHABLE_FUNCTION_ENTER:
153 // If `patchable-function-entry` is set, PATCHABLE_FUNCTION_ENTER
154 // instructions are expanded to the specified number of NOPs. Otherwise,
155 // they are expanded to 36-byte XRay sleds.
156 NumBytes =
157 F.getFnAttributeAsParsedInteger("patchable-function-entry", 9) * 4;
158 break;
159 case TargetOpcode::PATCHABLE_FUNCTION_EXIT:
160 case TargetOpcode::PATCHABLE_TYPED_EVENT_CALL:
161 // An XRay sled can be 4 bytes of alignment plus a 32-byte block.
162 NumBytes = 36;
163 break;
164 case TargetOpcode::PATCHABLE_EVENT_CALL:
165 // EVENT_CALL XRay sleds are exactly 6 instructions long (no alignment).
166 NumBytes = 24;
167 break;
168
169 case AArch64::SPACE:
170 NumBytes = MI.getOperand(1).getImm();
171 break;
172 case TargetOpcode::BUNDLE:
173 NumBytes = getInstBundleLength(MI);
174 break;
175 }
176
177 return NumBytes;
178}
179
180unsigned AArch64InstrInfo::getInstBundleLength(const MachineInstr &MI) const {
181 unsigned Size = 0;
183 MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end();
184 while (++I != E && I->isInsideBundle()) {
185 assert(!I->isBundle() && "No nested bundle!");
187 }
188 return Size;
189}
190
193 // Block ends with fall-through condbranch.
194 switch (LastInst->getOpcode()) {
195 default:
196 llvm_unreachable("Unknown branch instruction?");
197 case AArch64::Bcc:
198 Target = LastInst->getOperand(1).getMBB();
199 Cond.push_back(LastInst->getOperand(0));
200 break;
201 case AArch64::CBZW:
202 case AArch64::CBZX:
203 case AArch64::CBNZW:
204 case AArch64::CBNZX:
205 Target = LastInst->getOperand(1).getMBB();
206 Cond.push_back(MachineOperand::CreateImm(-1));
207 Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
208 Cond.push_back(LastInst->getOperand(0));
209 break;
210 case AArch64::TBZW:
211 case AArch64::TBZX:
212 case AArch64::TBNZW:
213 case AArch64::TBNZX:
214 Target = LastInst->getOperand(2).getMBB();
215 Cond.push_back(MachineOperand::CreateImm(-1));
216 Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
217 Cond.push_back(LastInst->getOperand(0));
218 Cond.push_back(LastInst->getOperand(1));
219 }
220}
221
222static unsigned getBranchDisplacementBits(unsigned Opc) {
223 switch (Opc) {
224 default:
225 llvm_unreachable("unexpected opcode!");
226 case AArch64::B:
227 return BDisplacementBits;
228 case AArch64::TBNZW:
229 case AArch64::TBZW:
230 case AArch64::TBNZX:
231 case AArch64::TBZX:
232 return TBZDisplacementBits;
233 case AArch64::CBNZW:
234 case AArch64::CBZW:
235 case AArch64::CBNZX:
236 case AArch64::CBZX:
237 return CBZDisplacementBits;
238 case AArch64::Bcc:
239 return BCCDisplacementBits;
240 }
241}
242
244 int64_t BrOffset) const {
245 unsigned Bits = getBranchDisplacementBits(BranchOp);
246 assert(Bits >= 3 && "max branch displacement must be enough to jump"
247 "over conditional branch expansion");
248 return isIntN(Bits, BrOffset / 4);
249}
250
253 switch (MI.getOpcode()) {
254 default:
255 llvm_unreachable("unexpected opcode!");
256 case AArch64::B:
257 return MI.getOperand(0).getMBB();
258 case AArch64::TBZW:
259 case AArch64::TBNZW:
260 case AArch64::TBZX:
261 case AArch64::TBNZX:
262 return MI.getOperand(2).getMBB();
263 case AArch64::CBZW:
264 case AArch64::CBNZW:
265 case AArch64::CBZX:
266 case AArch64::CBNZX:
267 case AArch64::Bcc:
268 return MI.getOperand(1).getMBB();
269 }
270}
271
273 MachineBasicBlock &NewDestBB,
274 MachineBasicBlock &RestoreBB,
275 const DebugLoc &DL,
276 int64_t BrOffset,
277 RegScavenger *RS) const {
278 assert(RS && "RegScavenger required for long branching");
279 assert(MBB.empty() &&
280 "new block should be inserted for expanding unconditional branch");
281 assert(MBB.pred_size() == 1);
282 assert(RestoreBB.empty() &&
283 "restore block should be inserted for restoring clobbered registers");
284
285 auto buildIndirectBranch = [&](Register Reg, MachineBasicBlock &DestBB) {
286 // Offsets outside of the signed 33-bit range are not supported for ADRP +
287 // ADD.
288 if (!isInt<33>(BrOffset))
290 "Branch offsets outside of the signed 33-bit range not supported");
291
292 BuildMI(MBB, MBB.end(), DL, get(AArch64::ADRP), Reg)
293 .addSym(DestBB.getSymbol(), AArch64II::MO_PAGE);
294 BuildMI(MBB, MBB.end(), DL, get(AArch64::ADDXri), Reg)
295 .addReg(Reg)
296 .addSym(DestBB.getSymbol(), AArch64II::MO_PAGEOFF | AArch64II::MO_NC)
297 .addImm(0);
298 BuildMI(MBB, MBB.end(), DL, get(AArch64::BR)).addReg(Reg);
299 };
300
302 // If X16 is unused, we can rely on the linker to insert a range extension
303 // thunk if NewDestBB is out of range of a single B instruction.
304 constexpr Register Reg = AArch64::X16;
305 if (!RS->isRegUsed(Reg)) {
306 insertUnconditionalBranch(MBB, &NewDestBB, DL);
307 RS->setRegUsed(Reg);
308 return;
309 }
310
311 // If there's a free register and it's worth inflating the code size,
312 // manually insert the indirect branch.
313 Register Scavenged = RS->FindUnusedReg(&AArch64::GPR64RegClass);
314 if (Scavenged != AArch64::NoRegister &&
316 buildIndirectBranch(Scavenged, NewDestBB);
317 RS->setRegUsed(Scavenged);
318 return;
319 }
320
321 // Note: Spilling X16 briefly moves the stack pointer, making it incompatible
322 // with red zones.
324 if (!AFI || AFI->hasRedZone().value_or(true))
326 "Unable to insert indirect branch inside function that has red zone");
327
328 // Otherwise, spill X16 and defer range extension to the linker.
329 BuildMI(MBB, MBB.end(), DL, get(AArch64::STRXpre))
330 .addReg(AArch64::SP, RegState::Define)
331 .addReg(Reg)
332 .addReg(AArch64::SP)
333 .addImm(-16);
334
335 BuildMI(MBB, MBB.end(), DL, get(AArch64::B)).addMBB(&RestoreBB);
336
337 BuildMI(RestoreBB, RestoreBB.end(), DL, get(AArch64::LDRXpost))
338 .addReg(AArch64::SP, RegState::Define)
340 .addReg(AArch64::SP)
341 .addImm(16);
342}
343
344// Branch analysis.
347 MachineBasicBlock *&FBB,
349 bool AllowModify) const {
350 // If the block has no terminators, it just falls into the block after it.
352 if (I == MBB.end())
353 return false;
354
355 // Skip over SpeculationBarrierEndBB terminators
356 if (I->getOpcode() == AArch64::SpeculationBarrierISBDSBEndBB ||
357 I->getOpcode() == AArch64::SpeculationBarrierSBEndBB) {
358 --I;
359 }
360
361 if (!isUnpredicatedTerminator(*I))
362 return false;
363
364 // Get the last instruction in the block.
365 MachineInstr *LastInst = &*I;
366
367 // If there is only one terminator instruction, process it.
368 unsigned LastOpc = LastInst->getOpcode();
369 if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
370 if (isUncondBranchOpcode(LastOpc)) {
371 TBB = LastInst->getOperand(0).getMBB();
372 return false;
373 }
374 if (isCondBranchOpcode(LastOpc)) {
375 // Block ends with fall-through condbranch.
376 parseCondBranch(LastInst, TBB, Cond);
377 return false;
378 }
379 return true; // Can't handle indirect branch.
380 }
381
382 // Get the instruction before it if it is a terminator.
383 MachineInstr *SecondLastInst = &*I;
384 unsigned SecondLastOpc = SecondLastInst->getOpcode();
385
386 // If AllowModify is true and the block ends with two or more unconditional
387 // branches, delete all but the first unconditional branch.
388 if (AllowModify && isUncondBranchOpcode(LastOpc)) {
389 while (isUncondBranchOpcode(SecondLastOpc)) {
390 LastInst->eraseFromParent();
391 LastInst = SecondLastInst;
392 LastOpc = LastInst->getOpcode();
393 if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
394 // Return now the only terminator is an unconditional branch.
395 TBB = LastInst->getOperand(0).getMBB();
396 return false;
397 }
398 SecondLastInst = &*I;
399 SecondLastOpc = SecondLastInst->getOpcode();
400 }
401 }
402
403 // If we're allowed to modify and the block ends in a unconditional branch
404 // which could simply fallthrough, remove the branch. (Note: This case only
405 // matters when we can't understand the whole sequence, otherwise it's also
406 // handled by BranchFolding.cpp.)
407 if (AllowModify && isUncondBranchOpcode(LastOpc) &&
409 LastInst->eraseFromParent();
410 LastInst = SecondLastInst;
411 LastOpc = LastInst->getOpcode();
412 if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
413 assert(!isUncondBranchOpcode(LastOpc) &&
414 "unreachable unconditional branches removed above");
415
416 if (isCondBranchOpcode(LastOpc)) {
417 // Block ends with fall-through condbranch.
418 parseCondBranch(LastInst, TBB, Cond);
419 return false;
420 }
421 return true; // Can't handle indirect branch.
422 }
423 SecondLastInst = &*I;
424 SecondLastOpc = SecondLastInst->getOpcode();
425 }
426
427 // If there are three terminators, we don't know what sort of block this is.
428 if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(*--I))
429 return true;
430
431 // If the block ends with a B and a Bcc, handle it.
432 if (isCondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
433 parseCondBranch(SecondLastInst, TBB, Cond);
434 FBB = LastInst->getOperand(0).getMBB();
435 return false;
436 }
437
438 // If the block ends with two unconditional branches, handle it. The second
439 // one is not executed, so remove it.
440 if (isUncondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
441 TBB = SecondLastInst->getOperand(0).getMBB();
442 I = LastInst;
443 if (AllowModify)
444 I->eraseFromParent();
445 return false;
446 }
447
448 // ...likewise if it ends with an indirect branch followed by an unconditional
449 // branch.
450 if (isIndirectBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
451 I = LastInst;
452 if (AllowModify)
453 I->eraseFromParent();
454 return true;
455 }
456
457 // Otherwise, can't handle this.
458 return true;
459}
460
462 MachineBranchPredicate &MBP,
463 bool AllowModify) const {
464 // For the moment, handle only a block which ends with a cb(n)zx followed by
465 // a fallthrough. Why this? Because it is a common form.
466 // TODO: Should we handle b.cc?
467
469 if (I == MBB.end())
470 return true;
471
472 // Skip over SpeculationBarrierEndBB terminators
473 if (I->getOpcode() == AArch64::SpeculationBarrierISBDSBEndBB ||
474 I->getOpcode() == AArch64::SpeculationBarrierSBEndBB) {
475 --I;
476 }
477
478 if (!isUnpredicatedTerminator(*I))
479 return true;
480
481 // Get the last instruction in the block.
482 MachineInstr *LastInst = &*I;
483 unsigned LastOpc = LastInst->getOpcode();
484 if (!isCondBranchOpcode(LastOpc))
485 return true;
486
487 switch (LastOpc) {
488 default:
489 return true;
490 case AArch64::CBZW:
491 case AArch64::CBZX:
492 case AArch64::CBNZW:
493 case AArch64::CBNZX:
494 break;
495 };
496
497 MBP.TrueDest = LastInst->getOperand(1).getMBB();
498 assert(MBP.TrueDest && "expected!");
499 MBP.FalseDest = MBB.getNextNode();
500
501 MBP.ConditionDef = nullptr;
502 MBP.SingleUseCondition = false;
503
504 MBP.LHS = LastInst->getOperand(0);
505 MBP.RHS = MachineOperand::CreateImm(0);
506 MBP.Predicate = LastOpc == AArch64::CBNZX ? MachineBranchPredicate::PRED_NE
507 : MachineBranchPredicate::PRED_EQ;
508 return false;
509}
510
513 if (Cond[0].getImm() != -1) {
514 // Regular Bcc
517 } else {
518 // Folded compare-and-branch
519 switch (Cond[1].getImm()) {
520 default:
521 llvm_unreachable("Unknown conditional branch!");
522 case AArch64::CBZW:
523 Cond[1].setImm(AArch64::CBNZW);
524 break;
525 case AArch64::CBNZW:
526 Cond[1].setImm(AArch64::CBZW);
527 break;
528 case AArch64::CBZX:
529 Cond[1].setImm(AArch64::CBNZX);
530 break;
531 case AArch64::CBNZX:
532 Cond[1].setImm(AArch64::CBZX);
533 break;
534 case AArch64::TBZW:
535 Cond[1].setImm(AArch64::TBNZW);
536 break;
537 case AArch64::TBNZW:
538 Cond[1].setImm(AArch64::TBZW);
539 break;
540 case AArch64::TBZX:
541 Cond[1].setImm(AArch64::TBNZX);
542 break;
543 case AArch64::TBNZX:
544 Cond[1].setImm(AArch64::TBZX);
545 break;
546 }
547 }
548
549 return false;
550}
551
553 int *BytesRemoved) const {
555 if (I == MBB.end())
556 return 0;
557
558 if (!isUncondBranchOpcode(I->getOpcode()) &&
559 !isCondBranchOpcode(I->getOpcode()))
560 return 0;
561
562 // Remove the branch.
563 I->eraseFromParent();
564
565 I = MBB.end();
566
567 if (I == MBB.begin()) {
568 if (BytesRemoved)
569 *BytesRemoved = 4;
570 return 1;
571 }
572 --I;
573 if (!isCondBranchOpcode(I->getOpcode())) {
574 if (BytesRemoved)
575 *BytesRemoved = 4;
576 return 1;
577 }
578
579 // Remove the branch.
580 I->eraseFromParent();
581 if (BytesRemoved)
582 *BytesRemoved = 8;
583
584 return 2;
585}
586
587void AArch64InstrInfo::instantiateCondBranch(
590 if (Cond[0].getImm() != -1) {
591 // Regular Bcc
592 BuildMI(&MBB, DL, get(AArch64::Bcc)).addImm(Cond[0].getImm()).addMBB(TBB);
593 } else {
594 // Folded compare-and-branch
595 // Note that we use addOperand instead of addReg to keep the flags.
596 const MachineInstrBuilder MIB =
597 BuildMI(&MBB, DL, get(Cond[1].getImm())).add(Cond[2]);
598 if (Cond.size() > 3)
599 MIB.addImm(Cond[3].getImm());
600 MIB.addMBB(TBB);
601 }
602}
603
606 ArrayRef<MachineOperand> Cond, const DebugLoc &DL, int *BytesAdded) const {
607 // Shouldn't be a fall through.
608 assert(TBB && "insertBranch must not be told to insert a fallthrough");
609
610 if (!FBB) {
611 if (Cond.empty()) // Unconditional branch?
612 BuildMI(&MBB, DL, get(AArch64::B)).addMBB(TBB);
613 else
614 instantiateCondBranch(MBB, DL, TBB, Cond);
615
616 if (BytesAdded)
617 *BytesAdded = 4;
618
619 return 1;
620 }
621
622 // Two-way conditional branch.
623 instantiateCondBranch(MBB, DL, TBB, Cond);
624 BuildMI(&MBB, DL, get(AArch64::B)).addMBB(FBB);
625
626 if (BytesAdded)
627 *BytesAdded = 8;
628
629 return 2;
630}
631
632// Find the original register that VReg is copied from.
633static unsigned removeCopies(const MachineRegisterInfo &MRI, unsigned VReg) {
634 while (Register::isVirtualRegister(VReg)) {
635 const MachineInstr *DefMI = MRI.getVRegDef(VReg);
636 if (!DefMI->isFullCopy())
637 return VReg;
638 VReg = DefMI->getOperand(1).getReg();
639 }
640 return VReg;
641}
642
643// Determine if VReg is defined by an instruction that can be folded into a
644// csel instruction. If so, return the folded opcode, and the replacement
645// register.
646static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg,
647 unsigned *NewVReg = nullptr) {
648 VReg = removeCopies(MRI, VReg);
650 return 0;
651
652 bool Is64Bit = AArch64::GPR64allRegClass.hasSubClassEq(MRI.getRegClass(VReg));
653 const MachineInstr *DefMI = MRI.getVRegDef(VReg);
654 unsigned Opc = 0;
655 unsigned SrcOpNum = 0;
656 switch (DefMI->getOpcode()) {
657 case AArch64::ADDSXri:
658 case AArch64::ADDSWri:
659 // if NZCV is used, do not fold.
660 if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr,
661 true) == -1)
662 return 0;
663 // fall-through to ADDXri and ADDWri.
664 [[fallthrough]];
665 case AArch64::ADDXri:
666 case AArch64::ADDWri:
667 // add x, 1 -> csinc.
668 if (!DefMI->getOperand(2).isImm() || DefMI->getOperand(2).getImm() != 1 ||
669 DefMI->getOperand(3).getImm() != 0)
670 return 0;
671 SrcOpNum = 1;
672 Opc = Is64Bit ? AArch64::CSINCXr : AArch64::CSINCWr;
673 break;
674
675 case AArch64::ORNXrr:
676 case AArch64::ORNWrr: {
677 // not x -> csinv, represented as orn dst, xzr, src.
678 unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg());
679 if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)
680 return 0;
681 SrcOpNum = 2;
682 Opc = Is64Bit ? AArch64::CSINVXr : AArch64::CSINVWr;
683 break;
684 }
685
686 case AArch64::SUBSXrr:
687 case AArch64::SUBSWrr:
688 // if NZCV is used, do not fold.
689 if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr,
690 true) == -1)
691 return 0;
692 // fall-through to SUBXrr and SUBWrr.
693 [[fallthrough]];
694 case AArch64::SUBXrr:
695 case AArch64::SUBWrr: {
696 // neg x -> csneg, represented as sub dst, xzr, src.
697 unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg());
698 if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)
699 return 0;
700 SrcOpNum = 2;
701 Opc = Is64Bit ? AArch64::CSNEGXr : AArch64::CSNEGWr;
702 break;
703 }
704 default:
705 return 0;
706 }
707 assert(Opc && SrcOpNum && "Missing parameters");
708
709 if (NewVReg)
710 *NewVReg = DefMI->getOperand(SrcOpNum).getReg();
711 return Opc;
712}
713
716 Register DstReg, Register TrueReg,
717 Register FalseReg, int &CondCycles,
718 int &TrueCycles,
719 int &FalseCycles) const {
720 // Check register classes.
722 const TargetRegisterClass *RC =
723 RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg));
724 if (!RC)
725 return false;
726
727 // Also need to check the dest regclass, in case we're trying to optimize
728 // something like:
729 // %1(gpr) = PHI %2(fpr), bb1, %(fpr), bb2
730 if (!RI.getCommonSubClass(RC, MRI.getRegClass(DstReg)))
731 return false;
732
733 // Expanding cbz/tbz requires an extra cycle of latency on the condition.
734 unsigned ExtraCondLat = Cond.size() != 1;
735
736 // GPRs are handled by csel.
737 // FIXME: Fold in x+1, -x, and ~x when applicable.
738 if (AArch64::GPR64allRegClass.hasSubClassEq(RC) ||
739 AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
740 // Single-cycle csel, csinc, csinv, and csneg.
741 CondCycles = 1 + ExtraCondLat;
742 TrueCycles = FalseCycles = 1;
743 if (canFoldIntoCSel(MRI, TrueReg))
744 TrueCycles = 0;
745 else if (canFoldIntoCSel(MRI, FalseReg))
746 FalseCycles = 0;
747 return true;
748 }
749
750 // Scalar floating point is handled by fcsel.
751 // FIXME: Form fabs, fmin, and fmax when applicable.
752 if (AArch64::FPR64RegClass.hasSubClassEq(RC) ||
753 AArch64::FPR32RegClass.hasSubClassEq(RC)) {
754 CondCycles = 5 + ExtraCondLat;
755 TrueCycles = FalseCycles = 2;
756 return true;
757 }
758
759 // Can't do vectors.
760 return false;
761}
762
765 const DebugLoc &DL, Register DstReg,
767 Register TrueReg, Register FalseReg) const {
769
770 // Parse the condition code, see parseCondBranch() above.
772 switch (Cond.size()) {
773 default:
774 llvm_unreachable("Unknown condition opcode in Cond");
775 case 1: // b.cc
776 CC = AArch64CC::CondCode(Cond[0].getImm());
777 break;
778 case 3: { // cbz/cbnz
779 // We must insert a compare against 0.
780 bool Is64Bit;
781 switch (Cond[1].getImm()) {
782 default:
783 llvm_unreachable("Unknown branch opcode in Cond");
784 case AArch64::CBZW:
785 Is64Bit = false;
787 break;
788 case AArch64::CBZX:
789 Is64Bit = true;
791 break;
792 case AArch64::CBNZW:
793 Is64Bit = false;
795 break;
796 case AArch64::CBNZX:
797 Is64Bit = true;
799 break;
800 }
801 Register SrcReg = Cond[2].getReg();
802 if (Is64Bit) {
803 // cmp reg, #0 is actually subs xzr, reg, #0.
804 MRI.constrainRegClass(SrcReg, &AArch64::GPR64spRegClass);
805 BuildMI(MBB, I, DL, get(AArch64::SUBSXri), AArch64::XZR)
806 .addReg(SrcReg)
807 .addImm(0)
808 .addImm(0);
809 } else {
810 MRI.constrainRegClass(SrcReg, &AArch64::GPR32spRegClass);
811 BuildMI(MBB, I, DL, get(AArch64::SUBSWri), AArch64::WZR)
812 .addReg(SrcReg)
813 .addImm(0)
814 .addImm(0);
815 }
816 break;
817 }
818 case 4: { // tbz/tbnz
819 // We must insert a tst instruction.
820 switch (Cond[1].getImm()) {
821 default:
822 llvm_unreachable("Unknown branch opcode in Cond");
823 case AArch64::TBZW:
824 case AArch64::TBZX:
826 break;
827 case AArch64::TBNZW:
828 case AArch64::TBNZX:
830 break;
831 }
832 // cmp reg, #foo is actually ands xzr, reg, #1<<foo.
833 if (Cond[1].getImm() == AArch64::TBZW || Cond[1].getImm() == AArch64::TBNZW)
834 BuildMI(MBB, I, DL, get(AArch64::ANDSWri), AArch64::WZR)
835 .addReg(Cond[2].getReg())
836 .addImm(
837 AArch64_AM::encodeLogicalImmediate(1ull << Cond[3].getImm(), 32));
838 else
839 BuildMI(MBB, I, DL, get(AArch64::ANDSXri), AArch64::XZR)
840 .addReg(Cond[2].getReg())
841 .addImm(
842 AArch64_AM::encodeLogicalImmediate(1ull << Cond[3].getImm(), 64));
843 break;
844 }
845 }
846
847 unsigned Opc = 0;
848 const TargetRegisterClass *RC = nullptr;
849 bool TryFold = false;
850 if (MRI.constrainRegClass(DstReg, &AArch64::GPR64RegClass)) {
851 RC = &AArch64::GPR64RegClass;
852 Opc = AArch64::CSELXr;
853 TryFold = true;
854 } else if (MRI.constrainRegClass(DstReg, &AArch64::GPR32RegClass)) {
855 RC = &AArch64::GPR32RegClass;
856 Opc = AArch64::CSELWr;
857 TryFold = true;
858 } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR64RegClass)) {
859 RC = &AArch64::FPR64RegClass;
860 Opc = AArch64::FCSELDrrr;
861 } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR32RegClass)) {
862 RC = &AArch64::FPR32RegClass;
863 Opc = AArch64::FCSELSrrr;
864 }
865 assert(RC && "Unsupported regclass");
866
867 // Try folding simple instructions into the csel.
868 if (TryFold) {
869 unsigned NewVReg = 0;
870 unsigned FoldedOpc = canFoldIntoCSel(MRI, TrueReg, &NewVReg);
871 if (FoldedOpc) {
872 // The folded opcodes csinc, csinc and csneg apply the operation to
873 // FalseReg, so we need to invert the condition.
875 TrueReg = FalseReg;
876 } else
877 FoldedOpc = canFoldIntoCSel(MRI, FalseReg, &NewVReg);
878
879 // Fold the operation. Leave any dead instructions for DCE to clean up.
880 if (FoldedOpc) {
881 FalseReg = NewVReg;
882 Opc = FoldedOpc;
883 // The extends the live range of NewVReg.
884 MRI.clearKillFlags(NewVReg);
885 }
886 }
887
888 // Pull all virtual register into the appropriate class.
889 MRI.constrainRegClass(TrueReg, RC);
890 MRI.constrainRegClass(FalseReg, RC);
891
892 // Insert the csel.
893 BuildMI(MBB, I, DL, get(Opc), DstReg)
894 .addReg(TrueReg)
895 .addReg(FalseReg)
896 .addImm(CC);
897}
898
899// Return true if Imm can be loaded into a register by a "cheap" sequence of
900// instructions. For now, "cheap" means at most two instructions.
901static bool isCheapImmediate(const MachineInstr &MI, unsigned BitSize) {
902 if (BitSize == 32)
903 return true;
904
905 assert(BitSize == 64 && "Only bit sizes of 32 or 64 allowed");
906 uint64_t Imm = static_cast<uint64_t>(MI.getOperand(1).getImm());
908 AArch64_IMM::expandMOVImm(Imm, BitSize, Is);
909
910 return Is.size() <= 2;
911}
912
913// FIXME: this implementation should be micro-architecture dependent, so a
914// micro-architecture target hook should be introduced here in future.
916 if (Subtarget.hasExynosCheapAsMoveHandling()) {
917 if (isExynosCheapAsMove(MI))
918 return true;
919 return MI.isAsCheapAsAMove();
920 }
921
922 switch (MI.getOpcode()) {
923 default:
924 return MI.isAsCheapAsAMove();
925
926 case AArch64::ADDWrs:
927 case AArch64::ADDXrs:
928 case AArch64::SUBWrs:
929 case AArch64::SUBXrs:
930 return Subtarget.hasALULSLFast() && MI.getOperand(3).getImm() <= 4;
931
932 // If MOVi32imm or MOVi64imm can be expanded into ORRWri or
933 // ORRXri, it is as cheap as MOV.
934 // Likewise if it can be expanded to MOVZ/MOVN/MOVK.
935 case AArch64::MOVi32imm:
936 return isCheapImmediate(MI, 32);
937 case AArch64::MOVi64imm:
938 return isCheapImmediate(MI, 64);
939 }
940}
941
943 switch (MI.getOpcode()) {
944 default:
945 return false;
946
947 case AArch64::ADDWrs:
948 case AArch64::ADDXrs:
949 case AArch64::ADDSWrs:
950 case AArch64::ADDSXrs: {
951 unsigned Imm = MI.getOperand(3).getImm();
952 unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
953 if (ShiftVal == 0)
954 return true;
955 return AArch64_AM::getShiftType(Imm) == AArch64_AM::LSL && ShiftVal <= 5;
956 }
957
958 case AArch64::ADDWrx:
959 case AArch64::ADDXrx:
960 case AArch64::ADDXrx64:
961 case AArch64::ADDSWrx:
962 case AArch64::ADDSXrx:
963 case AArch64::ADDSXrx64: {
964 unsigned Imm = MI.getOperand(3).getImm();
965 switch (AArch64_AM::getArithExtendType(Imm)) {
966 default:
967 return false;
968 case AArch64_AM::UXTB:
969 case AArch64_AM::UXTH:
970 case AArch64_AM::UXTW:
971 case AArch64_AM::UXTX:
972 return AArch64_AM::getArithShiftValue(Imm) <= 4;
973 }
974 }
975
976 case AArch64::SUBWrs:
977 case AArch64::SUBSWrs: {
978 unsigned Imm = MI.getOperand(3).getImm();
979 unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
980 return ShiftVal == 0 ||
981 (AArch64_AM::getShiftType(Imm) == AArch64_AM::ASR && ShiftVal == 31);
982 }
983
984 case AArch64::SUBXrs:
985 case AArch64::SUBSXrs: {
986 unsigned Imm = MI.getOperand(3).getImm();
987 unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
988 return ShiftVal == 0 ||
989 (AArch64_AM::getShiftType(Imm) == AArch64_AM::ASR && ShiftVal == 63);
990 }
991
992 case AArch64::SUBWrx:
993 case AArch64::SUBXrx:
994 case AArch64::SUBXrx64:
995 case AArch64::SUBSWrx:
996 case AArch64::SUBSXrx:
997 case AArch64::SUBSXrx64: {
998 unsigned Imm = MI.getOperand(3).getImm();
999 switch (AArch64_AM::getArithExtendType(Imm)) {
1000 default:
1001 return false;
1002 case AArch64_AM::UXTB:
1003 case AArch64_AM::UXTH:
1004 case AArch64_AM::UXTW:
1005 case AArch64_AM::UXTX:
1006 return AArch64_AM::getArithShiftValue(Imm) == 0;
1007 }
1008 }
1009
1010 case AArch64::LDRBBroW:
1011 case AArch64::LDRBBroX:
1012 case AArch64::LDRBroW:
1013 case AArch64::LDRBroX:
1014 case AArch64::LDRDroW:
1015 case AArch64::LDRDroX:
1016 case AArch64::LDRHHroW:
1017 case AArch64::LDRHHroX:
1018 case AArch64::LDRHroW:
1019 case AArch64::LDRHroX:
1020 case AArch64::LDRQroW:
1021 case AArch64::LDRQroX:
1022 case AArch64::LDRSBWroW:
1023 case AArch64::LDRSBWroX:
1024 case AArch64::LDRSBXroW:
1025 case AArch64::LDRSBXroX:
1026 case AArch64::LDRSHWroW:
1027 case AArch64::LDRSHWroX:
1028 case AArch64::LDRSHXroW:
1029 case AArch64::LDRSHXroX:
1030 case AArch64::LDRSWroW:
1031 case AArch64::LDRSWroX:
1032 case AArch64::LDRSroW:
1033 case AArch64::LDRSroX:
1034 case AArch64::LDRWroW:
1035 case AArch64::LDRWroX:
1036 case AArch64::LDRXroW:
1037 case AArch64::LDRXroX:
1038 case AArch64::PRFMroW:
1039 case AArch64::PRFMroX:
1040 case AArch64::STRBBroW:
1041 case AArch64::STRBBroX:
1042 case AArch64::STRBroW:
1043 case AArch64::STRBroX:
1044 case AArch64::STRDroW:
1045 case AArch64::STRDroX:
1046 case AArch64::STRHHroW:
1047 case AArch64::STRHHroX:
1048 case AArch64::STRHroW:
1049 case AArch64::STRHroX:
1050 case AArch64::STRQroW:
1051 case AArch64::STRQroX:
1052 case AArch64::STRSroW:
1053 case AArch64::STRSroX:
1054 case AArch64::STRWroW:
1055 case AArch64::STRWroX:
1056 case AArch64::STRXroW:
1057 case AArch64::STRXroX: {
1058 unsigned IsSigned = MI.getOperand(3).getImm();
1059 return !IsSigned;
1060 }
1061 }
1062}
1063
1065 unsigned Opc = MI.getOpcode();
1066 switch (Opc) {
1067 default:
1068 return false;
1069 case AArch64::SEH_StackAlloc:
1070 case AArch64::SEH_SaveFPLR:
1071 case AArch64::SEH_SaveFPLR_X:
1072 case AArch64::SEH_SaveReg:
1073 case AArch64::SEH_SaveReg_X:
1074 case AArch64::SEH_SaveRegP:
1075 case AArch64::SEH_SaveRegP_X:
1076 case AArch64::SEH_SaveFReg:
1077 case AArch64::SEH_SaveFReg_X:
1078 case AArch64::SEH_SaveFRegP:
1079 case AArch64::SEH_SaveFRegP_X:
1080 case AArch64::SEH_SetFP:
1081 case AArch64::SEH_AddFP:
1082 case AArch64::SEH_Nop:
1083 case AArch64::SEH_PrologEnd:
1084 case AArch64::SEH_EpilogStart:
1085 case AArch64::SEH_EpilogEnd:
1086 case AArch64::SEH_PACSignLR:
1087 case AArch64::SEH_SaveAnyRegQP:
1088 case AArch64::SEH_SaveAnyRegQPX:
1089 return true;
1090 }
1091}
1092
1094 Register &SrcReg, Register &DstReg,
1095 unsigned &SubIdx) const {
1096 switch (MI.getOpcode()) {
1097 default:
1098 return false;
1099 case AArch64::SBFMXri: // aka sxtw
1100 case AArch64::UBFMXri: // aka uxtw
1101 // Check for the 32 -> 64 bit extension case, these instructions can do
1102 // much more.
1103 if (MI.getOperand(2).getImm() != 0 || MI.getOperand(3).getImm() != 31)
1104 return false;
1105 // This is a signed or unsigned 32 -> 64 bit extension.
1106 SrcReg = MI.getOperand(1).getReg();
1107 DstReg = MI.getOperand(0).getReg();
1108 SubIdx = AArch64::sub_32;
1109 return true;
1110 }
1111}
1112
1114 const MachineInstr &MIa, const MachineInstr &MIb) const {
1116 const MachineOperand *BaseOpA = nullptr, *BaseOpB = nullptr;
1117 int64_t OffsetA = 0, OffsetB = 0;
1118 TypeSize WidthA(0, false), WidthB(0, false);
1119 bool OffsetAIsScalable = false, OffsetBIsScalable = false;
1120
1121 assert(MIa.mayLoadOrStore() && "MIa must be a load or store.");
1122 assert(MIb.mayLoadOrStore() && "MIb must be a load or store.");
1123
1126 return false;
1127
1128 // Retrieve the base, offset from the base and width. Width
1129 // is the size of memory that is being loaded/stored (e.g. 1, 2, 4, 8). If
1130 // base are identical, and the offset of a lower memory access +
1131 // the width doesn't overlap the offset of a higher memory access,
1132 // then the memory accesses are different.
1133 // If OffsetAIsScalable and OffsetBIsScalable are both true, they
1134 // are assumed to have the same scale (vscale).
1135 if (getMemOperandWithOffsetWidth(MIa, BaseOpA, OffsetA, OffsetAIsScalable,
1136 WidthA, TRI) &&
1137 getMemOperandWithOffsetWidth(MIb, BaseOpB, OffsetB, OffsetBIsScalable,
1138 WidthB, TRI)) {
1139 if (BaseOpA->isIdenticalTo(*BaseOpB) &&
1140 OffsetAIsScalable == OffsetBIsScalable) {
1141 int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB;
1142 int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA;
1143 TypeSize LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
1144 if (LowWidth.isScalable() == OffsetAIsScalable &&
1145 LowOffset + (int)LowWidth.getKnownMinValue() <= HighOffset)
1146 return true;
1147 }
1148 }
1149 return false;
1150}
1151
1153 const MachineBasicBlock *MBB,
1154 const MachineFunction &MF) const {
1156 return true;
1157
1158 // Do not move an instruction that can be recognized as a branch target.
1159 if (hasBTISemantics(MI))
1160 return true;
1161
1162 switch (MI.getOpcode()) {
1163 case AArch64::HINT:
1164 // CSDB hints are scheduling barriers.
1165 if (MI.getOperand(0).getImm() == 0x14)
1166 return true;
1167 break;
1168 case AArch64::DSB:
1169 case AArch64::ISB:
1170 // DSB and ISB also are scheduling barriers.
1171 return true;
1172 case AArch64::MSRpstatesvcrImm1:
1173 // SMSTART and SMSTOP are also scheduling barriers.
1174 return true;
1175 default:;
1176 }
1177 if (isSEHInstruction(MI))
1178 return true;
1179 auto Next = std::next(MI.getIterator());
1180 return Next != MBB->end() && Next->isCFIInstruction();
1181}
1182
1183/// analyzeCompare - For a comparison instruction, return the source registers
1184/// in SrcReg and SrcReg2, and the value it compares against in CmpValue.
1185/// Return true if the comparison instruction can be analyzed.
1187 Register &SrcReg2, int64_t &CmpMask,
1188 int64_t &CmpValue) const {
1189 // The first operand can be a frame index where we'd normally expect a
1190 // register.
1191 assert(MI.getNumOperands() >= 2 && "All AArch64 cmps should have 2 operands");
1192 if (!MI.getOperand(1).isReg())
1193 return false;
1194
1195 switch (MI.getOpcode()) {
1196 default:
1197 break;
1198 case AArch64::PTEST_PP:
1199 case AArch64::PTEST_PP_ANY:
1200 SrcReg = MI.getOperand(0).getReg();
1201 SrcReg2 = MI.getOperand(1).getReg();
1202 // Not sure about the mask and value for now...
1203 CmpMask = ~0;
1204 CmpValue = 0;
1205 return true;
1206 case AArch64::SUBSWrr:
1207 case AArch64::SUBSWrs:
1208 case AArch64::SUBSWrx:
1209 case AArch64::SUBSXrr:
1210 case AArch64::SUBSXrs:
1211 case AArch64::SUBSXrx:
1212 case AArch64::ADDSWrr:
1213 case AArch64::ADDSWrs:
1214 case AArch64::ADDSWrx:
1215 case AArch64::ADDSXrr:
1216 case AArch64::ADDSXrs:
1217 case AArch64::ADDSXrx:
1218 // Replace SUBSWrr with SUBWrr if NZCV is not used.
1219 SrcReg = MI.getOperand(1).getReg();
1220 SrcReg2 = MI.getOperand(2).getReg();
1221 CmpMask = ~0;
1222 CmpValue = 0;
1223 return true;
1224 case AArch64::SUBSWri:
1225 case AArch64::ADDSWri:
1226 case AArch64::SUBSXri:
1227 case AArch64::ADDSXri:
1228 SrcReg = MI.getOperand(1).getReg();
1229 SrcReg2 = 0;
1230 CmpMask = ~0;
1231 CmpValue = MI.getOperand(2).getImm();
1232 return true;
1233 case AArch64::ANDSWri:
1234 case AArch64::ANDSXri:
1235 // ANDS does not use the same encoding scheme as the others xxxS
1236 // instructions.
1237 SrcReg = MI.getOperand(1).getReg();
1238 SrcReg2 = 0;
1239 CmpMask = ~0;
1241 MI.getOperand(2).getImm(),
1242 MI.getOpcode() == AArch64::ANDSWri ? 32 : 64);
1243 return true;
1244 }
1245
1246 return false;
1247}
1248
1250 MachineBasicBlock *MBB = Instr.getParent();
1251 assert(MBB && "Can't get MachineBasicBlock here");
1252 MachineFunction *MF = MBB->getParent();
1253 assert(MF && "Can't get MachineFunction here");
1257
1258 for (unsigned OpIdx = 0, EndIdx = Instr.getNumOperands(); OpIdx < EndIdx;
1259 ++OpIdx) {
1260 MachineOperand &MO = Instr.getOperand(OpIdx);
1261 const TargetRegisterClass *OpRegCstraints =
1262 Instr.getRegClassConstraint(OpIdx, TII, TRI);
1263
1264 // If there's no constraint, there's nothing to do.
1265 if (!OpRegCstraints)
1266 continue;
1267 // If the operand is a frame index, there's nothing to do here.
1268 // A frame index operand will resolve correctly during PEI.
1269 if (MO.isFI())
1270 continue;
1271
1272 assert(MO.isReg() &&
1273 "Operand has register constraints without being a register!");
1274
1275 Register Reg = MO.getReg();
1276 if (Reg.isPhysical()) {
1277 if (!OpRegCstraints->contains(Reg))
1278 return false;
1279 } else if (!OpRegCstraints->hasSubClassEq(MRI->getRegClass(Reg)) &&
1280 !MRI->constrainRegClass(Reg, OpRegCstraints))
1281 return false;
1282 }
1283
1284 return true;
1285}
1286
1287/// Return the opcode that does not set flags when possible - otherwise
1288/// return the original opcode. The caller is responsible to do the actual
1289/// substitution and legality checking.
1291 // Don't convert all compare instructions, because for some the zero register
1292 // encoding becomes the sp register.
1293 bool MIDefinesZeroReg = false;
1294 if (MI.definesRegister(AArch64::WZR, /*TRI=*/nullptr) ||
1295 MI.definesRegister(AArch64::XZR, /*TRI=*/nullptr))
1296 MIDefinesZeroReg = true;
1297
1298 switch (MI.getOpcode()) {
1299 default:
1300 return MI.getOpcode();
1301 case AArch64::ADDSWrr:
1302 return AArch64::ADDWrr;
1303 case AArch64::ADDSWri:
1304 return MIDefinesZeroReg ? AArch64::ADDSWri : AArch64::ADDWri;
1305 case AArch64::ADDSWrs:
1306 return MIDefinesZeroReg ? AArch64::ADDSWrs : AArch64::ADDWrs;
1307 case AArch64::ADDSWrx:
1308 return AArch64::ADDWrx;
1309 case AArch64::ADDSXrr:
1310 return AArch64::ADDXrr;
1311 case AArch64::ADDSXri:
1312 return MIDefinesZeroReg ? AArch64::ADDSXri : AArch64::ADDXri;
1313 case AArch64::ADDSXrs:
1314 return MIDefinesZeroReg ? AArch64::ADDSXrs : AArch64::ADDXrs;
1315 case AArch64::ADDSXrx:
1316 return AArch64::ADDXrx;
1317 case AArch64::SUBSWrr:
1318 return AArch64::SUBWrr;
1319 case AArch64::SUBSWri:
1320 return MIDefinesZeroReg ? AArch64::SUBSWri : AArch64::SUBWri;
1321 case AArch64::SUBSWrs:
1322 return MIDefinesZeroReg ? AArch64::SUBSWrs : AArch64::SUBWrs;
1323 case AArch64::SUBSWrx:
1324 return AArch64::SUBWrx;
1325 case AArch64::SUBSXrr:
1326 return AArch64::SUBXrr;
1327 case AArch64::SUBSXri:
1328 return MIDefinesZeroReg ? AArch64::SUBSXri : AArch64::SUBXri;
1329 case AArch64::SUBSXrs:
1330 return MIDefinesZeroReg ? AArch64::SUBSXrs : AArch64::SUBXrs;
1331 case AArch64::SUBSXrx:
1332 return AArch64::SUBXrx;
1333 }
1334}
1335
1336enum AccessKind { AK_Write = 0x01, AK_Read = 0x10, AK_All = 0x11 };
1337
1338/// True when condition flags are accessed (either by writing or reading)
1339/// on the instruction trace starting at From and ending at To.
1340///
1341/// Note: If From and To are from different blocks it's assumed CC are accessed
1342/// on the path.
1345 const TargetRegisterInfo *TRI, const AccessKind AccessToCheck = AK_All) {
1346 // Early exit if To is at the beginning of the BB.
1347 if (To == To->getParent()->begin())
1348 return true;
1349
1350 // Check whether the instructions are in the same basic block
1351 // If not, assume the condition flags might get modified somewhere.
1352 if (To->getParent() != From->getParent())
1353 return true;
1354
1355 // From must be above To.
1356 assert(std::any_of(
1357 ++To.getReverse(), To->getParent()->rend(),
1358 [From](MachineInstr &MI) { return MI.getIterator() == From; }));
1359
1360 // We iterate backward starting at \p To until we hit \p From.
1361 for (const MachineInstr &Instr :
1362 instructionsWithoutDebug(++To.getReverse(), From.getReverse())) {
1363 if (((AccessToCheck & AK_Write) &&
1364 Instr.modifiesRegister(AArch64::NZCV, TRI)) ||
1365 ((AccessToCheck & AK_Read) && Instr.readsRegister(AArch64::NZCV, TRI)))
1366 return true;
1367 }
1368 return false;
1369}
1370
1371std::optional<unsigned>
1372AArch64InstrInfo::canRemovePTestInstr(MachineInstr *PTest, MachineInstr *Mask,
1373 MachineInstr *Pred,
1374 const MachineRegisterInfo *MRI) const {
1375 unsigned MaskOpcode = Mask->getOpcode();
1376 unsigned PredOpcode = Pred->getOpcode();
1377 bool PredIsPTestLike = isPTestLikeOpcode(PredOpcode);
1378 bool PredIsWhileLike = isWhileOpcode(PredOpcode);
1379
1380 if (PredIsWhileLike) {
1381 // For PTEST(PG, PG), PTEST is redundant when PG is the result of a WHILEcc
1382 // instruction and the condition is "any" since WHILcc does an implicit
1383 // PTEST(ALL, PG) check and PG is always a subset of ALL.
1384 if ((Mask == Pred) && PTest->getOpcode() == AArch64::PTEST_PP_ANY)
1385 return PredOpcode;
1386
1387 // For PTEST(PTRUE_ALL, WHILE), if the element size matches, the PTEST is
1388 // redundant since WHILE performs an implicit PTEST with an all active
1389 // mask.
1390 if (isPTrueOpcode(MaskOpcode) && Mask->getOperand(1).getImm() == 31 &&
1391 getElementSizeForOpcode(MaskOpcode) ==
1392 getElementSizeForOpcode(PredOpcode))
1393 return PredOpcode;
1394
1395 return {};
1396 }
1397
1398 if (PredIsPTestLike) {
1399 // For PTEST(PG, PG), PTEST is redundant when PG is the result of an
1400 // instruction that sets the flags as PTEST would and the condition is
1401 // "any" since PG is always a subset of the governing predicate of the
1402 // ptest-like instruction.
1403 if ((Mask == Pred) && PTest->getOpcode() == AArch64::PTEST_PP_ANY)
1404 return PredOpcode;
1405
1406 // For PTEST(PTRUE_ALL, PTEST_LIKE), the PTEST is redundant if the
1407 // the element size matches and either the PTEST_LIKE instruction uses
1408 // the same all active mask or the condition is "any".
1409 if (isPTrueOpcode(MaskOpcode) && Mask->getOperand(1).getImm() == 31 &&
1410 getElementSizeForOpcode(MaskOpcode) ==
1411 getElementSizeForOpcode(PredOpcode)) {
1412 auto PTestLikeMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg());
1413 if (Mask == PTestLikeMask || PTest->getOpcode() == AArch64::PTEST_PP_ANY)
1414 return PredOpcode;
1415 }
1416
1417 // For PTEST(PG, PTEST_LIKE(PG, ...)), the PTEST is redundant since the
1418 // flags are set based on the same mask 'PG', but PTEST_LIKE must operate
1419 // on 8-bit predicates like the PTEST. Otherwise, for instructions like
1420 // compare that also support 16/32/64-bit predicates, the implicit PTEST
1421 // performed by the compare could consider fewer lanes for these element
1422 // sizes.
1423 //
1424 // For example, consider
1425 //
1426 // ptrue p0.b ; P0=1111-1111-1111-1111
1427 // index z0.s, #0, #1 ; Z0=<0,1,2,3>
1428 // index z1.s, #1, #1 ; Z1=<1,2,3,4>
1429 // cmphi p1.s, p0/z, z1.s, z0.s ; P1=0001-0001-0001-0001
1430 // ; ^ last active
1431 // ptest p0, p1.b ; P1=0001-0001-0001-0001
1432 // ; ^ last active
1433 //
1434 // where the compare generates a canonical all active 32-bit predicate
1435 // (equivalent to 'ptrue p1.s, all'). The implicit PTEST sets the last
1436 // active flag, whereas the PTEST instruction with the same mask doesn't.
1437 // For PTEST_ANY this doesn't apply as the flags in this case would be
1438 // identical regardless of element size.
1439 auto PTestLikeMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg());
1440 uint64_t PredElementSize = getElementSizeForOpcode(PredOpcode);
1441 if (Mask == PTestLikeMask && (PredElementSize == AArch64::ElementSizeB ||
1442 PTest->getOpcode() == AArch64::PTEST_PP_ANY))
1443 return PredOpcode;
1444
1445 return {};
1446 }
1447
1448 // If OP in PTEST(PG, OP(PG, ...)) has a flag-setting variant change the
1449 // opcode so the PTEST becomes redundant.
1450 switch (PredOpcode) {
1451 case AArch64::AND_PPzPP:
1452 case AArch64::BIC_PPzPP:
1453 case AArch64::EOR_PPzPP:
1454 case AArch64::NAND_PPzPP:
1455 case AArch64::NOR_PPzPP:
1456 case AArch64::ORN_PPzPP:
1457 case AArch64::ORR_PPzPP:
1458 case AArch64::BRKA_PPzP:
1459 case AArch64::BRKPA_PPzPP:
1460 case AArch64::BRKB_PPzP:
1461 case AArch64::BRKPB_PPzPP:
1462 case AArch64::RDFFR_PPz: {
1463 // Check to see if our mask is the same. If not the resulting flag bits
1464 // may be different and we can't remove the ptest.
1465 auto *PredMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg());
1466 if (Mask != PredMask)
1467 return {};
1468 break;
1469 }
1470 case AArch64::BRKN_PPzP: {
1471 // BRKN uses an all active implicit mask to set flags unlike the other
1472 // flag-setting instructions.
1473 // PTEST(PTRUE_B(31), BRKN(PG, A, B)) -> BRKNS(PG, A, B).
1474 if ((MaskOpcode != AArch64::PTRUE_B) ||
1475 (Mask->getOperand(1).getImm() != 31))
1476 return {};
1477 break;
1478 }
1479 case AArch64::PTRUE_B:
1480 // PTEST(OP=PTRUE_B(A), OP) -> PTRUES_B(A)
1481 break;
1482 default:
1483 // Bail out if we don't recognize the input
1484 return {};
1485 }
1486
1487 return convertToFlagSettingOpc(PredOpcode);
1488}
1489
1490/// optimizePTestInstr - Attempt to remove a ptest of a predicate-generating
1491/// operation which could set the flags in an identical manner
1492bool AArch64InstrInfo::optimizePTestInstr(
1493 MachineInstr *PTest, unsigned MaskReg, unsigned PredReg,
1494 const MachineRegisterInfo *MRI) const {
1495 auto *Mask = MRI->getUniqueVRegDef(MaskReg);
1496 auto *Pred = MRI->getUniqueVRegDef(PredReg);
1497 unsigned PredOpcode = Pred->getOpcode();
1498 auto NewOp = canRemovePTestInstr(PTest, Mask, Pred, MRI);
1499 if (!NewOp)
1500 return false;
1501
1503
1504 // If another instruction between Pred and PTest accesses flags, don't remove
1505 // the ptest or update the earlier instruction to modify them.
1506 if (areCFlagsAccessedBetweenInstrs(Pred, PTest, TRI))
1507 return false;
1508
1509 // If we pass all the checks, it's safe to remove the PTEST and use the flags
1510 // as they are prior to PTEST. Sometimes this requires the tested PTEST
1511 // operand to be replaced with an equivalent instruction that also sets the
1512 // flags.
1513 PTest->eraseFromParent();
1514 if (*NewOp != PredOpcode) {
1515 Pred->setDesc(get(*NewOp));
1516 bool succeeded = UpdateOperandRegClass(*Pred);
1517 (void)succeeded;
1518 assert(succeeded && "Operands have incompatible register classes!");
1519 Pred->addRegisterDefined(AArch64::NZCV, TRI);
1520 }
1521
1522 // Ensure that the flags def is live.
1523 if (Pred->registerDefIsDead(AArch64::NZCV, TRI)) {
1524 unsigned i = 0, e = Pred->getNumOperands();
1525 for (; i != e; ++i) {
1526 MachineOperand &MO = Pred->getOperand(i);
1527 if (MO.isReg() && MO.isDef() && MO.getReg() == AArch64::NZCV) {
1528 MO.setIsDead(false);
1529 break;
1530 }
1531 }
1532 }
1533 return true;
1534}
1535
1536/// Try to optimize a compare instruction. A compare instruction is an
1537/// instruction which produces AArch64::NZCV. It can be truly compare
1538/// instruction
1539/// when there are no uses of its destination register.
1540///
1541/// The following steps are tried in order:
1542/// 1. Convert CmpInstr into an unconditional version.
1543/// 2. Remove CmpInstr if above there is an instruction producing a needed
1544/// condition code or an instruction which can be converted into such an
1545/// instruction.
1546/// Only comparison with zero is supported.
1548 MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int64_t CmpMask,
1549 int64_t CmpValue, const MachineRegisterInfo *MRI) const {
1550 assert(CmpInstr.getParent());
1551 assert(MRI);
1552
1553 // Replace SUBSWrr with SUBWrr if NZCV is not used.
1554 int DeadNZCVIdx =
1555 CmpInstr.findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr, true);
1556 if (DeadNZCVIdx != -1) {
1557 if (CmpInstr.definesRegister(AArch64::WZR, /*TRI=*/nullptr) ||
1558 CmpInstr.definesRegister(AArch64::XZR, /*TRI=*/nullptr)) {
1559 CmpInstr.eraseFromParent();
1560 return true;
1561 }
1562 unsigned Opc = CmpInstr.getOpcode();
1563 unsigned NewOpc = convertToNonFlagSettingOpc(CmpInstr);
1564 if (NewOpc == Opc)
1565 return false;
1566 const MCInstrDesc &MCID = get(NewOpc);
1567 CmpInstr.setDesc(MCID);
1568 CmpInstr.removeOperand(DeadNZCVIdx);
1569 bool succeeded = UpdateOperandRegClass(CmpInstr);
1570 (void)succeeded;
1571 assert(succeeded && "Some operands reg class are incompatible!");
1572 return true;
1573 }
1574
1575 if (CmpInstr.getOpcode() == AArch64::PTEST_PP ||
1576 CmpInstr.getOpcode() == AArch64::PTEST_PP_ANY)
1577 return optimizePTestInstr(&CmpInstr, SrcReg, SrcReg2, MRI);
1578
1579 if (SrcReg2 != 0)
1580 return false;
1581
1582 // CmpInstr is a Compare instruction if destination register is not used.
1583 if (!MRI->use_nodbg_empty(CmpInstr.getOperand(0).getReg()))
1584 return false;
1585
1586 if (CmpValue == 0 && substituteCmpToZero(CmpInstr, SrcReg, *MRI))
1587 return true;
1588 return (CmpValue == 0 || CmpValue == 1) &&
1589 removeCmpToZeroOrOne(CmpInstr, SrcReg, CmpValue, *MRI);
1590}
1591
1592/// Get opcode of S version of Instr.
1593/// If Instr is S version its opcode is returned.
1594/// AArch64::INSTRUCTION_LIST_END is returned if Instr does not have S version
1595/// or we are not interested in it.
1596static unsigned sForm(MachineInstr &Instr) {
1597 switch (Instr.getOpcode()) {
1598 default:
1599 return AArch64::INSTRUCTION_LIST_END;
1600
1601 case AArch64::ADDSWrr:
1602 case AArch64::ADDSWri:
1603 case AArch64::ADDSXrr:
1604 case AArch64::ADDSXri:
1605 case AArch64::SUBSWrr:
1606 case AArch64::SUBSWri:
1607 case AArch64::SUBSXrr:
1608 case AArch64::SUBSXri:
1609 return Instr.getOpcode();
1610
1611 case AArch64::ADDWrr:
1612 return AArch64::ADDSWrr;
1613 case AArch64::ADDWri:
1614 return AArch64::ADDSWri;
1615 case AArch64::ADDXrr:
1616 return AArch64::ADDSXrr;
1617 case AArch64::ADDXri:
1618 return AArch64::ADDSXri;
1619 case AArch64::ADCWr:
1620 return AArch64::ADCSWr;
1621 case AArch64::ADCXr:
1622 return AArch64::ADCSXr;
1623 case AArch64::SUBWrr:
1624 return AArch64::SUBSWrr;
1625 case AArch64::SUBWri:
1626 return AArch64::SUBSWri;
1627 case AArch64::SUBXrr:
1628 return AArch64::SUBSXrr;
1629 case AArch64::SUBXri:
1630 return AArch64::SUBSXri;
1631 case AArch64::SBCWr:
1632 return AArch64::SBCSWr;
1633 case AArch64::SBCXr:
1634 return AArch64::SBCSXr;
1635 case AArch64::ANDWri:
1636 return AArch64::ANDSWri;
1637 case AArch64::ANDXri:
1638 return AArch64::ANDSXri;
1639 }
1640}
1641
1642/// Check if AArch64::NZCV should be alive in successors of MBB.
1644 for (auto *BB : MBB->successors())
1645 if (BB->isLiveIn(AArch64::NZCV))
1646 return true;
1647 return false;
1648}
1649
1650/// \returns The condition code operand index for \p Instr if it is a branch
1651/// or select and -1 otherwise.
1652static int
1654 switch (Instr.getOpcode()) {
1655 default:
1656 return -1;
1657
1658 case AArch64::Bcc: {
1659 int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV, /*TRI=*/nullptr);
1660 assert(Idx >= 2);
1661 return Idx - 2;
1662 }
1663
1664 case AArch64::CSINVWr:
1665 case AArch64::CSINVXr:
1666 case AArch64::CSINCWr:
1667 case AArch64::CSINCXr:
1668 case AArch64::CSELWr:
1669 case AArch64::CSELXr:
1670 case AArch64::CSNEGWr:
1671 case AArch64::CSNEGXr:
1672 case AArch64::FCSELSrrr:
1673 case AArch64::FCSELDrrr: {
1674 int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV, /*TRI=*/nullptr);
1675 assert(Idx >= 1);
1676 return Idx - 1;
1677 }
1678 }
1679}
1680
1681/// Find a condition code used by the instruction.
1682/// Returns AArch64CC::Invalid if either the instruction does not use condition
1683/// codes or we don't optimize CmpInstr in the presence of such instructions.
1686 return CCIdx >= 0 ? static_cast<AArch64CC::CondCode>(
1687 Instr.getOperand(CCIdx).getImm())
1689}
1690
1693 UsedNZCV UsedFlags;
1694 switch (CC) {
1695 default:
1696 break;
1697
1698 case AArch64CC::EQ: // Z set
1699 case AArch64CC::NE: // Z clear
1700 UsedFlags.Z = true;
1701 break;
1702
1703 case AArch64CC::HI: // Z clear and C set
1704 case AArch64CC::LS: // Z set or C clear
1705 UsedFlags.Z = true;
1706 [[fallthrough]];
1707 case AArch64CC::HS: // C set
1708 case AArch64CC::LO: // C clear
1709 UsedFlags.C = true;
1710 break;
1711
1712 case AArch64CC::MI: // N set
1713 case AArch64CC::PL: // N clear
1714 UsedFlags.N = true;
1715 break;
1716
1717 case AArch64CC::VS: // V set
1718 case AArch64CC::VC: // V clear
1719 UsedFlags.V = true;
1720 break;
1721
1722 case AArch64CC::GT: // Z clear, N and V the same
1723 case AArch64CC::LE: // Z set, N and V differ
1724 UsedFlags.Z = true;
1725 [[fallthrough]];
1726 case AArch64CC::GE: // N and V the same
1727 case AArch64CC::LT: // N and V differ
1728 UsedFlags.N = true;
1729 UsedFlags.V = true;
1730 break;
1731 }
1732 return UsedFlags;
1733}
1734
1735/// \returns Conditions flags used after \p CmpInstr in its MachineBB if NZCV
1736/// flags are not alive in successors of the same \p CmpInstr and \p MI parent.
1737/// \returns std::nullopt otherwise.
1738///
1739/// Collect instructions using that flags in \p CCUseInstrs if provided.
1740std::optional<UsedNZCV>
1742 const TargetRegisterInfo &TRI,
1743 SmallVectorImpl<MachineInstr *> *CCUseInstrs) {
1744 MachineBasicBlock *CmpParent = CmpInstr.getParent();
1745 if (MI.getParent() != CmpParent)
1746 return std::nullopt;
1747
1748 if (areCFlagsAliveInSuccessors(CmpParent))
1749 return std::nullopt;
1750
1751 UsedNZCV NZCVUsedAfterCmp;
1753 std::next(CmpInstr.getIterator()), CmpParent->instr_end())) {
1754 if (Instr.readsRegister(AArch64::NZCV, &TRI)) {
1756 if (CC == AArch64CC::Invalid) // Unsupported conditional instruction
1757 return std::nullopt;
1758 NZCVUsedAfterCmp |= getUsedNZCV(CC);
1759 if (CCUseInstrs)
1760 CCUseInstrs->push_back(&Instr);
1761 }
1762 if (Instr.modifiesRegister(AArch64::NZCV, &TRI))
1763 break;
1764 }
1765 return NZCVUsedAfterCmp;
1766}
1767
1768static bool isADDSRegImm(unsigned Opcode) {
1769 return Opcode == AArch64::ADDSWri || Opcode == AArch64::ADDSXri;
1770}
1771
1772static bool isSUBSRegImm(unsigned Opcode) {
1773 return Opcode == AArch64::SUBSWri || Opcode == AArch64::SUBSXri;
1774}
1775
1776/// Check if CmpInstr can be substituted by MI.
1777///
1778/// CmpInstr can be substituted:
1779/// - CmpInstr is either 'ADDS %vreg, 0' or 'SUBS %vreg, 0'
1780/// - and, MI and CmpInstr are from the same MachineBB
1781/// - and, condition flags are not alive in successors of the CmpInstr parent
1782/// - and, if MI opcode is the S form there must be no defs of flags between
1783/// MI and CmpInstr
1784/// or if MI opcode is not the S form there must be neither defs of flags
1785/// nor uses of flags between MI and CmpInstr.
1786/// - and, if C/V flags are not used after CmpInstr
1787/// or if N flag is used but MI produces poison value if signed overflow
1788/// occurs.
1790 const TargetRegisterInfo &TRI) {
1791 // NOTE this assertion guarantees that MI.getOpcode() is add or subtraction
1792 // that may or may not set flags.
1793 assert(sForm(MI) != AArch64::INSTRUCTION_LIST_END);
1794
1795 const unsigned CmpOpcode = CmpInstr.getOpcode();
1796 if (!isADDSRegImm(CmpOpcode) && !isSUBSRegImm(CmpOpcode))
1797 return false;
1798
1799 assert((CmpInstr.getOperand(2).isImm() &&
1800 CmpInstr.getOperand(2).getImm() == 0) &&
1801 "Caller guarantees that CmpInstr compares with constant 0");
1802
1803 std::optional<UsedNZCV> NZVCUsed = examineCFlagsUse(MI, CmpInstr, TRI);
1804 if (!NZVCUsed || NZVCUsed->C)
1805 return false;
1806
1807 // CmpInstr is either 'ADDS %vreg, 0' or 'SUBS %vreg, 0', and MI is either
1808 // '%vreg = add ...' or '%vreg = sub ...'.
1809 // Condition flag V is used to indicate signed overflow.
1810 // 1) MI and CmpInstr set N and V to the same value.
1811 // 2) If MI is add/sub with no-signed-wrap, it produces a poison value when
1812 // signed overflow occurs, so CmpInstr could still be simplified away.
1813 if (NZVCUsed->V && !MI.getFlag(MachineInstr::NoSWrap))
1814 return false;
1815
1816 AccessKind AccessToCheck = AK_Write;
1817 if (sForm(MI) != MI.getOpcode())
1818 AccessToCheck = AK_All;
1819 return !areCFlagsAccessedBetweenInstrs(&MI, &CmpInstr, &TRI, AccessToCheck);
1820}
1821
1822/// Substitute an instruction comparing to zero with another instruction
1823/// which produces needed condition flags.
1824///
1825/// Return true on success.
1826bool AArch64InstrInfo::substituteCmpToZero(
1827 MachineInstr &CmpInstr, unsigned SrcReg,
1828 const MachineRegisterInfo &MRI) const {
1829 // Get the unique definition of SrcReg.
1830 MachineInstr *MI = MRI.getUniqueVRegDef(SrcReg);
1831 if (!MI)
1832 return false;
1833
1835
1836 unsigned NewOpc = sForm(*MI);
1837 if (NewOpc == AArch64::INSTRUCTION_LIST_END)
1838 return false;
1839
1840 if (!canInstrSubstituteCmpInstr(*MI, CmpInstr, TRI))
1841 return false;
1842
1843 // Update the instruction to set NZCV.
1844 MI->setDesc(get(NewOpc));
1845 CmpInstr.eraseFromParent();
1847 (void)succeeded;
1848 assert(succeeded && "Some operands reg class are incompatible!");
1849 MI->addRegisterDefined(AArch64::NZCV, &TRI);
1850 return true;
1851}
1852
1853/// \returns True if \p CmpInstr can be removed.
1854///
1855/// \p IsInvertCC is true if, after removing \p CmpInstr, condition
1856/// codes used in \p CCUseInstrs must be inverted.
1858 int CmpValue, const TargetRegisterInfo &TRI,
1860 bool &IsInvertCC) {
1861 assert((CmpValue == 0 || CmpValue == 1) &&
1862 "Only comparisons to 0 or 1 considered for removal!");
1863
1864 // MI is 'CSINCWr %vreg, wzr, wzr, <cc>' or 'CSINCXr %vreg, xzr, xzr, <cc>'
1865 unsigned MIOpc = MI.getOpcode();
1866 if (MIOpc == AArch64::CSINCWr) {
1867 if (MI.getOperand(1).getReg() != AArch64::WZR ||
1868 MI.getOperand(2).getReg() != AArch64::WZR)
1869 return false;
1870 } else if (MIOpc == AArch64::CSINCXr) {
1871 if (MI.getOperand(1).getReg() != AArch64::XZR ||
1872 MI.getOperand(2).getReg() != AArch64::XZR)
1873 return false;
1874 } else {
1875 return false;
1876 }
1878 if (MICC == AArch64CC::Invalid)
1879 return false;
1880
1881 // NZCV needs to be defined
1882 if (MI.findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr, true) != -1)
1883 return false;
1884
1885 // CmpInstr is 'ADDS %vreg, 0' or 'SUBS %vreg, 0' or 'SUBS %vreg, 1'
1886 const unsigned CmpOpcode = CmpInstr.getOpcode();
1887 bool IsSubsRegImm = isSUBSRegImm(CmpOpcode);
1888 if (CmpValue && !IsSubsRegImm)
1889 return false;
1890 if (!CmpValue && !IsSubsRegImm && !isADDSRegImm(CmpOpcode))
1891 return false;
1892
1893 // MI conditions allowed: eq, ne, mi, pl
1894 UsedNZCV MIUsedNZCV = getUsedNZCV(MICC);
1895 if (MIUsedNZCV.C || MIUsedNZCV.V)
1896 return false;
1897
1898 std::optional<UsedNZCV> NZCVUsedAfterCmp =
1899 examineCFlagsUse(MI, CmpInstr, TRI, &CCUseInstrs);
1900 // Condition flags are not used in CmpInstr basic block successors and only
1901 // Z or N flags allowed to be used after CmpInstr within its basic block
1902 if (!NZCVUsedAfterCmp || NZCVUsedAfterCmp->C || NZCVUsedAfterCmp->V)
1903 return false;
1904 // Z or N flag used after CmpInstr must correspond to the flag used in MI
1905 if ((MIUsedNZCV.Z && NZCVUsedAfterCmp->N) ||
1906 (MIUsedNZCV.N && NZCVUsedAfterCmp->Z))
1907 return false;
1908 // If CmpInstr is comparison to zero MI conditions are limited to eq, ne
1909 if (MIUsedNZCV.N && !CmpValue)
1910 return false;
1911
1912 // There must be no defs of flags between MI and CmpInstr
1913 if (areCFlagsAccessedBetweenInstrs(&MI, &CmpInstr, &TRI, AK_Write))
1914 return false;
1915
1916 // Condition code is inverted in the following cases:
1917 // 1. MI condition is ne; CmpInstr is 'ADDS %vreg, 0' or 'SUBS %vreg, 0'
1918 // 2. MI condition is eq, pl; CmpInstr is 'SUBS %vreg, 1'
1919 IsInvertCC = (CmpValue && (MICC == AArch64CC::EQ || MICC == AArch64CC::PL)) ||
1920 (!CmpValue && MICC == AArch64CC::NE);
1921 return true;
1922}
1923
1924/// Remove comparison in csinc-cmp sequence
1925///
1926/// Examples:
1927/// 1. \code
1928/// csinc w9, wzr, wzr, ne
1929/// cmp w9, #0
1930/// b.eq
1931/// \endcode
1932/// to
1933/// \code
1934/// csinc w9, wzr, wzr, ne
1935/// b.ne
1936/// \endcode
1937///
1938/// 2. \code
1939/// csinc x2, xzr, xzr, mi
1940/// cmp x2, #1
1941/// b.pl
1942/// \endcode
1943/// to
1944/// \code
1945/// csinc x2, xzr, xzr, mi
1946/// b.pl
1947/// \endcode
1948///
1949/// \param CmpInstr comparison instruction
1950/// \return True when comparison removed
1951bool AArch64InstrInfo::removeCmpToZeroOrOne(
1952 MachineInstr &CmpInstr, unsigned SrcReg, int CmpValue,
1953 const MachineRegisterInfo &MRI) const {
1954 MachineInstr *MI = MRI.getUniqueVRegDef(SrcReg);
1955 if (!MI)
1956 return false;
1959 bool IsInvertCC = false;
1960 if (!canCmpInstrBeRemoved(*MI, CmpInstr, CmpValue, TRI, CCUseInstrs,
1961 IsInvertCC))
1962 return false;
1963 // Make transformation
1964 CmpInstr.eraseFromParent();
1965 if (IsInvertCC) {
1966 // Invert condition codes in CmpInstr CC users
1967 for (MachineInstr *CCUseInstr : CCUseInstrs) {
1969 assert(Idx >= 0 && "Unexpected instruction using CC.");
1970 MachineOperand &CCOperand = CCUseInstr->getOperand(Idx);
1972 static_cast<AArch64CC::CondCode>(CCOperand.getImm()));
1973 CCOperand.setImm(CCUse);
1974 }
1975 }
1976 return true;
1977}
1978
1980 if (MI.getOpcode() != TargetOpcode::LOAD_STACK_GUARD &&
1981 MI.getOpcode() != AArch64::CATCHRET)
1982 return false;
1983
1984 MachineBasicBlock &MBB = *MI.getParent();
1985 auto &Subtarget = MBB.getParent()->getSubtarget<AArch64Subtarget>();
1986 auto TRI = Subtarget.getRegisterInfo();
1987 DebugLoc DL = MI.getDebugLoc();
1988
1989 if (MI.getOpcode() == AArch64::CATCHRET) {
1990 // Skip to the first instruction before the epilog.
1991 const TargetInstrInfo *TII =
1993 MachineBasicBlock *TargetMBB = MI.getOperand(0).getMBB();
1995 MachineBasicBlock::iterator FirstEpilogSEH = std::prev(MBBI);
1996 while (FirstEpilogSEH->getFlag(MachineInstr::FrameDestroy) &&
1997 FirstEpilogSEH != MBB.begin())
1998 FirstEpilogSEH = std::prev(FirstEpilogSEH);
1999 if (FirstEpilogSEH != MBB.begin())
2000 FirstEpilogSEH = std::next(FirstEpilogSEH);
2001 BuildMI(MBB, FirstEpilogSEH, DL, TII->get(AArch64::ADRP))
2002 .addReg(AArch64::X0, RegState::Define)
2003 .addMBB(TargetMBB);
2004 BuildMI(MBB, FirstEpilogSEH, DL, TII->get(AArch64::ADDXri))
2005 .addReg(AArch64::X0, RegState::Define)
2006 .addReg(AArch64::X0)
2007 .addMBB(TargetMBB)
2008 .addImm(0);
2009 TargetMBB->setMachineBlockAddressTaken();
2010 return true;
2011 }
2012
2013 Register Reg = MI.getOperand(0).getReg();
2015 if (M.getStackProtectorGuard() == "sysreg") {
2016 const AArch64SysReg::SysReg *SrcReg =
2017 AArch64SysReg::lookupSysRegByName(M.getStackProtectorGuardReg());
2018 if (!SrcReg)
2019 report_fatal_error("Unknown SysReg for Stack Protector Guard Register");
2020
2021 // mrs xN, sysreg
2022 BuildMI(MBB, MI, DL, get(AArch64::MRS))
2024 .addImm(SrcReg->Encoding);
2025 int Offset = M.getStackProtectorGuardOffset();
2026 if (Offset >= 0 && Offset <= 32760 && Offset % 8 == 0) {
2027 // ldr xN, [xN, #offset]
2028 BuildMI(MBB, MI, DL, get(AArch64::LDRXui))
2029 .addDef(Reg)
2030 .addUse(Reg, RegState::Kill)
2031 .addImm(Offset / 8);
2032 } else if (Offset >= -256 && Offset <= 255) {
2033 // ldur xN, [xN, #offset]
2034 BuildMI(MBB, MI, DL, get(AArch64::LDURXi))
2035 .addDef(Reg)
2036 .addUse(Reg, RegState::Kill)
2037 .addImm(Offset);
2038 } else if (Offset >= -4095 && Offset <= 4095) {
2039 if (Offset > 0) {
2040 // add xN, xN, #offset
2041 BuildMI(MBB, MI, DL, get(AArch64::ADDXri))
2042 .addDef(Reg)
2043 .addUse(Reg, RegState::Kill)
2044 .addImm(Offset)
2045 .addImm(0);
2046 } else {
2047 // sub xN, xN, #offset
2048 BuildMI(MBB, MI, DL, get(AArch64::SUBXri))
2049 .addDef(Reg)
2050 .addUse(Reg, RegState::Kill)
2051 .addImm(-Offset)
2052 .addImm(0);
2053 }
2054 // ldr xN, [xN]
2055 BuildMI(MBB, MI, DL, get(AArch64::LDRXui))
2056 .addDef(Reg)
2057 .addUse(Reg, RegState::Kill)
2058 .addImm(0);
2059 } else {
2060 // Cases that are larger than +/- 4095 and not a multiple of 8, or larger
2061 // than 23760.
2062 // It might be nice to use AArch64::MOVi32imm here, which would get
2063 // expanded in PreSched2 after PostRA, but our lone scratch Reg already
2064 // contains the MRS result. findScratchNonCalleeSaveRegister() in
2065 // AArch64FrameLowering might help us find such a scratch register
2066 // though. If we failed to find a scratch register, we could emit a
2067 // stream of add instructions to build up the immediate. Or, we could try
2068 // to insert a AArch64::MOVi32imm before register allocation so that we
2069 // didn't need to scavenge for a scratch register.
2070 report_fatal_error("Unable to encode Stack Protector Guard Offset");
2071 }
2072 MBB.erase(MI);
2073 return true;
2074 }
2075
2076 const GlobalValue *GV =
2077 cast<GlobalValue>((*MI.memoperands_begin())->getValue());
2078 const TargetMachine &TM = MBB.getParent()->getTarget();
2079 unsigned OpFlags = Subtarget.ClassifyGlobalReference(GV, TM);
2080 const unsigned char MO_NC = AArch64II::MO_NC;
2081
2082 if ((OpFlags & AArch64II::MO_GOT) != 0) {
2083 BuildMI(MBB, MI, DL, get(AArch64::LOADgot), Reg)
2084 .addGlobalAddress(GV, 0, OpFlags);
2085 if (Subtarget.isTargetILP32()) {
2086 unsigned Reg32 = TRI->getSubReg(Reg, AArch64::sub_32);
2087 BuildMI(MBB, MI, DL, get(AArch64::LDRWui))
2088 .addDef(Reg32, RegState::Dead)
2089 .addUse(Reg, RegState::Kill)
2090 .addImm(0)
2091 .addMemOperand(*MI.memoperands_begin())
2093 } else {
2094 BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
2095 .addReg(Reg, RegState::Kill)
2096 .addImm(0)
2097 .addMemOperand(*MI.memoperands_begin());
2098 }
2099 } else if (TM.getCodeModel() == CodeModel::Large) {
2100 assert(!Subtarget.isTargetILP32() && "how can large exist in ILP32?");
2101 BuildMI(MBB, MI, DL, get(AArch64::MOVZXi), Reg)
2102 .addGlobalAddress(GV, 0, AArch64II::MO_G0 | MO_NC)
2103 .addImm(0);
2104 BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
2105 .addReg(Reg, RegState::Kill)
2106 .addGlobalAddress(GV, 0, AArch64II::MO_G1 | MO_NC)
2107 .addImm(16);
2108 BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
2109 .addReg(Reg, RegState::Kill)
2110 .addGlobalAddress(GV, 0, AArch64II::MO_G2 | MO_NC)
2111 .addImm(32);
2112 BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
2113 .addReg(Reg, RegState::Kill)
2115 .addImm(48);
2116 BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
2117 .addReg(Reg, RegState::Kill)
2118 .addImm(0)
2119 .addMemOperand(*MI.memoperands_begin());
2120 } else if (TM.getCodeModel() == CodeModel::Tiny) {
2121 BuildMI(MBB, MI, DL, get(AArch64::ADR), Reg)
2122 .addGlobalAddress(GV, 0, OpFlags);
2123 } else {
2124 BuildMI(MBB, MI, DL, get(AArch64::ADRP), Reg)
2125 .addGlobalAddress(GV, 0, OpFlags | AArch64II::MO_PAGE);
2126 unsigned char LoFlags = OpFlags | AArch64II::MO_PAGEOFF | MO_NC;
2127 if (Subtarget.isTargetILP32()) {
2128 unsigned Reg32 = TRI->getSubReg(Reg, AArch64::sub_32);
2129 BuildMI(MBB, MI, DL, get(AArch64::LDRWui))
2130 .addDef(Reg32, RegState::Dead)
2131 .addUse(Reg, RegState::Kill)
2132 .addGlobalAddress(GV, 0, LoFlags)
2133 .addMemOperand(*MI.memoperands_begin())
2135 } else {
2136 BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
2137 .addReg(Reg, RegState::Kill)
2138 .addGlobalAddress(GV, 0, LoFlags)
2139 .addMemOperand(*MI.memoperands_begin());
2140 }
2141 }
2142
2143 MBB.erase(MI);
2144
2145 return true;
2146}
2147
2148// Return true if this instruction simply sets its single destination register
2149// to zero. This is equivalent to a register rename of the zero-register.
2151 switch (MI.getOpcode()) {
2152 default:
2153 break;
2154 case AArch64::MOVZWi:
2155 case AArch64::MOVZXi: // movz Rd, #0 (LSL #0)
2156 if (MI.getOperand(1).isImm() && MI.getOperand(1).getImm() == 0) {
2157 assert(MI.getDesc().getNumOperands() == 3 &&
2158 MI.getOperand(2).getImm() == 0 && "invalid MOVZi operands");
2159 return true;
2160 }
2161 break;
2162 case AArch64::ANDWri: // and Rd, Rzr, #imm
2163 return MI.getOperand(1).getReg() == AArch64::WZR;
2164 case AArch64::ANDXri:
2165 return MI.getOperand(1).getReg() == AArch64::XZR;
2166 case TargetOpcode::COPY:
2167 return MI.getOperand(1).getReg() == AArch64::WZR;
2168 }
2169 return false;
2170}
2171
2172// Return true if this instruction simply renames a general register without
2173// modifying bits.
2175 switch (MI.getOpcode()) {
2176 default:
2177 break;
2178 case TargetOpcode::COPY: {
2179 // GPR32 copies will by lowered to ORRXrs
2180 Register DstReg = MI.getOperand(0).getReg();
2181 return (AArch64::GPR32RegClass.contains(DstReg) ||
2182 AArch64::GPR64RegClass.contains(DstReg));
2183 }
2184 case AArch64::ORRXrs: // orr Xd, Xzr, Xm (LSL #0)
2185 if (MI.getOperand(1).getReg() == AArch64::XZR) {
2186 assert(MI.getDesc().getNumOperands() == 4 &&
2187 MI.getOperand(3).getImm() == 0 && "invalid ORRrs operands");
2188 return true;
2189 }
2190 break;
2191 case AArch64::ADDXri: // add Xd, Xn, #0 (LSL #0)
2192 if (MI.getOperand(2).getImm() == 0) {
2193 assert(MI.getDesc().getNumOperands() == 4 &&
2194 MI.getOperand(3).getImm() == 0 && "invalid ADDXri operands");
2195 return true;
2196 }
2197 break;
2198 }
2199 return false;
2200}
2201
2202// Return true if this instruction simply renames a general register without
2203// modifying bits.
2205 switch (MI.getOpcode()) {
2206 default:
2207 break;
2208 case TargetOpcode::COPY: {
2209 Register DstReg = MI.getOperand(0).getReg();
2210 return AArch64::FPR128RegClass.contains(DstReg);
2211 }
2212 case AArch64::ORRv16i8:
2213 if (MI.getOperand(1).getReg() == MI.getOperand(2).getReg()) {
2214 assert(MI.getDesc().getNumOperands() == 3 && MI.getOperand(0).isReg() &&
2215 "invalid ORRv16i8 operands");
2216 return true;
2217 }
2218 break;
2219 }
2220 return false;
2221}
2222
2224 int &FrameIndex) const {
2225 switch (MI.getOpcode()) {
2226 default:
2227 break;
2228 case AArch64::LDRWui:
2229 case AArch64::LDRXui:
2230 case AArch64::LDRBui:
2231 case AArch64::LDRHui:
2232 case AArch64::LDRSui:
2233 case AArch64::LDRDui:
2234 case AArch64::LDRQui:
2235 case AArch64::LDR_PXI:
2236 if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() &&
2237 MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) {
2238 FrameIndex = MI.getOperand(1).getIndex();
2239 return MI.getOperand(0).getReg();
2240 }
2241 break;
2242 }
2243
2244 return 0;
2245}
2246
2248 int &FrameIndex) const {
2249 switch (MI.getOpcode()) {
2250 default:
2251 break;
2252 case AArch64::STRWui:
2253 case AArch64::STRXui:
2254 case AArch64::STRBui:
2255 case AArch64::STRHui:
2256 case AArch64::STRSui:
2257 case AArch64::STRDui:
2258 case AArch64::STRQui:
2259 case AArch64::STR_PXI:
2260 if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() &&
2261 MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) {
2262 FrameIndex = MI.getOperand(1).getIndex();
2263 return MI.getOperand(0).getReg();
2264 }
2265 break;
2266 }
2267 return 0;
2268}
2269
2270/// Check all MachineMemOperands for a hint to suppress pairing.
2272 return llvm::any_of(MI.memoperands(), [](MachineMemOperand *MMO) {
2273 return MMO->getFlags() & MOSuppressPair;
2274 });
2275}
2276
2277/// Set a flag on the first MachineMemOperand to suppress pairing.
2279 if (MI.memoperands_empty())
2280 return;
2281 (*MI.memoperands_begin())->setFlags(MOSuppressPair);
2282}
2283
2284/// Check all MachineMemOperands for a hint that the load/store is strided.
2286 return llvm::any_of(MI.memoperands(), [](MachineMemOperand *MMO) {
2287 return MMO->getFlags() & MOStridedAccess;
2288 });
2289}
2290
2292 switch (Opc) {
2293 default:
2294 return false;
2295 case AArch64::STURSi:
2296 case AArch64::STRSpre:
2297 case AArch64::STURDi:
2298 case AArch64::STRDpre:
2299 case AArch64::STURQi:
2300 case AArch64::STRQpre:
2301 case AArch64::STURBBi:
2302 case AArch64::STURHHi:
2303 case AArch64::STURWi:
2304 case AArch64::STRWpre:
2305 case AArch64::STURXi:
2306 case AArch64::STRXpre:
2307 case AArch64::LDURSi:
2308 case AArch64::LDRSpre:
2309 case AArch64::LDURDi:
2310 case AArch64::LDRDpre:
2311 case AArch64::LDURQi:
2312 case AArch64::LDRQpre:
2313 case AArch64::LDURWi:
2314 case AArch64::LDRWpre:
2315 case AArch64::LDURXi:
2316 case AArch64::LDRXpre:
2317 case AArch64::LDRSWpre:
2318 case AArch64::LDURSWi:
2319 case AArch64::LDURHHi:
2320 case AArch64::LDURBBi:
2321 case AArch64::LDURSBWi:
2322 case AArch64::LDURSHWi:
2323 return true;
2324 }
2325}
2326
2327std::optional<unsigned> AArch64InstrInfo::getUnscaledLdSt(unsigned Opc) {
2328 switch (Opc) {
2329 default: return {};
2330 case AArch64::PRFMui: return AArch64::PRFUMi;
2331 case AArch64::LDRXui: return AArch64::LDURXi;
2332 case AArch64::LDRWui: return AArch64::LDURWi;
2333 case AArch64::LDRBui: return AArch64::LDURBi;
2334 case AArch64::LDRHui: return AArch64::LDURHi;
2335 case AArch64::LDRSui: return AArch64::LDURSi;
2336 case AArch64::LDRDui: return AArch64::LDURDi;
2337 case AArch64::LDRQui: return AArch64::LDURQi;
2338 case AArch64::LDRBBui: return AArch64::LDURBBi;
2339 case AArch64::LDRHHui: return AArch64::LDURHHi;
2340 case AArch64::LDRSBXui: return AArch64::LDURSBXi;
2341 case AArch64::LDRSBWui: return AArch64::LDURSBWi;
2342 case AArch64::LDRSHXui: return AArch64::LDURSHXi;
2343 case AArch64::LDRSHWui: return AArch64::LDURSHWi;
2344 case AArch64::LDRSWui: return AArch64::LDURSWi;
2345 case AArch64::STRXui: return AArch64::STURXi;
2346 case AArch64::STRWui: return AArch64::STURWi;
2347 case AArch64::STRBui: return AArch64::STURBi;
2348 case AArch64::STRHui: return AArch64::STURHi;
2349 case AArch64::STRSui: return AArch64::STURSi;
2350 case AArch64::STRDui: return AArch64::STURDi;
2351 case AArch64::STRQui: return AArch64::STURQi;
2352 case AArch64::STRBBui: return AArch64::STURBBi;
2353 case AArch64::STRHHui: return AArch64::STURHHi;
2354 }
2355}
2356
2358 switch (Opc) {
2359 default:
2360 llvm_unreachable("Unhandled Opcode in getLoadStoreImmIdx");
2361 case AArch64::ADDG:
2362 case AArch64::LDAPURBi:
2363 case AArch64::LDAPURHi:
2364 case AArch64::LDAPURi:
2365 case AArch64::LDAPURSBWi:
2366 case AArch64::LDAPURSBXi:
2367 case AArch64::LDAPURSHWi:
2368 case AArch64::LDAPURSHXi:
2369 case AArch64::LDAPURSWi:
2370 case AArch64::LDAPURXi:
2371 case AArch64::LDR_PPXI:
2372 case AArch64::LDR_PXI:
2373 case AArch64::LDR_ZXI:
2374 case AArch64::LDR_ZZXI:
2375 case AArch64::LDR_ZZZXI:
2376 case AArch64::LDR_ZZZZXI:
2377 case AArch64::LDRBBui:
2378 case AArch64::LDRBui:
2379 case AArch64::LDRDui:
2380 case AArch64::LDRHHui:
2381 case AArch64::LDRHui:
2382 case AArch64::LDRQui:
2383 case AArch64::LDRSBWui:
2384 case AArch64::LDRSBXui:
2385 case AArch64::LDRSHWui:
2386 case AArch64::LDRSHXui:
2387 case AArch64::LDRSui:
2388 case AArch64::LDRSWui:
2389 case AArch64::LDRWui:
2390 case AArch64::LDRXui:
2391 case AArch64::LDURBBi:
2392 case AArch64::LDURBi:
2393 case AArch64::LDURDi:
2394 case AArch64::LDURHHi:
2395 case AArch64::LDURHi:
2396 case AArch64::LDURQi:
2397 case AArch64::LDURSBWi:
2398 case AArch64::LDURSBXi:
2399 case AArch64::LDURSHWi:
2400 case AArch64::LDURSHXi:
2401 case AArch64::LDURSi:
2402 case AArch64::LDURSWi:
2403 case AArch64::LDURWi:
2404 case AArch64::LDURXi:
2405 case AArch64::PRFMui:
2406 case AArch64::PRFUMi:
2407 case AArch64::ST2Gi:
2408 case AArch64::STGi:
2409 case AArch64::STLURBi:
2410 case AArch64::STLURHi:
2411 case AArch64::STLURWi:
2412 case AArch64::STLURXi:
2413 case AArch64::StoreSwiftAsyncContext:
2414 case AArch64::STR_PPXI:
2415 case AArch64::STR_PXI:
2416 case AArch64::STR_ZXI:
2417 case AArch64::STR_ZZXI:
2418 case AArch64::STR_ZZZXI:
2419 case AArch64::STR_ZZZZXI:
2420 case AArch64::STRBBui:
2421 case AArch64::STRBui:
2422 case AArch64::STRDui:
2423 case AArch64::STRHHui:
2424 case AArch64::STRHui:
2425 case AArch64::STRQui:
2426 case AArch64::STRSui:
2427 case AArch64::STRWui:
2428 case AArch64::STRXui:
2429 case AArch64::STURBBi:
2430 case AArch64::STURBi:
2431 case AArch64::STURDi:
2432 case AArch64::STURHHi:
2433 case AArch64::STURHi:
2434 case AArch64::STURQi:
2435 case AArch64::STURSi:
2436 case AArch64::STURWi:
2437 case AArch64::STURXi:
2438 case AArch64::STZ2Gi:
2439 case AArch64::STZGi:
2440 case AArch64::TAGPstack:
2441 case AArch64::SPILL_PPR_TO_ZPR_SLOT_PSEUDO:
2442 case AArch64::FILL_PPR_FROM_ZPR_SLOT_PSEUDO:
2443 return 2;
2444 case AArch64::LD1B_D_IMM:
2445 case AArch64::LD1B_H_IMM:
2446 case AArch64::LD1B_IMM:
2447 case AArch64::LD1B_S_IMM:
2448 case AArch64::LD1D_IMM:
2449 case AArch64::LD1H_D_IMM:
2450 case AArch64::LD1H_IMM:
2451 case AArch64::LD1H_S_IMM:
2452 case AArch64::LD1RB_D_IMM:
2453 case AArch64::LD1RB_H_IMM:
2454 case AArch64::LD1RB_IMM:
2455 case AArch64::LD1RB_S_IMM:
2456 case AArch64::LD1RD_IMM:
2457 case AArch64::LD1RH_D_IMM:
2458 case AArch64::LD1RH_IMM:
2459 case AArch64::LD1RH_S_IMM:
2460 case AArch64::LD1RSB_D_IMM:
2461 case AArch64::LD1RSB_H_IMM:
2462 case AArch64::LD1RSB_S_IMM:
2463 case AArch64::LD1RSH_D_IMM:
2464 case AArch64::LD1RSH_S_IMM:
2465 case AArch64::LD1RSW_IMM:
2466 case AArch64::LD1RW_D_IMM:
2467 case AArch64::LD1RW_IMM:
2468 case AArch64::LD1SB_D_IMM:
2469 case AArch64::LD1SB_H_IMM:
2470 case AArch64::LD1SB_S_IMM:
2471 case AArch64::LD1SH_D_IMM:
2472 case AArch64::LD1SH_S_IMM:
2473 case AArch64::LD1SW_D_IMM:
2474 case AArch64::LD1W_D_IMM:
2475 case AArch64::LD1W_IMM:
2476 case AArch64::LD2B_IMM:
2477 case AArch64::LD2D_IMM:
2478 case AArch64::LD2H_IMM:
2479 case AArch64::LD2W_IMM:
2480 case AArch64::LD3B_IMM:
2481 case AArch64::LD3D_IMM:
2482 case AArch64::LD3H_IMM:
2483 case AArch64::LD3W_IMM:
2484 case AArch64::LD4B_IMM:
2485 case AArch64::LD4D_IMM:
2486 case AArch64::LD4H_IMM:
2487 case AArch64::LD4W_IMM:
2488 case AArch64::LDG:
2489 case AArch64::LDNF1B_D_IMM:
2490 case AArch64::LDNF1B_H_IMM:
2491 case AArch64::LDNF1B_IMM:
2492 case AArch64::LDNF1B_S_IMM:
2493 case AArch64::LDNF1D_IMM:
2494 case AArch64::LDNF1H_D_IMM:
2495 case AArch64::LDNF1H_IMM:
2496 case AArch64::LDNF1H_S_IMM:
2497 case AArch64::LDNF1SB_D_IMM:
2498 case AArch64::LDNF1SB_H_IMM:
2499 case AArch64::LDNF1SB_S_IMM:
2500 case AArch64::LDNF1SH_D_IMM:
2501 case AArch64::LDNF1SH_S_IMM:
2502 case AArch64::LDNF1SW_D_IMM:
2503 case AArch64::LDNF1W_D_IMM:
2504 case AArch64::LDNF1W_IMM:
2505 case AArch64::LDNPDi:
2506 case AArch64::LDNPQi:
2507 case AArch64::LDNPSi:
2508 case AArch64::LDNPWi:
2509 case AArch64::LDNPXi:
2510 case AArch64::LDNT1B_ZRI:
2511 case AArch64::LDNT1D_ZRI:
2512 case AArch64::LDNT1H_ZRI:
2513 case AArch64::LDNT1W_ZRI:
2514 case AArch64::LDPDi:
2515 case AArch64::LDPQi:
2516 case AArch64::LDPSi:
2517 case AArch64::LDPWi:
2518 case AArch64::LDPXi:
2519 case AArch64::LDRBBpost:
2520 case AArch64::LDRBBpre:
2521 case AArch64::LDRBpost:
2522 case AArch64::LDRBpre:
2523 case AArch64::LDRDpost:
2524 case AArch64::LDRDpre:
2525 case AArch64::LDRHHpost:
2526 case AArch64::LDRHHpre:
2527 case AArch64::LDRHpost:
2528 case AArch64::LDRHpre:
2529 case AArch64::LDRQpost:
2530 case AArch64::LDRQpre:
2531 case AArch64::LDRSpost:
2532 case AArch64::LDRSpre:
2533 case AArch64::LDRWpost:
2534 case AArch64::LDRWpre:
2535 case AArch64::LDRXpost:
2536 case AArch64::LDRXpre:
2537 case AArch64::ST1B_D_IMM:
2538 case AArch64::ST1B_H_IMM:
2539 case AArch64::ST1B_IMM:
2540 case AArch64::ST1B_S_IMM:
2541 case AArch64::ST1D_IMM:
2542 case AArch64::ST1H_D_IMM:
2543 case AArch64::ST1H_IMM:
2544 case AArch64::ST1H_S_IMM:
2545 case AArch64::ST1W_D_IMM:
2546 case AArch64::ST1W_IMM:
2547 case AArch64::ST2B_IMM:
2548 case AArch64::ST2D_IMM:
2549 case AArch64::ST2H_IMM:
2550 case AArch64::ST2W_IMM:
2551 case AArch64::ST3B_IMM:
2552 case AArch64::ST3D_IMM:
2553 case AArch64::ST3H_IMM:
2554 case AArch64::ST3W_IMM:
2555 case AArch64::ST4B_IMM:
2556 case AArch64::ST4D_IMM:
2557 case AArch64::ST4H_IMM:
2558 case AArch64::ST4W_IMM:
2559 case AArch64::STGPi:
2560 case AArch64::STGPreIndex:
2561 case AArch64::STZGPreIndex:
2562 case AArch64::ST2GPreIndex:
2563 case AArch64::STZ2GPreIndex:
2564 case AArch64::STGPostIndex:
2565 case AArch64::STZGPostIndex:
2566 case AArch64::ST2GPostIndex:
2567 case AArch64::STZ2GPostIndex:
2568 case AArch64::STNPDi:
2569 case AArch64::STNPQi:
2570 case AArch64::STNPSi:
2571 case AArch64::STNPWi:
2572 case AArch64::STNPXi:
2573 case AArch64::STNT1B_ZRI:
2574 case AArch64::STNT1D_ZRI:
2575 case AArch64::STNT1H_ZRI:
2576 case AArch64::STNT1W_ZRI:
2577 case AArch64::STPDi:
2578 case AArch64::STPQi:
2579 case AArch64::STPSi:
2580 case AArch64::STPWi:
2581 case AArch64::STPXi:
2582 case AArch64::STRBBpost:
2583 case AArch64::STRBBpre:
2584 case AArch64::STRBpost:
2585 case AArch64::STRBpre:
2586 case AArch64::STRDpost:
2587 case AArch64::STRDpre:
2588 case AArch64::STRHHpost:
2589 case AArch64::STRHHpre:
2590 case AArch64::STRHpost:
2591 case AArch64::STRHpre:
2592 case AArch64::STRQpost:
2593 case AArch64::STRQpre:
2594 case AArch64::STRSpost:
2595 case AArch64::STRSpre:
2596 case AArch64::STRWpost:
2597 case AArch64::STRWpre:
2598 case AArch64::STRXpost:
2599 case AArch64::STRXpre:
2600 return 3;
2601 case AArch64::LDPDpost:
2602 case AArch64::LDPDpre:
2603 case AArch64::LDPQpost:
2604 case AArch64::LDPQpre:
2605 case AArch64::LDPSpost:
2606 case AArch64::LDPSpre:
2607 case AArch64::LDPWpost:
2608 case AArch64::LDPWpre:
2609 case AArch64::LDPXpost:
2610 case AArch64::LDPXpre:
2611 case AArch64::STGPpre:
2612 case AArch64::STGPpost:
2613 case AArch64::STPDpost:
2614 case AArch64::STPDpre:
2615 case AArch64::STPQpost:
2616 case AArch64::STPQpre:
2617 case AArch64::STPSpost:
2618 case AArch64::STPSpre:
2619 case AArch64::STPWpost:
2620 case AArch64::STPWpre:
2621 case AArch64::STPXpost:
2622 case AArch64::STPXpre:
2623 return 4;
2624 }
2625}
2626
2628 switch (MI.getOpcode()) {
2629 default:
2630 return false;
2631 // Scaled instructions.
2632 case AArch64::STRSui:
2633 case AArch64::STRDui:
2634 case AArch64::STRQui:
2635 case AArch64::STRXui:
2636 case AArch64::STRWui:
2637 case AArch64::LDRSui:
2638 case AArch64::LDRDui:
2639 case AArch64::LDRQui:
2640 case AArch64::LDRXui:
2641 case AArch64::LDRWui:
2642 case AArch64::LDRSWui:
2643 // Unscaled instructions.
2644 case AArch64::STURSi:
2645 case AArch64::STRSpre:
2646 case AArch64::STURDi:
2647 case AArch64::STRDpre:
2648 case AArch64::STURQi:
2649 case AArch64::STRQpre:
2650 case AArch64::STURWi:
2651 case AArch64::STRWpre:
2652 case AArch64::STURXi:
2653 case AArch64::STRXpre:
2654 case AArch64::LDURSi:
2655 case AArch64::LDRSpre:
2656 case AArch64::LDURDi:
2657 case AArch64::LDRDpre:
2658 case AArch64::LDURQi:
2659 case AArch64::LDRQpre:
2660 case AArch64::LDURWi:
2661 case AArch64::LDRWpre:
2662 case AArch64::LDURXi:
2663 case AArch64::LDRXpre:
2664 case AArch64::LDURSWi:
2665 case AArch64::LDRSWpre:
2666 return true;
2667 }
2668}
2669
2671 switch (MI.getOpcode()) {
2672 default:
2673 assert((!MI.isCall() || !MI.isReturn()) &&
2674 "Unexpected instruction - was a new tail call opcode introduced?");
2675 return false;
2676 case AArch64::TCRETURNdi:
2677 case AArch64::TCRETURNri:
2678 case AArch64::TCRETURNrix16x17:
2679 case AArch64::TCRETURNrix17:
2680 case AArch64::TCRETURNrinotx16:
2681 case AArch64::TCRETURNriALL:
2682 case AArch64::AUTH_TCRETURN:
2683 case AArch64::AUTH_TCRETURN_BTI:
2684 return true;
2685 }
2686}
2687
2689 switch (Opc) {
2690 default:
2691 llvm_unreachable("Opcode has no flag setting equivalent!");
2692 // 32-bit cases:
2693 case AArch64::ADDWri:
2694 return AArch64::ADDSWri;
2695 case AArch64::ADDWrr:
2696 return AArch64::ADDSWrr;
2697 case AArch64::ADDWrs:
2698 return AArch64::ADDSWrs;
2699 case AArch64::ADDWrx:
2700 return AArch64::ADDSWrx;
2701 case AArch64::ANDWri:
2702 return AArch64::ANDSWri;
2703 case AArch64::ANDWrr:
2704 return AArch64::ANDSWrr;
2705 case AArch64::ANDWrs:
2706 return AArch64::ANDSWrs;
2707 case AArch64::BICWrr:
2708 return AArch64::BICSWrr;
2709 case AArch64::BICWrs:
2710 return AArch64::BICSWrs;
2711 case AArch64::SUBWri:
2712 return AArch64::SUBSWri;
2713 case AArch64::SUBWrr:
2714 return AArch64::SUBSWrr;
2715 case AArch64::SUBWrs:
2716 return AArch64::SUBSWrs;
2717 case AArch64::SUBWrx:
2718 return AArch64::SUBSWrx;
2719 // 64-bit cases:
2720 case AArch64::ADDXri:
2721 return AArch64::ADDSXri;
2722 case AArch64::ADDXrr:
2723 return AArch64::ADDSXrr;
2724 case AArch64::ADDXrs:
2725 return AArch64::ADDSXrs;
2726 case AArch64::ADDXrx:
2727 return AArch64::ADDSXrx;
2728 case AArch64::ANDXri:
2729 return AArch64::ANDSXri;
2730 case AArch64::ANDXrr:
2731 return AArch64::ANDSXrr;
2732 case AArch64::ANDXrs:
2733 return AArch64::ANDSXrs;
2734 case AArch64::BICXrr:
2735 return AArch64::BICSXrr;
2736 case AArch64::BICXrs:
2737 return AArch64::BICSXrs;
2738 case AArch64::SUBXri:
2739 return AArch64::SUBSXri;
2740 case AArch64::SUBXrr:
2741 return AArch64::SUBSXrr;
2742 case AArch64::SUBXrs:
2743 return AArch64::SUBSXrs;
2744 case AArch64::SUBXrx:
2745 return AArch64::SUBSXrx;
2746 // SVE instructions:
2747 case AArch64::AND_PPzPP:
2748 return AArch64::ANDS_PPzPP;
2749 case AArch64::BIC_PPzPP:
2750 return AArch64::BICS_PPzPP;
2751 case AArch64::EOR_PPzPP:
2752 return AArch64::EORS_PPzPP;
2753 case AArch64::NAND_PPzPP:
2754 return AArch64::NANDS_PPzPP;
2755 case AArch64::NOR_PPzPP:
2756 return AArch64::NORS_PPzPP;
2757 case AArch64::ORN_PPzPP:
2758 return AArch64::ORNS_PPzPP;
2759 case AArch64::ORR_PPzPP:
2760 return AArch64::ORRS_PPzPP;
2761 case AArch64::BRKA_PPzP:
2762 return AArch64::BRKAS_PPzP;
2763 case AArch64::BRKPA_PPzPP:
2764 return AArch64::BRKPAS_PPzPP;
2765 case AArch64::BRKB_PPzP:
2766 return AArch64::BRKBS_PPzP;
2767 case AArch64::BRKPB_PPzPP:
2768 return AArch64::BRKPBS_PPzPP;
2769 case AArch64::BRKN_PPzP:
2770 return AArch64::BRKNS_PPzP;
2771 case AArch64::RDFFR_PPz:
2772 return AArch64::RDFFRS_PPz;
2773 case AArch64::PTRUE_B:
2774 return AArch64::PTRUES_B;
2775 }
2776}
2777
2778// Is this a candidate for ld/st merging or pairing? For example, we don't
2779// touch volatiles or load/stores that have a hint to avoid pair formation.
2781
2782 bool IsPreLdSt = isPreLdSt(MI);
2783
2784 // If this is a volatile load/store, don't mess with it.
2785 if (MI.hasOrderedMemoryRef())
2786 return false;
2787
2788 // Make sure this is a reg/fi+imm (as opposed to an address reloc).
2789 // For Pre-inc LD/ST, the operand is shifted by one.
2790 assert((MI.getOperand(IsPreLdSt ? 2 : 1).isReg() ||
2791 MI.getOperand(IsPreLdSt ? 2 : 1).isFI()) &&
2792 "Expected a reg or frame index operand.");
2793
2794 // For Pre-indexed addressing quadword instructions, the third operand is the
2795 // immediate value.
2796 bool IsImmPreLdSt = IsPreLdSt && MI.getOperand(3).isImm();
2797
2798 if (!MI.getOperand(2).isImm() && !IsImmPreLdSt)
2799 return false;
2800
2801 // Can't merge/pair if the instruction modifies the base register.
2802 // e.g., ldr x0, [x0]
2803 // This case will never occur with an FI base.
2804 // However, if the instruction is an LDR<S,D,Q,W,X,SW>pre or
2805 // STR<S,D,Q,W,X>pre, it can be merged.
2806 // For example:
2807 // ldr q0, [x11, #32]!
2808 // ldr q1, [x11, #16]
2809 // to
2810 // ldp q0, q1, [x11, #32]!
2811 if (MI.getOperand(1).isReg() && !IsPreLdSt) {
2812 Register BaseReg = MI.getOperand(1).getReg();
2814 if (MI.modifiesRegister(BaseReg, TRI))
2815 return false;
2816 }
2817
2818 // Check if this load/store has a hint to avoid pair formation.
2819 // MachineMemOperands hints are set by the AArch64StorePairSuppress pass.
2821 return false;
2822
2823 // Do not pair any callee-save store/reload instructions in the
2824 // prologue/epilogue if the CFI information encoded the operations as separate
2825 // instructions, as that will cause the size of the actual prologue to mismatch
2826 // with the prologue size recorded in the Windows CFI.
2827 const MCAsmInfo *MAI = MI.getMF()->getTarget().getMCAsmInfo();
2828 bool NeedsWinCFI = MAI->usesWindowsCFI() &&
2829 MI.getMF()->getFunction().needsUnwindTableEntry();
2830 if (NeedsWinCFI && (MI.getFlag(MachineInstr::FrameSetup) ||
2832 return false;
2833
2834 // On some CPUs quad load/store pairs are slower than two single load/stores.
2835 if (Subtarget.isPaired128Slow()) {
2836 switch (MI.getOpcode()) {
2837 default:
2838 break;
2839 case AArch64::LDURQi:
2840 case AArch64::STURQi:
2841 case AArch64::LDRQui:
2842 case AArch64::STRQui:
2843 return false;
2844 }
2845 }
2846
2847 return true;
2848}
2849
2852 int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width,
2853 const TargetRegisterInfo *TRI) const {
2854 if (!LdSt.mayLoadOrStore())
2855 return false;
2856
2857 const MachineOperand *BaseOp;
2858 TypeSize WidthN(0, false);
2859 if (!getMemOperandWithOffsetWidth(LdSt, BaseOp, Offset, OffsetIsScalable,
2860 WidthN, TRI))
2861 return false;
2862 // The maximum vscale is 16 under AArch64, return the maximal extent for the
2863 // vector.
2864 Width = LocationSize::precise(WidthN);
2865 BaseOps.push_back(BaseOp);
2866 return true;
2867}
2868
2869std::optional<ExtAddrMode>
2871 const TargetRegisterInfo *TRI) const {
2872 const MachineOperand *Base; // Filled with the base operand of MI.
2873 int64_t Offset; // Filled with the offset of MI.
2874 bool OffsetIsScalable;
2875 if (!getMemOperandWithOffset(MemI, Base, Offset, OffsetIsScalable, TRI))
2876 return std::nullopt;
2877
2878 if (!Base->isReg())
2879 return std::nullopt;
2880 ExtAddrMode AM;
2881 AM.BaseReg = Base->getReg();
2882 AM.Displacement = Offset;
2883 AM.ScaledReg = 0;
2884 AM.Scale = 0;
2885 return AM;
2886}
2887
2889 Register Reg,
2890 const MachineInstr &AddrI,
2891 ExtAddrMode &AM) const {
2892 // Filter out instructions into which we cannot fold.
2893 unsigned NumBytes;
2894 int64_t OffsetScale = 1;
2895 switch (MemI.getOpcode()) {
2896 default:
2897 return false;
2898
2899 case AArch64::LDURQi:
2900 case AArch64::STURQi:
2901 NumBytes = 16;
2902 break;
2903
2904 case AArch64::LDURDi:
2905 case AArch64::STURDi:
2906 case AArch64::LDURXi:
2907 case AArch64::STURXi:
2908 NumBytes = 8;
2909 break;
2910
2911 case AArch64::LDURWi:
2912 case AArch64::LDURSWi:
2913 case AArch64::STURWi:
2914 NumBytes = 4;
2915 break;
2916
2917 case AArch64::LDURHi:
2918 case AArch64::STURHi:
2919 case AArch64::LDURHHi:
2920 case AArch64::STURHHi:
2921 case AArch64::LDURSHXi:
2922 case AArch64::LDURSHWi:
2923 NumBytes = 2;
2924 break;
2925
2926 case AArch64::LDRBroX:
2927 case AArch64::LDRBBroX:
2928 case AArch64::LDRSBXroX:
2929 case AArch64::LDRSBWroX:
2930 case AArch64::STRBroX:
2931 case AArch64::STRBBroX:
2932 case AArch64::LDURBi:
2933 case AArch64::LDURBBi:
2934 case AArch64::LDURSBXi:
2935 case AArch64::LDURSBWi:
2936 case AArch64::STURBi:
2937 case AArch64::STURBBi:
2938 case AArch64::LDRBui:
2939 case AArch64::LDRBBui:
2940 case AArch64::LDRSBXui:
2941 case AArch64::LDRSBWui:
2942 case AArch64::STRBui:
2943 case AArch64::STRBBui:
2944 NumBytes = 1;
2945 break;
2946
2947 case AArch64::LDRQroX:
2948 case AArch64::STRQroX:
2949 case AArch64::LDRQui:
2950 case AArch64::STRQui:
2951 NumBytes = 16;
2952 OffsetScale = 16;
2953 break;
2954
2955 case AArch64::LDRDroX:
2956 case AArch64::STRDroX:
2957 case AArch64::LDRXroX:
2958 case AArch64::STRXroX:
2959 case AArch64::LDRDui:
2960 case AArch64::STRDui:
2961 case AArch64::LDRXui:
2962 case AArch64::STRXui:
2963 NumBytes = 8;
2964 OffsetScale = 8;
2965 break;
2966
2967 case AArch64::LDRWroX:
2968 case AArch64::LDRSWroX:
2969 case AArch64::STRWroX:
2970 case AArch64::LDRWui:
2971 case AArch64::LDRSWui:
2972 case AArch64::STRWui:
2973 NumBytes = 4;
2974 OffsetScale = 4;
2975 break;
2976
2977 case AArch64::LDRHroX:
2978 case AArch64::STRHroX:
2979 case AArch64::LDRHHroX:
2980 case AArch64::STRHHroX:
2981 case AArch64::LDRSHXroX:
2982 case AArch64::LDRSHWroX:
2983 case AArch64::LDRHui:
2984 case AArch64::STRHui:
2985 case AArch64::LDRHHui:
2986 case AArch64::STRHHui:
2987 case AArch64::LDRSHXui:
2988 case AArch64::LDRSHWui:
2989 NumBytes = 2;
2990 OffsetScale = 2;
2991 break;
2992 }
2993
2994 // Check the fold operand is not the loaded/stored value.
2995 const MachineOperand &BaseRegOp = MemI.getOperand(0);
2996 if (BaseRegOp.isReg() && BaseRegOp.getReg() == Reg)
2997 return false;
2998
2999 // Handle memory instructions with a [Reg, Reg] addressing mode.
3000 if (MemI.getOperand(2).isReg()) {
3001 // Bail if the addressing mode already includes extension of the offset
3002 // register.
3003 if (MemI.getOperand(3).getImm())
3004 return false;
3005
3006 // Check if we actually have a scaled offset.
3007 if (MemI.getOperand(4).getImm() == 0)
3008 OffsetScale = 1;
3009
3010 // If the address instructions is folded into the base register, then the
3011 // addressing mode must not have a scale. Then we can swap the base and the
3012 // scaled registers.
3013 if (MemI.getOperand(1).getReg() == Reg && OffsetScale != 1)
3014 return false;
3015
3016 switch (AddrI.getOpcode()) {
3017 default:
3018 return false;
3019
3020 case AArch64::SBFMXri:
3021 // sxtw Xa, Wm
3022 // ldr Xd, [Xn, Xa, lsl #N]
3023 // ->
3024 // ldr Xd, [Xn, Wm, sxtw #N]
3025 if (AddrI.getOperand(2).getImm() != 0 ||
3026 AddrI.getOperand(3).getImm() != 31)
3027 return false;
3028
3029 AM.BaseReg = MemI.getOperand(1).getReg();
3030 if (AM.BaseReg == Reg)
3031 AM.BaseReg = MemI.getOperand(2).getReg();
3032 AM.ScaledReg = AddrI.getOperand(1).getReg();
3033 AM.Scale = OffsetScale;
3034 AM.Displacement = 0;
3036 return true;
3037
3038 case TargetOpcode::SUBREG_TO_REG: {
3039 // mov Wa, Wm
3040 // ldr Xd, [Xn, Xa, lsl #N]
3041 // ->
3042 // ldr Xd, [Xn, Wm, uxtw #N]
3043
3044 // Zero-extension looks like an ORRWrs followed by a SUBREG_TO_REG.
3045 if (AddrI.getOperand(1).getImm() != 0 ||
3046 AddrI.getOperand(3).getImm() != AArch64::sub_32)
3047 return false;
3048
3049 const MachineRegisterInfo &MRI = AddrI.getMF()->getRegInfo();
3050 Register OffsetReg = AddrI.getOperand(2).getReg();
3051 if (!OffsetReg.isVirtual() || !MRI.hasOneNonDBGUse(OffsetReg))
3052 return false;
3053
3054 const MachineInstr &DefMI = *MRI.getVRegDef(OffsetReg);
3055 if (DefMI.getOpcode() != AArch64::ORRWrs ||
3056 DefMI.getOperand(1).getReg() != AArch64::WZR ||
3057 DefMI.getOperand(3).getImm() != 0)
3058 return false;
3059
3060 AM.BaseReg = MemI.getOperand(1).getReg();
3061 if (AM.BaseReg == Reg)
3062 AM.BaseReg = MemI.getOperand(2).getReg();
3063 AM.ScaledReg = DefMI.getOperand(2).getReg();
3064 AM.Scale = OffsetScale;
3065 AM.Displacement = 0;
3067 return true;
3068 }
3069 }
3070 }
3071
3072 // Handle memory instructions with a [Reg, #Imm] addressing mode.
3073
3074 // Check we are not breaking a potential conversion to an LDP.
3075 auto validateOffsetForLDP = [](unsigned NumBytes, int64_t OldOffset,
3076 int64_t NewOffset) -> bool {
3077 int64_t MinOffset, MaxOffset;
3078 switch (NumBytes) {
3079 default:
3080 return true;
3081 case 4:
3082 MinOffset = -256;
3083 MaxOffset = 252;
3084 break;
3085 case 8:
3086 MinOffset = -512;
3087 MaxOffset = 504;
3088 break;
3089 case 16:
3090 MinOffset = -1024;
3091 MaxOffset = 1008;
3092 break;
3093 }
3094 return OldOffset < MinOffset || OldOffset > MaxOffset ||
3095 (NewOffset >= MinOffset && NewOffset <= MaxOffset);
3096 };
3097 auto canFoldAddSubImmIntoAddrMode = [&](int64_t Disp) -> bool {
3098 int64_t OldOffset = MemI.getOperand(2).getImm() * OffsetScale;
3099 int64_t NewOffset = OldOffset + Disp;
3100 if (!isLegalAddressingMode(NumBytes, NewOffset, /* Scale */ 0))
3101 return false;
3102 // If the old offset would fit into an LDP, but the new offset wouldn't,
3103 // bail out.
3104 if (!validateOffsetForLDP(NumBytes, OldOffset, NewOffset))
3105 return false;
3106 AM.BaseReg = AddrI.getOperand(1).getReg();
3107 AM.ScaledReg = 0;
3108 AM.Scale = 0;
3109 AM.Displacement = NewOffset;
3111 return true;
3112 };
3113
3114 auto canFoldAddRegIntoAddrMode =
3115 [&](int64_t Scale,
3117 if (MemI.getOperand(2).getImm() != 0)
3118 return false;
3119 if (!isLegalAddressingMode(NumBytes, /* Offset */ 0, Scale))
3120 return false;
3121 AM.BaseReg = AddrI.getOperand(1).getReg();
3122 AM.ScaledReg = AddrI.getOperand(2).getReg();
3123 AM.Scale = Scale;
3124 AM.Displacement = 0;
3125 AM.Form = Form;
3126 return true;
3127 };
3128
3129 auto avoidSlowSTRQ = [&](const MachineInstr &MemI) {
3130 unsigned Opcode = MemI.getOpcode();
3131 return (Opcode == AArch64::STURQi || Opcode == AArch64::STRQui) &&
3132 Subtarget.isSTRQroSlow();
3133 };
3134
3135 int64_t Disp = 0;
3136 const bool OptSize = MemI.getMF()->getFunction().hasOptSize();
3137 switch (AddrI.getOpcode()) {
3138 default:
3139 return false;
3140
3141 case AArch64::ADDXri:
3142 // add Xa, Xn, #N
3143 // ldr Xd, [Xa, #M]
3144 // ->
3145 // ldr Xd, [Xn, #N'+M]
3146 Disp = AddrI.getOperand(2).getImm() << AddrI.getOperand(3).getImm();
3147 return canFoldAddSubImmIntoAddrMode(Disp);
3148
3149 case AArch64::SUBXri:
3150 // sub Xa, Xn, #N
3151 // ldr Xd, [Xa, #M]
3152 // ->
3153 // ldr Xd, [Xn, #N'+M]
3154 Disp = AddrI.getOperand(2).getImm() << AddrI.getOperand(3).getImm();
3155 return canFoldAddSubImmIntoAddrMode(-Disp);
3156
3157 case AArch64::ADDXrs: {
3158 // add Xa, Xn, Xm, lsl #N
3159 // ldr Xd, [Xa]
3160 // ->
3161 // ldr Xd, [Xn, Xm, lsl #N]
3162
3163 // Don't fold the add if the result would be slower, unless optimising for
3164 // size.
3165 unsigned Shift = static_cast<unsigned>(AddrI.getOperand(3).getImm());
3167 return false;
3168 Shift = AArch64_AM::getShiftValue(Shift);
3169 if (!OptSize) {
3170 if (Shift != 2 && Shift != 3 && Subtarget.hasAddrLSLSlow14())
3171 return false;
3172 if (avoidSlowSTRQ(MemI))
3173 return false;
3174 }
3175 return canFoldAddRegIntoAddrMode(1ULL << Shift);
3176 }
3177
3178 case AArch64::ADDXrr:
3179 // add Xa, Xn, Xm
3180 // ldr Xd, [Xa]
3181 // ->
3182 // ldr Xd, [Xn, Xm, lsl #0]
3183
3184 // Don't fold the add if the result would be slower, unless optimising for
3185 // size.
3186 if (!OptSize && avoidSlowSTRQ(MemI))
3187 return false;
3188 return canFoldAddRegIntoAddrMode(1);
3189
3190 case AArch64::ADDXrx:
3191 // add Xa, Xn, Wm, {s,u}xtw #N
3192 // ldr Xd, [Xa]
3193 // ->
3194 // ldr Xd, [Xn, Wm, {s,u}xtw #N]
3195
3196 // Don't fold the add if the result would be slower, unless optimising for
3197 // size.
3198 if (!OptSize && avoidSlowSTRQ(MemI))
3199 return false;
3200
3201 // Can fold only sign-/zero-extend of a word.
3202 unsigned Imm = static_cast<unsigned>(AddrI.getOperand(3).getImm());
3204 if (Extend != AArch64_AM::UXTW && Extend != AArch64_AM::SXTW)
3205 return false;
3206
3207 return canFoldAddRegIntoAddrMode(
3208 1ULL << AArch64_AM::getArithShiftValue(Imm),
3211 }
3212}
3213
3214// Given an opcode for an instruction with a [Reg, #Imm] addressing mode,
3215// return the opcode of an instruction performing the same operation, but using
3216// the [Reg, Reg] addressing mode.
3217static unsigned regOffsetOpcode(unsigned Opcode) {
3218 switch (Opcode) {
3219 default:
3220 llvm_unreachable("Address folding not implemented for instruction");
3221
3222 case AArch64::LDURQi:
3223 case AArch64::LDRQui:
3224 return AArch64::LDRQroX;
3225 case AArch64::STURQi:
3226 case AArch64::STRQui:
3227 return AArch64::STRQroX;
3228 case AArch64::LDURDi:
3229 case AArch64::LDRDui:
3230 return AArch64::LDRDroX;
3231 case AArch64::STURDi:
3232 case AArch64::STRDui:
3233 return AArch64::STRDroX;
3234 case AArch64::LDURXi:
3235 case AArch64::LDRXui:
3236 return AArch64::LDRXroX;
3237 case AArch64::STURXi:
3238 case AArch64::STRXui:
3239 return AArch64::STRXroX;
3240 case AArch64::LDURWi:
3241 case AArch64::LDRWui:
3242 return AArch64::LDRWroX;
3243 case AArch64::LDURSWi:
3244 case AArch64::LDRSWui:
3245 return AArch64::LDRSWroX;
3246 case AArch64::STURWi:
3247 case AArch64::STRWui:
3248 return AArch64::STRWroX;
3249 case AArch64::LDURHi:
3250 case AArch64::LDRHui:
3251 return AArch64::LDRHroX;
3252 case AArch64::STURHi:
3253 case AArch64::STRHui:
3254 return AArch64::STRHroX;
3255 case AArch64::LDURHHi:
3256 case AArch64::LDRHHui:
3257 return AArch64::LDRHHroX;
3258 case AArch64::STURHHi:
3259 case AArch64::STRHHui:
3260 return AArch64::STRHHroX;
3261 case AArch64::LDURSHXi:
3262 case AArch64::LDRSHXui:
3263 return AArch64::LDRSHXroX;
3264 case AArch64::LDURSHWi:
3265 case AArch64::LDRSHWui:
3266 return AArch64::LDRSHWroX;
3267 case AArch64::LDURBi:
3268 case AArch64::LDRBui:
3269 return AArch64::LDRBroX;
3270 case AArch64::LDURBBi:
3271 case AArch64::LDRBBui:
3272 return AArch64::LDRBBroX;
3273 case AArch64::LDURSBXi:
3274 case AArch64::LDRSBXui:
3275 return AArch64::LDRSBXroX;
3276 case AArch64::LDURSBWi:
3277 case AArch64::LDRSBWui:
3278 return AArch64::LDRSBWroX;
3279 case AArch64::STURBi:
3280 case AArch64::STRBui:
3281 return AArch64::STRBroX;
3282 case AArch64::STURBBi:
3283 case AArch64::STRBBui:
3284 return AArch64::STRBBroX;
3285 }
3286}
3287
3288// Given an opcode for an instruction with a [Reg, #Imm] addressing mode, return
3289// the opcode of an instruction performing the same operation, but using the
3290// [Reg, #Imm] addressing mode with scaled offset.
3291unsigned scaledOffsetOpcode(unsigned Opcode, unsigned &Scale) {
3292 switch (Opcode) {
3293 default:
3294 llvm_unreachable("Address folding not implemented for instruction");
3295
3296 case AArch64::LDURQi:
3297 Scale = 16;
3298 return AArch64::LDRQui;
3299 case AArch64::STURQi:
3300 Scale = 16;
3301 return AArch64::STRQui;
3302 case AArch64::LDURDi:
3303 Scale = 8;
3304 return AArch64::LDRDui;
3305 case AArch64::STURDi:
3306 Scale = 8;
3307 return AArch64::STRDui;
3308 case AArch64::LDURXi:
3309 Scale = 8;
3310 return AArch64::LDRXui;
3311 case AArch64::STURXi:
3312 Scale = 8;
3313 return AArch64::STRXui;
3314 case AArch64::LDURWi:
3315 Scale = 4;
3316 return AArch64::LDRWui;
3317 case AArch64::LDURSWi:
3318 Scale = 4;
3319 return AArch64::LDRSWui;
3320 case AArch64::STURWi:
3321 Scale = 4;
3322 return AArch64::STRWui;
3323 case AArch64::LDURHi:
3324 Scale = 2;
3325 return AArch64::LDRHui;
3326 case AArch64::STURHi:
3327 Scale = 2;
3328 return AArch64::STRHui;
3329 case AArch64::LDURHHi:
3330 Scale = 2;
3331 return AArch64::LDRHHui;
3332 case AArch64::STURHHi:
3333 Scale = 2;
3334 return AArch64::STRHHui;
3335 case AArch64::LDURSHXi:
3336 Scale = 2;
3337 return AArch64::LDRSHXui;
3338 case AArch64::LDURSHWi:
3339 Scale = 2;
3340 return AArch64::LDRSHWui;
3341 case AArch64::LDURBi:
3342 Scale = 1;
3343 return AArch64::LDRBui;
3344 case AArch64::LDURBBi:
3345 Scale = 1;
3346 return AArch64::LDRBBui;
3347 case AArch64::LDURSBXi:
3348 Scale = 1;
3349 return AArch64::LDRSBXui;
3350 case AArch64::LDURSBWi:
3351 Scale = 1;
3352 return AArch64::LDRSBWui;
3353 case AArch64::STURBi:
3354 Scale = 1;
3355 return AArch64::STRBui;
3356 case AArch64::STURBBi:
3357 Scale = 1;
3358 return AArch64::STRBBui;
3359 case AArch64::LDRQui:
3360 case AArch64::STRQui:
3361 Scale = 16;
3362 return Opcode;
3363 case AArch64::LDRDui:
3364 case AArch64::STRDui:
3365 case AArch64::LDRXui:
3366 case AArch64::STRXui:
3367 Scale = 8;
3368 return Opcode;
3369 case AArch64::LDRWui:
3370 case AArch64::LDRSWui:
3371 case AArch64::STRWui:
3372 Scale = 4;
3373 return Opcode;
3374 case AArch64::LDRHui:
3375 case AArch64::STRHui:
3376 case AArch64::LDRHHui:
3377 case AArch64::STRHHui:
3378 case AArch64::LDRSHXui:
3379 case AArch64::LDRSHWui:
3380 Scale = 2;
3381 return Opcode;
3382 case AArch64::LDRBui:
3383 case AArch64::LDRBBui:
3384 case AArch64::LDRSBXui:
3385 case AArch64::LDRSBWui:
3386 case AArch64::STRBui:
3387 case AArch64::STRBBui:
3388 Scale = 1;
3389 return Opcode;
3390 }
3391}
3392
3393// Given an opcode for an instruction with a [Reg, #Imm] addressing mode, return
3394// the opcode of an instruction performing the same operation, but using the
3395// [Reg, #Imm] addressing mode with unscaled offset.
3396unsigned unscaledOffsetOpcode(unsigned Opcode) {
3397 switch (Opcode) {
3398 default:
3399 llvm_unreachable("Address folding not implemented for instruction");
3400
3401 case AArch64::LDURQi:
3402 case AArch64::STURQi:
3403 case AArch64::LDURDi:
3404 case AArch64::STURDi:
3405 case AArch64::LDURXi:
3406 case AArch64::STURXi:
3407 case AArch64::LDURWi:
3408 case AArch64::LDURSWi:
3409 case AArch64::STURWi:
3410 case AArch64::LDURHi:
3411 case AArch64::STURHi:
3412 case AArch64::LDURHHi:
3413 case AArch64::STURHHi:
3414 case AArch64::LDURSHXi:
3415 case AArch64::LDURSHWi:
3416 case AArch64::LDURBi:
3417 case AArch64::STURBi:
3418 case AArch64::LDURBBi:
3419 case AArch64::STURBBi:
3420 case AArch64::LDURSBWi:
3421 case AArch64::LDURSBXi:
3422 return Opcode;
3423 case AArch64::LDRQui:
3424 return AArch64::LDURQi;
3425 case AArch64::STRQui:
3426 return AArch64::STURQi;
3427 case AArch64::LDRDui:
3428 return AArch64::LDURDi;
3429 case AArch64::STRDui:
3430 return AArch64::STURDi;
3431 case AArch64::LDRXui:
3432 return AArch64::LDURXi;
3433 case AArch64::STRXui:
3434 return AArch64::STURXi;
3435 case AArch64::LDRWui:
3436 return AArch64::LDURWi;
3437 case AArch64::LDRSWui:
3438 return AArch64::LDURSWi;
3439 case AArch64::STRWui:
3440 return AArch64::STURWi;
3441 case AArch64::LDRHui:
3442 return AArch64::LDURHi;
3443 case AArch64::STRHui:
3444 return AArch64::STURHi;
3445 case AArch64::LDRHHui:
3446 return AArch64::LDURHHi;
3447 case AArch64::STRHHui:
3448 return AArch64::STURHHi;
3449 case AArch64::LDRSHXui:
3450 return AArch64::LDURSHXi;
3451 case AArch64::LDRSHWui:
3452 return AArch64::LDURSHWi;
3453 case AArch64::LDRBBui:
3454 return AArch64::LDURBBi;
3455 case AArch64::LDRBui:
3456 return AArch64::LDURBi;
3457 case AArch64::STRBBui:
3458 return AArch64::STURBBi;
3459 case AArch64::STRBui:
3460 return AArch64::STURBi;
3461 case AArch64::LDRSBWui:
3462 return AArch64::LDURSBWi;
3463 case AArch64::LDRSBXui:
3464 return AArch64::LDURSBXi;
3465 }
3466}
3467
3468// Given the opcode of a memory load/store instruction, return the opcode of an
3469// instruction performing the same operation, but using
3470// the [Reg, Reg, {s,u}xtw #N] addressing mode with sign-/zero-extend of the
3471// offset register.
3472static unsigned offsetExtendOpcode(unsigned Opcode) {
3473 switch (Opcode) {
3474 default:
3475 llvm_unreachable("Address folding not implemented for instruction");
3476
3477 case AArch64::LDRQroX:
3478 case AArch64::LDURQi:
3479 case AArch64::LDRQui:
3480 return AArch64::LDRQroW;
3481 case AArch64::STRQroX:
3482 case AArch64::STURQi:
3483 case AArch64::STRQui:
3484 return AArch64::STRQroW;
3485 case AArch64::LDRDroX:
3486 case AArch64::LDURDi:
3487 case AArch64::LDRDui:
3488 return AArch64::LDRDroW;
3489 case AArch64::STRDroX:
3490 case AArch64::STURDi:
3491 case AArch64::STRDui:
3492 return AArch64::STRDroW;
3493 case AArch64::LDRXroX:
3494 case AArch64::LDURXi:
3495 case AArch64::LDRXui:
3496 return AArch64::LDRXroW;
3497 case AArch64::STRXroX:
3498 case AArch64::STURXi:
3499 case AArch64::STRXui:
3500 return AArch64::STRXroW;
3501 case AArch64::LDRWroX:
3502 case AArch64::LDURWi:
3503 case AArch64::LDRWui:
3504 return AArch64::LDRWroW;
3505 case AArch64::LDRSWroX:
3506 case AArch64::LDURSWi:
3507 case AArch64::LDRSWui:
3508 return AArch64::LDRSWroW;
3509 case AArch64::STRWroX:
3510 case AArch64::STURWi:
3511 case AArch64::STRWui:
3512 return AArch64::STRWroW;
3513 case AArch64::LDRHroX:
3514 case AArch64::LDURHi:
3515 case AArch64::LDRHui:
3516 return AArch64::LDRHroW;
3517 case AArch64::STRHroX:
3518 case AArch64::STURHi:
3519 case AArch64::STRHui:
3520 return AArch64::STRHroW;
3521 case AArch64::LDRHHroX:
3522 case AArch64::LDURHHi:
3523 case AArch64::LDRHHui:
3524 return AArch64::LDRHHroW;
3525 case AArch64::STRHHroX:
3526 case AArch64::STURHHi:
3527 case AArch64::STRHHui:
3528 return AArch64::STRHHroW;
3529 case AArch64::LDRSHXroX:
3530 case AArch64::LDURSHXi:
3531 case AArch64::LDRSHXui:
3532 return AArch64::LDRSHXroW;
3533 case AArch64::LDRSHWroX:
3534 case AArch64::LDURSHWi:
3535 case AArch64::LDRSHWui:
3536 return AArch64::LDRSHWroW;
3537 case AArch64::LDRBroX:
3538 case AArch64::LDURBi:
3539 case AArch64::LDRBui:
3540 return AArch64::LDRBroW;
3541 case AArch64::LDRBBroX:
3542 case AArch64::LDURBBi:
3543 case AArch64::LDRBBui:
3544 return AArch64::LDRBBroW;
3545 case AArch64::LDRSBXroX:
3546 case AArch64::LDURSBXi:
3547 case AArch64::LDRSBXui:
3548 return AArch64::LDRSBXroW;
3549 case AArch64::LDRSBWroX:
3550 case AArch64::LDURSBWi:
3551 case AArch64::LDRSBWui:
3552 return AArch64::LDRSBWroW;
3553 case AArch64::STRBroX:
3554 case AArch64::STURBi:
3555 case AArch64::STRBui:
3556 return AArch64::STRBroW;
3557 case AArch64::STRBBroX:
3558 case AArch64::STURBBi:
3559 case AArch64::STRBBui:
3560 return AArch64::STRBBroW;
3561 }
3562}
3563
3565 const ExtAddrMode &AM) const {
3566
3567 const DebugLoc &DL = MemI.getDebugLoc();
3568 MachineBasicBlock &MBB = *MemI.getParent();
3570
3572 if (AM.ScaledReg) {
3573 // The new instruction will be in the form `ldr Rt, [Xn, Xm, lsl #imm]`.
3574 unsigned Opcode = regOffsetOpcode(MemI.getOpcode());
3575 MRI.constrainRegClass(AM.BaseReg, &AArch64::GPR64spRegClass);
3576 auto B = BuildMI(MBB, MemI, DL, get(Opcode))
3577 .addReg(MemI.getOperand(0).getReg(),
3578 MemI.mayLoad() ? RegState::Define : 0)
3579 .addReg(AM.BaseReg)
3580 .addReg(AM.ScaledReg)
3581 .addImm(0)
3582 .addImm(AM.Scale > 1)
3583 .setMemRefs(MemI.memoperands())
3584 .setMIFlags(MemI.getFlags());
3585 return B.getInstr();
3586 }
3587
3588 assert(AM.ScaledReg == 0 && AM.Scale == 0 &&
3589 "Addressing mode not supported for folding");
3590
3591 // The new instruction will be in the form `ld[u]r Rt, [Xn, #imm]`.
3592 unsigned Scale = 1;
3593 unsigned Opcode = MemI.getOpcode();
3594 if (isInt<9>(AM.Displacement))
3595 Opcode = unscaledOffsetOpcode(Opcode);
3596 else
3597 Opcode = scaledOffsetOpcode(Opcode, Scale);
3598
3599 auto B = BuildMI(MBB, MemI, DL, get(Opcode))
3600 .addReg(MemI.getOperand(0).getReg(),
3601 MemI.mayLoad() ? RegState::Define : 0)
3602 .addReg(AM.BaseReg)
3603 .addImm(AM.Displacement / Scale)
3604 .setMemRefs(MemI.memoperands())
3605 .setMIFlags(MemI.getFlags());
3606 return B.getInstr();
3607 }
3608
3611 // The new instruction will be in the form `ldr Rt, [Xn, Wm, {s,u}xtw #N]`.
3612 assert(AM.ScaledReg && !AM.Displacement &&
3613 "Address offset can be a register or an immediate, but not both");
3614 unsigned Opcode = offsetExtendOpcode(MemI.getOpcode());
3615 MRI.constrainRegClass(AM.BaseReg, &AArch64::GPR64spRegClass);
3616 // Make sure the offset register is in the correct register class.
3617 Register OffsetReg = AM.ScaledReg;
3618 const TargetRegisterClass *RC = MRI.getRegClass(OffsetReg);
3619 if (RC->hasSuperClassEq(&AArch64::GPR64RegClass)) {
3620 OffsetReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3621 BuildMI(MBB, MemI, DL, get(TargetOpcode::COPY), OffsetReg)
3622 .addReg(AM.ScaledReg, 0, AArch64::sub_32);
3623 }
3624 auto B = BuildMI(MBB, MemI, DL, get(Opcode))
3625 .addReg(MemI.getOperand(0).getReg(),
3626 MemI.mayLoad() ? RegState::Define : 0)
3627 .addReg(AM.BaseReg)
3628 .addReg(OffsetReg)
3630 .addImm(AM.Scale != 1)
3631 .setMemRefs(MemI.memoperands())
3632 .setMIFlags(MemI.getFlags());
3633
3634 return B.getInstr();
3635 }
3636
3638 "Function must not be called with an addressing mode it can't handle");
3639}
3640
3641/// Return true if the opcode is a post-index ld/st instruction, which really
3642/// loads from base+0.
3643static bool isPostIndexLdStOpcode(unsigned Opcode) {
3644 switch (Opcode) {
3645 default:
3646 return false;
3647 case AArch64::LD1Fourv16b_POST:
3648 case AArch64::LD1Fourv1d_POST:
3649 case AArch64::LD1Fourv2d_POST:
3650 case AArch64::LD1Fourv2s_POST:
3651 case AArch64::LD1Fourv4h_POST:
3652 case AArch64::LD1Fourv4s_POST:
3653 case AArch64::LD1Fourv8b_POST:
3654 case AArch64::LD1Fourv8h_POST:
3655 case AArch64::LD1Onev16b_POST:
3656 case AArch64::LD1Onev1d_POST:
3657 case AArch64::LD1Onev2d_POST:
3658 case AArch64::LD1Onev2s_POST:
3659 case AArch64::LD1Onev4h_POST:
3660 case AArch64::LD1Onev4s_POST:
3661 case AArch64::LD1Onev8b_POST:
3662 case AArch64::LD1Onev8h_POST:
3663 case AArch64::LD1Rv16b_POST:
3664 case AArch64::LD1Rv1d_POST:
3665 case AArch64::LD1Rv2d_POST:
3666 case AArch64::LD1Rv2s_POST:
3667 case AArch64::LD1Rv4h_POST:
3668 case AArch64::LD1Rv4s_POST:
3669 case AArch64::LD1Rv8b_POST:
3670 case AArch64::LD1Rv8h_POST:
3671 case AArch64::LD1Threev16b_POST:
3672 case AArch64::LD1Threev1d_POST:
3673 case AArch64::LD1Threev2d_POST:
3674 case AArch64::LD1Threev2s_POST:
3675 case AArch64::LD1Threev4h_POST:
3676 case AArch64::LD1Threev4s_POST:
3677 case AArch64::LD1Threev8b_POST:
3678 case AArch64::LD1Threev8h_POST:
3679 case AArch64::LD1Twov16b_POST:
3680 case AArch64::LD1Twov1d_POST:
3681 case AArch64::LD1Twov2d_POST:
3682 case AArch64::LD1Twov2s_POST:
3683 case AArch64::LD1Twov4h_POST:
3684 case AArch64::LD1Twov4s_POST:
3685 case AArch64::LD1Twov8b_POST:
3686 case AArch64::LD1Twov8h_POST:
3687 case AArch64::LD1i16_POST:
3688 case AArch64::LD1i32_POST:
3689 case AArch64::LD1i64_POST:
3690 case AArch64::LD1i8_POST:
3691 case AArch64::LD2Rv16b_POST:
3692 case AArch64::LD2Rv1d_POST:
3693 case AArch64::LD2Rv2d_POST:
3694 case AArch64::LD2Rv2s_POST:
3695 case AArch64::LD2Rv4h_POST:
3696 case AArch64::LD2Rv4s_POST:
3697 case AArch64::LD2Rv8b_POST:
3698 case AArch64::LD2Rv8h_POST:
3699 case AArch64::LD2Twov16b_POST:
3700 case AArch64::LD2Twov2d_POST:
3701 case AArch64::LD2Twov2s_POST:
3702 case AArch64::LD2Twov4h_POST:
3703 case AArch64::LD2Twov4s_POST:
3704 case AArch64::LD2Twov8b_POST:
3705 case AArch64::LD2Twov8h_POST:
3706 case AArch64::LD2i16_POST:
3707 case AArch64::LD2i32_POST:
3708 case AArch64::LD2i64_POST:
3709 case AArch64::LD2i8_POST:
3710 case AArch64::LD3Rv16b_POST:
3711 case AArch64::LD3Rv1d_POST:
3712 case AArch64::LD3Rv2d_POST:
3713 case AArch64::LD3Rv2s_POST:
3714 case AArch64::LD3Rv4h_POST:
3715 case AArch64::LD3Rv4s_POST:
3716 case AArch64::LD3Rv8b_POST:
3717 case AArch64::LD3Rv8h_POST:
3718 case AArch64::LD3Threev16b_POST:
3719 case AArch64::LD3Threev2d_POST:
3720 case AArch64::LD3Threev2s_POST:
3721 case AArch64::LD3Threev4h_POST:
3722 case AArch64::LD3Threev4s_POST:
3723 case AArch64::LD3Threev8b_POST:
3724 case AArch64::LD3Threev8h_POST:
3725 case AArch64::LD3i16_POST:
3726 case AArch64::LD3i32_POST:
3727 case AArch64::LD3i64_POST:
3728 case AArch64::LD3i8_POST:
3729 case AArch64::LD4Fourv16b_POST:
3730 case AArch64::LD4Fourv2d_POST:
3731 case AArch64::LD4Fourv2s_POST:
3732 case AArch64::LD4Fourv4h_POST:
3733 case AArch64::LD4Fourv4s_POST:
3734 case AArch64::LD4Fourv8b_POST:
3735 case AArch64::LD4Fourv8h_POST:
3736 case AArch64::LD4Rv16b_POST:
3737 case AArch64::LD4Rv1d_POST:
3738 case AArch64::LD4Rv2d_POST:
3739 case AArch64::LD4Rv2s_POST:
3740 case AArch64::LD4Rv4h_POST:
3741 case AArch64::LD4Rv4s_POST:
3742 case AArch64::LD4Rv8b_POST:
3743 case AArch64::LD4Rv8h_POST:
3744 case AArch64::LD4i16_POST:
3745 case AArch64::LD4i32_POST:
3746 case AArch64::LD4i64_POST:
3747 case AArch64::LD4i8_POST:
3748 case AArch64::LDAPRWpost:
3749 case AArch64::LDAPRXpost:
3750 case AArch64::LDIAPPWpost:
3751 case AArch64::LDIAPPXpost:
3752 case AArch64::LDPDpost:
3753 case AArch64::LDPQpost:
3754 case AArch64::LDPSWpost:
3755 case AArch64::LDPSpost:
3756 case AArch64::LDPWpost:
3757 case AArch64::LDPXpost:
3758 case AArch64::LDRBBpost:
3759 case AArch64::LDRBpost:
3760 case AArch64::LDRDpost:
3761 case AArch64::LDRHHpost:
3762 case AArch64::LDRHpost:
3763 case AArch64::LDRQpost:
3764 case AArch64::LDRSBWpost:
3765 case AArch64::LDRSBXpost:
3766 case AArch64::LDRSHWpost:
3767 case AArch64::LDRSHXpost:
3768 case AArch64::LDRSWpost:
3769 case AArch64::LDRSpost:
3770 case AArch64::LDRWpost:
3771 case AArch64::LDRXpost:
3772 case AArch64::ST1Fourv16b_POST:
3773 case AArch64::ST1Fourv1d_POST:
3774 case AArch64::ST1Fourv2d_POST:
3775 case AArch64::ST1Fourv2s_POST:
3776 case AArch64::ST1Fourv4h_POST:
3777 case AArch64::ST1Fourv4s_POST:
3778 case AArch64::ST1Fourv8b_POST:
3779 case AArch64::ST1Fourv8h_POST:
3780 case AArch64::ST1Onev16b_POST:
3781 case AArch64::ST1Onev1d_POST:
3782 case AArch64::ST1Onev2d_POST:
3783 case AArch64::ST1Onev2s_POST:
3784 case AArch64::ST1Onev4h_POST:
3785 case AArch64::ST1Onev4s_POST:
3786 case AArch64::ST1Onev8b_POST:
3787 case AArch64::ST1Onev8h_POST:
3788 case AArch64::ST1Threev16b_POST:
3789 case AArch64::ST1Threev1d_POST:
3790 case AArch64::ST1Threev2d_POST:
3791 case AArch64::ST1Threev2s_POST:
3792 case AArch64::ST1Threev4h_POST:
3793 case AArch64::ST1Threev4s_POST:
3794 case AArch64::ST1Threev8b_POST:
3795 case AArch64::ST1Threev8h_POST:
3796 case AArch64::ST1Twov16b_POST:
3797 case AArch64::ST1Twov1d_POST:
3798 case AArch64::ST1Twov2d_POST:
3799 case AArch64::ST1Twov2s_POST:
3800 case AArch64::ST1Twov4h_POST:
3801 case AArch64::ST1Twov4s_POST:
3802 case AArch64::ST1Twov8b_POST:
3803 case AArch64::ST1Twov8h_POST:
3804 case AArch64::ST1i16_POST:
3805 case AArch64::ST1i32_POST:
3806 case AArch64::ST1i64_POST:
3807 case AArch64::ST1i8_POST:
3808 case AArch64::ST2GPostIndex:
3809 case AArch64::ST2Twov16b_POST:
3810 case AArch64::ST2Twov2d_POST:
3811 case AArch64::ST2Twov2s_POST:
3812 case AArch64::ST2Twov4h_POST:
3813 case AArch64::ST2Twov4s_POST:
3814 case AArch64::ST2Twov8b_POST:
3815 case AArch64::ST2Twov8h_POST:
3816 case AArch64::ST2i16_POST:
3817 case AArch64::ST2i32_POST:
3818 case AArch64::ST2i64_POST:
3819 case AArch64::ST2i8_POST:
3820 case AArch64::ST3Threev16b_POST:
3821 case AArch64::ST3Threev2d_POST:
3822 case AArch64::ST3Threev2s_POST:
3823 case AArch64::ST3Threev4h_POST:
3824 case AArch64::ST3Threev4s_POST:
3825 case AArch64::ST3Threev8b_POST:
3826 case AArch64::ST3Threev8h_POST:
3827 case AArch64::ST3i16_POST:
3828 case AArch64::ST3i32_POST:
3829 case AArch64::ST3i64_POST:
3830 case AArch64::ST3i8_POST:
3831 case AArch64::ST4Fourv16b_POST:
3832 case AArch64::ST4Fourv2d_POST:
3833 case AArch64::ST4Fourv2s_POST:
3834 case AArch64::ST4Fourv4h_POST:
3835 case AArch64::ST4Fourv4s_POST:
3836 case AArch64::ST4Fourv8b_POST:
3837 case AArch64::ST4Fourv8h_POST:
3838 case AArch64::ST4i16_POST:
3839 case AArch64::ST4i32_POST:
3840 case AArch64::ST4i64_POST:
3841 case AArch64::ST4i8_POST:
3842 case AArch64::STGPostIndex:
3843 case AArch64::STGPpost:
3844 case AArch64::STPDpost:
3845 case AArch64::STPQpost:
3846 case AArch64::STPSpost:
3847 case AArch64::STPWpost:
3848 case AArch64::STPXpost:
3849 case AArch64::STRBBpost:
3850 case AArch64::STRBpost:
3851 case AArch64::STRDpost:
3852 case AArch64::STRHHpost:
3853 case AArch64::STRHpost:
3854 case AArch64::STRQpost:
3855 case AArch64::STRSpost:
3856 case AArch64::STRWpost:
3857 case AArch64::STRXpost:
3858 case AArch64::STZ2GPostIndex:
3859 case AArch64::STZGPostIndex:
3860 return true;
3861 }
3862}
3863
3865 const MachineInstr &LdSt, const MachineOperand *&BaseOp, int64_t &Offset,
3866 bool &OffsetIsScalable, TypeSize &Width,
3867 const TargetRegisterInfo *TRI) const {
3868 assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
3869 // Handle only loads/stores with base register followed by immediate offset.
3870 if (LdSt.getNumExplicitOperands() == 3) {
3871 // Non-paired instruction (e.g., ldr x1, [x0, #8]).
3872 if ((!LdSt.getOperand(1).isReg() && !LdSt.getOperand(1).isFI()) ||
3873 !LdSt.getOperand(2).isImm())
3874 return false;
3875 } else if (LdSt.getNumExplicitOperands() == 4) {
3876 // Paired instruction (e.g., ldp x1, x2, [x0, #8]).
3877 if (!LdSt.getOperand(1).isReg() ||
3878 (!LdSt.getOperand(2).isReg() && !LdSt.getOperand(2).isFI()) ||
3879 !LdSt.getOperand(3).isImm())
3880 return false;
3881 } else
3882 return false;
3883
3884 // Get the scaling factor for the instruction and set the width for the
3885 // instruction.
3886 TypeSize Scale(0U, false);
3887 int64_t Dummy1, Dummy2;
3888
3889 // If this returns false, then it's an instruction we don't want to handle.
3890 if (!getMemOpInfo(LdSt.getOpcode(), Scale, Width, Dummy1, Dummy2))
3891 return false;
3892
3893 // Compute the offset. Offset is calculated as the immediate operand
3894 // multiplied by the scaling factor. Unscaled instructions have scaling factor
3895 // set to 1. Postindex are a special case which have an offset of 0.
3896 if (isPostIndexLdStOpcode(LdSt.getOpcode())) {
3897 BaseOp = &LdSt.getOperand(2);
3898 Offset = 0;
3899 } else if (LdSt.getNumExplicitOperands() == 3) {
3900 BaseOp = &LdSt.getOperand(1);
3901 Offset = LdSt.getOperand(2).getImm() * Scale.getKnownMinValue();
3902 } else {
3903 assert(LdSt.getNumExplicitOperands() == 4 && "invalid number of operands");
3904 BaseOp = &LdSt.getOperand(2);
3905 Offset = LdSt.getOperand(3).getImm() * Scale.getKnownMinValue();
3906 }
3907 OffsetIsScalable = Scale.isScalable();
3908
3909 return BaseOp->isReg() || BaseOp->isFI();
3910}
3911
3914 assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
3915 MachineOperand &OfsOp = LdSt.getOperand(LdSt.getNumExplicitOperands() - 1);
3916 assert(OfsOp.isImm() && "Offset operand wasn't immediate.");
3917 return OfsOp;
3918}
3919
3920bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
3921 TypeSize &Width, int64_t &MinOffset,
3922 int64_t &MaxOffset) {
3923 switch (Opcode) {
3924 // Not a memory operation or something we want to handle.
3925 default:
3926 Scale = TypeSize::getFixed(0);
3927 Width = TypeSize::getFixed(0);
3928 MinOffset = MaxOffset = 0;
3929 return false;
3930 // LDR / STR
3931 case AArch64::LDRQui:
3932 case AArch64::STRQui:
3933 Scale = TypeSize::getFixed(16);
3934 Width = TypeSize::getFixed(16);
3935 MinOffset = 0;
3936 MaxOffset = 4095;
3937 break;
3938 case AArch64::LDRXui:
3939 case AArch64::LDRDui:
3940 case AArch64::STRXui:
3941 case AArch64::STRDui:
3942 case AArch64::PRFMui:
3943 Scale = TypeSize::getFixed(8);
3944 Width = TypeSize::getFixed(8);
3945 MinOffset = 0;
3946 MaxOffset = 4095;
3947 break;
3948 case AArch64::LDRWui:
3949 case AArch64::LDRSui:
3950 case AArch64::LDRSWui:
3951 case AArch64::STRWui:
3952 case AArch64::STRSui:
3953 Scale = TypeSize::getFixed(4);
3954 Width = TypeSize::getFixed(4);
3955 MinOffset = 0;
3956 MaxOffset = 4095;
3957 break;
3958 case AArch64::LDRHui:
3959 case AArch64::LDRHHui:
3960 case AArch64::LDRSHWui:
3961 case AArch64::LDRSHXui:
3962 case AArch64::STRHui:
3963 case AArch64::STRHHui:
3964 Scale = TypeSize::getFixed(2);
3965 Width = TypeSize::getFixed(2);
3966 MinOffset = 0;
3967 MaxOffset = 4095;
3968 break;
3969 case AArch64::LDRBui:
3970 case AArch64::LDRBBui:
3971 case AArch64::LDRSBWui:
3972 case AArch64::LDRSBXui:
3973 case AArch64::STRBui:
3974 case AArch64::STRBBui:
3975 Scale = TypeSize::getFixed(1);
3976 Width = TypeSize::getFixed(1);
3977 MinOffset = 0;
3978 MaxOffset = 4095;
3979 break;
3980 // post/pre inc
3981 case AArch64::STRQpre:
3982 case AArch64::LDRQpost:
3983 Scale = TypeSize::getFixed(1);
3984 Width = TypeSize::getFixed(16);
3985 MinOffset = -256;
3986 MaxOffset = 255;
3987 break;
3988 case AArch64::LDRDpost:
3989 case AArch64::LDRDpre:
3990 case AArch64::LDRXpost:
3991 case AArch64::LDRXpre:
3992 case AArch64::STRDpost:
3993 case AArch64::STRDpre:
3994 case AArch64::STRXpost:
3995 case AArch64::STRXpre:
3996 Scale = TypeSize::getFixed(1);
3997 Width = TypeSize::getFixed(8);
3998 MinOffset = -256;
3999 MaxOffset = 255;
4000 break;
4001 case AArch64::STRWpost:
4002 case AArch64::STRWpre:
4003 case AArch64::LDRWpost:
4004 case AArch64::LDRWpre:
4005 case AArch64::STRSpost:
4006 case AArch64::STRSpre:
4007 case AArch64::LDRSpost:
4008 case AArch64::LDRSpre:
4009 Scale = TypeSize::getFixed(1);
4010 Width = TypeSize::getFixed(4);
4011 MinOffset = -256;
4012 MaxOffset = 255;
4013 break;
4014 case AArch64::LDRHpost:
4015 case AArch64::LDRHpre:
4016 case AArch64::STRHpost:
4017 case AArch64::STRHpre:
4018 case AArch64::LDRHHpost:
4019 case AArch64::LDRHHpre:
4020 case AArch64::STRHHpost:
4021 case AArch64::STRHHpre:
4022 Scale = TypeSize::getFixed(1);
4023 Width = TypeSize::getFixed(2);
4024 MinOffset = -256;
4025 MaxOffset = 255;
4026 break;
4027 case AArch64::LDRBpost:
4028 case AArch64::LDRBpre:
4029 case AArch64::STRBpost:
4030 case AArch64::STRBpre:
4031 case AArch64::LDRBBpost:
4032 case AArch64::LDRBBpre:
4033 case AArch64::STRBBpost:
4034 case AArch64::STRBBpre:
4035 Scale = TypeSize::getFixed(1);
4036 Width = TypeSize::getFixed(1);
4037 MinOffset = -256;
4038 MaxOffset = 255;
4039 break;
4040 // Unscaled
4041 case AArch64::LDURQi:
4042 case AArch64::STURQi:
4043 Scale = TypeSize::getFixed(1);
4044 Width = TypeSize::getFixed(16);
4045 MinOffset = -256;
4046 MaxOffset = 255;
4047 break;
4048 case AArch64::LDURXi:
4049 case AArch64::LDURDi:
4050 case AArch64::LDAPURXi:
4051 case AArch64::STURXi:
4052 case AArch64::STURDi:
4053 case AArch64::STLURXi:
4054 case AArch64::PRFUMi:
4055 Scale = TypeSize::getFixed(1);
4056 Width = TypeSize::getFixed(8);
4057 MinOffset = -256;
4058 MaxOffset = 255;
4059 break;
4060 case AArch64::LDURWi:
4061 case AArch64::LDURSi:
4062 case AArch64::LDURSWi:
4063 case AArch64::LDAPURi:
4064 case AArch64::LDAPURSWi:
4065 case AArch64::STURWi:
4066 case AArch64::STURSi:
4067 case AArch64::STLURWi:
4068 Scale = TypeSize::getFixed(1);
4069 Width = TypeSize::getFixed(4);
4070 MinOffset = -256;
4071 MaxOffset = 255;
4072 break;
4073 case AArch64::LDURHi:
4074 case AArch64::LDURHHi:
4075 case AArch64::LDURSHXi:
4076 case AArch64::LDURSHWi:
4077 case AArch64::LDAPURHi:
4078 case AArch64::LDAPURSHWi:
4079 case AArch64::LDAPURSHXi:
4080 case AArch64::STURHi:
4081 case AArch64::STURHHi:
4082 case AArch64::STLURHi:
4083 Scale = TypeSize::getFixed(1);
4084 Width = TypeSize::getFixed(2);
4085 MinOffset = -256;
4086 MaxOffset = 255;
4087 break;
4088 case AArch64::LDURBi:
4089 case AArch64::LDURBBi:
4090 case AArch64::LDURSBXi:
4091 case AArch64::LDURSBWi:
4092 case AArch64::LDAPURBi:
4093 case AArch64::LDAPURSBWi:
4094 case AArch64::LDAPURSBXi:
4095 case AArch64::STURBi:
4096 case AArch64::STURBBi:
4097 case AArch64::STLURBi:
4098 Scale = TypeSize::getFixed(1);
4099 Width = TypeSize::getFixed(1);
4100 MinOffset = -256;
4101 MaxOffset = 255;
4102 break;
4103 // LDP / STP (including pre/post inc)
4104 case AArch64::LDPQi:
4105 case AArch64::LDNPQi:
4106 case AArch64::STPQi:
4107 case AArch64::STNPQi:
4108 case AArch64::LDPQpost:
4109 case AArch64::LDPQpre:
4110 case AArch64::STPQpost:
4111 case AArch64::STPQpre:
4112 Scale = TypeSize::getFixed(16);
4113 Width = TypeSize::getFixed(16 * 2);
4114 MinOffset = -64;
4115 MaxOffset = 63;
4116 break;
4117 case AArch64::LDPXi:
4118 case AArch64::LDPDi:
4119 case AArch64::LDNPXi:
4120 case AArch64::LDNPDi:
4121 case AArch64::STPXi:
4122 case AArch64::STPDi:
4123 case AArch64::STNPXi:
4124 case AArch64::STNPDi:
4125 case AArch64::LDPDpost:
4126 case AArch64::LDPDpre:
4127 case AArch64::LDPXpost:
4128 case AArch64::LDPXpre:
4129 case AArch64::STPDpost:
4130 case AArch64::STPDpre:
4131 case AArch64::STPXpost:
4132 case AArch64::STPXpre:
4133 Scale = TypeSize::getFixed(8);
4134 Width = TypeSize::getFixed(8 * 2);
4135 MinOffset = -64;
4136 MaxOffset = 63;
4137 break;
4138 case AArch64::LDPWi:
4139 case AArch64::LDPSi:
4140 case AArch64::LDNPWi:
4141 case AArch64::LDNPSi:
4142 case AArch64::STPWi:
4143 case AArch64::STPSi:
4144 case AArch64::STNPWi:
4145 case AArch64::STNPSi:
4146 case AArch64::LDPSpost:
4147 case AArch64::LDPSpre:
4148 case AArch64::LDPWpost:
4149 case AArch64::LDPWpre:
4150 case AArch64::STPSpost:
4151 case AArch64::STPSpre:
4152 case AArch64::STPWpost:
4153 case AArch64::STPWpre:
4154 Scale = TypeSize::getFixed(4);
4155 Width = TypeSize::getFixed(4 * 2);
4156 MinOffset = -64;
4157 MaxOffset = 63;
4158 break;
4159 case AArch64::StoreSwiftAsyncContext:
4160 // Store is an STRXui, but there might be an ADDXri in the expansion too.
4161 Scale = TypeSize::getFixed(1);
4162 Width = TypeSize::getFixed(8);
4163 MinOffset = 0;
4164 MaxOffset = 4095;
4165 break;
4166 case AArch64::ADDG:
4167 Scale = TypeSize::getFixed(16);
4168 Width = TypeSize::getFixed(0);
4169 MinOffset = 0;
4170 MaxOffset = 63;
4171 break;
4172 case AArch64::TAGPstack:
4173 Scale = TypeSize::getFixed(16);
4174 Width = TypeSize::getFixed(0);
4175 // TAGP with a negative offset turns into SUBP, which has a maximum offset
4176 // of 63 (not 64!).
4177 MinOffset = -63;
4178 MaxOffset = 63;
4179 break;
4180 case AArch64::LDG:
4181 case AArch64::STGi:
4182 case AArch64::STGPreIndex:
4183 case AArch64::STGPostIndex:
4184 case AArch64::STZGi:
4185 case AArch64::STZGPreIndex:
4186 case AArch64::STZGPostIndex:
4187 Scale = TypeSize::getFixed(16);
4188 Width = TypeSize::getFixed(16);
4189 MinOffset = -256;
4190 MaxOffset = 255;
4191 break;
4192 // SVE
4193 case AArch64::STR_ZZZZXI:
4194 case AArch64::LDR_ZZZZXI:
4195 Scale = TypeSize::getScalable(16);
4196 Width = TypeSize::getScalable(16 * 4);
4197 MinOffset = -256;
4198 MaxOffset = 252;
4199 break;
4200 case AArch64::STR_ZZZXI:
4201 case AArch64::LDR_ZZZXI:
4202 Scale = TypeSize::getScalable(16);
4203 Width = TypeSize::getScalable(16 * 3);
4204 MinOffset = -256;
4205 MaxOffset = 253;
4206 break;
4207 case AArch64::STR_ZZXI:
4208 case AArch64::LDR_ZZXI:
4209 Scale = TypeSize::getScalable(16);
4210 Width = TypeSize::getScalable(16 * 2);
4211 MinOffset = -256;
4212 MaxOffset = 254;
4213 break;
4214 case AArch64::LDR_PXI:
4215 case AArch64::STR_PXI:
4216 Scale = TypeSize::getScalable(2);
4217 Width = TypeSize::getScalable(2);
4218 MinOffset = -256;
4219 MaxOffset = 255;
4220 break;
4221 case AArch64::LDR_PPXI:
4222 case AArch64::STR_PPXI:
4223 Scale = TypeSize::getScalable(2);
4224 Width = TypeSize::getScalable(2 * 2);
4225 MinOffset = -256;
4226 MaxOffset = 254;
4227 break;
4228 case AArch64::SPILL_PPR_TO_ZPR_SLOT_PSEUDO:
4229 case AArch64::FILL_PPR_FROM_ZPR_SLOT_PSEUDO:
4230 case AArch64::LDR_ZXI:
4231 case AArch64::STR_ZXI:
4232 Scale = TypeSize::getScalable(16);
4233 Width = TypeSize::getScalable(16);
4234 MinOffset = -256;
4235 MaxOffset = 255;
4236 break;
4237 case AArch64::LD1B_IMM:
4238 case AArch64::LD1H_IMM:
4239 case AArch64::LD1W_IMM:
4240 case AArch64::LD1D_IMM:
4241 case AArch64::LDNT1B_ZRI:
4242 case AArch64::LDNT1H_ZRI:
4243 case AArch64::LDNT1W_ZRI:
4244 case AArch64::LDNT1D_ZRI:
4245 case AArch64::ST1B_IMM:
4246 case AArch64::ST1H_IMM:
4247 case AArch64::ST1W_IMM:
4248 case AArch64::ST1D_IMM:
4249 case AArch64::STNT1B_ZRI:
4250 case AArch64::STNT1H_ZRI:
4251 case AArch64::STNT1W_ZRI:
4252 case AArch64::STNT1D_ZRI:
4253 case AArch64::LDNF1B_IMM:
4254 case AArch64::LDNF1H_IMM:
4255 case AArch64::LDNF1W_IMM:
4256 case AArch64::LDNF1D_IMM:
4257 // A full vectors worth of data
4258 // Width = mbytes * elements
4259 Scale = TypeSize::getScalable(16);
4260 Width = TypeSize::getScalable(16);
4261 MinOffset = -8;
4262 MaxOffset = 7;
4263 break;
4264 case AArch64::LD2B_IMM:
4265 case AArch64::LD2H_IMM:
4266 case AArch64::LD2W_IMM:
4267 case AArch64::LD2D_IMM:
4268 case AArch64::ST2B_IMM:
4269 case AArch64::ST2H_IMM:
4270 case AArch64::ST2W_IMM:
4271 case AArch64::ST2D_IMM:
4272 Scale = TypeSize::getScalable(32);
4273 Width = TypeSize::getScalable(16 * 2);
4274 MinOffset = -8;
4275 MaxOffset = 7;
4276 break;
4277 case AArch64::LD3B_IMM:
4278 case AArch64::LD3H_IMM:
4279 case AArch64::LD3W_IMM:
4280 case AArch64::LD3D_IMM:
4281 case AArch64::ST3B_IMM:
4282 case AArch64::ST3H_IMM:
4283 case AArch64::ST3W_IMM:
4284 case AArch64::ST3D_IMM:
4285 Scale = TypeSize::getScalable(48);
4286 Width = TypeSize::getScalable(16 * 3);
4287 MinOffset = -8;
4288 MaxOffset = 7;
4289 break;
4290 case AArch64::LD4B_IMM:
4291 case AArch64::LD4H_IMM:
4292 case AArch64::LD4W_IMM:
4293 case AArch64::LD4D_IMM:
4294 case AArch64::ST4B_IMM:
4295 case AArch64::ST4H_IMM:
4296 case AArch64::ST4W_IMM:
4297 case AArch64::ST4D_IMM:
4298 Scale = TypeSize::getScalable(64);
4299 Width = TypeSize::getScalable(16 * 4);
4300 MinOffset = -8;
4301 MaxOffset = 7;
4302 break;
4303 case AArch64::LD1B_H_IMM:
4304 case AArch64::LD1SB_H_IMM:
4305 case AArch64::LD1H_S_IMM:
4306 case AArch64::LD1SH_S_IMM:
4307 case AArch64::LD1W_D_IMM:
4308 case AArch64::LD1SW_D_IMM:
4309 case AArch64::ST1B_H_IMM:
4310 case AArch64::ST1H_S_IMM:
4311 case AArch64::ST1W_D_IMM:
4312 case AArch64::LDNF1B_H_IMM:
4313 case AArch64::LDNF1SB_H_IMM:
4314 case AArch64::LDNF1H_S_IMM:
4315 case AArch64::LDNF1SH_S_IMM:
4316 case AArch64::LDNF1W_D_IMM:
4317 case AArch64::LDNF1SW_D_IMM:
4318 // A half vector worth of data
4319 // Width = mbytes * elements
4320 Scale = TypeSize::getScalable(8);
4321 Width = TypeSize::getScalable(8);
4322 MinOffset = -8;
4323 MaxOffset = 7;
4324 break;
4325 case AArch64::LD1B_S_IMM:
4326 case AArch64::LD1SB_S_IMM:
4327 case AArch64::LD1H_D_IMM:
4328 case AArch64::LD1SH_D_IMM:
4329 case AArch64::ST1B_S_IMM:
4330 case AArch64::ST1H_D_IMM:
4331 case AArch64::LDNF1B_S_IMM:
4332 case AArch64::LDNF1SB_S_IMM:
4333 case AArch64::LDNF1H_D_IMM:
4334 case AArch64::LDNF1SH_D_IMM:
4335 // A quarter vector worth of data
4336 // Width = mbytes * elements
4337 Scale = TypeSize::getScalable(4);
4338 Width = TypeSize::getScalable(4);
4339 MinOffset = -8;
4340 MaxOffset = 7;
4341 break;
4342 case AArch64::LD1B_D_IMM:
4343 case AArch64::LD1SB_D_IMM:
4344 case AArch64::ST1B_D_IMM:
4345 case AArch64::LDNF1B_D_IMM:
4346 case AArch64::LDNF1SB_D_IMM:
4347 // A eighth vector worth of data
4348 // Width = mbytes * elements
4349 Scale = TypeSize::getScalable(2);
4350 Width = TypeSize::getScalable(2);
4351 MinOffset = -8;
4352 MaxOffset = 7;
4353 break;
4354 case AArch64::ST2Gi:
4355 case AArch64::ST2GPreIndex:
4356 case AArch64::ST2GPostIndex:
4357 case AArch64::STZ2Gi:
4358 case AArch64::STZ2GPreIndex:
4359 case AArch64::STZ2GPostIndex:
4360 Scale = TypeSize::getFixed(16);
4361 Width = TypeSize::getFixed(32);
4362 MinOffset = -256;
4363 MaxOffset = 255;
4364 break;
4365 case AArch64::STGPi:
4366 case AArch64::STGPpost:
4367 case AArch64::STGPpre:
4368 Scale = TypeSize::getFixed(16);
4369 Width = TypeSize::getFixed(16);
4370 MinOffset = -64;
4371 MaxOffset = 63;
4372 break;
4373 case AArch64::LD1RB_IMM:
4374 case AArch64::LD1RB_H_IMM:
4375 case AArch64::LD1RB_S_IMM:
4376 case AArch64::LD1RB_D_IMM:
4377 case AArch64::LD1RSB_H_IMM:
4378 case AArch64::LD1RSB_S_IMM:
4379 case AArch64::LD1RSB_D_IMM:
4380 Scale = TypeSize::getFixed(1);
4381 Width = TypeSize::getFixed(1);
4382 MinOffset = 0;
4383 MaxOffset = 63;
4384 break;
4385 case AArch64::LD1RH_IMM:
4386 case AArch64::LD1RH_S_IMM:
4387 case AArch64::LD1RH_D_IMM:
4388 case AArch64::LD1RSH_S_IMM:
4389 case AArch64::LD1RSH_D_IMM:
4390 Scale = TypeSize::getFixed(2);
4391 Width = TypeSize::getFixed(2);
4392 MinOffset = 0;
4393 MaxOffset = 63;
4394 break;
4395 case AArch64::LD1RW_IMM:
4396 case AArch64::LD1RW_D_IMM:
4397 case AArch64::LD1RSW_IMM:
4398 Scale = TypeSize::getFixed(4);
4399 Width = TypeSize::getFixed(4);
4400 MinOffset = 0;
4401 MaxOffset = 63;
4402 break;
4403 case AArch64::LD1RD_IMM:
4404 Scale = TypeSize::getFixed(8);
4405 Width = TypeSize::getFixed(8);
4406 MinOffset = 0;
4407 MaxOffset = 63;
4408 break;
4409 }
4410
4411 return true;
4412}
4413
4414// Scaling factor for unscaled load or store.
4416 switch (Opc) {
4417 default:
4418 llvm_unreachable("Opcode has unknown scale!");
4419 case AArch64::LDRBBui:
4420 case AArch64::LDURBBi:
4421 case AArch64::LDRSBWui:
4422 case AArch64::LDURSBWi:
4423 case AArch64::STRBBui:
4424 case AArch64::STURBBi:
4425 return 1;
4426 case AArch64::LDRHHui:
4427 case AArch64::LDURHHi:
4428 case AArch64::LDRSHWui:
4429 case AArch64::LDURSHWi:
4430 case AArch64::STRHHui:
4431 case AArch64::STURHHi:
4432 return 2;
4433 case AArch64::LDRSui:
4434 case AArch64::LDURSi:
4435 case AArch64::LDRSpre:
4436 case AArch64::LDRSWui:
4437 case AArch64::LDURSWi:
4438 case AArch64::LDRSWpre:
4439 case AArch64::LDRWpre:
4440 case AArch64::LDRWui:
4441 case AArch64::LDURWi:
4442 case AArch64::STRSui:
4443 case AArch64::STURSi:
4444 case AArch64::STRSpre:
4445 case AArch64::STRWui:
4446 case AArch64::STURWi:
4447 case AArch64::STRWpre:
4448 case AArch64::LDPSi:
4449 case AArch64::LDPSWi:
4450 case AArch64::LDPWi:
4451 case AArch64::STPSi:
4452 case AArch64::STPWi:
4453 return 4;
4454 case AArch64::LDRDui:
4455 case AArch64::LDURDi:
4456 case AArch64::LDRDpre:
4457 case AArch64::LDRXui:
4458 case AArch64::LDURXi:
4459 case AArch64::LDRXpre:
4460 case AArch64::STRDui:
4461 case AArch64::STURDi:
4462 case AArch64::STRDpre:
4463 case AArch64::STRXui:
4464 case AArch64::STURXi:
4465 case AArch64::STRXpre:
4466 case AArch64::LDPDi:
4467 case AArch64::LDPXi:
4468 case AArch64::STPDi:
4469 case AArch64::STPXi:
4470 return 8;
4471 case AArch64::LDRQui:
4472 case AArch64::LDURQi:
4473 case AArch64::STRQui:
4474 case AArch64::STURQi:
4475 case AArch64::STRQpre:
4476 case AArch64::LDPQi:
4477 case AArch64::LDRQpre:
4478 case AArch64::STPQi:
4479 case AArch64::STGi:
4480 case AArch64::STZGi:
4481 case AArch64::ST2Gi:
4482 case AArch64::STZ2Gi:
4483 case AArch64::STGPi:
4484 return 16;
4485 }
4486}
4487
4489 switch (MI.getOpcode()) {
4490 default:
4491 return false;
4492 case AArch64::LDRWpre:
4493 case AArch64::LDRXpre:
4494 case AArch64::LDRSWpre:
4495 case AArch64::LDRSpre:
4496 case AArch64::LDRDpre:
4497 case AArch64::LDRQpre:
4498 return true;
4499 }
4500}
4501
4503 switch (MI.getOpcode()) {
4504 default:
4505 return false;
4506 case AArch64::STRWpre:
4507 case AArch64::STRXpre:
4508 case AArch64::STRSpre:
4509 case AArch64::STRDpre:
4510 case AArch64::STRQpre:
4511 return true;
4512 }
4513}
4514
4516 return isPreLd(MI) || isPreSt(MI);
4517}
4518
4520 switch (MI.getOpcode()) {
4521 default:
4522 return false;
4523 case AArch64::LDPSi:
4524 case AArch64::LDPSWi:
4525 case AArch64::LDPDi:
4526 case AArch64::LDPQi:
4527 case AArch64::LDPWi:
4528 case AArch64::LDPXi:
4529 case AArch64::STPSi:
4530 case AArch64::STPDi:
4531 case AArch64::STPQi:
4532 case AArch64::STPWi:
4533 case AArch64::STPXi:
4534 case AArch64::STGPi:
4535 return true;
4536 }
4537}
4538
4540 assert(MI.mayLoadOrStore() && "Load or store instruction expected");
4541 unsigned Idx =
4543 : 1;
4544 return MI.getOperand(Idx);
4545}
4546
4547const MachineOperand &
4549 assert(MI.mayLoadOrStore() && "Load or store instruction expected");
4550 unsigned Idx =
4552 : 2;
4553 return MI.getOperand(Idx);
4554}
4555
4556const MachineOperand &
4558 switch (MI.getOpcode()) {
4559 default:
4560 llvm_unreachable("Unexpected opcode");
4561 case AArch64::LDRBroX:
4562 case AArch64::LDRBBroX:
4563 case AArch64::LDRSBXroX:
4564 case AArch64::LDRSBWroX:
4565 case AArch64::LDRHroX:
4566 case AArch64::LDRHHroX:
4567 case AArch64::LDRSHXroX:
4568 case AArch64::LDRSHWroX:
4569 case AArch64::LDRWroX:
4570 case AArch64::LDRSroX:
4571 case AArch64::LDRSWroX:
4572 case AArch64::LDRDroX:
4573 case AArch64::LDRXroX:
4574 case AArch64::LDRQroX:
4575 return MI.getOperand(4);
4576 }
4577}
4578
4580 Register Reg) {
4581 if (MI.getParent() == nullptr)
4582 return nullptr;
4583 const MachineFunction *MF = MI.getParent()->getParent();
4584 return MF ? MF->getRegInfo().getRegClassOrNull(Reg) : nullptr;
4585}
4586
4588 auto IsHFPR = [&](const MachineOperand &Op) {
4589 if (!Op.isReg())
4590 return false;
4591 auto Reg = Op.getReg();
4592 if (Reg.isPhysical())
4593 return AArch64::FPR16RegClass.contains(Reg);
4594 const TargetRegisterClass *TRC = ::getRegClass(MI, Reg);
4595 return TRC == &AArch64::FPR16RegClass ||
4596 TRC == &AArch64::FPR16_loRegClass;
4597 };
4598 return llvm::any_of(MI.operands(), IsHFPR);
4599}
4600
4602 auto IsQFPR = [&](const MachineOperand &Op) {
4603 if (!Op.isReg())
4604 return false;
4605 auto Reg = Op.getReg();
4606 if (Reg.isPhysical())
4607 return AArch64::FPR128RegClass.contains(Reg);
4608 const TargetRegisterClass *TRC = ::getRegClass(MI, Reg);
4609 return TRC == &AArch64::FPR128RegClass ||
4610 TRC == &AArch64::FPR128_loRegClass;
4611 };
4612 return llvm::any_of(MI.operands(), IsQFPR);
4613}
4614
4616 switch (MI.getOpcode()) {
4617 case AArch64::BRK:
4618 case AArch64::HLT:
4619 case AArch64::PACIASP:
4620 case AArch64::PACIBSP:
4621 // Implicit BTI behavior.
4622 return true;
4623 case AArch64::PAUTH_PROLOGUE:
4624 // PAUTH_PROLOGUE expands to PACI(A|B)SP.
4625 return true;
4626 case AArch64::HINT: {
4627 unsigned Imm = MI.getOperand(0).getImm();
4628 // Explicit BTI instruction.
4629 if (Imm == 32 || Imm == 34 || Imm == 36 || Imm == 38)
4630 return true;
4631 // PACI(A|B)SP instructions.
4632 if (Imm == 25 || Imm == 27)
4633 return true;
4634 return false;
4635 }
4636 default:
4637 return false;
4638 }
4639}
4640
4642 if (Reg == 0)
4643 return false;
4644 assert(Reg.isPhysical() && "Expected physical register in isFpOrNEON");
4645 return AArch64::FPR128RegClass.contains(Reg) ||
4646 AArch64::FPR64RegClass.contains(Reg) ||
4647 AArch64::FPR32RegClass.contains(Reg) ||
4648 AArch64::FPR16RegClass.contains(Reg) ||
4649 AArch64::FPR8RegClass.contains(Reg);
4650}
4651
4653 auto IsFPR = [&](const MachineOperand &Op) {
4654 if (!Op.isReg())
4655 return false;
4656 auto Reg = Op.getReg();
4657 if (Reg.isPhysical())
4658 return isFpOrNEON(Reg);
4659
4660 const TargetRegisterClass *TRC = ::getRegClass(MI, Reg);
4661 return TRC == &AArch64::FPR128RegClass ||
4662 TRC == &AArch64::FPR128_loRegClass ||
4663 TRC == &AArch64::FPR64RegClass ||
4664 TRC == &AArch64::FPR64_loRegClass ||
4665 TRC == &AArch64::FPR32RegClass || TRC == &AArch64::FPR16RegClass ||
4666 TRC == &AArch64::FPR8RegClass;
4667 };
4668 return llvm::any_of(MI.operands(), IsFPR);
4669}
4670
4671// Scale the unscaled offsets. Returns false if the unscaled offset can't be
4672// scaled.
4673static bool scaleOffset(unsigned Opc, int64_t &Offset) {
4674 int Scale = AArch64InstrInfo::getMemScale(Opc);
4675
4676 // If the byte-offset isn't a multiple of the stride, we can't scale this
4677 // offset.
4678 if (Offset % Scale != 0)
4679 return false;
4680
4681 // Convert the byte-offset used by unscaled into an "element" offset used
4682 // by the scaled pair load/store instructions.
4683 Offset /= Scale;
4684 return true;
4685}
4686
4687static bool canPairLdStOpc(unsigned FirstOpc, unsigned SecondOpc) {
4688 if (FirstOpc == SecondOpc)
4689 return true;
4690 // We can also pair sign-ext and zero-ext instructions.
4691 switch (FirstOpc) {
4692 default:
4693 return false;
4694 case AArch64::STRSui:
4695 case AArch64::STURSi:
4696 return SecondOpc == AArch64::STRSui || SecondOpc == AArch64::STURSi;
4697 case AArch64::STRDui:
4698 case AArch64::STURDi:
4699 return SecondOpc == AArch64::STRDui || SecondOpc == AArch64::STURDi;
4700 case AArch64::STRQui:
4701 case AArch64::STURQi:
4702 return SecondOpc == AArch64::STRQui || SecondOpc == AArch64::STURQi;
4703 case AArch64::STRWui:
4704 case AArch64::STURWi:
4705 return SecondOpc == AArch64::STRWui || SecondOpc == AArch64::STURWi;
4706 case AArch64::STRXui:
4707 case AArch64::STURXi:
4708 return SecondOpc == AArch64::STRXui || SecondOpc == AArch64::STURXi;
4709 case AArch64::LDRSui:
4710 case AArch64::LDURSi:
4711 return SecondOpc == AArch64::LDRSui || SecondOpc == AArch64::LDURSi;
4712 case AArch64::LDRDui:
4713 case AArch64::LDURDi:
4714 return SecondOpc == AArch64::LDRDui || SecondOpc == AArch64::LDURDi;
4715 case AArch64::LDRQui:
4716 case AArch64::LDURQi:
4717 return SecondOpc == AArch64::LDRQui || SecondOpc == AArch64::LDURQi;
4718 case AArch64::LDRWui:
4719 case AArch64::LDURWi:
4720 return SecondOpc == AArch64::LDRSWui || SecondOpc == AArch64::LDURSWi;
4721 case AArch64::LDRSWui:
4722 case AArch64::LDURSWi:
4723 return SecondOpc == AArch64::LDRWui || SecondOpc == AArch64::LDURWi;
4724 case AArch64::LDRXui:
4725 case AArch64::LDURXi:
4726 return SecondOpc == AArch64::LDRXui || SecondOpc == AArch64::LDURXi;
4727 }
4728 // These instructions can't be paired based on their opcodes.
4729 return false;
4730}
4731
4732static bool shouldClusterFI(const MachineFrameInfo &MFI, int FI1,
4733 int64_t Offset1, unsigned Opcode1, int FI2,
4734 int64_t Offset2, unsigned Opcode2) {
4735 // Accesses through fixed stack object frame indices may access a different
4736 // fixed stack slot. Check that the object offsets + offsets match.
4737 if (MFI.isFixedObjectIndex(FI1) && MFI.isFixedObjectIndex(FI2)) {
4738 int64_t ObjectOffset1 = MFI.getObjectOffset(FI1);
4739 int64_t ObjectOffset2 = MFI.getObjectOffset(FI2);
4740 assert(ObjectOffset1 <= ObjectOffset2 && "Object offsets are not ordered.");
4741 // Convert to scaled object offsets.
4742 int Scale1 = AArch64InstrInfo::getMemScale(Opcode1);
4743 if (ObjectOffset1 % Scale1 != 0)
4744 return false;
4745 ObjectOffset1 /= Scale1;
4746 int Scale2 = AArch64InstrInfo::getMemScale(Opcode2);
4747 if (ObjectOffset2 % Scale2 != 0)
4748 return false;
4749 ObjectOffset2 /= Scale2;
4750 ObjectOffset1 += Offset1;
4751 ObjectOffset2 += Offset2;
4752 return ObjectOffset1 + 1 == ObjectOffset2;
4753 }
4754
4755 return FI1 == FI2;
4756}
4757
4758/// Detect opportunities for ldp/stp formation.
4759///
4760/// Only called for LdSt for which getMemOperandWithOffset returns true.
4762 ArrayRef<const MachineOperand *> BaseOps1, int64_t OpOffset1,
4763 bool OffsetIsScalable1, ArrayRef<const MachineOperand *> BaseOps2,
4764 int64_t OpOffset2, bool OffsetIsScalable2, unsigned ClusterSize,
4765 unsigned NumBytes) const {
4766 assert(BaseOps1.size() == 1 && BaseOps2.size() == 1);
4767 const MachineOperand &BaseOp1 = *BaseOps1.front();
4768 const MachineOperand &BaseOp2 = *BaseOps2.front();
4769 const MachineInstr &FirstLdSt = *BaseOp1.getParent();
4770 const MachineInstr &SecondLdSt = *BaseOp2.getParent();
4771 if (BaseOp1.getType() != BaseOp2.getType())
4772 return false;
4773
4774 assert((BaseOp1.isReg() || BaseOp1.isFI()) &&
4775 "Only base registers and frame indices are supported.");
4776
4777 // Check for both base regs and base FI.
4778 if (BaseOp1.isReg() && BaseOp1.getReg() != BaseOp2.getReg())
4779 return false;
4780
4781 // Only cluster up to a single pair.
4782 if (ClusterSize > 2)
4783 return false;
4784
4785 if (!isPairableLdStInst(FirstLdSt) || !isPairableLdStInst(SecondLdSt))
4786 return false;
4787
4788 // Can we pair these instructions based on their opcodes?
4789 unsigned FirstOpc = FirstLdSt.getOpcode();
4790 unsigned SecondOpc = SecondLdSt.getOpcode();
4791 if (!canPairLdStOpc(FirstOpc, SecondOpc))
4792 return false;
4793
4794 // Can't merge volatiles or load/stores that have a hint to avoid pair
4795 // formation, for example.
4796 if (!isCandidateToMergeOrPair(FirstLdSt) ||
4797 !isCandidateToMergeOrPair(SecondLdSt))
4798 return false;
4799
4800 // isCandidateToMergeOrPair guarantees that operand 2 is an immediate.
4801 int64_t Offset1 = FirstLdSt.getOperand(2).getImm();
4802 if (hasUnscaledLdStOffset(FirstOpc) && !scaleOffset(FirstOpc, Offset1))
4803 return false;
4804
4805 int64_t Offset2 = SecondLdSt.getOperand(2).getImm();
4806 if (hasUnscaledLdStOffset(SecondOpc) && !scaleOffset(SecondOpc, Offset2))
4807 return false;
4808
4809 // Pairwise instructions have a 7-bit signed offset field.
4810 if (Offset1 > 63 || Offset1 < -64)
4811 return false;
4812
4813 // The caller should already have ordered First/SecondLdSt by offset.
4814 // Note: except for non-equal frame index bases
4815 if (BaseOp1.isFI()) {
4816 assert((!BaseOp1.isIdenticalTo(BaseOp2) || Offset1 <= Offset2) &&
4817 "Caller should have ordered offsets.");
4818
4819 const MachineFrameInfo &MFI =
4820 FirstLdSt.getParent()->getParent()->getFrameInfo();
4821 return shouldClusterFI(MFI, BaseOp1.getIndex(), Offset1, FirstOpc,
4822 BaseOp2.getIndex(), Offset2, SecondOpc);
4823 }
4824
4825 assert(Offset1 <= Offset2 && "Caller should have ordered offsets.");
4826
4827 return Offset1 + 1 == Offset2;
4828}
4829
4831 MCRegister Reg, unsigned SubIdx,
4832 unsigned State,
4833 const TargetRegisterInfo *TRI) {
4834 if (!SubIdx)
4835 return MIB.addReg(Reg, State);
4836
4837 if (Reg.isPhysical())
4838 return MIB.addReg(TRI->getSubReg(Reg, SubIdx), State);
4839 return MIB.addReg(Reg, State, SubIdx);
4840}
4841
4842static bool forwardCopyWillClobberTuple(unsigned DestReg, unsigned SrcReg,
4843 unsigned NumRegs) {
4844 // We really want the positive remainder mod 32 here, that happens to be
4845 // easily obtainable with a mask.
4846 return ((DestReg - SrcReg) & 0x1f) < NumRegs;
4847}
4848
4851 const DebugLoc &DL, MCRegister DestReg,
4852 MCRegister SrcReg, bool KillSrc,
4853 unsigned Opcode,
4854 ArrayRef<unsigned> Indices) const {
4855 assert(Subtarget.hasNEON() && "Unexpected register copy without NEON");
4857 uint16_t DestEncoding = TRI->getEncodingValue(DestReg);
4858 uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg);
4859 unsigned NumRegs = Indices.size();
4860
4861 int SubReg = 0, End = NumRegs, Incr = 1;
4862 if (forwardCopyWillClobberTuple(DestEncoding, SrcEncoding, NumRegs)) {
4863 SubReg = NumRegs - 1;
4864 End = -1;
4865 Incr = -1;
4866 }
4867
4868 for (; SubReg != End; SubReg += Incr) {
4869 const MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opcode));
4870 AddSubReg(MIB, DestReg, Indices[SubReg], RegState::Define, TRI);
4871 AddSubReg(MIB, SrcReg, Indices[SubReg], 0, TRI);
4872 AddSubReg(MIB, SrcReg, Indices[SubReg], getKillRegState(KillSrc), TRI);
4873 }
4874}
4875
4878 const DebugLoc &DL, MCRegister DestReg,
4879 MCRegister SrcReg, bool KillSrc,
4880 unsigned Opcode, unsigned ZeroReg,
4881 llvm::ArrayRef<unsigned> Indices) const {
4883 unsigned NumRegs = Indices.size();
4884
4885#ifndef NDEBUG
4886 uint16_t DestEncoding = TRI->getEncodingValue(DestReg);
4887 uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg);
4888 assert(DestEncoding % NumRegs == 0 && SrcEncoding % NumRegs == 0 &&
4889 "GPR reg sequences should not be able to overlap");
4890#endif
4891
4892 for (unsigned SubReg = 0; SubReg != NumRegs; ++SubReg) {
4893 const MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opcode));
4894 AddSubReg(MIB, DestReg, Indices[SubReg], RegState::Define, TRI);
4895 MIB.addReg(ZeroReg);
4896 AddSubReg(MIB, SrcReg, Indices[SubReg], getKillRegState(KillSrc), TRI);
4897 MIB.addImm(0);
4898 }
4899}
4900
4903 const DebugLoc &DL, MCRegister DestReg,
4904 MCRegister SrcReg, bool KillSrc,
4905 bool RenamableDest,
4906 bool RenamableSrc) const {
4907 if (AArch64::GPR32spRegClass.contains(DestReg) &&
4908 (AArch64::GPR32spRegClass.contains(SrcReg) || SrcReg == AArch64::WZR)) {
4910
4911 if (DestReg == AArch64::WSP || SrcReg == AArch64::WSP) {
4912 // If either operand is WSP, expand to ADD #0.
4913 if (Subtarget.hasZeroCycleRegMove()) {
4914 // Cyclone recognizes "ADD Xd, Xn, #0" as a zero-cycle register move.
4915 MCRegister DestRegX = TRI->getMatchingSuperReg(
4916 DestReg, AArch64::sub_32, &AArch64::GPR64spRegClass);
4917 MCRegister SrcRegX = TRI->getMatchingSuperReg(
4918 SrcReg, AArch64::sub_32, &AArch64::GPR64spRegClass);
4919 // This instruction is reading and writing X registers. This may upset
4920 // the register scavenger and machine verifier, so we need to indicate
4921 // that we are reading an undefined value from SrcRegX, but a proper
4922 // value from SrcReg.
4923 BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestRegX)
4924 .addReg(SrcRegX, RegState::Undef)
4925 .addImm(0)
4927 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
4928 } else {
4929 BuildMI(MBB, I, DL, get(AArch64::ADDWri), DestReg)
4930 .addReg(SrcReg, getKillRegState(KillSrc))
4931 .addImm(0)
4933 }
4934 } else if (SrcReg == AArch64::WZR && Subtarget.hasZeroCycleZeroingGP()) {
4935 BuildMI(MBB, I, DL, get(AArch64::MOVZWi), DestReg)
4936 .addImm(0)
4938 } else {
4939 if (Subtarget.hasZeroCycleRegMove()) {
4940 // Cyclone recognizes "ORR Xd, XZR, Xm" as a zero-cycle register move.
4941 MCRegister DestRegX = TRI->getMatchingSuperReg(
4942 DestReg, AArch64::sub_32, &AArch64::GPR64spRegClass);
4943 MCRegister SrcRegX = TRI->getMatchingSuperReg(
4944 SrcReg, AArch64::sub_32, &AArch64::GPR64spRegClass);
4945 // This instruction is reading and writing X registers. This may upset
4946 // the register scavenger and machine verifier, so we need to indicate
4947 // that we are reading an undefined value from SrcRegX, but a proper
4948 // value from SrcReg.
4949 BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestRegX)
4950 .addReg(AArch64::XZR)
4951 .addReg(SrcRegX, RegState::Undef)
4952 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
4953 } else {
4954 // Otherwise, expand to ORR WZR.
4955 BuildMI(MBB, I, DL, get(AArch64::ORRWrr), DestReg)
4956 .addReg(AArch64::WZR)
4957 .addReg(SrcReg, getKillRegState(KillSrc));
4958 }
4959 }
4960 return;
4961 }
4962
4963 // Copy a Predicate register by ORRing with itself.
4964 if (AArch64::PPRRegClass.contains(DestReg) &&
4965 AArch64::PPRRegClass.contains(SrcReg)) {
4967 "Unexpected SVE register.");
4968 BuildMI(MBB, I, DL, get(AArch64::ORR_PPzPP), DestReg)
4969 .addReg(SrcReg) // Pg
4970 .addReg(SrcReg)
4971 .addReg(SrcReg, getKillRegState(KillSrc));
4972 return;
4973 }
4974
4975 // Copy a predicate-as-counter register by ORRing with itself as if it
4976 // were a regular predicate (mask) register.
4977 bool DestIsPNR = AArch64::PNRRegClass.contains(DestReg);
4978 bool SrcIsPNR = AArch64::PNRRegClass.contains(SrcReg);
4979 if (DestIsPNR || SrcIsPNR) {
4980 auto ToPPR = [](MCRegister R) -> MCRegister {
4981 return (R - AArch64::PN0) + AArch64::P0;
4982 };
4983 MCRegister PPRSrcReg = SrcIsPNR ? ToPPR(SrcReg) : SrcReg;
4984 MCRegister PPRDestReg = DestIsPNR ? ToPPR(DestReg) : DestReg;
4985
4986 if (PPRSrcReg != PPRDestReg) {
4987 auto NewMI = BuildMI(MBB, I, DL, get(AArch64::ORR_PPzPP), PPRDestReg)
4988 .addReg(PPRSrcReg) // Pg
4989 .addReg(PPRSrcReg)
4990 .addReg(PPRSrcReg, getKillRegState(KillSrc));
4991 if (DestIsPNR)
4992 NewMI.addDef(DestReg, RegState::Implicit);
4993 }
4994 return;
4995 }
4996
4997 // Copy a Z register by ORRing with itself.
4998 if (AArch64::ZPRRegClass.contains(DestReg) &&
4999 AArch64::ZPRRegClass.contains(SrcReg)) {
5001 "Unexpected SVE register.");
5002 BuildMI(MBB, I, DL, get(AArch64::ORR_ZZZ), DestReg)
5003 .addReg(SrcReg)
5004 .addReg(SrcReg, getKillRegState(KillSrc));
5005 return;
5006 }
5007
5008 // Copy a Z register pair by copying the individual sub-registers.
5009 if ((AArch64::ZPR2RegClass.contains(DestReg) ||
5010 AArch64::ZPR2StridedOrContiguousRegClass.contains(DestReg)) &&
5011 (AArch64::ZPR2RegClass.contains(SrcReg) ||
5012 AArch64::ZPR2StridedOrContiguousRegClass.contains(SrcReg))) {
5014 "Unexpected SVE register.");
5015 static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1};
5016 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORR_ZZZ,
5017 Indices);
5018 return;
5019 }
5020
5021 // Copy a Z register triple by copying the individual sub-registers.
5022 if (AArch64::ZPR3RegClass.contains(DestReg) &&
5023 AArch64::ZPR3RegClass.contains(SrcReg)) {
5025 "Unexpected SVE register.");
5026 static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1,
5027 AArch64::zsub2};
5028 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORR_ZZZ,
5029 Indices);
5030 return;
5031 }
5032
5033 // Copy a Z register quad by copying the individual sub-registers.
5034 if ((AArch64::ZPR4RegClass.contains(DestReg) ||
5035 AArch64::ZPR4StridedOrContiguousRegClass.contains(DestReg)) &&
5036 (AArch64::ZPR4RegClass.contains(SrcReg) ||
5037 AArch64::ZPR4StridedOrContiguousRegClass.contains(SrcReg))) {
5039 "Unexpected SVE register.");
5040 static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1,
5041 AArch64::zsub2, AArch64::zsub3};
5042 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORR_ZZZ,
5043 Indices);
5044 return;
5045 }
5046
5047 if (AArch64::GPR64spRegClass.contains(DestReg) &&
5048 (AArch64::GPR64spRegClass.contains(SrcReg) || SrcReg == AArch64::XZR)) {
5049 if (DestReg == AArch64::SP || SrcReg == AArch64::SP) {
5050 // If either operand is SP, expand to ADD #0.
5051 BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestReg)
5052 .addReg(SrcReg, getKillRegState(KillSrc))
5053 .addImm(0)
5055 } else if (SrcReg == AArch64::XZR && Subtarget.hasZeroCycleZeroingGP()) {
5056 BuildMI(MBB, I, DL, get(AArch64::MOVZXi), DestReg)
5057 .addImm(0)
5059 } else {
5060 // Otherwise, expand to ORR XZR.
5061 BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestReg)
5062 .addReg(AArch64::XZR)
5063 .addReg(SrcReg, getKillRegState(KillSrc));
5064 }
5065 return;
5066 }
5067
5068 // Copy a DDDD register quad by copying the individual sub-registers.
5069 if (AArch64::DDDDRegClass.contains(DestReg) &&
5070 AArch64::DDDDRegClass.contains(SrcReg)) {
5071 static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1,
5072 AArch64::dsub2, AArch64::dsub3};
5073 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
5074 Indices);
5075 return;
5076 }
5077
5078 // Copy a DDD register triple by copying the individual sub-registers.
5079 if (AArch64::DDDRegClass.contains(DestReg) &&
5080 AArch64::DDDRegClass.contains(SrcReg)) {
5081 static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1,
5082 AArch64::dsub2};
5083 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
5084 Indices);
5085 return;
5086 }
5087
5088 // Copy a DD register pair by copying the individual sub-registers.
5089 if (AArch64::DDRegClass.contains(DestReg) &&
5090 AArch64::DDRegClass.contains(SrcReg)) {
5091 static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1};
5092 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
5093 Indices);
5094 return;
5095 }
5096
5097 // Copy a QQQQ register quad by copying the individual sub-registers.
5098 if (AArch64::QQQQRegClass.contains(DestReg) &&
5099 AArch64::QQQQRegClass.contains(SrcReg)) {
5100 static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1,
5101 AArch64::qsub2, AArch64::qsub3};
5102 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
5103 Indices);
5104 return;
5105 }
5106
5107 // Copy a QQQ register triple by copying the individual sub-registers.
5108 if (AArch64::QQQRegClass.contains(DestReg) &&
5109 AArch64::QQQRegClass.contains(SrcReg)) {
5110 static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1,
5111 AArch64::qsub2};
5112 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
5113 Indices);
5114 return;
5115 }
5116
5117 // Copy a QQ register pair by copying the individual sub-registers.
5118 if (AArch64::QQRegClass.contains(DestReg) &&
5119 AArch64::QQRegClass.contains(SrcReg)) {
5120 static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1};
5121 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
5122 Indices);
5123 return;
5124 }
5125
5126 if (AArch64::XSeqPairsClassRegClass.contains(DestReg) &&
5127 AArch64::XSeqPairsClassRegClass.contains(SrcReg)) {
5128 static const unsigned Indices[] = {AArch64::sube64, AArch64::subo64};
5129 copyGPRRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRXrs,
5130 AArch64::XZR, Indices);
5131 return;
5132 }
5133
5134 if (AArch64::WSeqPairsClassRegClass.contains(DestReg) &&
5135 AArch64::WSeqPairsClassRegClass.contains(SrcReg)) {
5136 static const unsigned Indices[] = {AArch64::sube32, AArch64::subo32};
5137 copyGPRRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRWrs,
5138 AArch64::WZR, Indices);
5139 return;
5140 }
5141
5142 if (AArch64::FPR128RegClass.contains(DestReg) &&
5143 AArch64::FPR128RegClass.contains(SrcReg)) {
5144 if (Subtarget.isSVEorStreamingSVEAvailable() &&
5145 !Subtarget.isNeonAvailable())
5146 BuildMI(MBB, I, DL, get(AArch64::ORR_ZZZ))
5147 .addReg(AArch64::Z0 + (DestReg - AArch64::Q0), RegState::Define)
5148 .addReg(AArch64::Z0 + (SrcReg - AArch64::Q0))
5149 .addReg(AArch64::Z0 + (SrcReg - AArch64::Q0));
5150 else if (Subtarget.isNeonAvailable())
5151 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
5152 .addReg(SrcReg)
5153 .addReg(SrcReg, getKillRegState(KillSrc));
5154 else {
5155 BuildMI(MBB, I, DL, get(AArch64::STRQpre))
5156 .addReg(AArch64::SP, RegState::Define)
5157 .addReg(SrcReg, getKillRegState(KillSrc))
5158 .addReg(AArch64::SP)
5159 .addImm(-16);
5160 BuildMI(MBB, I, DL, get(AArch64::LDRQpost))
5161 .addReg(AArch64::SP, RegState::Define)
5162 .addReg(DestReg, RegState::Define)
5163 .addReg(AArch64::SP)
5164 .addImm(16);
5165 }
5166 return;
5167 }
5168
5169 if (AArch64::FPR64RegClass.contains(DestReg) &&
5170 AArch64::FPR64RegClass.contains(SrcReg)) {
5171 BuildMI(MBB, I, DL, get(AArch64::FMOVDr), DestReg)
5172 .addReg(SrcReg, getKillRegState(KillSrc));
5173 return;
5174 }
5175
5176 if (AArch64::FPR32RegClass.contains(DestReg) &&
5177 AArch64::FPR32RegClass.contains(SrcReg)) {
5178 BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
5179 .addReg(SrcReg, getKillRegState(KillSrc));
5180 return;
5181 }
5182
5183 if (AArch64::FPR16RegClass.contains(DestReg) &&
5184 AArch64::FPR16RegClass.contains(SrcReg)) {
5185 DestReg =
5186 RI.getMatchingSuperReg(DestReg, AArch64::hsub, &AArch64::FPR32RegClass);
5187 SrcReg =
5188 RI.getMatchingSuperReg(SrcReg, AArch64::hsub, &AArch64::FPR32RegClass);
5189 BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
5190 .addReg(SrcReg, getKillRegState(KillSrc));
5191 return;
5192 }
5193
5194 if (AArch64::FPR8RegClass.contains(DestReg) &&
5195 AArch64::FPR8RegClass.contains(SrcReg)) {
5196 DestReg =
5197 RI.getMatchingSuperReg(DestReg, AArch64::bsub, &AArch64::FPR32RegClass);
5198 SrcReg =
5199 RI.getMatchingSuperReg(SrcReg, AArch64::bsub, &AArch64::FPR32RegClass);
5200 BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
5201 .addReg(SrcReg, getKillRegState(KillSrc));
5202 return;
5203 }
5204
5205 // Copies between GPR64 and FPR64.
5206 if (AArch64::FPR64RegClass.contains(DestReg) &&
5207 AArch64::GPR64RegClass.contains(SrcReg)) {
5208 BuildMI(MBB, I, DL, get(AArch64::FMOVXDr), DestReg)
5209 .addReg(SrcReg, getKillRegState(KillSrc));
5210 return;
5211 }
5212 if (AArch64::GPR64RegClass.contains(DestReg) &&
5213 AArch64::FPR64RegClass.contains(SrcReg)) {
5214 BuildMI(MBB, I, DL, get(AArch64::FMOVDXr), DestReg)
5215 .addReg(SrcReg, getKillRegState(KillSrc));
5216 return;
5217 }
5218 // Copies between GPR32 and FPR32.
5219 if (AArch64::FPR32RegClass.contains(DestReg) &&
5220 AArch64::GPR32RegClass.contains(SrcReg)) {
5221 BuildMI(MBB, I, DL, get(AArch64::FMOVWSr), DestReg)
5222 .addReg(SrcReg, getKillRegState(KillSrc));
5223 return;
5224 }
5225 if (AArch64::GPR32RegClass.contains(DestReg) &&
5226 AArch64::FPR32RegClass.contains(SrcReg)) {
5227 BuildMI(MBB, I, DL, get(AArch64::FMOVSWr), DestReg)
5228 .addReg(SrcReg, getKillRegState(KillSrc));
5229 return;
5230 }
5231
5232 if (DestReg == AArch64::NZCV) {
5233 assert(AArch64::GPR64RegClass.contains(SrcReg) && "Invalid NZCV copy");
5234 BuildMI(MBB, I, DL, get(AArch64::MSR))
5235 .addImm(AArch64SysReg::NZCV)
5236 .addReg(SrcReg, getKillRegState(KillSrc))
5237 .addReg(AArch64::NZCV, RegState::Implicit | RegState::Define);
5238 return;
5239 }
5240
5241 if (SrcReg == AArch64::NZCV) {
5242 assert(AArch64::GPR64RegClass.contains(DestReg) && "Invalid NZCV copy");
5243 BuildMI(MBB, I, DL, get(AArch64::MRS), DestReg)
5244 .addImm(AArch64SysReg::NZCV)
5245 .addReg(AArch64::NZCV, RegState::Implicit | getKillRegState(KillSrc));
5246 return;
5247 }
5248
5249#ifndef NDEBUG
5251 errs() << TRI.getRegAsmName(DestReg) << " = COPY "
5252 << TRI.getRegAsmName(SrcReg) << "\n";
5253#endif
5254 llvm_unreachable("unimplemented reg-to-reg copy");
5255}
5256
5259 MachineBasicBlock::iterator InsertBefore,
5260 const MCInstrDesc &MCID,
5261 Register SrcReg, bool IsKill,
5262 unsigned SubIdx0, unsigned SubIdx1, int FI,
5263 MachineMemOperand *MMO) {
5264 Register SrcReg0 = SrcReg;
5265 Register SrcReg1 = SrcReg;
5266 if (SrcReg.isPhysical()) {
5267 SrcReg0 = TRI.getSubReg(SrcReg, SubIdx0);
5268 SubIdx0 = 0;
5269 SrcReg1 = TRI.getSubReg(SrcReg, SubIdx1);
5270 SubIdx1 = 0;
5271 }
5272 BuildMI(MBB, InsertBefore, DebugLoc(), MCID)
5273 .addReg(SrcReg0, getKillRegState(IsKill), SubIdx0)
5274 .addReg(SrcReg1, getKillRegState(IsKill), SubIdx1)
5275 .addFrameIndex(FI)
5276 .addImm(0)
5277 .addMemOperand(MMO);
5278}
5279
5282 Register SrcReg, bool isKill, int FI,
5283 const TargetRegisterClass *RC,
5284 const TargetRegisterInfo *TRI,
5285 Register VReg,
5286 MachineInstr::MIFlag Flags) const {
5287 MachineFunction &MF = *MBB.getParent();
5288 MachineFrameInfo &MFI = MF.getFrameInfo();
5289
5291 MachineMemOperand *MMO =
5293 MFI.getObjectSize(FI), MFI.getObjectAlign(FI));
5294 unsigned Opc = 0;
5295 bool Offset = true;
5297 unsigned StackID = TargetStackID::Default;
5298 switch (TRI->getSpillSize(*RC)) {
5299 case 1:
5300 if (AArch64::FPR8RegClass.hasSubClassEq(RC))
5301 Opc = AArch64::STRBui;
5302 break;
5303 case 2: {
5304 if (AArch64::FPR16RegClass.hasSubClassEq(RC))
5305 Opc = AArch64::STRHui;
5306 else if (AArch64::PNRRegClass.hasSubClassEq(RC) ||
5307 AArch64::PPRRegClass.hasSubClassEq(RC)) {
5309 "Unexpected register store without SVE store instructions");
5310 Opc = AArch64::STR_PXI;
5312 }
5313 break;
5314 }
5315 case 4:
5316 if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
5317 Opc = AArch64::STRWui;
5318 if (SrcReg.isVirtual())
5319 MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR32RegClass);
5320 else
5321 assert(SrcReg != AArch64::WSP);
5322 } else if (AArch64::FPR32RegClass.hasSubClassEq(RC))
5323 Opc = AArch64::STRSui;
5324 else if (AArch64::PPR2RegClass.hasSubClassEq(RC)) {
5325 Opc = AArch64::STR_PPXI;
5327 }
5328 break;
5329 case 8:
5330 if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) {
5331 Opc = AArch64::STRXui;
5332 if (SrcReg.isVirtual())
5333 MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass);
5334 else
5335 assert(SrcReg != AArch64::SP);
5336 } else if (AArch64::FPR64RegClass.hasSubClassEq(RC)) {
5337 Opc = AArch64::STRDui;
5338 } else if (AArch64::WSeqPairsClassRegClass.hasSubClassEq(RC)) {
5340 get(AArch64::STPWi), SrcReg, isKill,
5341 AArch64::sube32, AArch64::subo32, FI, MMO);
5342 return;
5343 }
5344 break;
5345 case 16:
5346 if (AArch64::FPR128RegClass.hasSubClassEq(RC))
5347 Opc = AArch64::STRQui;
5348 else if (AArch64::DDRegClass.hasSubClassEq(RC)) {
5349 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
5350 Opc = AArch64::ST1Twov1d;
5351 Offset = false;
5352 } else if (AArch64::XSeqPairsClassRegClass.hasSubClassEq(RC)) {
5354 get(AArch64::STPXi), SrcReg, isKill,
5355 AArch64::sube64, AArch64::subo64, FI, MMO);
5356 return;
5357 } else if (AArch64::ZPRRegClass.hasSubClassEq(RC)) {
5359 "Unexpected register store without SVE store instructions");
5360 Opc = AArch64::STR_ZXI;
5362 } else if (AArch64::PPRRegClass.hasSubClassEq(RC)) {
5364 "Unexpected predicate store without SVE store instructions");
5365 Opc = AArch64::SPILL_PPR_TO_ZPR_SLOT_PSEUDO;
5367 }
5368 break;
5369 case 24:
5370 if (AArch64::DDDRegClass.hasSubClassEq(RC)) {
5371 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
5372 Opc = AArch64::ST1Threev1d;
5373 Offset = false;
5374 }
5375 break;
5376 case 32:
5377 if (AArch64::DDDDRegClass.hasSubClassEq(RC)) {
5378 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
5379 Opc = AArch64::ST1Fourv1d;
5380 Offset = false;
5381 } else if (AArch64::QQRegClass.hasSubClassEq(RC)) {
5382 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
5383 Opc = AArch64::ST1Twov2d;
5384 Offset = false;
5385 } else if (AArch64::ZPR2RegClass.hasSubClassEq(RC) ||
5386 AArch64::ZPR2StridedOrContiguousRegClass.hasSubClassEq(RC)) {
5388 "Unexpected register store without SVE store instructions");
5389 Opc = AArch64::STR_ZZXI;
5391 }
5392 break;
5393 case 48:
5394 if (AArch64::QQQRegClass.hasSubClassEq(RC)) {
5395 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
5396 Opc = AArch64::ST1Threev2d;
5397 Offset = false;
5398 } else if (AArch64::ZPR3RegClass.hasSubClassEq(RC)) {
5400 "Unexpected register store without SVE store instructions");
5401 Opc = AArch64::STR_ZZZXI;
5403 }
5404 break;
5405 case 64:
5406 if (AArch64::QQQQRegClass.hasSubClassEq(RC)) {
5407 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
5408 Opc = AArch64::ST1Fourv2d;
5409 Offset = false;
5410 } else if (AArch64::ZPR4RegClass.hasSubClassEq(RC) ||
5411 AArch64::ZPR4StridedOrContiguousRegClass.hasSubClassEq(RC)) {
5413 "Unexpected register store without SVE store instructions");
5414 Opc = AArch64::STR_ZZZZXI;
5416 }
5417 break;
5418 }
5419 assert(Opc && "Unknown register class");
5420 MFI.setStackID(FI, StackID);
5421
5422 const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DebugLoc(), get(Opc))
5423 .addReg(SrcReg, getKillRegState(isKill))
5424 .addFrameIndex(FI);
5425
5426 if (Offset)
5427 MI.addImm(0);
5428 if (PNRReg.isValid())
5429 MI.addDef(PNRReg, RegState::Implicit);
5430 MI.addMemOperand(MMO);
5431}
5432
5435 MachineBasicBlock::iterator InsertBefore,
5436 const MCInstrDesc &MCID,
5437 Register DestReg, unsigned SubIdx0,
5438 unsigned SubIdx1, int FI,
5439 MachineMemOperand *MMO) {
5440 Register DestReg0 = DestReg;
5441 Register DestReg1 = DestReg;
5442 bool IsUndef = true;
5443 if (DestReg.isPhysical()) {
5444 DestReg0 = TRI.getSubReg(DestReg, SubIdx0);
5445 SubIdx0 = 0;
5446 DestReg1 = TRI.getSubReg(DestReg, SubIdx1);
5447 SubIdx1 = 0;
5448 IsUndef = false;
5449 }
5450 BuildMI(MBB, InsertBefore, DebugLoc(), MCID)
5451 .addReg(DestReg0, RegState::Define | getUndefRegState(IsUndef), SubIdx0)
5452 .addReg(DestReg1, RegState::Define | getUndefRegState(IsUndef), SubIdx1)
5453 .addFrameIndex(FI)
5454 .addImm(0)
5455 .addMemOperand(MMO);
5456}
5457
5460 int FI, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI,
5461 Register VReg, MachineInstr::MIFlag Flags) const {
5462 MachineFunction &MF = *MBB.getParent();
5463 MachineFrameInfo &MFI = MF.getFrameInfo();
5465 MachineMemOperand *MMO =
5467 MFI.getObjectSize(FI), MFI.getObjectAlign(FI));
5468
5469 unsigned Opc = 0;
5470 bool Offset = true;
5471 unsigned StackID = TargetStackID::Default;
5473 switch (TRI->getSpillSize(*RC)) {
5474 case 1:
5475 if (AArch64::FPR8RegClass.hasSubClassEq(RC))
5476 Opc = AArch64::LDRBui;
5477 break;
5478 case 2: {
5479 bool IsPNR = AArch64::PNRRegClass.hasSubClassEq(RC);
5480 if (AArch64::FPR16RegClass.hasSubClassEq(RC))
5481 Opc = AArch64::LDRHui;
5482 else if (IsPNR || AArch64::PPRRegClass.hasSubClassEq(RC)) {
5484 "Unexpected register load without SVE load instructions");
5485 if (IsPNR)
5486 PNRReg = DestReg;
5487 Opc = AArch64::LDR_PXI;
5489 }
5490 break;
5491 }
5492 case 4:
5493 if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
5494 Opc = AArch64::LDRWui;
5495 if (DestReg.isVirtual())
5496 MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR32RegClass);
5497 else
5498 assert(DestReg != AArch64::WSP);
5499 } else if (AArch64::FPR32RegClass.hasSubClassEq(RC))
5500 Opc = AArch64::LDRSui;
5501 else if (AArch64::PPR2RegClass.hasSubClassEq(RC)) {
5502 Opc = AArch64::LDR_PPXI;
5504 }
5505 break;
5506 case 8:
5507 if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) {
5508 Opc = AArch64::LDRXui;
5509 if (DestReg.isVirtual())
5510 MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR64RegClass);
5511 else
5512 assert(DestReg != AArch64::SP);
5513 } else if (AArch64::FPR64RegClass.hasSubClassEq(RC)) {
5514 Opc = AArch64::LDRDui;
5515 } else if (AArch64::WSeqPairsClassRegClass.hasSubClassEq(RC)) {
5517 get(AArch64::LDPWi), DestReg, AArch64::sube32,
5518 AArch64::subo32, FI, MMO);
5519 return;
5520 }
5521 break;
5522 case 16:
5523 if (AArch64::FPR128RegClass.hasSubClassEq(RC))
5524 Opc = AArch64::LDRQui;
5525 else if (AArch64::DDRegClass.hasSubClassEq(RC)) {
5526 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
5527 Opc = AArch64::LD1Twov1d;
5528 Offset = false;
5529 } else if (AArch64::XSeqPairsClassRegClass.hasSubClassEq(RC)) {
5531 get(AArch64::LDPXi), DestReg, AArch64::sube64,
5532 AArch64::subo64, FI, MMO);
5533 return;
5534 } else if (AArch64::ZPRRegClass.hasSubClassEq(RC)) {
5536 "Unexpected register load without SVE load instructions");
5537 Opc = AArch64::LDR_ZXI;
5539 } else if (AArch64::PPRRegClass.hasSubClassEq(RC)) {
5541 "Unexpected predicate load without SVE load instructions");
5542 Opc = AArch64::FILL_PPR_FROM_ZPR_SLOT_PSEUDO;
5544 }
5545 break;
5546 case 24:
5547 if (AArch64::DDDRegClass.hasSubClassEq(RC)) {
5548 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
5549 Opc = AArch64::LD1Threev1d;
5550 Offset = false;
5551 }
5552 break;
5553 case 32:
5554 if (AArch64::DDDDRegClass.hasSubClassEq(RC)) {
5555 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
5556 Opc = AArch64::LD1Fourv1d;
5557 Offset = false;
5558 } else if (AArch64::QQRegClass.hasSubClassEq(RC)) {
5559 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
5560 Opc = AArch64::LD1Twov2d;
5561 Offset = false;
5562 } else if (AArch64::ZPR2RegClass.hasSubClassEq(RC) ||
5563 AArch64::ZPR2StridedOrContiguousRegClass.hasSubClassEq(RC)) {
5565 "Unexpected register load without SVE load instructions");
5566 Opc = AArch64::LDR_ZZXI;
5568 }
5569 break;
5570 case 48:
5571 if (AArch64::QQQRegClass.hasSubClassEq(RC)) {
5572 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
5573 Opc = AArch64::LD1Threev2d;
5574 Offset = false;
5575 } else if (AArch64::ZPR3RegClass.hasSubClassEq(RC)) {
5577 "Unexpected register load without SVE load instructions");
5578 Opc = AArch64::LDR_ZZZXI;
5580 }
5581 break;
5582 case 64:
5583 if (AArch64::QQQQRegClass.hasSubClassEq(RC)) {
5584 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
5585 Opc = AArch64::LD1Fourv2d;
5586 Offset = false;
5587 } else if (AArch64::ZPR4RegClass.hasSubClassEq(RC) ||
5588 AArch64::ZPR4StridedOrContiguousRegClass.hasSubClassEq(RC)) {
5590 "Unexpected register load without SVE load instructions");
5591 Opc = AArch64::LDR_ZZZZXI;
5593 }
5594 break;
5595 }
5596
5597 assert(Opc && "Unknown register class");
5598 MFI.setStackID(FI, StackID);
5599
5600 const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DebugLoc(), get(Opc))
5601 .addReg(DestReg, getDefRegState(true))
5602 .addFrameIndex(FI);
5603 if (Offset)
5604 MI.addImm(0);
5605 if (PNRReg.isValid() && !PNRReg.isVirtual())
5606 MI.addDef(PNRReg, RegState::Implicit);
5607 MI.addMemOperand(MMO);
5608}
5609
5611 const MachineInstr &UseMI,
5612 const TargetRegisterInfo *TRI) {
5613 return any_of(instructionsWithoutDebug(std::next(DefMI.getIterator()),
5614 UseMI.getIterator()),
5615 [TRI](const MachineInstr &I) {
5616 return I.modifiesRegister(AArch64::NZCV, TRI) ||
5617 I.readsRegister(AArch64::NZCV, TRI);
5618 });
5619}
5620
5622 const StackOffset &Offset, int64_t &ByteSized, int64_t &VGSized) {
5623 // The smallest scalable element supported by scaled SVE addressing
5624 // modes are predicates, which are 2 scalable bytes in size. So the scalable
5625 // byte offset must always be a multiple of 2.
5626 assert(Offset.getScalable() % 2 == 0 && "Invalid frame offset");
5627
5628 // VGSized offsets are divided by '2', because the VG register is the
5629 // the number of 64bit granules as opposed to 128bit vector chunks,
5630 // which is how the 'n' in e.g. MVT::nxv1i8 is modelled.
5631 // So, for a stack offset of 16 MVT::nxv1i8's, the size is n x 16 bytes.
5632 // VG = n * 2 and the dwarf offset must be VG * 8 bytes.
5633 ByteSized = Offset.getFixed();
5634 VGSized = Offset.getScalable() / 2;
5635}
5636
5637/// Returns the offset in parts to which this frame offset can be
5638/// decomposed for the purpose of describing a frame offset.
5639/// For non-scalable offsets this is simply its byte size.
5641 const StackOffset &Offset, int64_t &NumBytes, int64_t &NumPredicateVectors,
5642 int64_t &NumDataVectors) {
5643 // The smallest scalable element supported by scaled SVE addressing
5644 // modes are predicates, which are 2 scalable bytes in size. So the scalable
5645 // byte offset must always be a multiple of 2.
5646 assert(Offset.getScalable() % 2 == 0 && "Invalid frame offset");
5647
5648 NumBytes = Offset.getFixed();
5649 NumDataVectors = 0;
5650 NumPredicateVectors = Offset.getScalable() / 2;
5651 // This method is used to get the offsets to adjust the frame offset.
5652 // If the function requires ADDPL to be used and needs more than two ADDPL
5653 // instructions, part of the offset is folded into NumDataVectors so that it
5654 // uses ADDVL for part of it, reducing the number of ADDPL instructions.
5655 if (NumPredicateVectors % 8 == 0 || NumPredicateVectors < -64 ||
5656 NumPredicateVectors > 62) {
5657 NumDataVectors = NumPredicateVectors / 8;
5658 NumPredicateVectors -= NumDataVectors * 8;
5659 }
5660}
5661
5662// Convenience function to create a DWARF expression for
5663// Expr + NumBytes + NumVGScaledBytes * AArch64::VG
5664static void appendVGScaledOffsetExpr(SmallVectorImpl<char> &Expr, int NumBytes,
5665 int NumVGScaledBytes, unsigned VG,
5666 llvm::raw_string_ostream &Comment) {
5667 uint8_t buffer[16];
5668
5669 if (NumBytes) {
5670 Expr.push_back(dwarf::DW_OP_consts);
5671 Expr.append(buffer, buffer + encodeSLEB128(NumBytes, buffer));
5672 Expr.push_back((uint8_t)dwarf::DW_OP_plus);
5673 Comment << (NumBytes < 0 ? " - " : " + ") << std::abs(NumBytes);
5674 }
5675
5676 if (NumVGScaledBytes) {
5677 Expr.push_back((uint8_t)dwarf::DW_OP_consts);
5678 Expr.append(buffer, buffer + encodeSLEB128(NumVGScaledBytes, buffer));
5679
5680 Expr.push_back((uint8_t)dwarf::DW_OP_bregx);
5681 Expr.append(buffer, buffer + encodeULEB128(VG, buffer));
5682 Expr.push_back(0);
5683
5684 Expr.push_back((uint8_t)dwarf::DW_OP_mul);
5685 Expr.push_back((uint8_t)dwarf::DW_OP_plus);
5686
5687 Comment << (NumVGScaledBytes < 0 ? " - " : " + ")
5688 << std::abs(NumVGScaledBytes) << " * VG";
5689 }
5690}
5691
5692// Creates an MCCFIInstruction:
5693// { DW_CFA_def_cfa_expression, ULEB128 (sizeof expr), expr }
5695 unsigned Reg,
5696 const StackOffset &Offset) {
5697 int64_t NumBytes, NumVGScaledBytes;
5699 NumVGScaledBytes);
5700 std::string CommentBuffer;
5701 llvm::raw_string_ostream Comment(CommentBuffer);
5702
5703 if (Reg == AArch64::SP)
5704 Comment << "sp";
5705 else if (Reg == AArch64::FP)
5706 Comment << "fp";
5707 else
5708 Comment << printReg(Reg, &TRI);
5709
5710 // Build up the expression (Reg + NumBytes + NumVGScaledBytes * AArch64::VG)
5711 SmallString<64> Expr;
5712 unsigned DwarfReg = TRI.getDwarfRegNum(Reg, true);
5713 Expr.push_back((uint8_t)(dwarf::DW_OP_breg0 + DwarfReg));
5714 Expr.push_back(0);
5715 appendVGScaledOffsetExpr(Expr, NumBytes, NumVGScaledBytes,
5716 TRI.getDwarfRegNum(AArch64::VG, true), Comment);
5717
5718 // Wrap this into DW_CFA_def_cfa.
5719 SmallString<64> DefCfaExpr;
5720 DefCfaExpr.push_back(dwarf::DW_CFA_def_cfa_expression);
5721 uint8_t buffer[16];
5722 DefCfaExpr.append(buffer, buffer + encodeULEB128(Expr.size(), buffer));
5723 DefCfaExpr.append(Expr.str());
5724 return MCCFIInstruction::createEscape(nullptr, DefCfaExpr.str(), SMLoc(),
5725 Comment.str());
5726}
5727
5729 unsigned FrameReg, unsigned Reg,
5730 const StackOffset &Offset,
5731 bool LastAdjustmentWasScalable) {
5732 if (Offset.getScalable())
5733 return createDefCFAExpression(TRI, Reg, Offset);
5734
5735 if (FrameReg == Reg && !LastAdjustmentWasScalable)
5736 return MCCFIInstruction::cfiDefCfaOffset(nullptr, int(Offset.getFixed()));
5737
5738 unsigned DwarfReg = TRI.getDwarfRegNum(Reg, true);
5739 return MCCFIInstruction::cfiDefCfa(nullptr, DwarfReg, (int)Offset.getFixed());
5740}
5741
5743 unsigned Reg,
5744 const StackOffset &OffsetFromDefCFA) {
5745 int64_t NumBytes, NumVGScaledBytes;
5747 OffsetFromDefCFA, NumBytes, NumVGScaledBytes);
5748
5749 unsigned DwarfReg = TRI.getDwarfRegNum(Reg, true);
5750
5751 // Non-scalable offsets can use DW_CFA_offset directly.
5752 if (!NumVGScaledBytes)
5753 return MCCFIInstruction::createOffset(nullptr, DwarfReg, NumBytes);
5754
5755 std::string CommentBuffer;
5756 llvm::raw_string_ostream Comment(CommentBuffer);
5757 Comment << printReg(Reg, &TRI) << " @ cfa";
5758
5759 // Build up expression (NumBytes + NumVGScaledBytes * AArch64::VG)
5760 SmallString<64> OffsetExpr;
5761 appendVGScaledOffsetExpr(OffsetExpr, NumBytes, NumVGScaledBytes,
5762 TRI.getDwarfRegNum(AArch64::VG, true), Comment);
5763
5764 // Wrap this into DW_CFA_expression
5765 SmallString<64> CfaExpr;
5766 CfaExpr.push_back(dwarf::DW_CFA_expression);
5767 uint8_t buffer[16];
5768 CfaExpr.append(buffer, buffer + encodeULEB128(DwarfReg, buffer));
5769 CfaExpr.append(buffer, buffer + encodeULEB128(OffsetExpr.size(), buffer));
5770 CfaExpr.append(OffsetExpr.str());
5771
5772 return MCCFIInstruction::createEscape(nullptr, CfaExpr.str(), SMLoc(),
5773 Comment.str());
5774}
5775
5776// Helper function to emit a frame offset adjustment from a given
5777// pointer (SrcReg), stored into DestReg. This function is explicit
5778// in that it requires the opcode.
5781 const DebugLoc &DL, unsigned DestReg,
5782 unsigned SrcReg, int64_t Offset, unsigned Opc,
5783 const TargetInstrInfo *TII,
5784 MachineInstr::MIFlag Flag, bool NeedsWinCFI,
5785 bool *HasWinCFI, bool EmitCFAOffset,
5786 StackOffset CFAOffset, unsigned FrameReg) {
5787 int Sign = 1;
5788 unsigned MaxEncoding, ShiftSize;
5789 switch (Opc) {
5790 case AArch64::ADDXri:
5791 case AArch64::ADDSXri:
5792 case AArch64::SUBXri:
5793 case AArch64::SUBSXri:
5794 MaxEncoding = 0xfff;
5795 ShiftSize = 12;
5796 break;
5797 case AArch64::ADDVL_XXI:
5798 case AArch64::ADDPL_XXI:
5799 case AArch64::ADDSVL_XXI:
5800 case AArch64::ADDSPL_XXI:
5801 MaxEncoding = 31;
5802 ShiftSize = 0;
5803 if (Offset < 0) {
5804 MaxEncoding = 32;
5805 Sign = -1;
5806 Offset = -Offset;
5807 }
5808 break;
5809 default:
5810 llvm_unreachable("Unsupported opcode");
5811 }
5812
5813 // `Offset` can be in bytes or in "scalable bytes".
5814 int VScale = 1;
5815 if (Opc == AArch64::ADDVL_XXI || Opc == AArch64::ADDSVL_XXI)
5816 VScale = 16;
5817 else if (Opc == AArch64::ADDPL_XXI || Opc == AArch64::ADDSPL_XXI)
5818 VScale = 2;
5819
5820 // FIXME: If the offset won't fit in 24-bits, compute the offset into a
5821 // scratch register. If DestReg is a virtual register, use it as the
5822 // scratch register; otherwise, create a new virtual register (to be
5823 // replaced by the scavenger at the end of PEI). That case can be optimized
5824 // slightly if DestReg is SP which is always 16-byte aligned, so the scratch
5825 // register can be loaded with offset%8 and the add/sub can use an extending
5826 // instruction with LSL#3.
5827 // Currently the function handles any offsets but generates a poor sequence
5828 // of code.
5829 // assert(Offset < (1 << 24) && "unimplemented reg plus immediate");
5830
5831 const unsigned MaxEncodableValue = MaxEncoding << ShiftSize;
5832 Register TmpReg = DestReg;
5833 if (TmpReg == AArch64::XZR)
5835 &AArch64::GPR64RegClass);
5836 do {
5837 uint64_t ThisVal = std::min<uint64_t>(Offset, MaxEncodableValue);
5838 unsigned LocalShiftSize = 0;
5839 if (ThisVal > MaxEncoding) {
5840 ThisVal = ThisVal >> ShiftSize;
5841 LocalShiftSize = ShiftSize;
5842 }
5843 assert((ThisVal >> ShiftSize) <= MaxEncoding &&
5844 "Encoding cannot handle value that big");
5845
5846 Offset -= ThisVal << LocalShiftSize;
5847 if (Offset == 0)
5848 TmpReg = DestReg;
5849 auto MBI = BuildMI(MBB, MBBI, DL, TII->get(Opc), TmpReg)
5850 .addReg(SrcReg)
5851 .addImm(Sign * (int)ThisVal);
5852 if (ShiftSize)
5853 MBI = MBI.addImm(
5855 MBI = MBI.setMIFlag(Flag);
5856
5857 auto Change =
5858 VScale == 1
5859 ? StackOffset::getFixed(ThisVal << LocalShiftSize)
5860 : StackOffset::getScalable(VScale * (ThisVal << LocalShiftSize));
5861 if (Sign == -1 || Opc == AArch64::SUBXri || Opc == AArch64::SUBSXri)
5862 CFAOffset += Change;
5863 else
5864 CFAOffset -= Change;
5865 if (EmitCFAOffset && DestReg == TmpReg) {
5866 MachineFunction &MF = *MBB.getParent();
5867 const TargetSubtargetInfo &STI = MF.getSubtarget();
5868 const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
5869
5870 unsigned CFIIndex = MF.addFrameInst(
5871 createDefCFA(TRI, FrameReg, DestReg, CFAOffset, VScale != 1));
5872 BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
5873 .addCFIIndex(CFIIndex)
5874 .setMIFlags(Flag);
5875 }
5876
5877 if (NeedsWinCFI) {
5878 assert(Sign == 1 && "SEH directives should always have a positive sign");
5879 int Imm = (int)(ThisVal << LocalShiftSize);
5880 if ((DestReg == AArch64::FP && SrcReg == AArch64::SP) ||
5881 (SrcReg == AArch64::FP && DestReg == AArch64::SP)) {
5882 if (HasWinCFI)
5883 *HasWinCFI = true;
5884 if (Imm == 0)
5885 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_SetFP)).setMIFlag(Flag);
5886 else
5887 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_AddFP))
5888 .addImm(Imm)
5889 .setMIFlag(Flag);
5890 assert(Offset == 0 && "Expected remaining offset to be zero to "
5891 "emit a single SEH directive");
5892 } else if (DestReg == AArch64::SP) {
5893 if (HasWinCFI)
5894 *HasWinCFI = true;
5895 assert(SrcReg == AArch64::SP && "Unexpected SrcReg for SEH_StackAlloc");
5896 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_StackAlloc))
5897 .addImm(Imm)
5898 .setMIFlag(Flag);
5899 }
5900 }
5901
5902 SrcReg = TmpReg;
5903 } while (Offset);
5904}
5905
5908 unsigned DestReg, unsigned SrcReg,
5910 MachineInstr::MIFlag Flag, bool SetNZCV,
5911 bool NeedsWinCFI, bool *HasWinCFI,
5912 bool EmitCFAOffset, StackOffset CFAOffset,
5913 unsigned FrameReg) {
5914 // If a function is marked as arm_locally_streaming, then the runtime value of
5915 // vscale in the prologue/epilogue is different the runtime value of vscale
5916 // in the function's body. To avoid having to consider multiple vscales,
5917 // we can use `addsvl` to allocate any scalable stack-slots, which under
5918 // most circumstances will be only locals, not callee-save slots.
5919 const Function &F = MBB.getParent()->getFunction();
5920 bool UseSVL = F.hasFnAttribute("aarch64_pstate_sm_body");
5921
5922 int64_t Bytes, NumPredicateVectors, NumDataVectors;
5924 Offset, Bytes, NumPredicateVectors, NumDataVectors);
5925
5926 // First emit non-scalable frame offsets, or a simple 'mov'.
5927 if (Bytes || (!Offset && SrcReg != DestReg)) {
5928 assert((DestReg != AArch64::SP || Bytes % 8 == 0) &&
5929 "SP increment/decrement not 8-byte aligned");
5930 unsigned Opc = SetNZCV ? AArch64::ADDSXri : AArch64::ADDXri;
5931 if (Bytes < 0) {
5932 Bytes = -Bytes;
5933 Opc = SetNZCV ? AArch64::SUBSXri : AArch64::SUBXri;
5934 }
5935 emitFrameOffsetAdj(MBB, MBBI, DL, DestReg, SrcReg, Bytes, Opc, TII, Flag,
5936 NeedsWinCFI, HasWinCFI, EmitCFAOffset, CFAOffset,
5937 FrameReg);
5938 CFAOffset += (Opc == AArch64::ADDXri || Opc == AArch64::ADDSXri)
5939 ? StackOffset::getFixed(-Bytes)
5940 : StackOffset::getFixed(Bytes);
5941 SrcReg = DestReg;
5942 FrameReg = DestReg;
5943 }
5944
5945 assert(!(SetNZCV && (NumPredicateVectors || NumDataVectors)) &&
5946 "SetNZCV not supported with SVE vectors");
5947 assert(!(NeedsWinCFI && (NumPredicateVectors || NumDataVectors)) &&
5948 "WinCFI not supported with SVE vectors");
5949
5950 if (NumDataVectors) {
5951 emitFrameOffsetAdj(MBB, MBBI, DL, DestReg, SrcReg, NumDataVectors,
5952 UseSVL ? AArch64::ADDSVL_XXI : AArch64::ADDVL_XXI,
5953 TII, Flag, NeedsWinCFI, nullptr, EmitCFAOffset,
5954 CFAOffset, FrameReg);
5955 CFAOffset += StackOffset::getScalable(-NumDataVectors * 16);
5956 SrcReg = DestReg;
5957 }
5958
5959 if (NumPredicateVectors) {
5960 assert(DestReg != AArch64::SP && "Unaligned access to SP");
5961 emitFrameOffsetAdj(MBB, MBBI, DL, DestReg, SrcReg, NumPredicateVectors,
5962 UseSVL ? AArch64::ADDSPL_XXI : AArch64::ADDPL_XXI,
5963 TII, Flag, NeedsWinCFI, nullptr, EmitCFAOffset,
5964 CFAOffset, FrameReg);
5965 }
5966}
5967
5970 MachineBasicBlock::iterator InsertPt, int FrameIndex,
5971 LiveIntervals *LIS, VirtRegMap *VRM) const {
5972 // This is a bit of a hack. Consider this instruction:
5973 //
5974 // %0 = COPY %sp; GPR64all:%0
5975 //
5976 // We explicitly chose GPR64all for the virtual register so such a copy might
5977 // be eliminated by RegisterCoalescer. However, that may not be possible, and
5978 // %0 may even spill. We can't spill %sp, and since it is in the GPR64all
5979 // register class, TargetInstrInfo::foldMemoryOperand() is going to try.
5980 //
5981 // To prevent that, we are going to constrain the %0 register class here.
5982 if (MI.isFullCopy()) {
5983 Register DstReg = MI.getOperand(0).getReg();
5984 Register SrcReg = MI.getOperand(1).getReg();
5985 if (SrcReg == AArch64::SP && DstReg.isVirtual()) {
5986 MF.getRegInfo().constrainRegClass(DstReg, &AArch64::GPR64RegClass);
5987 return nullptr;
5988 }
5989 if (DstReg == AArch64::SP && SrcReg.isVirtual()) {
5990 MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass);
5991 return nullptr;
5992 }
5993 // Nothing can folded with copy from/to NZCV.
5994 if (SrcReg == AArch64::NZCV || DstReg == AArch64::NZCV)
5995 return nullptr;
5996 }
5997
5998 // Handle the case where a copy is being spilled or filled but the source
5999 // and destination register class don't match. For example:
6000 //
6001 // %0 = COPY %xzr; GPR64common:%0
6002 //
6003 // In this case we can still safely fold away the COPY and generate the
6004 // following spill code:
6005 //
6006 // STRXui %xzr, %stack.0
6007 //
6008 // This also eliminates spilled cross register class COPYs (e.g. between x and
6009 // d regs) of the same size. For example:
6010 //
6011 // %0 = COPY %1; GPR64:%0, FPR64:%1
6012 //
6013 // will be filled as
6014 //
6015 // LDRDui %0, fi<#0>
6016 //
6017 // instead of
6018 //
6019 // LDRXui %Temp, fi<#0>
6020 // %0 = FMOV %Temp
6021 //
6022 if (MI.isCopy() && Ops.size() == 1 &&
6023 // Make sure we're only folding the explicit COPY defs/uses.
6024 (Ops[0] == 0 || Ops[0] == 1)) {
6025 bool IsSpill = Ops[0] == 0;
6026 bool IsFill = !IsSpill;
6028 const MachineRegisterInfo &MRI = MF.getRegInfo();
6029 MachineBasicBlock &MBB = *MI.getParent();
6030 const MachineOperand &DstMO = MI.getOperand(0);
6031 const MachineOperand &SrcMO = MI.getOperand(1);
6032 Register DstReg = DstMO.getReg();
6033 Register SrcReg = SrcMO.getReg();
6034 // This is slightly expensive to compute for physical regs since
6035 // getMinimalPhysRegClass is slow.
6036 auto getRegClass = [&](unsigned Reg) {
6037 return Register::isVirtualRegister(Reg) ? MRI.getRegClass(Reg)
6038 : TRI.getMinimalPhysRegClass(Reg);
6039 };
6040
6041 if (DstMO.getSubReg() == 0 && SrcMO.getSubReg() == 0) {
6042 assert(TRI.getRegSizeInBits(*getRegClass(DstReg)) ==
6043 TRI.getRegSizeInBits(*getRegClass(SrcReg)) &&
6044 "Mismatched register size in non subreg COPY");
6045 if (IsSpill)
6046 storeRegToStackSlot(MBB, InsertPt, SrcReg, SrcMO.isKill(), FrameIndex,
6047 getRegClass(SrcReg), &TRI, Register());
6048 else
6049 loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex,
6050 getRegClass(DstReg), &TRI, Register());
6051 return &*--InsertPt;
6052 }
6053
6054 // Handle cases like spilling def of:
6055 //
6056 // %0:sub_32<def,read-undef> = COPY %wzr; GPR64common:%0
6057 //
6058 // where the physical register source can be widened and stored to the full
6059 // virtual reg destination stack slot, in this case producing:
6060 //
6061 // STRXui %xzr, %stack.0
6062 //
6063 if (IsSpill && DstMO.isUndef() && SrcReg == AArch64::WZR &&
6064 TRI.getRegSizeInBits(*getRegClass(DstReg)) == 64) {
6065 assert(SrcMO.getSubReg() == 0 &&
6066 "Unexpected subreg on physical register");
6067 storeRegToStackSlot(MBB, InsertPt, AArch64::XZR, SrcMO.isKill(),
6068 FrameIndex, &AArch64::GPR64RegClass, &TRI,
6069 Register());
6070 return &*--InsertPt;
6071 }
6072
6073 // Handle cases like filling use of:
6074 //
6075 // %0:sub_32<def,read-undef> = COPY %1; GPR64:%0, GPR32:%1
6076 //
6077 // where we can load the full virtual reg source stack slot, into the subreg
6078 // destination, in this case producing:
6079 //
6080 // LDRWui %0:sub_32<def,read-undef>, %stack.0
6081 //
6082 if (IsFill && SrcMO.getSubReg() == 0 && DstMO.isUndef()) {
6083 const TargetRegisterClass *FillRC;
6084 switch (DstMO.getSubReg()) {
6085 default:
6086 FillRC = nullptr;
6087 break;
6088 case AArch64::sub_32:
6089 FillRC = &AArch64::GPR32RegClass;
6090 break;
6091 case AArch64::ssub:
6092 FillRC = &AArch64::FPR32RegClass;
6093 break;
6094 case AArch64::dsub:
6095 FillRC = &AArch64::FPR64RegClass;
6096 break;
6097 }
6098
6099 if (FillRC) {
6100 assert(TRI.getRegSizeInBits(*getRegClass(SrcReg)) ==
6101 TRI.getRegSizeInBits(*FillRC) &&
6102 "Mismatched regclass size on folded subreg COPY");
6103 loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex, FillRC, &TRI,
6104 Register());
6105 MachineInstr &LoadMI = *--InsertPt;
6106 MachineOperand &LoadDst = LoadMI.getOperand(0);
6107 assert(LoadDst.getSubReg() == 0 && "unexpected subreg on fill load");
6108 LoadDst.setSubReg(DstMO.getSubReg());
6109 LoadDst.setIsUndef();
6110 return &LoadMI;
6111 }
6112 }
6113 }
6114
6115 // Cannot fold.
6116 return nullptr;
6117}
6118
6120 StackOffset &SOffset,
6121 bool *OutUseUnscaledOp,
6122 unsigned *OutUnscaledOp,
6123 int64_t *EmittableOffset) {
6124 // Set output values in case of early exit.
6125 if (EmittableOffset)
6126 *EmittableOffset = 0;
6127 if (OutUseUnscaledOp)
6128 *OutUseUnscaledOp = false;
6129 if (OutUnscaledOp)
6130 *OutUnscaledOp = 0;
6131
6132 // Exit early for structured vector spills/fills as they can't take an
6133 // immediate offset.
6134 switch (MI.getOpcode()) {
6135 default:
6136 break;
6137 case AArch64::LD1Rv1d:
6138 case AArch64::LD1Rv2s:
6139 case AArch64::LD1Rv2d:
6140 case AArch64::LD1Rv4h:
6141 case AArch64::LD1Rv4s:
6142 case AArch64::LD1Rv8b:
6143 case AArch64::LD1Rv8h:
6144 case AArch64::LD1Rv16b:
6145 case AArch64::LD1Twov2d:
6146 case AArch64::LD1Threev2d:
6147 case AArch64::LD1Fourv2d:
6148 case AArch64::LD1Twov1d:
6149 case AArch64::LD1Threev1d:
6150 case AArch64::LD1Fourv1d:
6151 case AArch64::ST1Twov2d:
6152 case AArch64::ST1Threev2d:
6153 case AArch64::ST1Fourv2d:
6154 case AArch64::ST1Twov1d:
6155 case AArch64::ST1Threev1d:
6156 case AArch64::ST1Fourv1d:
6157 case AArch64::ST1i8:
6158 case AArch64::ST1i16:
6159 case AArch64::ST1i32:
6160 case AArch64::ST1i64:
6161 case AArch64::IRG:
6162 case AArch64::IRGstack:
6163 case AArch64::STGloop:
6164 case AArch64::STZGloop:
6166 }
6167
6168 // Get the min/max offset and the scale.
6169 TypeSize ScaleValue(0U, false), Width(0U, false);
6170 int64_t MinOff, MaxOff;
6171 if (!AArch64InstrInfo::getMemOpInfo(MI.getOpcode(), ScaleValue, Width, MinOff,
6172 MaxOff))
6173 llvm_unreachable("unhandled opcode in isAArch64FrameOffsetLegal");
6174
6175 // Construct the complete offset.
6176 bool IsMulVL = ScaleValue.isScalable();
6177 unsigned Scale = ScaleValue.getKnownMinValue();
6178 int64_t Offset = IsMulVL ? SOffset.getScalable() : SOffset.getFixed();
6179
6180 const MachineOperand &ImmOpnd =
6181 MI.getOperand(AArch64InstrInfo::getLoadStoreImmIdx(MI.getOpcode()));
6182 Offset += ImmOpnd.getImm() * Scale;
6183
6184 // If the offset doesn't match the scale, we rewrite the instruction to
6185 // use the unscaled instruction instead. Likewise, if we have a negative
6186 // offset and there is an unscaled op to use.
6187 std::optional<unsigned> UnscaledOp =
6189 bool useUnscaledOp = UnscaledOp && (Offset % Scale || Offset < 0);
6190 if (useUnscaledOp &&
6191 !AArch64InstrInfo::getMemOpInfo(*UnscaledOp, ScaleValue, Width, MinOff,
6192 MaxOff))
6193 llvm_unreachable("unhandled opcode in isAArch64FrameOffsetLegal");
6194
6195 Scale = ScaleValue.getKnownMinValue();
6196 assert(IsMulVL == ScaleValue.isScalable() &&
6197 "Unscaled opcode has different value for scalable");
6198
6199 int64_t Remainder = Offset % Scale;
6200 assert(!(Remainder && useUnscaledOp) &&
6201 "Cannot have remainder when using unscaled op");
6202
6203 assert(MinOff < MaxOff && "Unexpected Min/Max offsets");
6204 int64_t NewOffset = Offset / Scale;
6205 if (MinOff <= NewOffset && NewOffset <= MaxOff)
6206 Offset = Remainder;
6207 else {
6208 NewOffset = NewOffset < 0 ? MinOff : MaxOff;
6209 Offset = Offset - (NewOffset * Scale);
6210 }
6211
6212 if (EmittableOffset)
6213 *EmittableOffset = NewOffset;
6214 if (OutUseUnscaledOp)
6215 *OutUseUnscaledOp = useUnscaledOp;
6216 if (OutUnscaledOp && UnscaledOp)
6217 *OutUnscaledOp = *UnscaledOp;
6218
6219 if (IsMulVL)
6220 SOffset = StackOffset::get(SOffset.getFixed(), Offset);
6221 else
6222 SOffset = StackOffset::get(Offset, SOffset.getScalable());
6224 (SOffset ? 0 : AArch64FrameOffsetIsLegal);
6225}
6226
6228 unsigned FrameReg, StackOffset &Offset,
6229 const AArch64InstrInfo *TII) {
6230 unsigned Opcode = MI.getOpcode();
6231 unsigned ImmIdx = FrameRegIdx + 1;
6232
6233 if (Opcode == AArch64::ADDSXri || Opcode == AArch64::ADDXri) {
6234 Offset += StackOffset::getFixed(MI.getOperand(ImmIdx).getImm());
6235 emitFrameOffset(*MI.getParent(), MI, MI.getDebugLoc(),
6236 MI.getOperand(0).getReg(), FrameReg, Offset, TII,
6237 MachineInstr::NoFlags, (Opcode == AArch64::ADDSXri));
6238 MI.eraseFromParent();
6239 Offset = StackOffset();
6240 return true;
6241 }
6242
6243 int64_t NewOffset;
6244 unsigned UnscaledOp;
6245 bool UseUnscaledOp;
6246 int Status = isAArch64FrameOffsetLegal(MI, Offset, &UseUnscaledOp,
6247 &UnscaledOp, &NewOffset);
6250 // Replace the FrameIndex with FrameReg.
6251 MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
6252 if (UseUnscaledOp)
6253 MI.setDesc(TII->get(UnscaledOp));
6254
6255 MI.getOperand(ImmIdx).ChangeToImmediate(NewOffset);
6256 return !Offset;
6257 }
6258
6259 return false;
6260}
6261
6264 DebugLoc DL;
6265 BuildMI(MBB, MI, DL, get(AArch64::HINT)).addImm(0);
6266}
6267
6269 return MCInstBuilder(AArch64::HINT).addImm(0);
6270}
6271
6272// AArch64 supports MachineCombiner.
6273bool AArch64InstrInfo::useMachineCombiner() const { return true; }
6274
6275// True when Opc sets flag
6276static bool isCombineInstrSettingFlag(unsigned Opc) {
6277 switch (Opc) {
6278 case AArch64::ADDSWrr:
6279 case AArch64::ADDSWri:
6280 case AArch64::ADDSXrr:
6281 case AArch64::ADDSXri:
6282 case AArch64::SUBSWrr:
6283 case AArch64::SUBSXrr:
6284 // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
6285 case AArch64::SUBSWri:
6286 case AArch64::SUBSXri:
6287 return true;
6288 default:
6289 break;
6290 }
6291 return false;
6292}
6293
6294// 32b Opcodes that can be combined with a MUL
6295static bool isCombineInstrCandidate32(unsigned Opc) {
6296 switch (Opc) {
6297 case AArch64::ADDWrr:
6298 case AArch64::ADDWri:
6299 case AArch64::SUBWrr:
6300 case AArch64::ADDSWrr:
6301 case AArch64::ADDSWri:
6302 case AArch64::SUBSWrr:
6303 // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
6304 case AArch64::SUBWri:
6305 case AArch64::SUBSWri:
6306 return true;
6307 default:
6308 break;
6309 }
6310 return false;
6311}
6312
6313// 64b Opcodes that can be combined with a MUL
6314static bool isCombineInstrCandidate64(unsigned Opc) {
6315 switch (Opc) {
6316 case AArch64::ADDXrr:
6317 case AArch64::ADDXri:
6318 case AArch64::SUBXrr:
6319 case AArch64::ADDSXrr:
6320 case AArch64::ADDSXri:
6321 case AArch64::SUBSXrr:
6322 // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
6323 case AArch64::SUBXri:
6324 case AArch64::SUBSXri:
6325 case AArch64::ADDv8i8:
6326 case AArch64::ADDv16i8:
6327 case AArch64::ADDv4i16:
6328 case AArch64::ADDv8i16:
6329 case AArch64::ADDv2i32:
6330 case AArch64::ADDv4i32:
6331 case AArch64::SUBv8i8:
6332 case AArch64::SUBv16i8:
6333 case AArch64::SUBv4i16:
6334 case AArch64::SUBv8i16:
6335 case AArch64::SUBv2i32:
6336 case AArch64::SUBv4i32:
6337 return true;
6338 default:
6339 break;
6340 }
6341 return false;
6342}
6343
6344// FP Opcodes that can be combined with a FMUL.
6345static bool isCombineInstrCandidateFP(const MachineInstr &Inst) {
6346 switch (Inst.getOpcode()) {
6347 default:
6348 break;
6349 case AArch64::FADDHrr:
6350 case AArch64::FADDSrr:
6351 case AArch64::FADDDrr:
6352 case AArch64::FADDv4f16:
6353 case AArch64::FADDv8f16:
6354 case AArch64::FADDv2f32:
6355 case AArch64::FADDv2f64:
6356 case AArch64::FADDv4f32:
6357 case AArch64::FSUBHrr:
6358 case AArch64::FSUBSrr:
6359 case AArch64::FSUBDrr:
6360 case AArch64::FSUBv4f16:
6361 case AArch64::FSUBv8f16:
6362 case AArch64::FSUBv2f32:
6363 case AArch64::FSUBv2f64:
6364 case AArch64::FSUBv4f32:
6366 // We can fuse FADD/FSUB with FMUL, if fusion is either allowed globally by
6367 // the target options or if FADD/FSUB has the contract fast-math flag.
6368 return Options.UnsafeFPMath ||
6369 Options.AllowFPOpFusion == FPOpFusion::Fast ||
6371 return true;
6372 }
6373 return false;
6374}
6375
6376// Opcodes that can be combined with a MUL
6377static bool isCombineInstrCandidate(unsigned Opc) {
6379}
6380
6381//
6382// Utility routine that checks if \param MO is defined by an
6383// \param CombineOpc instruction in the basic block \param MBB
6385 unsigned CombineOpc, unsigned ZeroReg = 0,
6386 bool CheckZeroReg = false) {
6388 MachineInstr *MI = nullptr;
6389
6390 if (MO.isReg() && MO.getReg().isVirtual())
6391 MI = MRI.getUniqueVRegDef(MO.getReg());
6392 // And it needs to be in the trace (otherwise, it won't have a depth).
6393 if (!MI || MI->getParent() != &MBB || (unsigned)MI->getOpcode() != CombineOpc)
6394 return false;
6395 // Must only used by the user we combine with.
6396 if (!MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
6397 return false;
6398
6399 if (CheckZeroReg) {
6400 assert(MI->getNumOperands() >= 4 && MI->getOperand(0).isReg() &&
6401 MI->getOperand(1).isReg() && MI->getOperand(2).isReg() &&
6402 MI->getOperand(3).isReg() && "MAdd/MSub must have a least 4 regs");
6403 // The third input reg must be zero.
6404 if (MI->getOperand(3).getReg() != ZeroReg)
6405 return false;
6406 }
6407
6408 if (isCombineInstrSettingFlag(CombineOpc) &&
6409 MI->findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr, true) == -1)
6410 return false;
6411
6412 return true;
6413}
6414
6415//
6416// Is \param MO defined by an integer multiply and can be combined?
6418 unsigned MulOpc, unsigned ZeroReg) {
6419 return canCombine(MBB, MO, MulOpc, ZeroReg, true);
6420}
6421
6422//
6423// Is \param MO defined by a floating-point multiply and can be combined?
6425 unsigned MulOpc) {
6426 return canCombine(MBB, MO, MulOpc);
6427}
6428
6429// TODO: There are many more machine instruction opcodes to match:
6430// 1. Other data types (integer, vectors)
6431// 2. Other math / logic operations (xor, or)
6432// 3. Other forms of the same operation (intrinsics and other variants)
6434 bool Invert) const {
6435 if (Invert)
6436 return false;
6437 switch (Inst.getOpcode()) {
6438 // == Floating-point types ==
6439 // -- Floating-point instructions --
6440 case AArch64::FADDHrr:
6441 case AArch64::FADDSrr:
6442 case AArch64::FADDDrr:
6443 case AArch64::FMULHrr:
6444 case AArch64::FMULSrr:
6445 case AArch64::FMULDrr:
6446 case AArch64::FMULX16:
6447 case AArch64::FMULX32:
6448 case AArch64::FMULX64:
6449 // -- Advanced SIMD instructions --
6450 case AArch64::FADDv4f16:
6451 case AArch64::FADDv8f16:
6452 case AArch64::FADDv2f32:
6453 case AArch64::FADDv4f32:
6454 case AArch64::FADDv2f64:
6455 case AArch64::FMULv4f16:
6456 case AArch64::FMULv8f16:
6457 case AArch64::FMULv2f32:
6458 case AArch64::FMULv4f32:
6459 case AArch64::FMULv2f64:
6460 case AArch64::FMULXv4f16:
6461 case AArch64::FMULXv8f16:
6462 case AArch64::FMULXv2f32:
6463 case AArch64::FMULXv4f32:
6464 case AArch64::FMULXv2f64:
6465 // -- SVE instructions --
6466 // Opcodes FMULX_ZZZ_? don't exist because there is no unpredicated FMULX
6467 // in the SVE instruction set (though there are predicated ones).
6468 case AArch64::FADD_ZZZ_H:
6469 case AArch64::FADD_ZZZ_S:
6470 case AArch64::FADD_ZZZ_D:
6471 case AArch64::FMUL_ZZZ_H:
6472 case AArch64::FMUL_ZZZ_S:
6473 case AArch64::FMUL_ZZZ_D:
6474 return Inst.getParent()->getParent()->getTarget().Options.UnsafeFPMath ||
6477
6478 // == Integer types ==
6479 // -- Base instructions --
6480 // Opcodes MULWrr and MULXrr don't exist because
6481 // `MUL <Wd>, <Wn>, <Wm>` and `MUL <Xd>, <Xn>, <Xm>` are aliases of
6482 // `MADD <Wd>, <Wn>, <Wm>, WZR` and `MADD <Xd>, <Xn>, <Xm>, XZR` respectively.
6483 // The machine-combiner does not support three-source-operands machine
6484 // instruction. So we cannot reassociate MULs.
6485 case AArch64::ADDWrr:
6486 case AArch64::ADDXrr:
6487 case AArch64::ANDWrr:
6488 case AArch64::ANDXrr:
6489 case AArch64::ORRWrr:
6490 case AArch64::ORRXrr:
6491 case AArch64::EORWrr:
6492 case AArch64::EORXrr:
6493 case AArch64::EONWrr:
6494 case AArch64::EONXrr:
6495 // -- Advanced SIMD instructions --
6496 // Opcodes MULv1i64 and MULv2i64 don't exist because there is no 64-bit MUL
6497 // in the Advanced SIMD instruction set.
6498 case AArch64::ADDv8i8:
6499 case AArch64::ADDv16i8:
6500 case AArch64::ADDv4i16:
6501 case AArch64::ADDv8i16:
6502 case AArch64::ADDv2i32:
6503 case AArch64::ADDv4i32:
6504 case AArch64::ADDv1i64:
6505 case AArch64::ADDv2i64:
6506 case AArch64::MULv8i8:
6507 case AArch64::MULv16i8:
6508 case AArch64::MULv4i16:
6509 case AArch64::MULv8i16:
6510 case AArch64::MULv2i32:
6511 case AArch64::MULv4i32:
6512 case AArch64::ANDv8i8:
6513 case AArch64::ANDv16i8:
6514 case AArch64::ORRv8i8:
6515 case AArch64::ORRv16i8:
6516 case AArch64::EORv8i8:
6517 case AArch64::EORv16i8:
6518 // -- SVE instructions --
6519 case AArch64::ADD_ZZZ_B:
6520 case AArch64::ADD_ZZZ_H:
6521 case AArch64::ADD_ZZZ_S:
6522 case AArch64::ADD_ZZZ_D:
6523 case AArch64::MUL_ZZZ_B:
6524 case AArch64::MUL_ZZZ_H:
6525 case AArch64::MUL_ZZZ_S:
6526 case AArch64::MUL_ZZZ_D:
6527 case AArch64::AND_ZZZ:
6528 case AArch64::ORR_ZZZ:
6529 case AArch64::EOR_ZZZ:
6530 return true;
6531
6532 default:
6533 return false;
6534 }
6535}
6536
6537/// Find instructions that can be turned into madd.
6539 SmallVectorImpl<unsigned> &Patterns) {
6540 unsigned Opc = Root.getOpcode();
6541 MachineBasicBlock &MBB = *Root.getParent();
6542 bool Found = false;
6543
6544 if (!isCombineInstrCandidate(Opc))
6545 return false;
6546 if (isCombineInstrSettingFlag(Opc)) {
6547 int Cmp_NZCV =
6548 Root.findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr, true);
6549 // When NZCV is live bail out.
6550 if (Cmp_NZCV == -1)
6551 return false;
6552 unsigned NewOpc = convertToNonFlagSettingOpc(Root);
6553 // When opcode can't change bail out.
6554 // CHECKME: do we miss any cases for opcode conversion?
6555 if (NewOpc == Opc)
6556 return false;
6557 Opc = NewOpc;
6558 }
6559
6560 auto setFound = [&](int Opcode, int Operand, unsigned ZeroReg,
6561 unsigned Pattern) {
6562 if (canCombineWithMUL(MBB, Root.getOperand(Operand), Opcode, ZeroReg)) {
6563 Patterns.push_back(Pattern);
6564 Found = true;
6565 }
6566 };
6567
6568 auto setVFound = [&](int Opcode, int Operand, unsigned Pattern) {
6569 if (canCombine(MBB, Root.getOperand(Operand), Opcode)) {
6570 Patterns.push_back(Pattern);
6571 Found = true;
6572 }
6573 };
6574
6576
6577 switch (Opc) {
6578 default:
6579 break;
6580 case AArch64::ADDWrr:
6581 assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
6582 "ADDWrr does not have register operands");
6583 setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULADDW_OP1);
6584 setFound(AArch64::MADDWrrr, 2, AArch64::WZR, MCP::MULADDW_OP2);
6585 break;
6586 case AArch64::ADDXrr:
6587 setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULADDX_OP1);
6588 setFound(AArch64::MADDXrrr, 2, AArch64::XZR, MCP::MULADDX_OP2);
6589 break;
6590 case AArch64::SUBWrr:
6591 setFound(AArch64::MADDWrrr, 2, AArch64::WZR, MCP::MULSUBW_OP2);
6592 setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULSUBW_OP1);
6593 break;
6594 case AArch64::SUBXrr:
6595 setFound(AArch64::MADDXrrr, 2, AArch64::XZR, MCP::MULSUBX_OP2);
6596 setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULSUBX_OP1);
6597 break;
6598 case AArch64::ADDWri:
6599 setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULADDWI_OP1);
6600 break;
6601 case AArch64::ADDXri:
6602 setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULADDXI_OP1);
6603 break;
6604 case AArch64::SUBWri:
6605 setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULSUBWI_OP1);
6606 break;
6607 case AArch64::SUBXri:
6608 setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULSUBXI_OP1);
6609 break;
6610 case AArch64::ADDv8i8:
6611 setVFound(AArch64::MULv8i8, 1, MCP::MULADDv8i8_OP1);
6612 setVFound(AArch64::MULv8i8, 2, MCP::MULADDv8i8_OP2);
6613 break;
6614 case AArch64::ADDv16i8:
6615 setVFound(AArch64::MULv16i8, 1, MCP::MULADDv16i8_OP1);
6616 setVFound(AArch64::MULv16i8, 2, MCP::MULADDv16i8_OP2);
6617 break;
6618 case AArch64::ADDv4i16:
6619 setVFound(AArch64::MULv4i16, 1, MCP::MULADDv4i16_OP1);
6620 setVFound(AArch64::MULv4i16, 2, MCP::MULADDv4i16_OP2);
6621 setVFound(AArch64::MULv4i16_indexed, 1, MCP::MULADDv4i16_indexed_OP1);
6622 setVFound(AArch64::MULv4i16_indexed, 2, MCP::MULADDv4i16_indexed_OP2);
6623 break;
6624 case AArch64::ADDv8i16:
6625 setVFound(AArch64::MULv8i16, 1, MCP::MULADDv8i16_OP1);
6626 setVFound(AArch64::MULv8i16, 2, MCP::MULADDv8i16_OP2);
6627 setVFound(AArch64::MULv8i16_indexed, 1, MCP::MULADDv8i16_indexed_OP1);
6628 setVFound(AArch64::MULv8i16_indexed, 2, MCP::MULADDv8i16_indexed_OP2);
6629 break;
6630 case AArch64::ADDv2i32:
6631 setVFound(AArch64::MULv2i32, 1, MCP::MULADDv2i32_OP1);
6632 setVFound(AArch64::MULv2i32, 2, MCP::MULADDv2i32_OP2);
6633 setVFound(AArch64::MULv2i32_indexed, 1, MCP::MULADDv2i32_indexed_OP1);
6634 setVFound(AArch64::MULv2i32_indexed, 2, MCP::MULADDv2i32_indexed_OP2);
6635 break;
6636 case AArch64::ADDv4i32:
6637 setVFound(AArch64::MULv4i32, 1, MCP::MULADDv4i32_OP1);
6638 setVFound(AArch64::MULv4i32, 2, MCP::MULADDv4i32_OP2);
6639 setVFound(AArch64::MULv4i32_indexed, 1, MCP::MULADDv4i32_indexed_OP1);
6640 setVFound(AArch64::MULv4i32_indexed, 2, MCP::MULADDv4i32_indexed_OP2);
6641 break;
6642 case AArch64::SUBv8i8:
6643 setVFound(AArch64::MULv8i8, 1, MCP::MULSUBv8i8_OP1);
6644 setVFound(AArch64::MULv8i8, 2, MCP::MULSUBv8i8_OP2);
6645 break;
6646 case AArch64::SUBv16i8:
6647 setVFound(AArch64::MULv16i8, 1, MCP::MULSUBv16i8_OP1);
6648 setVFound(AArch64::MULv16i8, 2, MCP::MULSUBv16i8_OP2);
6649 break;
6650 case AArch64::SUBv4i16:
6651 setVFound(AArch64::MULv4i16, 1, MCP::MULSUBv4i16_OP1);
6652 setVFound(AArch64::MULv4i16, 2, MCP::MULSUBv4i16_OP2);
6653 setVFound(AArch64::MULv4i16_indexed, 1, MCP::MULSUBv4i16_indexed_OP1);
6654 setVFound(AArch64::MULv4i16_indexed, 2, MCP::MULSUBv4i16_indexed_OP2);
6655 break;
6656 case AArch64::SUBv8i16:
6657 setVFound(AArch64::MULv8i16, 1, MCP::MULSUBv8i16_OP1);
6658 setVFound(AArch64::MULv8i16, 2, MCP::MULSUBv8i16_OP2);
6659 setVFound(AArch64::MULv8i16_indexed, 1, MCP::MULSUBv8i16_indexed_OP1);
6660 setVFound(AArch64::MULv8i16_indexed, 2, MCP::MULSUBv8i16_indexed_OP2);
6661 break;
6662 case AArch64::SUBv2i32:
6663 setVFound(AArch64::MULv2i32, 1, MCP::MULSUBv2i32_OP1);
6664 setVFound(AArch64::MULv2i32, 2, MCP::MULSUBv2i32_OP2);
6665 setVFound(AArch64::MULv2i32_indexed, 1, MCP::MULSUBv2i32_indexed_OP1);
6666 setVFound(AArch64::MULv2i32_indexed, 2, MCP::MULSUBv2i32_indexed_OP2);
6667 break;
6668 case AArch64::SUBv4i32:
6669 setVFound(AArch64::MULv4i32, 1, MCP::MULSUBv4i32_OP1);
6670 setVFound(AArch64::MULv4i32, 2, MCP::MULSUBv4i32_OP2);
6671 setVFound(AArch64::MULv4i32_indexed, 1, MCP::MULSUBv4i32_indexed_OP1);
6672 setVFound(AArch64::MULv4i32_indexed, 2, MCP::MULSUBv4i32_indexed_OP2);
6673 break;
6674 }
6675 return Found;
6676}
6677/// Floating-Point Support
6678
6679/// Find instructions that can be turned into madd.
6681 SmallVectorImpl<unsigned> &Patterns) {
6682
6683 if (!isCombineInstrCandidateFP(Root))
6684 return false;
6685
6686 MachineBasicBlock &MBB = *Root.getParent();
6687 bool Found = false;
6688
6689 auto Match = [&](int Opcode, int Operand, unsigned Pattern) -> bool {
6690 if (canCombineWithFMUL(MBB, Root.getOperand(Operand), Opcode)) {
6691 Patterns.push_back(Pattern);
6692 return true;
6693 }
6694 return false;
6695 };
6696
6698
6699 switch (Root.getOpcode()) {
6700 default:
6701 assert(false && "Unsupported FP instruction in combiner\n");
6702 break;
6703 case AArch64::FADDHrr:
6704 assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
6705 "FADDHrr does not have register operands");
6706
6707 Found = Match(AArch64::FMULHrr, 1, MCP::FMULADDH_OP1);
6708 Found |= Match(AArch64::FMULHrr, 2, MCP::FMULADDH_OP2);
6709 break;
6710 case AArch64::FADDSrr:
6711 assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
6712 "FADDSrr does not have register operands");
6713
6714 Found |= Match(AArch64::FMULSrr, 1, MCP::FMULADDS_OP1) ||
6715 Match(AArch64::FMULv1i32_indexed, 1, MCP::FMLAv1i32_indexed_OP1);
6716
6717 Found |= Match(AArch64::FMULSrr, 2, MCP::FMULADDS_OP2) ||
6718 Match(AArch64::FMULv1i32_indexed, 2, MCP::FMLAv1i32_indexed_OP2);
6719 break;
6720 case AArch64::FADDDrr:
6721 Found |= Match(AArch64::FMULDrr, 1, MCP::FMULADDD_OP1) ||
6722 Match(AArch64::FMULv1i64_indexed, 1, MCP::FMLAv1i64_indexed_OP1);
6723
6724 Found |= Match(AArch64::FMULDrr, 2, MCP::FMULADDD_OP2) ||
6725 Match(AArch64::FMULv1i64_indexed, 2, MCP::FMLAv1i64_indexed_OP2);
6726 break;
6727 case AArch64::FADDv4f16:
6728 Found |= Match(AArch64::FMULv4i16_indexed, 1, MCP::FMLAv4i16_indexed_OP1) ||
6729 Match(AArch64::FMULv4f16, 1, MCP::FMLAv4f16_OP1);
6730
6731 Found |= Match(AArch64::FMULv4i16_indexed, 2, MCP::FMLAv4i16_indexed_OP2) ||
6732 Match(AArch64::FMULv4f16, 2, MCP::FMLAv4f16_OP2);
6733 break;
6734 case AArch64::FADDv8f16:
6735 Found |= Match(AArch64::FMULv8i16_indexed, 1, MCP::FMLAv8i16_indexed_OP1) ||
6736 Match(AArch64::FMULv8f16, 1, MCP::FMLAv8f16_OP1);
6737
6738 Found |= Match(AArch64::FMULv8i16_indexed, 2, MCP::FMLAv8i16_indexed_OP2) ||
6739 Match(AArch64::FMULv8f16, 2, MCP::FMLAv8f16_OP2);
6740 break;
6741 case AArch64::FADDv2f32:
6742 Found |= Match(AArch64::FMULv2i32_indexed, 1, MCP::FMLAv2i32_indexed_OP1) ||
6743 Match(AArch64::FMULv2f32, 1, MCP::FMLAv2f32_OP1);
6744
6745 Found |= Match(AArch64::FMULv2i32_indexed, 2, MCP::FMLAv2i32_indexed_OP2) ||
6746 Match(AArch64::FMULv2f32, 2, MCP::FMLAv2f32_OP2);
6747 break;
6748 case AArch64::FADDv2f64:
6749 Found |= Match(AArch64::FMULv2i64_indexed, 1, MCP::FMLAv2i64_indexed_OP1) ||
6750 Match(AArch64::FMULv2f64, 1, MCP::FMLAv2f64_OP1);
6751
6752 Found |= Match(AArch64::FMULv2i64_indexed, 2, MCP::FMLAv2i64_indexed_OP2) ||
6753 Match(AArch64::FMULv2f64, 2, MCP::FMLAv2f64_OP2);
6754 break;
6755 case AArch64::FADDv4f32:
6756 Found |= Match(AArch64::FMULv4i32_indexed, 1, MCP::FMLAv4i32_indexed_OP1) ||
6757 Match(AArch64::FMULv4f32, 1, MCP::FMLAv4f32_OP1);
6758
6759 Found |= Match(AArch64::FMULv4i32_indexed, 2, MCP::FMLAv4i32_indexed_OP2) ||
6760 Match(AArch64::FMULv4f32, 2, MCP::FMLAv4f32_OP2);
6761 break;
6762 case AArch64::FSUBHrr:
6763 Found = Match(AArch64::FMULHrr, 1, MCP::FMULSUBH_OP1);
6764 Found |= Match(AArch64::FMULHrr, 2, MCP::FMULSUBH_OP2);
6765 Found |= Match(AArch64::FNMULHrr, 1, MCP::FNMULSUBH_OP1);
6766 break;
6767 case AArch64::FSUBSrr:
6768 Found = Match(AArch64::FMULSrr, 1, MCP::FMULSUBS_OP1);
6769
6770 Found |= Match(AArch64::FMULSrr, 2, MCP::FMULSUBS_OP2) ||
6771 Match(AArch64::FMULv1i32_indexed, 2, MCP::FMLSv1i32_indexed_OP2);
6772
6773 Found |= Match(AArch64::FNMULSrr, 1, MCP::FNMULSUBS_OP1);
6774 break;
6775 case AArch64::FSUBDrr:
6776 Found = Match(AArch64::FMULDrr, 1, MCP::FMULSUBD_OP1);
6777
6778 Found |= Match(AArch64::FMULDrr, 2, MCP::FMULSUBD_OP2) ||
6779 Match(AArch64::FMULv1i64_indexed, 2, MCP::FMLSv1i64_indexed_OP2);
6780
6781 Found |= Match(AArch64::FNMULDrr, 1, MCP::FNMULSUBD_OP1);
6782 break;
6783 case AArch64::FSUBv4f16:
6784 Found |= Match(AArch64::FMULv4i16_indexed, 2, MCP::FMLSv4i16_indexed_OP2) ||
6785 Match(AArch64::FMULv4f16, 2, MCP::FMLSv4f16_OP2);
6786
6787 Found |= Match(AArch64::FMULv4i16_indexed, 1, MCP::FMLSv4i16_indexed_OP1) ||
6788 Match(AArch64::FMULv4f16, 1, MCP::FMLSv4f16_OP1);
6789 break;
6790 case AArch64::FSUBv8f16:
6791 Found |= Match(AArch64::FMULv8i16_indexed, 2, MCP::FMLSv8i16_indexed_OP2) ||
6792 Match(AArch64::FMULv8f16, 2, MCP::FMLSv8f16_OP2);
6793
6794 Found |= Match(AArch64::FMULv8i16_indexed, 1, MCP::FMLSv8i16_indexed_OP1) ||
6795 Match(AArch64::FMULv8f16, 1, MCP::FMLSv8f16_OP1);
6796 break;
6797 case AArch64::FSUBv2f32:
6798 Found |= Match(AArch64::FMULv2i32_indexed, 2, MCP::FMLSv2i32_indexed_OP2) ||
6799 Match(AArch64::FMULv2f32, 2, MCP::FMLSv2f32_OP2);
6800
6801 Found |= Match(AArch64::FMULv2i32_indexed, 1, MCP::FMLSv2i32_indexed_OP1) ||
6802 Match(AArch64::FMULv2f32, 1, MCP::FMLSv2f32_OP1);
6803 break;
6804 case AArch64::FSUBv2f64:
6805 Found |= Match(AArch64::FMULv2i64_indexed, 2, MCP::FMLSv2i64_indexed_OP2) ||
6806 Match(AArch64::FMULv2f64, 2, MCP::FMLSv2f64_OP2);
6807
6808 Found |= Match(AArch64::FMULv2i64_indexed, 1, MCP::FMLSv2i64_indexed_OP1) ||
6809 Match(AArch64::FMULv2f64, 1, MCP::FMLSv2f64_OP1);
6810 break;
6811 case AArch64::FSUBv4f32:
6812 Found |= Match(AArch64::FMULv4i32_indexed, 2, MCP::FMLSv4i32_indexed_OP2) ||
6813 Match(AArch64::FMULv4f32, 2, MCP::FMLSv4f32_OP2);
6814
6815 Found |= Match(AArch64::FMULv4i32_indexed, 1, MCP::FMLSv4i32_indexed_OP1) ||
6816 Match(AArch64::FMULv4f32, 1, MCP::FMLSv4f32_OP1);
6817 break;
6818 }
6819 return Found;
6820}
6821
6823 SmallVectorImpl<unsigned> &Patterns) {
6824 MachineBasicBlock &MBB = *Root.getParent();
6825 bool Found = false;
6826
6827 auto Match = [&](unsigned Opcode, int Operand, unsigned Pattern) -> bool {
6829 MachineOperand &MO = Root.getOperand(Operand);
6830 MachineInstr *MI = nullptr;
6831 if (MO.isReg() && MO.getReg().isVirtual())
6832 MI = MRI.getUniqueVRegDef(MO.getReg());
6833 // Ignore No-op COPYs in FMUL(COPY(DUP(..)))
6834 if (MI && MI->getOpcode() == TargetOpcode::COPY &&
6835 MI->getOperand(1).getReg().isVirtual())
6836 MI = MRI.getUniqueVRegDef(MI->getOperand(1).getReg());
6837 if (MI && MI->getOpcode() == Opcode) {
6838 Patterns.push_back(Pattern);
6839 return true;
6840 }
6841 return false;
6842 };
6843
6845
6846 switch (Root.getOpcode()) {
6847 default:
6848 return false;
6849 case AArch64::FMULv2f32:
6850 Found = Match(AArch64::DUPv2i32lane, 1, MCP::FMULv2i32_indexed_OP1);
6851 Found |= Match(AArch64::DUPv2i32lane, 2, MCP::FMULv2i32_indexed_OP2);
6852 break;
6853 case AArch64::FMULv2f64:
6854 Found = Match(AArch64::DUPv2i64lane, 1, MCP::FMULv2i64_indexed_OP1);
6855 Found |= Match(AArch64::DUPv2i64lane, 2, MCP::FMULv2i64_indexed_OP2);
6856 break;
6857 case AArch64::FMULv4f16:
6858 Found = Match(AArch64::DUPv4i16lane, 1, MCP::FMULv4i16_indexed_OP1);
6859 Found |= Match(AArch64::DUPv4i16lane, 2, MCP::FMULv4i16_indexed_OP2);
6860 break;
6861 case AArch64::FMULv4f32:
6862 Found = Match(AArch64::DUPv4i32lane, 1, MCP::FMULv4i32_indexed_OP1);
6863 Found |= Match(AArch64::DUPv4i32lane, 2, MCP::FMULv4i32_indexed_OP2);
6864 break;
6865 case AArch64::FMULv8f16:
6866 Found = Match(AArch64::DUPv8i16lane, 1, MCP::FMULv8i16_indexed_OP1);
6867 Found |= Match(AArch64::DUPv8i16lane, 2, MCP::FMULv8i16_indexed_OP2);
6868 break;
6869 }
6870
6871 return Found;
6872}
6873
6875 SmallVectorImpl<unsigned> &Patterns) {
6876 unsigned Opc = Root.getOpcode();
6877 MachineBasicBlock &MBB = *Root.getParent();
6879
6880 auto Match = [&](unsigned Opcode, unsigned Pattern) -> bool {
6881 MachineOperand &MO = Root.getOperand(1);
6882 MachineInstr *MI = MRI.getUniqueVRegDef(MO.getReg());
6883 if (MI != nullptr && (MI->getOpcode() == Opcode) &&
6884 MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()) &&
6888 MI->getFlag(MachineInstr::MIFlag::FmNsz)) {
6889 Patterns.push_back(Pattern);
6890 return true;
6891 }
6892 return false;
6893 };
6894
6895 switch (Opc) {
6896 default:
6897 break;
6898 case AArch64::FNEGDr:
6899 return Match(AArch64::FMADDDrrr, AArch64MachineCombinerPattern::FNMADD);
6900 case AArch64::FNEGSr:
6901 return Match(AArch64::FMADDSrrr, AArch64MachineCombinerPattern::FNMADD);
6902 }
6903
6904 return false;
6905}
6906
6907/// Return true when a code sequence can improve throughput. It
6908/// should be called only for instructions in loops.
6909/// \param Pattern - combiner pattern
6911 switch (Pattern) {
6912 default:
6913 break;
7019 return true;
7020 } // end switch (Pattern)
7021 return false;
7022}
7023
7024/// Find other MI combine patterns.
7026 SmallVectorImpl<unsigned> &Patterns) {
7027 // A - (B + C) ==> (A - B) - C or (A - C) - B
7028 unsigned Opc = Root.getOpcode();
7029 MachineBasicBlock &MBB = *Root.getParent();
7030
7031 switch (Opc) {
7032 case AArch64::SUBWrr:
7033 case AArch64::SUBSWrr:
7034 case AArch64::SUBXrr:
7035 case AArch64::SUBSXrr:
7036 // Found candidate root.
7037 break;
7038 default:
7039 return false;
7040 }
7041
7042 if (isCombineInstrSettingFlag(Opc) &&
7043 Root.findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr, true) ==
7044 -1)
7045 return false;
7046
7047 if (canCombine(MBB, Root.getOperand(2), AArch64::ADDWrr) ||
7048 canCombine(MBB, Root.getOperand(2), AArch64::ADDSWrr) ||
7049 canCombine(MBB, Root.getOperand(2), AArch64::ADDXrr) ||
7050 canCombine(MBB, Root.getOperand(2), AArch64::ADDSXrr)) {
7053 return true;
7054 }
7055
7056 return false;
7057}
7058
7061 switch (Pattern) {
7065 default:
7067 }
7068}
7069
7070/// Return true when there is potentially a faster code sequence for an
7071/// instruction chain ending in \p Root. All potential patterns are listed in
7072/// the \p Pattern vector. Pattern should be sorted in priority order since the
7073/// pattern evaluator stops checking as soon as it finds a faster sequence.
7074
7076 MachineInstr &Root, SmallVectorImpl<unsigned> &Patterns,
7077 bool DoRegPressureReduce) const {
7078 // Integer patterns
7079 if (getMaddPatterns(Root, Patterns))
7080 return true;
7081 // Floating point patterns
7082 if (getFMULPatterns(Root, Patterns))
7083 return true;
7084 if (getFMAPatterns(Root, Patterns))
7085 return true;
7086 if (getFNEGPatterns(Root, Patterns))
7087 return true;
7088
7089 // Other patterns
7090 if (getMiscPatterns(Root, Patterns))
7091 return true;
7092
7093 return TargetInstrInfo::getMachineCombinerPatterns(Root, Patterns,
7094 DoRegPressureReduce);
7095}
7096
7098/// genFusedMultiply - Generate fused multiply instructions.
7099/// This function supports both integer and floating point instructions.
7100/// A typical example:
7101/// F|MUL I=A,B,0
7102/// F|ADD R,I,C
7103/// ==> F|MADD R,A,B,C
7104/// \param MF Containing MachineFunction
7105/// \param MRI Register information
7106/// \param TII Target information
7107/// \param Root is the F|ADD instruction
7108/// \param [out] InsInstrs is a vector of machine instructions and will
7109/// contain the generated madd instruction
7110/// \param IdxMulOpd is index of operand in Root that is the result of
7111/// the F|MUL. In the example above IdxMulOpd is 1.
7112/// \param MaddOpc the opcode fo the f|madd instruction
7113/// \param RC Register class of operands
7114/// \param kind of fma instruction (addressing mode) to be generated
7115/// \param ReplacedAddend is the result register from the instruction
7116/// replacing the non-combined operand, if any.
7117static MachineInstr *
7119 const TargetInstrInfo *TII, MachineInstr &Root,
7120 SmallVectorImpl<MachineInstr *> &InsInstrs, unsigned IdxMulOpd,
7121 unsigned MaddOpc, const TargetRegisterClass *RC,
7122 FMAInstKind kind = FMAInstKind::Default,
7123 const Register *ReplacedAddend = nullptr) {
7124 assert(IdxMulOpd == 1 || IdxMulOpd == 2);
7125
7126 unsigned IdxOtherOpd = IdxMulOpd == 1 ? 2 : 1;
7127 MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg());
7128 Register ResultReg = Root.getOperand(0).getReg();
7129 Register SrcReg0 = MUL->getOperand(1).getReg();
7130 bool Src0IsKill = MUL->getOperand(1).isKill();
7131 Register SrcReg1 = MUL->getOperand(2).getReg();
7132 bool Src1IsKill = MUL->getOperand(2).isKill();
7133
7134 Register SrcReg2;
7135 bool Src2IsKill;
7136 if (ReplacedAddend) {
7137 // If we just generated a new addend, we must be it's only use.
7138 SrcReg2 = *ReplacedAddend;
7139 Src2IsKill = true;
7140 } else {
7141 SrcReg2 = Root.getOperand(IdxOtherOpd).getReg();
7142 Src2IsKill = Root.getOperand(IdxOtherOpd).isKill();
7143 }
7144
7145 if (ResultReg.isVirtual())
7146 MRI.constrainRegClass(ResultReg, RC);
7147 if (SrcReg0.isVirtual())
7148 MRI.constrainRegClass(SrcReg0, RC);
7149 if (SrcReg1.isVirtual())
7150 MRI.constrainRegClass(SrcReg1, RC);
7151 if (SrcReg2.isVirtual())
7152 MRI.constrainRegClass(SrcReg2, RC);
7153
7155 if (kind == FMAInstKind::Default)
7156 MIB = BuildMI(MF, MIMetadata(Root), TII->get(MaddOpc), ResultReg)
7157 .addReg(SrcReg0, getKillRegState(Src0IsKill))
7158 .addReg(SrcReg1, getKillRegState(Src1IsKill))
7159 .addReg(SrcReg2, getKillRegState(Src2IsKill));
7160 else if (kind == FMAInstKind::Indexed)
7161 MIB = BuildMI(MF, MIMetadata(Root), TII->get(MaddOpc), ResultReg)
7162 .addReg(SrcReg2, getKillRegState(Src2IsKill))
7163 .addReg(SrcReg0, getKillRegState(Src0IsKill))
7164 .addReg(SrcReg1, getKillRegState(Src1IsKill))
7165 .addImm(MUL->getOperand(3).getImm());
7166 else if (kind == FMAInstKind::Accumulator)
7167 MIB = BuildMI(MF, MIMetadata(Root), TII->get(MaddOpc), ResultReg)
7168 .addReg(SrcReg2, getKillRegState(Src2IsKill))
7169 .addReg(SrcReg0, getKillRegState(Src0IsKill))
7170 .addReg(SrcReg1, getKillRegState(Src1IsKill));
7171 else
7172 assert(false && "Invalid FMA instruction kind \n");
7173 // Insert the MADD (MADD, FMA, FMS, FMLA, FMSL)
7174 InsInstrs.push_back(MIB);
7175 return MUL;
7176}
7177
7178static MachineInstr *
7180 const TargetInstrInfo *TII, MachineInstr &Root,
7182 MachineInstr *MAD = MRI.getUniqueVRegDef(Root.getOperand(1).getReg());
7183
7184 unsigned Opc = 0;
7185 const TargetRegisterClass *RC = MRI.getRegClass(MAD->getOperand(0).getReg());
7186 if (AArch64::FPR32RegClass.hasSubClassEq(RC))
7187 Opc = AArch64::FNMADDSrrr;
7188 else if (AArch64::FPR64RegClass.hasSubClassEq(RC))
7189 Opc = AArch64::FNMADDDrrr;
7190 else
7191 return nullptr;
7192
7193 Register ResultReg = Root.getOperand(0).getReg();
7194 Register SrcReg0 = MAD->getOperand(1).getReg();
7195 Register SrcReg1 = MAD->getOperand(2).getReg();
7196 Register SrcReg2 = MAD->getOperand(3).getReg();
7197 bool Src0IsKill = MAD->getOperand(1).isKill();
7198 bool Src1IsKill = MAD->getOperand(2).isKill();
7199 bool Src2IsKill = MAD->getOperand(3).isKill();
7200 if (ResultReg.isVirtual())
7201 MRI.constrainRegClass(ResultReg, RC);
7202 if (SrcReg0.isVirtual())
7203 MRI.constrainRegClass(SrcReg0, RC);
7204 if (SrcReg1.isVirtual())
7205 MRI.constrainRegClass(SrcReg1, RC);
7206 if (SrcReg2.isVirtual())
7207 MRI.constrainRegClass(SrcReg2, RC);
7208
7210 BuildMI(MF, MIMetadata(Root), TII->get(Opc), ResultReg)
7211 .addReg(SrcReg0, getKillRegState(Src0IsKill))
7212 .addReg(SrcReg1, getKillRegState(Src1IsKill))
7213 .addReg(SrcReg2, getKillRegState(Src2IsKill));
7214 InsInstrs.push_back(MIB);
7215
7216 return MAD;
7217}
7218
7219/// Fold (FMUL x (DUP y lane)) into (FMUL_indexed x y lane)
7220static MachineInstr *
7223 unsigned IdxDupOp, unsigned MulOpc,
7225 assert(((IdxDupOp == 1) || (IdxDupOp == 2)) &&
7226 "Invalid index of FMUL operand");
7227
7228 MachineFunction &MF = *Root.getMF();
7230
7231 MachineInstr *Dup =
7232 MF.getRegInfo().getUniqueVRegDef(Root.getOperand(IdxDupOp).getReg());
7233
7234 if (Dup->getOpcode() == TargetOpcode::COPY)
7235 Dup = MRI.getUniqueVRegDef(Dup->getOperand(1).getReg());
7236
7237 Register DupSrcReg = Dup->getOperand(1).getReg();
7238 MRI.clearKillFlags(DupSrcReg);
7239 MRI.constrainRegClass(DupSrcReg, RC);
7240
7241 unsigned DupSrcLane = Dup->getOperand(2).getImm();
7242
7243 unsigned IdxMulOp = IdxDupOp == 1 ? 2 : 1;
7244 MachineOperand &MulOp = Root.getOperand(IdxMulOp);
7245
7246 Register ResultReg = Root.getOperand(0).getReg();
7247
7249 MIB = BuildMI(MF, MIMetadata(Root), TII->get(MulOpc), ResultReg)
7250 .add(MulOp)
7251 .addReg(DupSrcReg)
7252 .addImm(DupSrcLane);
7253
7254 InsInstrs.push_back(MIB);
7255 return &Root;
7256}
7257
7258/// genFusedMultiplyAcc - Helper to generate fused multiply accumulate
7259/// instructions.
7260///
7261/// \see genFusedMultiply
7265 unsigned IdxMulOpd, unsigned MaddOpc, const TargetRegisterClass *RC) {
7266 return genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd, MaddOpc, RC,
7267 FMAInstKind::Accumulator);
7268}
7269
7270/// genNeg - Helper to generate an intermediate negation of the second operand
7271/// of Root
7273 const TargetInstrInfo *TII, MachineInstr &Root,
7275 DenseMap<unsigned, unsigned> &InstrIdxForVirtReg,
7276 unsigned MnegOpc, const TargetRegisterClass *RC) {
7277 Register NewVR = MRI.createVirtualRegister(RC);
7279 BuildMI(MF, MIMetadata(Root), TII->get(MnegOpc), NewVR)
7280 .add(Root.getOperand(2));
7281 InsInstrs.push_back(MIB);
7282
7283 assert(InstrIdxForVirtReg.empty());
7284 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
7285
7286 return NewVR;
7287}
7288
7289/// genFusedMultiplyAccNeg - Helper to generate fused multiply accumulate
7290/// instructions with an additional negation of the accumulator
7294 DenseMap<unsigned, unsigned> &InstrIdxForVirtReg, unsigned IdxMulOpd,
7295 unsigned MaddOpc, unsigned MnegOpc, const TargetRegisterClass *RC) {
7296 assert(IdxMulOpd == 1);
7297
7298 Register NewVR =
7299 genNeg(MF, MRI, TII, Root, InsInstrs, InstrIdxForVirtReg, MnegOpc, RC);
7300 return genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd, MaddOpc, RC,
7301 FMAInstKind::Accumulator, &NewVR);
7302}
7303
7304/// genFusedMultiplyIdx - Helper to generate fused multiply accumulate
7305/// instructions.
7306///
7307/// \see genFusedMultiply
7311 unsigned IdxMulOpd, unsigned MaddOpc, const TargetRegisterClass *RC) {
7312 return genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd, MaddOpc, RC,
7313 FMAInstKind::Indexed);
7314}
7315
7316/// genFusedMultiplyAccNeg - Helper to generate fused multiply accumulate
7317/// instructions with an additional negation of the accumulator
7321 DenseMap<unsigned, unsigned> &InstrIdxForVirtReg, unsigned IdxMulOpd,
7322 unsigned MaddOpc, unsigned MnegOpc, const TargetRegisterClass *RC) {
7323 assert(IdxMulOpd == 1);
7324
7325 Register NewVR =
7326 genNeg(MF, MRI, TII, Root, InsInstrs, InstrIdxForVirtReg, MnegOpc, RC);
7327
7328 return genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd, MaddOpc, RC,
7329 FMAInstKind::Indexed, &NewVR);
7330}
7331
7332/// genMaddR - Generate madd instruction and combine mul and add using
7333/// an extra virtual register
7334/// Example - an ADD intermediate needs to be stored in a register:
7335/// MUL I=A,B,0
7336/// ADD R,I,Imm
7337/// ==> ORR V, ZR, Imm
7338/// ==> MADD R,A,B,V
7339/// \param MF Containing MachineFunction
7340/// \param MRI Register information
7341/// \param TII Target information
7342/// \param Root is the ADD instruction
7343/// \param [out] InsInstrs is a vector of machine instructions and will
7344/// contain the generated madd instruction
7345/// \param IdxMulOpd is index of operand in Root that is the result of
7346/// the MUL. In the example above IdxMulOpd is 1.
7347/// \param MaddOpc the opcode fo the madd instruction
7348/// \param VR is a virtual register that holds the value of an ADD operand
7349/// (V in the example above).
7350/// \param RC Register class of operands
7352 const TargetInstrInfo *TII, MachineInstr &Root,
7354 unsigned IdxMulOpd, unsigned MaddOpc, unsigned VR,
7355 const TargetRegisterClass *RC) {
7356 assert(IdxMulOpd == 1 || IdxMulOpd == 2);
7357
7358 MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg());
7359 Register ResultReg = Root.getOperand(0).getReg();
7360 Register SrcReg0 = MUL->getOperand(1).getReg();
7361 bool Src0IsKill = MUL->getOperand(1).isKill();
7362 Register SrcReg1 = MUL->getOperand(2).getReg();
7363 bool Src1IsKill = MUL->getOperand(2).isKill();
7364
7365 if (ResultReg.isVirtual())
7366 MRI.constrainRegClass(ResultReg, RC);
7367 if (SrcReg0.isVirtual())
7368 MRI.constrainRegClass(SrcReg0, RC);
7369 if (SrcReg1.isVirtual())
7370 MRI.constrainRegClass(SrcReg1, RC);
7372 MRI.constrainRegClass(VR, RC);
7373
7375 BuildMI(MF, MIMetadata(Root), TII->get(MaddOpc), ResultReg)
7376 .addReg(SrcReg0, getKillRegState(Src0IsKill))
7377 .addReg(SrcReg1, getKillRegState(Src1IsKill))
7378 .addReg(VR);
7379 // Insert the MADD
7380 InsInstrs.push_back(MIB);
7381 return MUL;
7382}
7383
7384/// Do the following transformation
7385/// A - (B + C) ==> (A - B) - C
7386/// A - (B + C) ==> (A - C) - B
7387static void
7389 const TargetInstrInfo *TII, MachineInstr &Root,
7392 unsigned IdxOpd1,
7393 DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) {
7394 assert(IdxOpd1 == 1 || IdxOpd1 == 2);
7395 unsigned IdxOtherOpd = IdxOpd1 == 1 ? 2 : 1;
7396 MachineInstr *AddMI = MRI.getUniqueVRegDef(Root.getOperand(2).getReg());
7397
7398 Register ResultReg = Root.getOperand(0).getReg();
7399 Register RegA = Root.getOperand(1).getReg();
7400 bool RegAIsKill = Root.getOperand(1).isKill();
7401 Register RegB = AddMI->getOperand(IdxOpd1).getReg();
7402 bool RegBIsKill = AddMI->getOperand(IdxOpd1).isKill();
7403 Register RegC = AddMI->getOperand(IdxOtherOpd).getReg();
7404 bool RegCIsKill = AddMI->getOperand(IdxOtherOpd).isKill();
7405 Register NewVR =
7406 MRI.createVirtualRegister(MRI.getRegClass(Root.getOperand(2).getReg()));
7407
7408 unsigned Opcode = Root.getOpcode();
7409 if (Opcode == AArch64::SUBSWrr)
7410 Opcode = AArch64::SUBWrr;
7411 else if (Opcode == AArch64::SUBSXrr)
7412 Opcode = AArch64::SUBXrr;
7413 else
7414 assert((Opcode == AArch64::SUBWrr || Opcode == AArch64::SUBXrr) &&
7415 "Unexpected instruction opcode.");
7416
7417 uint32_t Flags = Root.mergeFlagsWith(*AddMI);
7418 Flags &= ~MachineInstr::NoSWrap;
7419 Flags &= ~MachineInstr::NoUWrap;
7420
7421 MachineInstrBuilder MIB1 =
7422 BuildMI(MF, MIMetadata(Root), TII->get(Opcode), NewVR)
7423 .addReg(RegA, getKillRegState(RegAIsKill))
7424 .addReg(RegB, getKillRegState(RegBIsKill))
7425 .setMIFlags(Flags);
7426 MachineInstrBuilder MIB2 =
7427 BuildMI(MF, MIMetadata(Root), TII->get(Opcode), ResultReg)
7428 .addReg(NewVR, getKillRegState(true))
7429 .addReg(RegC, getKillRegState(RegCIsKill))
7430 .setMIFlags(Flags);
7431
7432 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
7433 InsInstrs.push_back(MIB1);
7434 InsInstrs.push_back(MIB2);
7435 DelInstrs.push_back(AddMI);
7436 DelInstrs.push_back(&Root);
7437}
7438
7439/// When getMachineCombinerPatterns() finds potential patterns,
7440/// this function generates the instructions that could replace the
7441/// original code sequence
7443 MachineInstr &Root, unsigned Pattern,
7446 DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const {
7447 MachineBasicBlock &MBB = *Root.getParent();
7449 MachineFunction &MF = *MBB.getParent();
7451
7452 MachineInstr *MUL = nullptr;
7453 const TargetRegisterClass *RC;
7454 unsigned Opc;
7455 switch (Pattern) {
7456 default:
7457 // Reassociate instructions.
7459 DelInstrs, InstrIdxForVirtReg);
7460 return;
7462 // A - (B + C)
7463 // ==> (A - B) - C
7464 genSubAdd2SubSub(MF, MRI, TII, Root, InsInstrs, DelInstrs, 1,
7465 InstrIdxForVirtReg);
7466 return;
7468 // A - (B + C)
7469 // ==> (A - C) - B
7470 genSubAdd2SubSub(MF, MRI, TII, Root, InsInstrs, DelInstrs, 2,
7471 InstrIdxForVirtReg);
7472 return;
7475 // MUL I=A,B,0
7476 // ADD R,I,C
7477 // ==> MADD R,A,B,C
7478 // --- Create(MADD);
7480 Opc = AArch64::MADDWrrr;
7481 RC = &AArch64::GPR32RegClass;
7482 } else {
7483 Opc = AArch64::MADDXrrr;
7484 RC = &AArch64::GPR64RegClass;
7485 }
7486 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
7487 break;
7490 // MUL I=A,B,0
7491 // ADD R,C,I
7492 // ==> MADD R,A,B,C
7493 // --- Create(MADD);
7495 Opc = AArch64::MADDWrrr;
7496 RC = &AArch64::GPR32RegClass;
7497 } else {
7498 Opc = AArch64::MADDXrrr;
7499 RC = &AArch64::GPR64RegClass;
7500 }
7501 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7502 break;
7505 // MUL I=A,B,0
7506 // ADD R,I,Imm
7507 // ==> MOV V, Imm
7508 // ==> MADD R,A,B,V
7509 // --- Create(MADD);
7510 const TargetRegisterClass *OrrRC;
7511 unsigned BitSize, OrrOpc, ZeroReg;
7513 OrrOpc = AArch64::ORRWri;
7514 OrrRC = &AArch64::GPR32spRegClass;
7515 BitSize = 32;
7516 ZeroReg = AArch64::WZR;
7517 Opc = AArch64::MADDWrrr;
7518 RC = &AArch64::GPR32RegClass;
7519 } else {
7520 OrrOpc = AArch64::ORRXri;
7521 OrrRC = &AArch64::GPR64spRegClass;
7522 BitSize = 64;
7523 ZeroReg = AArch64::XZR;
7524 Opc = AArch64::MADDXrrr;
7525 RC = &AArch64::GPR64RegClass;
7526 }
7527 Register NewVR = MRI.createVirtualRegister(OrrRC);
7528 uint64_t Imm = Root.getOperand(2).getImm();
7529
7530 if (Root.getOperand(3).isImm()) {
7531 unsigned Val = Root.getOperand(3).getImm();
7532 Imm = Imm << Val;
7533 }
7534 uint64_t UImm = SignExtend64(Imm, BitSize);
7535 // The immediate can be composed via a single instruction.
7537 AArch64_IMM::expandMOVImm(UImm, BitSize, Insn);
7538 if (Insn.size() != 1)
7539 return;
7540 auto MovI = Insn.begin();
7542 // MOV is an alias for one of three instructions: movz, movn, and orr.
7543 if (MovI->Opcode == OrrOpc)
7544 MIB1 = BuildMI(MF, MIMetadata(Root), TII->get(OrrOpc), NewVR)
7545 .addReg(ZeroReg)
7546 .addImm(MovI->Op2);
7547 else {
7548 if (BitSize == 32)
7549 assert((MovI->Opcode == AArch64::MOVNWi ||
7550 MovI->Opcode == AArch64::MOVZWi) &&
7551 "Expected opcode");
7552 else
7553 assert((MovI->Opcode == AArch64::MOVNXi ||
7554 MovI->Opcode == AArch64::MOVZXi) &&
7555 "Expected opcode");
7556 MIB1 = BuildMI(MF, MIMetadata(Root), TII->get(MovI->Opcode), NewVR)
7557 .addImm(MovI->Op1)
7558 .addImm(MovI->Op2);
7559 }
7560 InsInstrs.push_back(MIB1);
7561 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
7562 MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
7563 break;
7564 }
7567 // MUL I=A,B,0
7568 // SUB R,I, C
7569 // ==> SUB V, 0, C
7570 // ==> MADD R,A,B,V // = -C + A*B
7571 // --- Create(MADD);
7572 const TargetRegisterClass *SubRC;
7573 unsigned SubOpc, ZeroReg;
7575 SubOpc = AArch64::SUBWrr;
7576 SubRC = &AArch64::GPR32spRegClass;
7577 ZeroReg = AArch64::WZR;
7578 Opc = AArch64::MADDWrrr;
7579 RC = &AArch64::GPR32RegClass;
7580 } else {
7581 SubOpc = AArch64::SUBXrr;
7582 SubRC = &AArch64::GPR64spRegClass;
7583 ZeroReg = AArch64::XZR;
7584 Opc = AArch64::MADDXrrr;
7585 RC = &AArch64::GPR64RegClass;
7586 }
7587 Register NewVR = MRI.createVirtualRegister(SubRC);
7588 // SUB NewVR, 0, C
7589 MachineInstrBuilder MIB1 =
7590 BuildMI(MF, MIMetadata(Root), TII->get(SubOpc), NewVR)
7591 .addReg(ZeroReg)
7592 .add(Root.getOperand(2));
7593 InsInstrs.push_back(MIB1);
7594 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
7595 MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
7596 break;
7597 }
7600 // MUL I=A,B,0
7601 // SUB R,C,I
7602 // ==> MSUB R,A,B,C (computes C - A*B)
7603 // --- Create(MSUB);
7605 Opc = AArch64::MSUBWrrr;
7606 RC = &AArch64::GPR32RegClass;
7607 } else {
7608 Opc = AArch64::MSUBXrrr;
7609 RC = &AArch64::GPR64RegClass;
7610 }
7611 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7612 break;
7615 // MUL I=A,B,0
7616 // SUB R,I, Imm
7617 // ==> MOV V, -Imm
7618 // ==> MADD R,A,B,V // = -Imm + A*B
7619 // --- Create(MADD);
7620 const TargetRegisterClass *OrrRC;
7621 unsigned BitSize, OrrOpc, ZeroReg;
7623 OrrOpc = AArch64::ORRWri;
7624 OrrRC = &AArch64::GPR32spRegClass;
7625 BitSize = 32;
7626 ZeroReg = AArch64::WZR;
7627 Opc = AArch64::MADDWrrr;
7628 RC = &AArch64::GPR32RegClass;
7629 } else {
7630 OrrOpc = AArch64::ORRXri;
7631 OrrRC = &AArch64::GPR64spRegClass;
7632 BitSize = 64;
7633 ZeroReg = AArch64::XZR;
7634 Opc = AArch64::MADDXrrr;
7635 RC = &AArch64::GPR64RegClass;
7636 }
7637 Register NewVR = MRI.createVirtualRegister(OrrRC);
7638 uint64_t Imm = Root.getOperand(2).getImm();
7639 if (Root.getOperand(3).isImm()) {
7640 unsigned Val = Root.getOperand(3).getImm();
7641 Imm = Imm << Val;
7642 }
7643 uint64_t UImm = SignExtend64(-Imm, BitSize);
7644 // The immediate can be composed via a single instruction.
7646 AArch64_IMM::expandMOVImm(UImm, BitSize, Insn);
7647 if (Insn.size() != 1)
7648 return;
7649 auto MovI = Insn.begin();
7651 // MOV is an alias for one of three instructions: movz, movn, and orr.
7652 if (MovI->Opcode == OrrOpc)
7653 MIB1 = BuildMI(MF, MIMetadata(Root), TII->get(OrrOpc), NewVR)
7654 .addReg(ZeroReg)
7655 .addImm(MovI->Op2);
7656 else {
7657 if (BitSize == 32)
7658 assert((MovI->Opcode == AArch64::MOVNWi ||
7659 MovI->Opcode == AArch64::MOVZWi) &&
7660 "Expected opcode");
7661 else
7662 assert((MovI->Opcode == AArch64::MOVNXi ||
7663 MovI->Opcode == AArch64::MOVZXi) &&
7664 "Expected opcode");
7665 MIB1 = BuildMI(MF, MIMetadata(Root), TII->get(MovI->Opcode), NewVR)
7666 .addImm(MovI->Op1)
7667 .addImm(MovI->Op2);
7668 }
7669 InsInstrs.push_back(MIB1);
7670 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
7671 MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
7672 break;
7673 }
7674
7676 Opc = AArch64::MLAv8i8;
7677 RC = &AArch64::FPR64RegClass;
7678 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
7679 break;
7681 Opc = AArch64::MLAv8i8;
7682 RC = &AArch64::FPR64RegClass;
7683 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7684 break;
7686 Opc = AArch64::MLAv16i8;
7687 RC = &AArch64::FPR128RegClass;
7688 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
7689 break;
7691 Opc = AArch64::MLAv16i8;
7692 RC = &AArch64::FPR128RegClass;
7693 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7694 break;
7696 Opc = AArch64::MLAv4i16;
7697 RC = &AArch64::FPR64RegClass;
7698 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
7699 break;
7701 Opc = AArch64::MLAv4i16;
7702 RC = &AArch64::FPR64RegClass;
7703 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7704 break;
7706 Opc = AArch64::MLAv8i16;
7707 RC = &AArch64::FPR128RegClass;
7708 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
7709 break;
7711 Opc = AArch64::MLAv8i16;
7712 RC = &AArch64::FPR128RegClass;
7713 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7714 break;
7716 Opc = AArch64::MLAv2i32;
7717 RC = &AArch64::FPR64RegClass;
7718 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
7719 break;
7721 Opc = AArch64::MLAv2i32;
7722 RC = &AArch64::FPR64RegClass;
7723 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7724 break;
7726 Opc = AArch64::MLAv4i32;
7727 RC = &AArch64::FPR128RegClass;
7728 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
7729 break;
7731 Opc = AArch64::MLAv4i32;
7732 RC = &AArch64::FPR128RegClass;
7733 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7734 break;
7735
7737 Opc = AArch64::MLAv8i8;
7738 RC = &AArch64::FPR64RegClass;
7739 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
7740 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv8i8,
7741 RC);
7742 break;
7744 Opc = AArch64::MLSv8i8;
7745 RC = &AArch64::FPR64RegClass;
7746 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7747 break;
7749 Opc = AArch64::MLAv16i8;
7750 RC = &AArch64::FPR128RegClass;
7751 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
7752 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv16i8,
7753 RC);
7754 break;
7756 Opc = AArch64::MLSv16i8;
7757 RC = &AArch64::FPR128RegClass;
7758 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7759 break;
7761 Opc = AArch64::MLAv4i16;
7762 RC = &AArch64::FPR64RegClass;
7763 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
7764 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i16,
7765 RC);
7766 break;
7768 Opc = AArch64::MLSv4i16;
7769 RC = &AArch64::FPR64RegClass;
7770 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7771 break;
7773 Opc = AArch64::MLAv8i16;
7774 RC = &AArch64::FPR128RegClass;
7775 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
7776 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv8i16,
7777 RC);
7778 break;
7780 Opc = AArch64::MLSv8i16;
7781 RC = &AArch64::FPR128RegClass;
7782 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7783 break;
7785 Opc = AArch64::MLAv2i32;
7786 RC = &AArch64::FPR64RegClass;
7787 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
7788 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv2i32,
7789 RC);
7790 break;
7792 Opc = AArch64::MLSv2i32;
7793 RC = &AArch64::FPR64RegClass;
7794 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7795 break;
7797 Opc = AArch64::MLAv4i32;
7798 RC = &AArch64::FPR128RegClass;
7799 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
7800 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i32,
7801 RC);
7802 break;
7804 Opc = AArch64::MLSv4i32;
7805 RC = &AArch64::FPR128RegClass;
7806 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7807 break;
7808
7810 Opc = AArch64::MLAv4i16_indexed;
7811 RC = &AArch64::FPR64RegClass;
7812 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
7813 break;
7815 Opc = AArch64::MLAv4i16_indexed;
7816 RC = &AArch64::FPR64RegClass;
7817 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7818 break;
7820 Opc = AArch64::MLAv8i16_indexed;
7821 RC = &AArch64::FPR128RegClass;
7822 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
7823 break;
7825 Opc = AArch64::MLAv8i16_indexed;
7826 RC = &AArch64::FPR128RegClass;
7827 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7828 break;
7830 Opc = AArch64::MLAv2i32_indexed;
7831 RC = &AArch64::FPR64RegClass;
7832 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
7833 break;
7835 Opc = AArch64::MLAv2i32_indexed;
7836 RC = &AArch64::FPR64RegClass;
7837 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7838 break;
7840 Opc = AArch64::MLAv4i32_indexed;
7841 RC = &AArch64::FPR128RegClass;
7842 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
7843 break;
7845 Opc = AArch64::MLAv4i32_indexed;
7846 RC = &AArch64::FPR128RegClass;
7847 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7848 break;
7849
7851 Opc = AArch64::MLAv4i16_indexed;
7852 RC = &AArch64::FPR64RegClass;
7853 MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs,
7854 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i16,
7855 RC);
7856 break;
7858 Opc = AArch64::MLSv4i16_indexed;
7859 RC = &AArch64::FPR64RegClass;
7860 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7861 break;
7863 Opc = AArch64::MLAv8i16_indexed;
7864 RC = &AArch64::FPR128RegClass;
7865 MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs,
7866 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv8i16,
7867 RC);
7868 break;
7870 Opc = AArch64::MLSv8i16_indexed;
7871 RC = &AArch64::FPR128RegClass;
7872 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7873 break;
7875 Opc = AArch64::MLAv2i32_indexed;
7876 RC = &AArch64::FPR64RegClass;
7877 MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs,
7878 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv2i32,
7879 RC);
7880 break;
7882 Opc = AArch64::MLSv2i32_indexed;
7883 RC = &AArch64::FPR64RegClass;
7884 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7885 break;
7887 Opc = AArch64::MLAv4i32_indexed;
7888 RC = &AArch64::FPR128RegClass;
7889 MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs,
7890 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i32,
7891 RC);
7892 break;
7894 Opc = AArch64::MLSv4i32_indexed;
7895 RC = &AArch64::FPR128RegClass;
7896 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7897 break;
7898
7899 // Floating Point Support
7901 Opc = AArch64::FMADDHrrr;
7902 RC = &AArch64::FPR16RegClass;
7903 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
7904 break;
7906 Opc = AArch64::FMADDSrrr;
7907 RC = &AArch64::FPR32RegClass;
7908 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
7909 break;
7911 Opc = AArch64::FMADDDrrr;
7912 RC = &AArch64::FPR64RegClass;
7913 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
7914 break;
7915
7917 Opc = AArch64::FMADDHrrr;
7918 RC = &AArch64::FPR16RegClass;
7919 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7920 break;
7922 Opc = AArch64::FMADDSrrr;
7923 RC = &AArch64::FPR32RegClass;
7924 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7925 break;
7927 Opc = AArch64::FMADDDrrr;
7928 RC = &AArch64::FPR64RegClass;
7929 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7930 break;
7931
7933 Opc = AArch64::FMLAv1i32_indexed;
7934 RC = &AArch64::FPR32RegClass;
7935 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
7936 FMAInstKind::Indexed);
7937 break;
7939 Opc = AArch64::FMLAv1i32_indexed;
7940 RC = &AArch64::FPR32RegClass;
7941 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
7942 FMAInstKind::Indexed);
7943 break;
7944
7946 Opc = AArch64::FMLAv1i64_indexed;
7947 RC = &AArch64::FPR64RegClass;
7948 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
7949 FMAInstKind::Indexed);
7950 break;
7952 Opc = AArch64::FMLAv1i64_indexed;
7953 RC = &AArch64::FPR64RegClass;
7954 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
7955 FMAInstKind::Indexed);
7956 break;
7957
7959 RC = &AArch64::FPR64RegClass;
7960 Opc = AArch64::FMLAv4i16_indexed;
7961 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
7962 FMAInstKind::Indexed);
7963 break;
7965 RC = &AArch64::FPR64RegClass;
7966 Opc = AArch64::FMLAv4f16;
7967 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
7968 FMAInstKind::Accumulator);
7969 break;
7971 RC = &AArch64::FPR64RegClass;
7972 Opc = AArch64::FMLAv4i16_indexed;
7973 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
7974 FMAInstKind::Indexed);
7975 break;
7977 RC = &AArch64::FPR64RegClass;
7978 Opc = AArch64::FMLAv4f16;
7979 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
7980 FMAInstKind::Accumulator);
7981 break;
7982
7985 RC = &AArch64::FPR64RegClass;
7987 Opc = AArch64::FMLAv2i32_indexed;
7988 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
7989 FMAInstKind::Indexed);
7990 } else {
7991 Opc = AArch64::FMLAv2f32;
7992 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
7993 FMAInstKind::Accumulator);
7994 }
7995 break;
7998 RC = &AArch64::FPR64RegClass;
8000 Opc = AArch64::FMLAv2i32_indexed;
8001 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8002 FMAInstKind::Indexed);
8003 } else {
8004 Opc = AArch64::FMLAv2f32;
8005 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8006 FMAInstKind::Accumulator);
8007 }
8008 break;
8009
8011 RC = &AArch64::FPR128RegClass;
8012 Opc = AArch64::FMLAv8i16_indexed;
8013 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
8014 FMAInstKind::Indexed);
8015 break;
8017 RC = &AArch64::FPR128RegClass;
8018 Opc = AArch64::FMLAv8f16;
8019 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
8020 FMAInstKind::Accumulator);
8021 break;
8023 RC = &AArch64::FPR128RegClass;
8024 Opc = AArch64::FMLAv8i16_indexed;
8025 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8026 FMAInstKind::Indexed);
8027 break;
8029 RC = &AArch64::FPR128RegClass;
8030 Opc = AArch64::FMLAv8f16;
8031 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8032 FMAInstKind::Accumulator);
8033 break;
8034
8037 RC = &AArch64::FPR128RegClass;
8039 Opc = AArch64::FMLAv2i64_indexed;
8040 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
8041 FMAInstKind::Indexed);
8042 } else {
8043 Opc = AArch64::FMLAv2f64;
8044 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
8045 FMAInstKind::Accumulator);
8046 }
8047 break;
8050 RC = &AArch64::FPR128RegClass;
8052 Opc = AArch64::FMLAv2i64_indexed;
8053 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8054 FMAInstKind::Indexed);
8055 } else {
8056 Opc = AArch64::FMLAv2f64;
8057 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8058 FMAInstKind::Accumulator);
8059 }
8060 break;
8061
8064 RC = &AArch64::FPR128RegClass;
8066 Opc = AArch64::FMLAv4i32_indexed;
8067 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
8068 FMAInstKind::Indexed);
8069 } else {
8070 Opc = AArch64::FMLAv4f32;
8071 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
8072 FMAInstKind::Accumulator);
8073 }
8074 break;
8075
8078 RC = &AArch64::FPR128RegClass;
8080 Opc = AArch64::FMLAv4i32_indexed;
8081 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8082 FMAInstKind::Indexed);
8083 } else {
8084 Opc = AArch64::FMLAv4f32;
8085 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8086 FMAInstKind::Accumulator);
8087 }
8088 break;
8089
8091 Opc = AArch64::FNMSUBHrrr;
8092 RC = &AArch64::FPR16RegClass;
8093 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8094 break;
8096 Opc = AArch64::FNMSUBSrrr;
8097 RC = &AArch64::FPR32RegClass;
8098 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8099 break;
8101 Opc = AArch64::FNMSUBDrrr;
8102 RC = &AArch64::FPR64RegClass;
8103 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8104 break;
8105
8107 Opc = AArch64::FNMADDHrrr;
8108 RC = &AArch64::FPR16RegClass;
8109 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8110 break;
8112 Opc = AArch64::FNMADDSrrr;
8113 RC = &AArch64::FPR32RegClass;
8114 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8115 break;
8117 Opc = AArch64::FNMADDDrrr;
8118 RC = &AArch64::FPR64RegClass;
8119 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8120 break;
8121
8123 Opc = AArch64::FMSUBHrrr;
8124 RC = &AArch64::FPR16RegClass;
8125 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8126 break;
8128 Opc = AArch64::FMSUBSrrr;
8129 RC = &AArch64::FPR32RegClass;
8130 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8131 break;
8133 Opc = AArch64::FMSUBDrrr;
8134 RC = &AArch64::FPR64RegClass;
8135 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8136 break;
8137
8139 Opc = AArch64::FMLSv1i32_indexed;
8140 RC = &AArch64::FPR32RegClass;
8141 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8142 FMAInstKind::Indexed);
8143 break;
8144
8146 Opc = AArch64::FMLSv1i64_indexed;
8147 RC = &AArch64::FPR64RegClass;
8148 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8149 FMAInstKind::Indexed);
8150 break;
8151
8154 RC = &AArch64::FPR64RegClass;
8155 Register NewVR = MRI.createVirtualRegister(RC);
8156 MachineInstrBuilder MIB1 =
8157 BuildMI(MF, MIMetadata(Root), TII->get(AArch64::FNEGv4f16), NewVR)
8158 .add(Root.getOperand(2));
8159 InsInstrs.push_back(MIB1);
8160 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
8162 Opc = AArch64::FMLAv4f16;
8163 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
8164 FMAInstKind::Accumulator, &NewVR);
8165 } else {
8166 Opc = AArch64::FMLAv4i16_indexed;
8167 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
8168 FMAInstKind::Indexed, &NewVR);
8169 }
8170 break;
8171 }
8173 RC = &AArch64::FPR64RegClass;
8174 Opc = AArch64::FMLSv4f16;
8175 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8176 FMAInstKind::Accumulator);
8177 break;
8179 RC = &AArch64::FPR64RegClass;
8180 Opc = AArch64::FMLSv4i16_indexed;
8181 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8182 FMAInstKind::Indexed);
8183 break;
8184
8187 RC = &AArch64::FPR64RegClass;
8189 Opc = AArch64::FMLSv2i32_indexed;
8190 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8191 FMAInstKind::Indexed);
8192 } else {
8193 Opc = AArch64::FMLSv2f32;
8194 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8195 FMAInstKind::Accumulator);
8196 }
8197 break;
8198
8201 RC = &AArch64::FPR128RegClass;
8202 Register NewVR = MRI.createVirtualRegister(RC);
8203 MachineInstrBuilder MIB1 =
8204 BuildMI(MF, MIMetadata(Root), TII->get(AArch64::FNEGv8f16), NewVR)
8205 .add(Root.getOperand(2));
8206 InsInstrs.push_back(MIB1);
8207 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
8209 Opc = AArch64::FMLAv8f16;
8210 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
8211 FMAInstKind::Accumulator, &NewVR);
8212 } else {
8213 Opc = AArch64::FMLAv8i16_indexed;
8214 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
8215 FMAInstKind::Indexed, &NewVR);
8216 }
8217 break;
8218 }
8220 RC = &AArch64::FPR128RegClass;
8221 Opc = AArch64::FMLSv8f16;
8222 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8223 FMAInstKind::Accumulator);
8224 break;
8226 RC = &AArch64::FPR128RegClass;
8227 Opc = AArch64::FMLSv8i16_indexed;
8228 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8229 FMAInstKind::Indexed);
8230 break;
8231
8234 RC = &AArch64::FPR128RegClass;
8236 Opc = AArch64::FMLSv2i64_indexed;
8237 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8238 FMAInstKind::Indexed);
8239 } else {
8240 Opc = AArch64::FMLSv2f64;
8241 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8242 FMAInstKind::Accumulator);
8243 }
8244 break;
8245
8248 RC = &AArch64::FPR128RegClass;
8250 Opc = AArch64::FMLSv4i32_indexed;
8251 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8252 FMAInstKind::Indexed);
8253 } else {
8254 Opc = AArch64::FMLSv4f32;
8255 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8256 FMAInstKind::Accumulator);
8257 }
8258 break;
8261 RC = &AArch64::FPR64RegClass;
8262 Register NewVR = MRI.createVirtualRegister(RC);
8263 MachineInstrBuilder MIB1 =
8264 BuildMI(MF, MIMetadata(Root), TII->get(AArch64::FNEGv2f32), NewVR)
8265 .add(Root.getOperand(2));
8266 InsInstrs.push_back(MIB1);
8267 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
8269 Opc = AArch64::FMLAv2i32_indexed;
8270 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
8271 FMAInstKind::Indexed, &NewVR);
8272 } else {
8273 Opc = AArch64::FMLAv2f32;
8274 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
8275 FMAInstKind::Accumulator, &NewVR);
8276 }
8277 break;
8278 }
8281 RC = &AArch64::FPR128RegClass;
8282 Register NewVR = MRI.createVirtualRegister(RC);
8283 MachineInstrBuilder MIB1 =
8284 BuildMI(MF, MIMetadata(Root), TII->get(AArch64::FNEGv4f32), NewVR)
8285 .add(Root.getOperand(2));
8286 InsInstrs.push_back(MIB1);
8287 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
8289 Opc = AArch64::FMLAv4i32_indexed;
8290 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
8291 FMAInstKind::Indexed, &NewVR);
8292 } else {
8293 Opc = AArch64::FMLAv4f32;
8294 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
8295 FMAInstKind::Accumulator, &NewVR);
8296 }
8297 break;
8298 }
8301 RC = &AArch64::FPR128RegClass;
8302 Register NewVR = MRI.createVirtualRegister(RC);
8303 MachineInstrBuilder MIB1 =
8304 BuildMI(MF, MIMetadata(Root), TII->get(AArch64::FNEGv2f64), NewVR)
8305 .add(Root.getOperand(2));
8306 InsInstrs.push_back(MIB1);
8307 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
8309 Opc = AArch64::FMLAv2i64_indexed;
8310 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
8311 FMAInstKind::Indexed, &NewVR);
8312 } else {
8313 Opc = AArch64::FMLAv2f64;
8314 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
8315 FMAInstKind::Accumulator, &NewVR);
8316 }
8317 break;
8318 }
8321 unsigned IdxDupOp =
8323 : 2;
8324 genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv2i32_indexed,
8325 &AArch64::FPR128RegClass, MRI);
8326 break;
8327 }
8330 unsigned IdxDupOp =
8332 : 2;
8333 genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv2i64_indexed,
8334 &AArch64::FPR128RegClass, MRI);
8335 break;
8336 }
8339 unsigned IdxDupOp =
8341 : 2;
8342 genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv4i16_indexed,
8343 &AArch64::FPR128_loRegClass, MRI);
8344 break;
8345 }
8348 unsigned IdxDupOp =
8350 : 2;
8351 genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv4i32_indexed,
8352 &AArch64::FPR128RegClass, MRI);
8353 break;
8354 }
8357 unsigned IdxDupOp =
8359 : 2;
8360 genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv8i16_indexed,
8361 &AArch64::FPR128_loRegClass, MRI);
8362 break;
8363 }
8365 MUL = genFNegatedMAD(MF, MRI, TII, Root, InsInstrs);
8366 break;
8367 }
8368
8369 } // end switch (Pattern)
8370 // Record MUL and ADD/SUB for deletion
8371 if (MUL)
8372 DelInstrs.push_back(MUL);
8373 DelInstrs.push_back(&Root);
8374
8375 // Set the flags on the inserted instructions to be the merged flags of the
8376 // instructions that we have combined.
8377 uint32_t Flags = Root.getFlags();
8378 if (MUL)
8379 Flags = Root.mergeFlagsWith(*MUL);
8380 for (auto *MI : InsInstrs)
8381 MI->setFlags(Flags);
8382}
8383
8384/// Replace csincr-branch sequence by simple conditional branch
8385///
8386/// Examples:
8387/// 1. \code
8388/// csinc w9, wzr, wzr, <condition code>
8389/// tbnz w9, #0, 0x44
8390/// \endcode
8391/// to
8392/// \code
8393/// b.<inverted condition code>
8394/// \endcode
8395///
8396/// 2. \code
8397/// csinc w9, wzr, wzr, <condition code>
8398/// tbz w9, #0, 0x44
8399/// \endcode
8400/// to
8401/// \code
8402/// b.<condition code>
8403/// \endcode
8404///
8405/// Replace compare and branch sequence by TBZ/TBNZ instruction when the
8406/// compare's constant operand is power of 2.
8407///
8408/// Examples:
8409/// \code
8410/// and w8, w8, #0x400
8411/// cbnz w8, L1
8412/// \endcode
8413/// to
8414/// \code
8415/// tbnz w8, #10, L1
8416/// \endcode
8417///
8418/// \param MI Conditional Branch
8419/// \return True when the simple conditional branch is generated
8420///
8422 bool IsNegativeBranch = false;
8423 bool IsTestAndBranch = false;
8424 unsigned TargetBBInMI = 0;
8425 switch (MI.getOpcode()) {
8426 default:
8427 llvm_unreachable("Unknown branch instruction?");
8428 case AArch64::Bcc:
8429 return false;
8430 case AArch64::CBZW:
8431 case AArch64::CBZX:
8432 TargetBBInMI = 1;
8433 break;
8434 case AArch64::CBNZW:
8435 case AArch64::CBNZX:
8436 TargetBBInMI = 1;
8437 IsNegativeBranch = true;
8438 break;
8439 case AArch64::TBZW:
8440 case AArch64::TBZX:
8441 TargetBBInMI = 2;
8442 IsTestAndBranch = true;
8443 break;
8444 case AArch64::TBNZW:
8445 case AArch64::TBNZX:
8446 TargetBBInMI = 2;
8447 IsNegativeBranch = true;
8448 IsTestAndBranch = true;
8449 break;
8450 }
8451 // So we increment a zero register and test for bits other
8452 // than bit 0? Conservatively bail out in case the verifier
8453 // missed this case.
8454 if (IsTestAndBranch && MI.getOperand(1).getImm())
8455 return false;
8456
8457 // Find Definition.
8458 assert(MI.getParent() && "Incomplete machine instruciton\n");
8459 MachineBasicBlock *MBB = MI.getParent();
8460 MachineFunction *MF = MBB->getParent();
8462 Register VReg = MI.getOperand(0).getReg();
8463 if (!VReg.isVirtual())
8464 return false;
8465
8466 MachineInstr *DefMI = MRI->getVRegDef(VReg);
8467
8468 // Look through COPY instructions to find definition.
8469 while (DefMI->isCopy()) {
8470 Register CopyVReg = DefMI->getOperand(1).getReg();
8471 if (!MRI->hasOneNonDBGUse(CopyVReg))
8472 return false;
8473 if (!MRI->hasOneDef(CopyVReg))
8474 return false;
8475 DefMI = MRI->getVRegDef(CopyVReg);
8476 }
8477
8478 switch (DefMI->getOpcode()) {
8479 default:
8480 return false;
8481 // Fold AND into a TBZ/TBNZ if constant operand is power of 2.
8482 case AArch64::ANDWri:
8483 case AArch64::ANDXri: {
8484 if (IsTestAndBranch)
8485 return false;
8486 if (DefMI->getParent() != MBB)
8487 return false;
8488 if (!MRI->hasOneNonDBGUse(VReg))
8489 return false;
8490
8491 bool Is32Bit = (DefMI->getOpcode() == AArch64::ANDWri);
8493 DefMI->getOperand(2).getImm(), Is32Bit ? 32 : 64);
8494 if (!isPowerOf2_64(Mask))
8495 return false;
8496
8498 Register NewReg = MO.getReg();
8499 if (!NewReg.isVirtual())
8500 return false;
8501
8502 assert(!MRI->def_empty(NewReg) && "Register must be defined.");
8503
8504 MachineBasicBlock &RefToMBB = *MBB;
8505 MachineBasicBlock *TBB = MI.getOperand(1).getMBB();
8506 DebugLoc DL = MI.getDebugLoc();
8507 unsigned Imm = Log2_64(Mask);
8508 unsigned Opc = (Imm < 32)
8509 ? (IsNegativeBranch ? AArch64::TBNZW : AArch64::TBZW)
8510 : (IsNegativeBranch ? AArch64::TBNZX : AArch64::TBZX);
8511 MachineInstr *NewMI = BuildMI(RefToMBB, MI, DL, get(Opc))
8512 .addReg(NewReg)
8513 .addImm(Imm)
8514 .addMBB(TBB);
8515 // Register lives on to the CBZ now.
8516 MO.setIsKill(false);
8517
8518 // For immediate smaller than 32, we need to use the 32-bit
8519 // variant (W) in all cases. Indeed the 64-bit variant does not
8520 // allow to encode them.
8521 // Therefore, if the input register is 64-bit, we need to take the
8522 // 32-bit sub-part.
8523 if (!Is32Bit && Imm < 32)
8524 NewMI->getOperand(0).setSubReg(AArch64::sub_32);
8525 MI.eraseFromParent();
8526 return true;
8527 }
8528 // Look for CSINC
8529 case AArch64::CSINCWr:
8530 case AArch64::CSINCXr: {
8531 if (!(DefMI->getOperand(1).getReg() == AArch64::WZR &&
8532 DefMI->getOperand(2).getReg() == AArch64::WZR) &&
8533 !(DefMI->getOperand(1).getReg() == AArch64::XZR &&
8534 DefMI->getOperand(2).getReg() == AArch64::XZR))
8535 return false;
8536
8537 if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr,
8538 true) != -1)
8539 return false;
8540
8542 // Convert only when the condition code is not modified between
8543 // the CSINC and the branch. The CC may be used by other
8544 // instructions in between.
8546 return false;
8547 MachineBasicBlock &RefToMBB = *MBB;
8548 MachineBasicBlock *TBB = MI.getOperand(TargetBBInMI).getMBB();
8549 DebugLoc DL = MI.getDebugLoc();
8550 if (IsNegativeBranch)
8552 BuildMI(RefToMBB, MI, DL, get(AArch64::Bcc)).addImm(CC).addMBB(TBB);
8553 MI.eraseFromParent();
8554 return true;
8555 }
8556 }
8557}
8558
8559std::pair<unsigned, unsigned>
8561 const unsigned Mask = AArch64II::MO_FRAGMENT;
8562 return std::make_pair(TF & Mask, TF & ~Mask);
8563}
8564
8567 using namespace AArch64II;
8568
8569 static const std::pair<unsigned, const char *> TargetFlags[] = {
8570 {MO_PAGE, "aarch64-page"}, {MO_PAGEOFF, "aarch64-pageoff"},
8571 {MO_G3, "aarch64-g3"}, {MO_G2, "aarch64-g2"},
8572 {MO_G1, "aarch64-g1"}, {MO_G0, "aarch64-g0"},
8573 {MO_HI12, "aarch64-hi12"}};
8574 return ArrayRef(TargetFlags);
8575}
8576
8579 using namespace AArch64II;
8580
8581 static const std::pair<unsigned, const char *> TargetFlags[] = {
8582 {MO_COFFSTUB, "aarch64-coffstub"},
8583 {MO_GOT, "aarch64-got"},
8584 {MO_NC, "aarch64-nc"},
8585 {MO_S, "aarch64-s"},
8586 {MO_TLS, "aarch64-tls"},
8587 {MO_DLLIMPORT, "aarch64-dllimport"},
8588 {MO_PREL, "aarch64-prel"},
8589 {MO_TAGGED, "aarch64-tagged"},
8590 {MO_ARM64EC_CALLMANGLE, "aarch64-arm64ec-callmangle"},
8591 };
8592 return ArrayRef(TargetFlags);
8593}
8594
8597 static const std::pair<MachineMemOperand::Flags, const char *> TargetFlags[] =
8598 {{MOSuppressPair, "aarch64-suppress-pair"},
8599 {MOStridedAccess, "aarch64-strided-access"}};
8600 return ArrayRef(TargetFlags);
8601}
8602
8603/// Constants defining how certain sequences should be outlined.
8604/// This encompasses how an outlined function should be called, and what kind of
8605/// frame should be emitted for that outlined function.
8606///
8607/// \p MachineOutlinerDefault implies that the function should be called with
8608/// a save and restore of LR to the stack.
8609///
8610/// That is,
8611///
8612/// I1 Save LR OUTLINED_FUNCTION:
8613/// I2 --> BL OUTLINED_FUNCTION I1
8614/// I3 Restore LR I2
8615/// I3
8616/// RET
8617///
8618/// * Call construction overhead: 3 (save + BL + restore)
8619/// * Frame construction overhead: 1 (ret)
8620/// * Requires stack fixups? Yes
8621///
8622/// \p MachineOutlinerTailCall implies that the function is being created from
8623/// a sequence of instructions ending in a return.
8624///
8625/// That is,
8626///
8627/// I1 OUTLINED_FUNCTION:
8628/// I2 --> B OUTLINED_FUNCTION I1
8629/// RET I2
8630/// RET
8631///
8632/// * Call construction overhead: 1 (B)
8633/// * Frame construction overhead: 0 (Return included in sequence)
8634/// * Requires stack fixups? No
8635///
8636/// \p MachineOutlinerNoLRSave implies that the function should be called using
8637/// a BL instruction, but doesn't require LR to be saved and restored. This
8638/// happens when LR is known to be dead.
8639///
8640/// That is,
8641///
8642/// I1 OUTLINED_FUNCTION:
8643/// I2 --> BL OUTLINED_FUNCTION I1
8644/// I3 I2
8645/// I3
8646/// RET
8647///
8648/// * Call construction overhead: 1 (BL)
8649/// * Frame construction overhead: 1 (RET)
8650/// * Requires stack fixups? No
8651///
8652/// \p MachineOutlinerThunk implies that the function is being created from
8653/// a sequence of instructions ending in a call. The outlined function is
8654/// called with a BL instruction, and the outlined function tail-calls the
8655/// original call destination.
8656///
8657/// That is,
8658///
8659/// I1 OUTLINED_FUNCTION:
8660/// I2 --> BL OUTLINED_FUNCTION I1
8661/// BL f I2
8662/// B f
8663/// * Call construction overhead: 1 (BL)
8664/// * Frame construction overhead: 0
8665/// * Requires stack fixups? No
8666///
8667/// \p MachineOutlinerRegSave implies that the function should be called with a
8668/// save and restore of LR to an available register. This allows us to avoid
8669/// stack fixups. Note that this outlining variant is compatible with the
8670/// NoLRSave case.
8671///
8672/// That is,
8673///
8674/// I1 Save LR OUTLINED_FUNCTION:
8675/// I2 --> BL OUTLINED_FUNCTION I1
8676/// I3 Restore LR I2
8677/// I3
8678/// RET
8679///
8680/// * Call construction overhead: 3 (save + BL + restore)
8681/// * Frame construction overhead: 1 (ret)
8682/// * Requires stack fixups? No
8684 MachineOutlinerDefault, /// Emit a save, restore, call, and return.
8685 MachineOutlinerTailCall, /// Only emit a branch.
8686 MachineOutlinerNoLRSave, /// Emit a call and return.
8687 MachineOutlinerThunk, /// Emit a call and tail-call.
8688 MachineOutlinerRegSave /// Same as default, but save to a register.
8690
8694 UnsafeRegsDead = 0x8
8696
8698AArch64InstrInfo::findRegisterToSaveLRTo(outliner::Candidate &C) const {
8699 MachineFunction *MF = C.getMF();
8701 const AArch64RegisterInfo *ARI =
8702 static_cast<const AArch64RegisterInfo *>(&TRI);
8703 // Check if there is an available register across the sequence that we can
8704 // use.
8705 for (unsigned Reg : AArch64::GPR64RegClass) {
8706 if (!ARI->isReservedReg(*MF, Reg) &&
8707 Reg != AArch64::LR && // LR is not reserved, but don't use it.
8708 Reg != AArch64::X16 && // X16 is not guaranteed to be preserved.
8709 Reg != AArch64::X17 && // Ditto for X17.
8710 C.isAvailableAcrossAndOutOfSeq(Reg, TRI) &&
8711 C.isAvailableInsideSeq(Reg, TRI))
8712 return Reg;
8713 }
8714 return Register();
8715}
8716
8717static bool
8719 const outliner::Candidate &b) {
8720 const auto &MFIa = a.getMF()->getInfo<AArch64FunctionInfo>();
8721 const auto &MFIb = b.getMF()->getInfo<AArch64FunctionInfo>();
8722
8723 return MFIa->shouldSignReturnAddress(false) == MFIb->shouldSignReturnAddress(false) &&
8724 MFIa->shouldSignReturnAddress(true) == MFIb->shouldSignReturnAddress(true);
8725}
8726
8727static bool
8729 const outliner::Candidate &b) {
8730 const auto &MFIa = a.getMF()->getInfo<AArch64FunctionInfo>();
8731 const auto &MFIb = b.getMF()->getInfo<AArch64FunctionInfo>();
8732
8733 return MFIa->shouldSignWithBKey() == MFIb->shouldSignWithBKey();
8734}
8735
8737 const outliner::Candidate &b) {
8738 const AArch64Subtarget &SubtargetA =
8740 const AArch64Subtarget &SubtargetB =
8741 b.getMF()->getSubtarget<AArch64Subtarget>();
8742 return SubtargetA.hasV8_3aOps() == SubtargetB.hasV8_3aOps();
8743}
8744
8745std::optional<std::unique_ptr<outliner::OutlinedFunction>>
8747 const MachineModuleInfo &MMI,
8748 std::vector<outliner::Candidate> &RepeatedSequenceLocs,
8749 unsigned MinRepeats) const {
8750 unsigned SequenceSize = 0;
8751 for (auto &MI : RepeatedSequenceLocs[0])
8752 SequenceSize += getInstSizeInBytes(MI);
8753
8754 unsigned NumBytesToCreateFrame = 0;
8755
8756 // We only allow outlining for functions having exactly matching return
8757 // address signing attributes, i.e., all share the same value for the
8758 // attribute "sign-return-address" and all share the same type of key they
8759 // are signed with.
8760 // Additionally we require all functions to simultaniously either support
8761 // v8.3a features or not. Otherwise an outlined function could get signed
8762 // using dedicated v8.3 instructions and a call from a function that doesn't
8763 // support v8.3 instructions would therefore be invalid.
8764 if (std::adjacent_find(
8765 RepeatedSequenceLocs.begin(), RepeatedSequenceLocs.end(),
8766 [](const outliner::Candidate &a, const outliner::Candidate &b) {
8767 // Return true if a and b are non-equal w.r.t. return address
8768 // signing or support of v8.3a features
8769 if (outliningCandidatesSigningScopeConsensus(a, b) &&
8770 outliningCandidatesSigningKeyConsensus(a, b) &&
8771 outliningCandidatesV8_3OpsConsensus(a, b)) {
8772 return false;
8773 }
8774 return true;
8775 }) != RepeatedSequenceLocs.end()) {
8776 return std::nullopt;
8777 }
8778
8779 // Since at this point all candidates agree on their return address signing
8780 // picking just one is fine. If the candidate functions potentially sign their
8781 // return addresses, the outlined function should do the same. Note that in
8782 // the case of "sign-return-address"="non-leaf" this is an assumption: It is
8783 // not certainly true that the outlined function will have to sign its return
8784 // address but this decision is made later, when the decision to outline
8785 // has already been made.
8786 // The same holds for the number of additional instructions we need: On
8787 // v8.3a RET can be replaced by RETAA/RETAB and no AUT instruction is
8788 // necessary. However, at this point we don't know if the outlined function
8789 // will have a RET instruction so we assume the worst.
8790 const TargetRegisterInfo &TRI = getRegisterInfo();
8791 // Performing a tail call may require extra checks when PAuth is enabled.
8792 // If PAuth is disabled, set it to zero for uniformity.
8793 unsigned NumBytesToCheckLRInTCEpilogue = 0;
8794 if (RepeatedSequenceLocs[0]
8795 .getMF()
8796 ->getInfo<AArch64FunctionInfo>()
8797 ->shouldSignReturnAddress(true)) {
8798 // One PAC and one AUT instructions
8799 NumBytesToCreateFrame += 8;
8800
8801 // PAuth is enabled - set extra tail call cost, if any.
8802 auto LRCheckMethod = Subtarget.getAuthenticatedLRCheckMethod(
8803 *RepeatedSequenceLocs[0].getMF());
8804 NumBytesToCheckLRInTCEpilogue =
8806 // Checking the authenticated LR value may significantly impact
8807 // SequenceSize, so account for it for more precise results.
8808 if (isTailCallReturnInst(RepeatedSequenceLocs[0].back()))
8809 SequenceSize += NumBytesToCheckLRInTCEpilogue;
8810
8811 // We have to check if sp modifying instructions would get outlined.
8812 // If so we only allow outlining if sp is unchanged overall, so matching
8813 // sub and add instructions are okay to outline, all other sp modifications
8814 // are not
8815 auto hasIllegalSPModification = [&TRI](outliner::Candidate &C) {
8816 int SPValue = 0;
8817 for (auto &MI : C) {
8818 if (MI.modifiesRegister(AArch64::SP, &TRI)) {
8819 switch (MI.getOpcode()) {
8820 case AArch64::ADDXri:
8821 case AArch64::ADDWri:
8822 assert(MI.getNumOperands() == 4 && "Wrong number of operands");
8823 assert(MI.getOperand(2).isImm() &&
8824 "Expected operand to be immediate");
8825 assert(MI.getOperand(1).isReg() &&
8826 "Expected operand to be a register");
8827 // Check if the add just increments sp. If so, we search for
8828 // matching sub instructions that decrement sp. If not, the
8829 // modification is illegal
8830 if (MI.getOperand(1).getReg() == AArch64::SP)
8831 SPValue += MI.getOperand(2).getImm();
8832 else
8833 return true;
8834 break;
8835 case AArch64::SUBXri:
8836 case AArch64::SUBWri:
8837 assert(MI.getNumOperands() == 4 && "Wrong number of operands");
8838 assert(MI.getOperand(2).isImm() &&
8839 "Expected operand to be immediate");
8840 assert(MI.getOperand(1).isReg() &&
8841 "Expected operand to be a register");
8842 // Check if the sub just decrements sp. If so, we search for
8843 // matching add instructions that increment sp. If not, the
8844 // modification is illegal
8845 if (MI.getOperand(1).getReg() == AArch64::SP)
8846 SPValue -= MI.getOperand(2).getImm();
8847 else
8848 return true;
8849 break;
8850 default:
8851 return true;
8852 }
8853 }
8854 }
8855 if (SPValue)
8856 return true;
8857 return false;
8858 };
8859 // Remove candidates with illegal stack modifying instructions
8860 llvm::erase_if(RepeatedSequenceLocs, hasIllegalSPModification);
8861
8862 // If the sequence doesn't have enough candidates left, then we're done.
8863 if (RepeatedSequenceLocs.size() < MinRepeats)
8864 return std::nullopt;
8865 }
8866
8867 // Properties about candidate MBBs that hold for all of them.
8868 unsigned FlagsSetInAll = 0xF;
8869
8870 // Compute liveness information for each candidate, and set FlagsSetInAll.
8871 for (outliner::Candidate &C : RepeatedSequenceLocs)
8872 FlagsSetInAll &= C.Flags;
8873
8874 unsigned LastInstrOpcode = RepeatedSequenceLocs[0].back().getOpcode();
8875
8876 // Helper lambda which sets call information for every candidate.
8877 auto SetCandidateCallInfo =
8878 [&RepeatedSequenceLocs](unsigned CallID, unsigned NumBytesForCall) {
8879 for (outliner::Candidate &C : RepeatedSequenceLocs)
8880 C.setCallInfo(CallID, NumBytesForCall);
8881 };
8882
8883 unsigned FrameID = MachineOutlinerDefault;
8884 NumBytesToCreateFrame += 4;
8885
8886 bool HasBTI = any_of(RepeatedSequenceLocs, [](outliner::Candidate &C) {
8887 return C.getMF()->getInfo<AArch64FunctionInfo>()->branchTargetEnforcement();
8888 });
8889
8890 // We check to see if CFI Instructions are present, and if they are
8891 // we find the number of CFI Instructions in the candidates.
8892 unsigned CFICount = 0;
8893 for (auto &I : RepeatedSequenceLocs[0]) {
8894 if (I.isCFIInstruction())
8895 CFICount++;
8896 }
8897
8898 // We compare the number of found CFI Instructions to the number of CFI
8899 // instructions in the parent function for each candidate. We must check this
8900 // since if we outline one of the CFI instructions in a function, we have to
8901 // outline them all for correctness. If we do not, the address offsets will be
8902 // incorrect between the two sections of the program.
8903 for (outliner::Candidate &C : RepeatedSequenceLocs) {
8904 std::vector<MCCFIInstruction> CFIInstructions =
8905 C.getMF()->getFrameInstructions();
8906
8907 if (CFICount > 0 && CFICount != CFIInstructions.size())
8908 return std::nullopt;
8909 }
8910
8911 // Returns true if an instructions is safe to fix up, false otherwise.
8912 auto IsSafeToFixup = [this, &TRI](MachineInstr &MI) {
8913 if (MI.isCall())
8914 return true;
8915
8916 if (!MI.modifiesRegister(AArch64::SP, &TRI) &&
8917 !MI.readsRegister(AArch64::SP, &TRI))
8918 return true;
8919
8920 // Any modification of SP will break our code to save/restore LR.
8921 // FIXME: We could handle some instructions which add a constant
8922 // offset to SP, with a bit more work.
8923 if (MI.modifiesRegister(AArch64::SP, &TRI))
8924 return false;
8925
8926 // At this point, we have a stack instruction that we might need to
8927 // fix up. We'll handle it if it's a load or store.
8928 if (MI.mayLoadOrStore()) {
8929 const MachineOperand *Base; // Filled with the base operand of MI.
8930 int64_t Offset; // Filled with the offset of MI.
8931 bool OffsetIsScalable;
8932
8933 // Does it allow us to offset the base operand and is the base the
8934 // register SP?
8935 if (!getMemOperandWithOffset(MI, Base, Offset, OffsetIsScalable, &TRI) ||
8936 !Base->isReg() || Base->getReg() != AArch64::SP)
8937 return false;
8938
8939 // Fixe-up code below assumes bytes.
8940 if (OffsetIsScalable)
8941 return false;
8942
8943 // Find the minimum/maximum offset for this instruction and check
8944 // if fixing it up would be in range.
8945 int64_t MinOffset,
8946 MaxOffset; // Unscaled offsets for the instruction.
8947 // The scale to multiply the offsets by.
8948 TypeSize Scale(0U, false), DummyWidth(0U, false);
8949 getMemOpInfo(MI.getOpcode(), Scale, DummyWidth, MinOffset, MaxOffset);
8950
8951 Offset += 16; // Update the offset to what it would be if we outlined.
8952 if (Offset < MinOffset * (int64_t)Scale.getFixedValue() ||
8953 Offset > MaxOffset * (int64_t)Scale.getFixedValue())
8954 return false;
8955
8956 // It's in range, so we can outline it.
8957 return true;
8958 }
8959
8960 // FIXME: Add handling for instructions like "add x0, sp, #8".
8961
8962 // We can't fix it up, so don't outline it.
8963 return false;
8964 };
8965
8966 // True if it's possible to fix up each stack instruction in this sequence.
8967 // Important for frames/call variants that modify the stack.
8968 bool AllStackInstrsSafe =
8969 llvm::all_of(RepeatedSequenceLocs[0], IsSafeToFixup);
8970
8971 // If the last instruction in any candidate is a terminator, then we should
8972 // tail call all of the candidates.
8973 if (RepeatedSequenceLocs[0].back().isTerminator()) {
8974 FrameID = MachineOutlinerTailCall;
8975 NumBytesToCreateFrame = 0;
8976 unsigned NumBytesForCall = 4 + NumBytesToCheckLRInTCEpilogue;
8977 SetCandidateCallInfo(MachineOutlinerTailCall, NumBytesForCall);
8978 }
8979
8980 else if (LastInstrOpcode == AArch64::BL ||
8981 ((LastInstrOpcode == AArch64::BLR ||
8982 LastInstrOpcode == AArch64::BLRNoIP) &&
8983 !HasBTI)) {
8984 // FIXME: Do we need to check if the code after this uses the value of LR?
8985 FrameID = MachineOutlinerThunk;
8986 NumBytesToCreateFrame = NumBytesToCheckLRInTCEpilogue;
8987 SetCandidateCallInfo(MachineOutlinerThunk, 4);
8988 }
8989
8990 else {
8991 // We need to decide how to emit calls + frames. We can always emit the same
8992 // frame if we don't need to save to the stack. If we have to save to the
8993 // stack, then we need a different frame.
8994 unsigned NumBytesNoStackCalls = 0;
8995 std::vector<outliner::Candidate> CandidatesWithoutStackFixups;
8996
8997 // Check if we have to save LR.
8998 for (outliner::Candidate &C : RepeatedSequenceLocs) {
8999 bool LRAvailable =
9000 (C.Flags & MachineOutlinerMBBFlags::LRUnavailableSomewhere)
9001 ? C.isAvailableAcrossAndOutOfSeq(AArch64::LR, TRI)
9002 : true;
9003 // If we have a noreturn caller, then we're going to be conservative and
9004 // say that we have to save LR. If we don't have a ret at the end of the
9005 // block, then we can't reason about liveness accurately.
9006 //
9007 // FIXME: We can probably do better than always disabling this in
9008 // noreturn functions by fixing up the liveness info.
9009 bool IsNoReturn =
9010 C.getMF()->getFunction().hasFnAttribute(Attribute::NoReturn);
9011
9012 // Is LR available? If so, we don't need a save.
9013 if (LRAvailable && !IsNoReturn) {
9014 NumBytesNoStackCalls += 4;
9015 C.setCallInfo(MachineOutlinerNoLRSave, 4);
9016 CandidatesWithoutStackFixups.push_back(C);
9017 }
9018
9019 // Is an unused register available? If so, we won't modify the stack, so
9020 // we can outline with the same frame type as those that don't save LR.
9021 else if (findRegisterToSaveLRTo(C)) {
9022 NumBytesNoStackCalls += 12;
9023 C.setCallInfo(MachineOutlinerRegSave, 12);
9024 CandidatesWithoutStackFixups.push_back(C);
9025 }
9026
9027 // Is SP used in the sequence at all? If not, we don't have to modify
9028 // the stack, so we are guaranteed to get the same frame.
9029 else if (C.isAvailableInsideSeq(AArch64::SP, TRI)) {
9030 NumBytesNoStackCalls += 12;
9031 C.setCallInfo(MachineOutlinerDefault, 12);
9032 CandidatesWithoutStackFixups.push_back(C);
9033 }
9034
9035 // If we outline this, we need to modify the stack. Pretend we don't
9036 // outline this by saving all of its bytes.
9037 else {
9038 NumBytesNoStackCalls += SequenceSize;
9039 }
9040 }
9041
9042 // If there are no places where we have to save LR, then note that we
9043 // don't have to update the stack. Otherwise, give every candidate the
9044 // default call type, as long as it's safe to do so.
9045 if (!AllStackInstrsSafe ||
9046 NumBytesNoStackCalls <= RepeatedSequenceLocs.size() * 12) {
9047 RepeatedSequenceLocs = CandidatesWithoutStackFixups;
9048 FrameID = MachineOutlinerNoLRSave;
9049 if (RepeatedSequenceLocs.size() < MinRepeats)
9050 return std::nullopt;
9051 } else {
9052 SetCandidateCallInfo(MachineOutlinerDefault, 12);
9053
9054 // Bugzilla ID: 46767
9055 // TODO: Check if fixing up the stack more than once is safe so we can
9056 // outline these.
9057 //
9058 // An outline resulting in a caller that requires stack fixups at the
9059 // callsite to a callee that also requires stack fixups can happen when
9060 // there are no available registers at the candidate callsite for a
9061 // candidate that itself also has calls.
9062 //
9063 // In other words if function_containing_sequence in the following pseudo
9064 // assembly requires that we save LR at the point of the call, but there
9065 // are no available registers: in this case we save using SP and as a
9066 // result the SP offsets requires stack fixups by multiples of 16.
9067 //
9068 // function_containing_sequence:
9069 // ...
9070 // save LR to SP <- Requires stack instr fixups in OUTLINED_FUNCTION_N
9071 // call OUTLINED_FUNCTION_N
9072 // restore LR from SP
9073 // ...
9074 //
9075 // OUTLINED_FUNCTION_N:
9076 // save LR to SP <- Requires stack instr fixups in OUTLINED_FUNCTION_N
9077 // ...
9078 // bl foo
9079 // restore LR from SP
9080 // ret
9081 //
9082 // Because the code to handle more than one stack fixup does not
9083 // currently have the proper checks for legality, these cases will assert
9084 // in the AArch64 MachineOutliner. This is because the code to do this
9085 // needs more hardening, testing, better checks that generated code is
9086 // legal, etc and because it is only verified to handle a single pass of
9087 // stack fixup.
9088 //
9089 // The assert happens in AArch64InstrInfo::buildOutlinedFrame to catch
9090 // these cases until they are known to be handled. Bugzilla 46767 is
9091 // referenced in comments at the assert site.
9092 //
9093 // To avoid asserting (or generating non-legal code on noassert builds)
9094 // we remove all candidates which would need more than one stack fixup by
9095 // pruning the cases where the candidate has calls while also having no
9096 // available LR and having no available general purpose registers to copy
9097 // LR to (ie one extra stack save/restore).
9098 //
9099 if (FlagsSetInAll & MachineOutlinerMBBFlags::HasCalls) {
9100 erase_if(RepeatedSequenceLocs, [this, &TRI](outliner::Candidate &C) {
9101 auto IsCall = [](const MachineInstr &MI) { return MI.isCall(); };
9102 return (llvm::any_of(C, IsCall)) &&
9103 (!C.isAvailableAcrossAndOutOfSeq(AArch64::LR, TRI) ||
9104 !findRegisterToSaveLRTo(C));
9105 });
9106 }
9107 }
9108
9109 // If we dropped all of the candidates, bail out here.
9110 if (RepeatedSequenceLocs.size() < MinRepeats)
9111 return std::nullopt;
9112 }
9113
9114 // Does every candidate's MBB contain a call? If so, then we might have a call
9115 // in the range.
9116 if (FlagsSetInAll & MachineOutlinerMBBFlags::HasCalls) {
9117 // Check if the range contains a call. These require a save + restore of the
9118 // link register.
9119 outliner::Candidate &FirstCand = RepeatedSequenceLocs[0];
9120 bool ModStackToSaveLR = false;
9121 if (any_of(drop_end(FirstCand),
9122 [](const MachineInstr &MI) { return MI.isCall(); }))
9123 ModStackToSaveLR = true;
9124
9125 // Handle the last instruction separately. If this is a tail call, then the
9126 // last instruction is a call. We don't want to save + restore in this case.
9127 // However, it could be possible that the last instruction is a call without
9128 // it being valid to tail call this sequence. We should consider this as
9129 // well.
9130 else if (FrameID != MachineOutlinerThunk &&
9131 FrameID != MachineOutlinerTailCall && FirstCand.back().isCall())
9132 ModStackToSaveLR = true;
9133
9134 if (ModStackToSaveLR) {
9135 // We can't fix up the stack. Bail out.
9136 if (!AllStackInstrsSafe)
9137 return std::nullopt;
9138
9139 // Save + restore LR.
9140 NumBytesToCreateFrame += 8;
9141 }
9142 }
9143
9144 // If we have CFI instructions, we can only outline if the outlined section
9145 // can be a tail call
9146 if (FrameID != MachineOutlinerTailCall && CFICount > 0)
9147 return std::nullopt;
9148
9149 return std::make_unique<outliner::OutlinedFunction>(
9150 RepeatedSequenceLocs, SequenceSize, NumBytesToCreateFrame, FrameID);
9151}
9152
9154 Function &F, std::vector<outliner::Candidate> &Candidates) const {
9155 // If a bunch of candidates reach this point they must agree on their return
9156 // address signing. It is therefore enough to just consider the signing
9157 // behaviour of one of them
9158 const auto &CFn = Candidates.front().getMF()->getFunction();
9159
9160 if (CFn.hasFnAttribute("ptrauth-returns"))
9161 F.addFnAttr(CFn.getFnAttribute("ptrauth-returns"));
9162 if (CFn.hasFnAttribute("ptrauth-auth-traps"))
9163 F.addFnAttr(CFn.getFnAttribute("ptrauth-auth-traps"));
9164 // Since all candidates belong to the same module, just copy the
9165 // function-level attributes of an arbitrary function.
9166 if (CFn.hasFnAttribute("sign-return-address"))
9167 F.addFnAttr(CFn.getFnAttribute("sign-return-address"));
9168 if (CFn.hasFnAttribute("sign-return-address-key"))
9169 F.addFnAttr(CFn.getFnAttribute("sign-return-address-key"));
9170
9171 AArch64GenInstrInfo::mergeOutliningCandidateAttributes(F, Candidates);
9172}
9173
9175 MachineFunction &MF, bool OutlineFromLinkOnceODRs) const {
9176 const Function &F = MF.getFunction();
9177
9178 // Can F be deduplicated by the linker? If it can, don't outline from it.
9179 if (!OutlineFromLinkOnceODRs && F.hasLinkOnceODRLinkage())
9180 return false;
9181
9182 // Don't outline from functions with section markings; the program could
9183 // expect that all the code is in the named section.
9184 // FIXME: Allow outlining from multiple functions with the same section
9185 // marking.
9186 if (F.hasSection())
9187 return false;
9188
9189 // Outlining from functions with redzones is unsafe since the outliner may
9190 // modify the stack. Check if hasRedZone is true or unknown; if yes, don't
9191 // outline from it.
9193 if (!AFI || AFI->hasRedZone().value_or(true))
9194 return false;
9195
9196 // FIXME: Determine whether it is safe to outline from functions which contain
9197 // streaming-mode changes. We may need to ensure any smstart/smstop pairs are
9198 // outlined together and ensure it is safe to outline with async unwind info,
9199 // required for saving & restoring VG around calls.
9200 if (AFI->hasStreamingModeChanges())
9201 return false;
9202
9203 // FIXME: Teach the outliner to generate/handle Windows unwind info.
9205 return false;
9206
9207 // It's safe to outline from MF.
9208 return true;
9209}
9210
9213 unsigned &Flags) const {
9215 "Must track liveness!");
9217 std::pair<MachineBasicBlock::iterator, MachineBasicBlock::iterator>>
9218 Ranges;
9219 // According to the AArch64 Procedure Call Standard, the following are
9220 // undefined on entry/exit from a function call:
9221 //
9222 // * Registers x16, x17, (and thus w16, w17)
9223 // * Condition codes (and thus the NZCV register)
9224 //
9225 // If any of these registers are used inside or live across an outlined
9226 // function, then they may be modified later, either by the compiler or
9227 // some other tool (like the linker).
9228 //
9229 // To avoid outlining in these situations, partition each block into ranges
9230 // where these registers are dead. We will only outline from those ranges.
9232 auto AreAllUnsafeRegsDead = [&LRU]() {
9233 return LRU.available(AArch64::W16) && LRU.available(AArch64::W17) &&
9234 LRU.available(AArch64::NZCV);
9235 };
9236
9237 // We need to know if LR is live across an outlining boundary later on in
9238 // order to decide how we'll create the outlined call, frame, etc.
9239 //
9240 // It's pretty expensive to check this for *every candidate* within a block.
9241 // That's some potentially n^2 behaviour, since in the worst case, we'd need
9242 // to compute liveness from the end of the block for O(n) candidates within
9243 // the block.
9244 //
9245 // So, to improve the average case, let's keep track of liveness from the end
9246 // of the block to the beginning of *every outlinable range*. If we know that
9247 // LR is available in every range we could outline from, then we know that
9248 // we don't need to check liveness for any candidate within that range.
9249 bool LRAvailableEverywhere = true;
9250 // Compute liveness bottom-up.
9251 LRU.addLiveOuts(MBB);
9252 // Update flags that require info about the entire MBB.
9253 auto UpdateWholeMBBFlags = [&Flags](const MachineInstr &MI) {
9254 if (MI.isCall() && !MI.isTerminator())
9255 Flags |= MachineOutlinerMBBFlags::HasCalls;
9256 };
9257 // Range: [RangeBegin, RangeEnd)
9258 MachineBasicBlock::instr_iterator RangeBegin, RangeEnd;
9259 unsigned RangeLen;
9260 auto CreateNewRangeStartingAt =
9261 [&RangeBegin, &RangeEnd,
9262 &RangeLen](MachineBasicBlock::instr_iterator NewBegin) {
9263 RangeBegin = NewBegin;
9264 RangeEnd = std::next(RangeBegin);
9265 RangeLen = 0;
9266 };
9267 auto SaveRangeIfNonEmpty = [&RangeLen, &Ranges, &RangeBegin, &RangeEnd]() {
9268 // At least one unsafe register is not dead. We do not want to outline at
9269 // this point. If it is long enough to outline from, save the range
9270 // [RangeBegin, RangeEnd).
9271 if (RangeLen > 1)
9272 Ranges.push_back(std::make_pair(RangeBegin, RangeEnd));
9273 };
9274 // Find the first point where all unsafe registers are dead.
9275 // FIND: <safe instr> <-- end of first potential range
9276 // SKIP: <unsafe def>
9277 // SKIP: ... everything between ...
9278 // SKIP: <unsafe use>
9279 auto FirstPossibleEndPt = MBB.instr_rbegin();
9280 for (; FirstPossibleEndPt != MBB.instr_rend(); ++FirstPossibleEndPt) {
9281 LRU.stepBackward(*FirstPossibleEndPt);
9282 // Update flags that impact how we outline across the entire block,
9283 // regardless of safety.
9284 UpdateWholeMBBFlags(*FirstPossibleEndPt);
9285 if (AreAllUnsafeRegsDead())
9286 break;
9287 }
9288 // If we exhausted the entire block, we have no safe ranges to outline.
9289 if (FirstPossibleEndPt == MBB.instr_rend())
9290 return Ranges;
9291 // Current range.
9292 CreateNewRangeStartingAt(FirstPossibleEndPt->getIterator());
9293 // StartPt points to the first place where all unsafe registers
9294 // are dead (if there is any such point). Begin partitioning the MBB into
9295 // ranges.
9296 for (auto &MI : make_range(FirstPossibleEndPt, MBB.instr_rend())) {
9297 LRU.stepBackward(MI);
9298 UpdateWholeMBBFlags(MI);
9299 if (!AreAllUnsafeRegsDead()) {
9300 SaveRangeIfNonEmpty();
9301 CreateNewRangeStartingAt(MI.getIterator());
9302 continue;
9303 }
9304 LRAvailableEverywhere &= LRU.available(AArch64::LR);
9305 RangeBegin = MI.getIterator();
9306 ++RangeLen;
9307 }
9308 // Above loop misses the last (or only) range. If we are still safe, then
9309 // let's save the range.
9310 if (AreAllUnsafeRegsDead())
9311 SaveRangeIfNonEmpty();
9312 if (Ranges.empty())
9313 return Ranges;
9314 // We found the ranges bottom-up. Mapping expects the top-down. Reverse
9315 // the order.
9316 std::reverse(Ranges.begin(), Ranges.end());
9317 // If there is at least one outlinable range where LR is unavailable
9318 // somewhere, remember that.
9319 if (!LRAvailableEverywhere)
9320 Flags |= MachineOutlinerMBBFlags::LRUnavailableSomewhere;
9321 return Ranges;
9322}
9323
9327 unsigned Flags) const {
9328 MachineInstr &MI = *MIT;
9329 MachineBasicBlock *MBB = MI.getParent();
9330 MachineFunction *MF = MBB->getParent();
9332
9333 // Don't outline anything used for return address signing. The outlined
9334 // function will get signed later if needed
9335 switch (MI.getOpcode()) {
9336 case AArch64::PACM:
9337 case AArch64::PACIASP:
9338 case AArch64::PACIBSP:
9339 case AArch64::PACIASPPC:
9340 case AArch64::PACIBSPPC:
9341 case AArch64::AUTIASP:
9342 case AArch64::AUTIBSP:
9343 case AArch64::AUTIASPPCi:
9344 case AArch64::AUTIASPPCr:
9345 case AArch64::AUTIBSPPCi:
9346 case AArch64::AUTIBSPPCr:
9347 case AArch64::RETAA:
9348 case AArch64::RETAB:
9349 case AArch64::RETAASPPCi:
9350 case AArch64::RETAASPPCr:
9351 case AArch64::RETABSPPCi:
9352 case AArch64::RETABSPPCr:
9353 case AArch64::EMITBKEY:
9354 case AArch64::PAUTH_PROLOGUE:
9355 case AArch64::PAUTH_EPILOGUE:
9357 }
9358
9359 // Don't outline LOHs.
9360 if (FuncInfo->getLOHRelated().count(&MI))
9362
9363 // We can only outline these if we will tail call the outlined function, or
9364 // fix up the CFI offsets. Currently, CFI instructions are outlined only if
9365 // in a tail call.
9366 //
9367 // FIXME: If the proper fixups for the offset are implemented, this should be
9368 // possible.
9369 if (MI.isCFIInstruction())
9371
9372 // Is this a terminator for a basic block?
9373 if (MI.isTerminator())
9374 // TargetInstrInfo::getOutliningType has already filtered out anything
9375 // that would break this, so we can allow it here.
9377
9378 // Make sure none of the operands are un-outlinable.
9379 for (const MachineOperand &MOP : MI.operands()) {
9380 // A check preventing CFI indices was here before, but only CFI
9381 // instructions should have those.
9382 assert(!MOP.isCFIIndex());
9383
9384 // If it uses LR or W30 explicitly, then don't touch it.
9385 if (MOP.isReg() && !MOP.isImplicit() &&
9386 (MOP.getReg() == AArch64::LR || MOP.getReg() == AArch64::W30))
9388 }
9389
9390 // Special cases for instructions that can always be outlined, but will fail
9391 // the later tests. e.g, ADRPs, which are PC-relative use LR, but can always
9392 // be outlined because they don't require a *specific* value to be in LR.
9393 if (MI.getOpcode() == AArch64::ADRP)
9395
9396 // If MI is a call we might be able to outline it. We don't want to outline
9397 // any calls that rely on the position of items on the stack. When we outline
9398 // something containing a call, we have to emit a save and restore of LR in
9399 // the outlined function. Currently, this always happens by saving LR to the
9400 // stack. Thus, if we outline, say, half the parameters for a function call
9401 // plus the call, then we'll break the callee's expectations for the layout
9402 // of the stack.
9403 //
9404 // FIXME: Allow calls to functions which construct a stack frame, as long
9405 // as they don't access arguments on the stack.
9406 // FIXME: Figure out some way to analyze functions defined in other modules.
9407 // We should be able to compute the memory usage based on the IR calling
9408 // convention, even if we can't see the definition.
9409 if (MI.isCall()) {
9410 // Get the function associated with the call. Look at each operand and find
9411 // the one that represents the callee and get its name.
9412 const Function *Callee = nullptr;
9413 for (const MachineOperand &MOP : MI.operands()) {
9414 if (MOP.isGlobal()) {
9415 Callee = dyn_cast<Function>(MOP.getGlobal());
9416 break;
9417 }
9418 }
9419
9420 // Never outline calls to mcount. There isn't any rule that would require
9421 // this, but the Linux kernel's "ftrace" feature depends on it.
9422 if (Callee && Callee->getName() == "\01_mcount")
9424
9425 // If we don't know anything about the callee, assume it depends on the
9426 // stack layout of the caller. In that case, it's only legal to outline
9427 // as a tail-call. Explicitly list the call instructions we know about so we
9428 // don't get unexpected results with call pseudo-instructions.
9429 auto UnknownCallOutlineType = outliner::InstrType::Illegal;
9430 if (MI.getOpcode() == AArch64::BLR ||
9431 MI.getOpcode() == AArch64::BLRNoIP || MI.getOpcode() == AArch64::BL)
9432 UnknownCallOutlineType = outliner::InstrType::LegalTerminator;
9433
9434 if (!Callee)
9435 return UnknownCallOutlineType;
9436
9437 // We have a function we have information about. Check it if it's something
9438 // can safely outline.
9439 MachineFunction *CalleeMF = MMI.getMachineFunction(*Callee);
9440
9441 // We don't know what's going on with the callee at all. Don't touch it.
9442 if (!CalleeMF)
9443 return UnknownCallOutlineType;
9444
9445 // Check if we know anything about the callee saves on the function. If we
9446 // don't, then don't touch it, since that implies that we haven't
9447 // computed anything about its stack frame yet.
9448 MachineFrameInfo &MFI = CalleeMF->getFrameInfo();
9449 if (!MFI.isCalleeSavedInfoValid() || MFI.getStackSize() > 0 ||
9450 MFI.getNumObjects() > 0)
9451 return UnknownCallOutlineType;
9452
9453 // At this point, we can say that CalleeMF ought to not pass anything on the
9454 // stack. Therefore, we can outline it.
9456 }
9457
9458 // Don't touch the link register or W30.
9459 if (MI.readsRegister(AArch64::W30, &getRegisterInfo()) ||
9460 MI.modifiesRegister(AArch64::W30, &getRegisterInfo()))
9462
9463 // Don't outline BTI instructions, because that will prevent the outlining
9464 // site from being indirectly callable.
9465 if (hasBTISemantics(MI))
9467
9469}
9470
9471void AArch64InstrInfo::fixupPostOutline(MachineBasicBlock &MBB) const {
9472 for (MachineInstr &MI : MBB) {
9473 const MachineOperand *Base;
9474 TypeSize Width(0, false);
9475 int64_t Offset;
9476 bool OffsetIsScalable;
9477
9478 // Is this a load or store with an immediate offset with SP as the base?
9479 if (!MI.mayLoadOrStore() ||
9480 !getMemOperandWithOffsetWidth(MI, Base, Offset, OffsetIsScalable, Width,
9481 &RI) ||
9482 (Base->isReg() && Base->getReg() != AArch64::SP))
9483 continue;
9484
9485 // It is, so we have to fix it up.
9486 TypeSize Scale(0U, false);
9487 int64_t Dummy1, Dummy2;
9488
9490 assert(StackOffsetOperand.isImm() && "Stack offset wasn't immediate!");
9491 getMemOpInfo(MI.getOpcode(), Scale, Width, Dummy1, Dummy2);
9492 assert(Scale != 0 && "Unexpected opcode!");
9493 assert(!OffsetIsScalable && "Expected offset to be a byte offset");
9494
9495 // We've pushed the return address to the stack, so add 16 to the offset.
9496 // This is safe, since we already checked if it would overflow when we
9497 // checked if this instruction was legal to outline.
9498 int64_t NewImm = (Offset + 16) / (int64_t)Scale.getFixedValue();
9499 StackOffsetOperand.setImm(NewImm);
9500 }
9501}
9502
9504 const AArch64InstrInfo *TII,
9505 bool ShouldSignReturnAddr) {
9506 if (!ShouldSignReturnAddr)
9507 return;
9508
9509 BuildMI(MBB, MBB.begin(), DebugLoc(), TII->get(AArch64::PAUTH_PROLOGUE))
9512 TII->get(AArch64::PAUTH_EPILOGUE))
9514}
9515
9518 const outliner::OutlinedFunction &OF) const {
9519
9521
9523 FI->setOutliningStyle("Tail Call");
9525 // For thunk outlining, rewrite the last instruction from a call to a
9526 // tail-call.
9527 MachineInstr *Call = &*--MBB.instr_end();
9528 unsigned TailOpcode;
9529 if (Call->getOpcode() == AArch64::BL) {
9530 TailOpcode = AArch64::TCRETURNdi;
9531 } else {
9532 assert(Call->getOpcode() == AArch64::BLR ||
9533 Call->getOpcode() == AArch64::BLRNoIP);
9534 TailOpcode = AArch64::TCRETURNriALL;
9535 }
9536 MachineInstr *TC = BuildMI(MF, DebugLoc(), get(TailOpcode))
9537 .add(Call->getOperand(0))
9538 .addImm(0);
9539 MBB.insert(MBB.end(), TC);
9540 Call->eraseFromParent();
9541
9542 FI->setOutliningStyle("Thunk");
9543 }
9544
9545 bool IsLeafFunction = true;
9546
9547 // Is there a call in the outlined range?
9548 auto IsNonTailCall = [](const MachineInstr &MI) {
9549 return MI.isCall() && !MI.isReturn();
9550 };
9551
9552 if (llvm::any_of(MBB.instrs(), IsNonTailCall)) {
9553 // Fix up the instructions in the range, since we're going to modify the
9554 // stack.
9555
9556 // Bugzilla ID: 46767
9557 // TODO: Check if fixing up twice is safe so we can outline these.
9559 "Can only fix up stack references once");
9560 fixupPostOutline(MBB);
9561
9562 IsLeafFunction = false;
9563
9564 // LR has to be a live in so that we can save it.
9565 if (!MBB.isLiveIn(AArch64::LR))
9566 MBB.addLiveIn(AArch64::LR);
9567
9570
9573 Et = std::prev(MBB.end());
9574
9575 // Insert a save before the outlined region
9576 MachineInstr *STRXpre = BuildMI(MF, DebugLoc(), get(AArch64::STRXpre))
9577 .addReg(AArch64::SP, RegState::Define)
9578 .addReg(AArch64::LR)
9579 .addReg(AArch64::SP)
9580 .addImm(-16);
9581 It = MBB.insert(It, STRXpre);
9582
9584 const TargetSubtargetInfo &STI = MF.getSubtarget();
9585 const MCRegisterInfo *MRI = STI.getRegisterInfo();
9586 unsigned DwarfReg = MRI->getDwarfRegNum(AArch64::LR, true);
9587
9588 // Add a CFI saying the stack was moved 16 B down.
9589 int64_t StackPosEntry =
9591 BuildMI(MBB, It, DebugLoc(), get(AArch64::CFI_INSTRUCTION))
9592 .addCFIIndex(StackPosEntry)
9594
9595 // Add a CFI saying that the LR that we want to find is now 16 B higher
9596 // than before.
9597 int64_t LRPosEntry = MF.addFrameInst(
9598 MCCFIInstruction::createOffset(nullptr, DwarfReg, -16));
9599 BuildMI(MBB, It, DebugLoc(), get(AArch64::CFI_INSTRUCTION))
9600 .addCFIIndex(LRPosEntry)
9602 }
9603
9604 // Insert a restore before the terminator for the function.
9605 MachineInstr *LDRXpost = BuildMI(MF, DebugLoc(), get(AArch64::LDRXpost))
9606 .addReg(AArch64::SP, RegState::Define)
9607 .addReg(AArch64::LR, RegState::Define)
9608 .addReg(AArch64::SP)
9609 .addImm(16);
9610 Et = MBB.insert(Et, LDRXpost);
9611 }
9612
9613 bool ShouldSignReturnAddr = FI->shouldSignReturnAddress(!IsLeafFunction);
9614
9615 // If this is a tail call outlined function, then there's already a return.
9618 signOutlinedFunction(MF, MBB, this, ShouldSignReturnAddr);
9619 return;
9620 }
9621
9622 // It's not a tail call, so we have to insert the return ourselves.
9623
9624 // LR has to be a live in so that we can return to it.
9625 if (!MBB.isLiveIn(AArch64::LR))
9626 MBB.addLiveIn(AArch64::LR);
9627
9628 MachineInstr *ret = BuildMI(MF, DebugLoc(), get(AArch64::RET))
9629 .addReg(AArch64::LR);
9630 MBB.insert(MBB.end(), ret);
9631
9632 signOutlinedFunction(MF, MBB, this, ShouldSignReturnAddr);
9633
9634 FI->setOutliningStyle("Function");
9635
9636 // Did we have to modify the stack by saving the link register?
9638 return;
9639
9640 // We modified the stack.
9641 // Walk over the basic block and fix up all the stack accesses.
9642 fixupPostOutline(MBB);
9643}
9644
9648
9649 // Are we tail calling?
9650 if (C.CallConstructionID == MachineOutlinerTailCall) {
9651 // If yes, then we can just branch to the label.
9652 It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::TCRETURNdi))
9653 .addGlobalAddress(M.getNamedValue(MF.getName()))
9654 .addImm(0));
9655 return It;
9656 }
9657
9658 // Are we saving the link register?
9659 if (C.CallConstructionID == MachineOutlinerNoLRSave ||
9660 C.CallConstructionID == MachineOutlinerThunk) {
9661 // No, so just insert the call.
9662 It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::BL))
9663 .addGlobalAddress(M.getNamedValue(MF.getName())));
9664 return It;
9665 }
9666
9667 // We want to return the spot where we inserted the call.
9669
9670 // Instructions for saving and restoring LR around the call instruction we're
9671 // going to insert.
9672 MachineInstr *Save;
9673 MachineInstr *Restore;
9674 // Can we save to a register?
9675 if (C.CallConstructionID == MachineOutlinerRegSave) {
9676 // FIXME: This logic should be sunk into a target-specific interface so that
9677 // we don't have to recompute the register.
9678 Register Reg = findRegisterToSaveLRTo(C);
9679 assert(Reg && "No callee-saved register available?");
9680
9681 // LR has to be a live in so that we can save it.
9682 if (!MBB.isLiveIn(AArch64::LR))
9683 MBB.addLiveIn(AArch64::LR);
9684
9685 // Save and restore LR from Reg.
9686 Save = BuildMI(MF, DebugLoc(), get(AArch64::ORRXrs), Reg)
9687 .addReg(AArch64::XZR)
9688 .addReg(AArch64::LR)
9689 .addImm(0);
9690 Restore = BuildMI(MF, DebugLoc(), get(AArch64::ORRXrs), AArch64::LR)
9691 .addReg(AArch64::XZR)
9692 .addReg(Reg)
9693 .addImm(0);
9694 } else {
9695 // We have the default case. Save and restore from SP.
9696 Save = BuildMI(MF, DebugLoc(), get(AArch64::STRXpre))
9697 .addReg(AArch64::SP, RegState::Define)
9698 .addReg(AArch64::LR)
9699 .addReg(AArch64::SP)
9700 .addImm(-16);
9701 Restore = BuildMI(MF, DebugLoc(), get(AArch64::LDRXpost))
9702 .addReg(AArch64::SP, RegState::Define)
9703 .addReg(AArch64::LR, RegState::Define)
9704 .addReg(AArch64::SP)
9705 .addImm(16);
9706 }
9707
9708 It = MBB.insert(It, Save);
9709 It++;
9710
9711 // Insert the call.
9712 It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::BL))
9713 .addGlobalAddress(M.getNamedValue(MF.getName())));
9714 CallPt = It;
9715 It++;
9716
9717 It = MBB.insert(It, Restore);
9718 return CallPt;
9719}
9720
9722 MachineFunction &MF) const {
9723 return MF.getFunction().hasMinSize();
9724}
9725
9728 DebugLoc &DL,
9729 bool AllowSideEffects) const {
9730 const MachineFunction &MF = *MBB.getParent();
9732 const AArch64RegisterInfo &TRI = *STI.getRegisterInfo();
9733
9734 if (TRI.isGeneralPurposeRegister(MF, Reg)) {
9735 BuildMI(MBB, Iter, DL, get(AArch64::MOVZXi), Reg).addImm(0).addImm(0);
9736 } else if (STI.isSVEorStreamingSVEAvailable()) {
9737 BuildMI(MBB, Iter, DL, get(AArch64::DUP_ZI_D), Reg)
9738 .addImm(0)
9739 .addImm(0);
9740 } else if (STI.isNeonAvailable()) {
9741 BuildMI(MBB, Iter, DL, get(AArch64::MOVIv2d_ns), Reg)
9742 .addImm(0);
9743 } else {
9744 // This is a streaming-compatible function without SVE. We don't have full
9745 // Neon (just FPRs), so we can at most use the first 64-bit sub-register.
9746 // So given `movi v..` would be illegal use `fmov d..` instead.
9747 assert(STI.hasNEON() && "Expected to have NEON.");
9748 Register Reg64 = TRI.getSubReg(Reg, AArch64::dsub);
9749 BuildMI(MBB, Iter, DL, get(AArch64::FMOVD0), Reg64);
9750 }
9751}
9752
9753std::optional<DestSourcePair>
9755
9756 // AArch64::ORRWrs and AArch64::ORRXrs with WZR/XZR reg
9757 // and zero immediate operands used as an alias for mov instruction.
9758 if (((MI.getOpcode() == AArch64::ORRWrs &&
9759 MI.getOperand(1).getReg() == AArch64::WZR &&
9760 MI.getOperand(3).getImm() == 0x0) ||
9761 (MI.getOpcode() == AArch64::ORRWrr &&
9762 MI.getOperand(1).getReg() == AArch64::WZR)) &&
9763 // Check that the w->w move is not a zero-extending w->x mov.
9764 (!MI.getOperand(0).getReg().isVirtual() ||
9765 MI.getOperand(0).getSubReg() == 0) &&
9766 (!MI.getOperand(0).getReg().isPhysical() ||
9767 MI.findRegisterDefOperandIdx(MI.getOperand(0).getReg() - AArch64::W0 +
9768 AArch64::X0,
9769 /*TRI=*/nullptr) == -1))
9770 return DestSourcePair{MI.getOperand(0), MI.getOperand(2)};
9771
9772 if (MI.getOpcode() == AArch64::ORRXrs &&
9773 MI.getOperand(1).getReg() == AArch64::XZR &&
9774 MI.getOperand(3).getImm() == 0x0)
9775 return DestSourcePair{MI.getOperand(0), MI.getOperand(2)};
9776
9777 return std::nullopt;
9778}
9779
9780std::optional<DestSourcePair>
9782 if ((MI.getOpcode() == AArch64::ORRWrs &&
9783 MI.getOperand(1).getReg() == AArch64::WZR &&
9784 MI.getOperand(3).getImm() == 0x0) ||
9785 (MI.getOpcode() == AArch64::ORRWrr &&
9786 MI.getOperand(1).getReg() == AArch64::WZR))
9787 return DestSourcePair{MI.getOperand(0), MI.getOperand(2)};
9788 return std::nullopt;
9789}
9790
9791std::optional<RegImmPair>
9793 int Sign = 1;
9794 int64_t Offset = 0;
9795
9796 // TODO: Handle cases where Reg is a super- or sub-register of the
9797 // destination register.
9798 const MachineOperand &Op0 = MI.getOperand(0);
9799 if (!Op0.isReg() || Reg != Op0.getReg())
9800 return std::nullopt;
9801
9802 switch (MI.getOpcode()) {
9803 default:
9804 return std::nullopt;
9805 case AArch64::SUBWri:
9806 case AArch64::SUBXri:
9807 case AArch64::SUBSWri:
9808 case AArch64::SUBSXri:
9809 Sign *= -1;
9810 [[fallthrough]];
9811 case AArch64::ADDSWri:
9812 case AArch64::ADDSXri:
9813 case AArch64::ADDWri:
9814 case AArch64::ADDXri: {
9815 // TODO: Third operand can be global address (usually some string).
9816 if (!MI.getOperand(0).isReg() || !MI.getOperand(1).isReg() ||
9817 !MI.getOperand(2).isImm())
9818 return std::nullopt;
9819 int Shift = MI.getOperand(3).getImm();
9820 assert((Shift == 0 || Shift == 12) && "Shift can be either 0 or 12");
9821 Offset = Sign * (MI.getOperand(2).getImm() << Shift);
9822 }
9823 }
9824 return RegImmPair{MI.getOperand(1).getReg(), Offset};
9825}
9826
9827/// If the given ORR instruction is a copy, and \p DescribedReg overlaps with
9828/// the destination register then, if possible, describe the value in terms of
9829/// the source register.
9830static std::optional<ParamLoadedValue>
9832 const TargetInstrInfo *TII,
9833 const TargetRegisterInfo *TRI) {
9834 auto DestSrc = TII->isCopyLikeInstr(MI);
9835 if (!DestSrc)
9836 return std::nullopt;
9837
9838 Register DestReg = DestSrc->Destination->getReg();
9839 Register SrcReg = DestSrc->Source->getReg();
9840
9841 auto Expr = DIExpression::get(MI.getMF()->getFunction().getContext(), {});
9842
9843 // If the described register is the destination, just return the source.
9844 if (DestReg == DescribedReg)
9845 return ParamLoadedValue(MachineOperand::CreateReg(SrcReg, false), Expr);
9846
9847 // ORRWrs zero-extends to 64-bits, so we need to consider such cases.
9848 if (MI.getOpcode() == AArch64::ORRWrs &&
9849 TRI->isSuperRegister(DestReg, DescribedReg))
9850 return ParamLoadedValue(MachineOperand::CreateReg(SrcReg, false), Expr);
9851
9852 // We may need to describe the lower part of a ORRXrs move.
9853 if (MI.getOpcode() == AArch64::ORRXrs &&
9854 TRI->isSubRegister(DestReg, DescribedReg)) {
9855 Register SrcSubReg = TRI->getSubReg(SrcReg, AArch64::sub_32);
9856 return ParamLoadedValue(MachineOperand::CreateReg(SrcSubReg, false), Expr);
9857 }
9858
9859 assert(!TRI->isSuperOrSubRegisterEq(DestReg, DescribedReg) &&
9860 "Unhandled ORR[XW]rs copy case");
9861
9862 return std::nullopt;
9863}
9864
9866 // Functions cannot be split to different sections on AArch64 if they have
9867 // a red zone. This is because relaxing a cross-section branch may require
9868 // incrementing the stack pointer to spill a register, which would overwrite
9869 // the red zone.
9870 if (MF.getInfo<AArch64FunctionInfo>()->hasRedZone().value_or(true))
9871 return false;
9872
9874}
9875
9877 const MachineBasicBlock &MBB) const {
9878 // Asm Goto blocks can contain conditional branches to goto labels, which can
9879 // get moved out of range of the branch instruction.
9880 auto isAsmGoto = [](const MachineInstr &MI) {
9881 return MI.getOpcode() == AArch64::INLINEASM_BR;
9882 };
9883 if (llvm::any_of(MBB, isAsmGoto) || MBB.isInlineAsmBrIndirectTarget())
9884 return false;
9885
9886 // Because jump tables are label-relative instead of table-relative, they all
9887 // must be in the same section or relocation fixup handling will fail.
9888
9889 // Check if MBB is a jump table target
9891 auto containsMBB = [&MBB](const MachineJumpTableEntry &JTE) {
9892 return llvm::is_contained(JTE.MBBs, &MBB);
9893 };
9894 if (MJTI != nullptr && llvm::any_of(MJTI->getJumpTables(), containsMBB))
9895 return false;
9896
9897 // Check if MBB contains a jump table lookup
9898 for (const MachineInstr &MI : MBB) {
9899 switch (MI.getOpcode()) {
9900 case TargetOpcode::G_BRJT:
9901 case AArch64::JumpTableDest32:
9902 case AArch64::JumpTableDest16:
9903 case AArch64::JumpTableDest8:
9904 return false;
9905 default:
9906 continue;
9907 }
9908 }
9909
9910 // MBB isn't a special case, so it's safe to be split to the cold section.
9911 return true;
9912}
9913
9914std::optional<ParamLoadedValue>
9916 Register Reg) const {
9917 const MachineFunction *MF = MI.getMF();
9919 switch (MI.getOpcode()) {
9920 case AArch64::MOVZWi:
9921 case AArch64::MOVZXi: {
9922 // MOVZWi may be used for producing zero-extended 32-bit immediates in
9923 // 64-bit parameters, so we need to consider super-registers.
9924 if (!TRI->isSuperRegisterEq(MI.getOperand(0).getReg(), Reg))
9925 return std::nullopt;
9926
9927 if (!MI.getOperand(1).isImm())
9928 return std::nullopt;
9929 int64_t Immediate = MI.getOperand(1).getImm();
9930 int Shift = MI.getOperand(2).getImm();
9931 return ParamLoadedValue(MachineOperand::CreateImm(Immediate << Shift),
9932 nullptr);
9933 }
9934 case AArch64::ORRWrs:
9935 case AArch64::ORRXrs:
9936 return describeORRLoadedValue(MI, Reg, this, TRI);
9937 }
9938
9940}
9941
9943 MachineInstr &ExtMI, MachineRegisterInfo &MRI) const {
9944 assert(ExtMI.getOpcode() == TargetOpcode::G_SEXT ||
9945 ExtMI.getOpcode() == TargetOpcode::G_ZEXT ||
9946 ExtMI.getOpcode() == TargetOpcode::G_ANYEXT);
9947
9948 // Anyexts are nops.
9949 if (ExtMI.getOpcode() == TargetOpcode::G_ANYEXT)
9950 return true;
9951
9952 Register DefReg = ExtMI.getOperand(0).getReg();
9953 if (!MRI.hasOneNonDBGUse(DefReg))
9954 return false;
9955
9956 // It's likely that a sext/zext as a G_PTR_ADD offset will be folded into an
9957 // addressing mode.
9958 auto *UserMI = &*MRI.use_instr_nodbg_begin(DefReg);
9959 return UserMI->getOpcode() == TargetOpcode::G_PTR_ADD;
9960}
9961
9963 return get(Opc).TSFlags & AArch64::ElementSizeMask;
9964}
9965
9966bool AArch64InstrInfo::isPTestLikeOpcode(unsigned Opc) const {
9967 return get(Opc).TSFlags & AArch64::InstrFlagIsPTestLike;
9968}
9969
9970bool AArch64InstrInfo::isWhileOpcode(unsigned Opc) const {
9971 return get(Opc).TSFlags & AArch64::InstrFlagIsWhile;
9972}
9973
9974unsigned int
9976 return OptLevel >= CodeGenOptLevel::Aggressive ? 6 : 2;
9977}
9978
9979bool AArch64InstrInfo::isLegalAddressingMode(unsigned NumBytes, int64_t Offset,
9980 unsigned Scale) const {
9981 if (Offset && Scale)
9982 return false;
9983
9984 // Check Reg + Imm
9985 if (!Scale) {
9986 // 9-bit signed offset
9987 if (isInt<9>(Offset))
9988 return true;
9989
9990 // 12-bit unsigned offset
9991 unsigned Shift = Log2_64(NumBytes);
9992 if (NumBytes && Offset > 0 && (Offset / NumBytes) <= (1LL << 12) - 1 &&
9993 // Must be a multiple of NumBytes (NumBytes is a power of 2)
9994 (Offset >> Shift) << Shift == Offset)
9995 return true;
9996 return false;
9997 }
9998
9999 // Check reg1 + SIZE_IN_BYTES * reg2 and reg1 + reg2
10000 return Scale == 1 || (Scale > 0 && Scale == NumBytes);
10001}
10002
10004 if (MF.getSubtarget<AArch64Subtarget>().hardenSlsBlr())
10005 return AArch64::BLRNoIP;
10006 else
10007 return AArch64::BLR;
10008}
10009
10012 Register TargetReg, bool FrameSetup) const {
10013 assert(TargetReg != AArch64::SP && "New top of stack cannot aleady be in SP");
10014
10016 MachineFunction &MF = *MBB.getParent();
10017 const AArch64InstrInfo *TII =
10018 MF.getSubtarget<AArch64Subtarget>().getInstrInfo();
10019 int64_t ProbeSize = MF.getInfo<AArch64FunctionInfo>()->getStackProbeSize();
10021
10022 MachineFunction::iterator MBBInsertPoint = std::next(MBB.getIterator());
10023 MachineBasicBlock *LoopTestMBB =
10025 MF.insert(MBBInsertPoint, LoopTestMBB);
10026 MachineBasicBlock *LoopBodyMBB =
10028 MF.insert(MBBInsertPoint, LoopBodyMBB);
10030 MF.insert(MBBInsertPoint, ExitMBB);
10031 MachineInstr::MIFlag Flags =
10033
10034 // LoopTest:
10035 // SUB SP, SP, #ProbeSize
10036 emitFrameOffset(*LoopTestMBB, LoopTestMBB->end(), DL, AArch64::SP,
10037 AArch64::SP, StackOffset::getFixed(-ProbeSize), TII, Flags);
10038
10039 // CMP SP, TargetReg
10040 BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL, TII->get(AArch64::SUBSXrx64),
10041 AArch64::XZR)
10042 .addReg(AArch64::SP)
10043 .addReg(TargetReg)
10045 .setMIFlags(Flags);
10046
10047 // B.<Cond> LoopExit
10048 BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL, TII->get(AArch64::Bcc))
10050 .addMBB(ExitMBB)
10051 .setMIFlags(Flags);
10052
10053 // STR XZR, [SP]
10054 BuildMI(*LoopBodyMBB, LoopBodyMBB->end(), DL, TII->get(AArch64::STRXui))
10055 .addReg(AArch64::XZR)
10056 .addReg(AArch64::SP)
10057 .addImm(0)
10058 .setMIFlags(Flags);
10059
10060 // B loop
10061 BuildMI(*LoopBodyMBB, LoopBodyMBB->end(), DL, TII->get(AArch64::B))
10062 .addMBB(LoopTestMBB)
10063 .setMIFlags(Flags);
10064
10065 // LoopExit:
10066 // MOV SP, TargetReg
10067 BuildMI(*ExitMBB, ExitMBB->end(), DL, TII->get(AArch64::ADDXri), AArch64::SP)
10068 .addReg(TargetReg)
10069 .addImm(0)
10071 .setMIFlags(Flags);
10072
10073 // LDR XZR, [SP]
10074 BuildMI(*ExitMBB, ExitMBB->end(), DL, TII->get(AArch64::LDRXui))
10075 .addReg(AArch64::XZR, RegState::Define)
10076 .addReg(AArch64::SP)
10077 .addImm(0)
10078 .setMIFlags(Flags);
10079
10080 ExitMBB->splice(ExitMBB->end(), &MBB, std::next(MBBI), MBB.end());
10082
10083 LoopTestMBB->addSuccessor(ExitMBB);
10084 LoopTestMBB->addSuccessor(LoopBodyMBB);
10085 LoopBodyMBB->addSuccessor(LoopTestMBB);
10086 MBB.addSuccessor(LoopTestMBB);
10087
10088 // Update liveins.
10089 if (MF.getRegInfo().reservedRegsFrozen())
10090 fullyRecomputeLiveIns({ExitMBB, LoopBodyMBB, LoopTestMBB});
10091
10092 return ExitMBB->begin();
10093}
10094
10095namespace {
10096class AArch64PipelinerLoopInfo : public TargetInstrInfo::PipelinerLoopInfo {
10097 MachineFunction *MF;
10098 const TargetInstrInfo *TII;
10099 const TargetRegisterInfo *TRI;
10101
10102 /// The block of the loop
10103 MachineBasicBlock *LoopBB;
10104 /// The conditional branch of the loop
10105 MachineInstr *CondBranch;
10106 /// The compare instruction for loop control
10107 MachineInstr *Comp;
10108 /// The number of the operand of the loop counter value in Comp
10109 unsigned CompCounterOprNum;
10110 /// The instruction that updates the loop counter value
10111 MachineInstr *Update;
10112 /// The number of the operand of the loop counter value in Update
10113 unsigned UpdateCounterOprNum;
10114 /// The initial value of the loop counter
10115 Register Init;
10116 /// True iff Update is a predecessor of Comp
10117 bool IsUpdatePriorComp;
10118
10119 /// The normalized condition used by createTripCountGreaterCondition()
10121
10122public:
10123 AArch64PipelinerLoopInfo(MachineBasicBlock *LoopBB, MachineInstr *CondBranch,
10124 MachineInstr *Comp, unsigned CompCounterOprNum,
10125 MachineInstr *Update, unsigned UpdateCounterOprNum,
10126 Register Init, bool IsUpdatePriorComp,
10128 : MF(Comp->getParent()->getParent()),
10129 TII(MF->getSubtarget().getInstrInfo()),
10130 TRI(MF->getSubtarget().getRegisterInfo()), MRI(MF->getRegInfo()),
10131 LoopBB(LoopBB), CondBranch(CondBranch), Comp(Comp),
10132 CompCounterOprNum(CompCounterOprNum), Update(Update),
10133 UpdateCounterOprNum(UpdateCounterOprNum), Init(Init),
10134 IsUpdatePriorComp(IsUpdatePriorComp), Cond(Cond.begin(), Cond.end()) {}
10135
10136 bool shouldIgnoreForPipelining(const MachineInstr *MI) const override {
10137 // Make the instructions for loop control be placed in stage 0.
10138 // The predecessors of Comp are considered by the caller.
10139 return MI == Comp;
10140 }
10141
10142 std::optional<bool> createTripCountGreaterCondition(
10143 int TC, MachineBasicBlock &MBB,
10144 SmallVectorImpl<MachineOperand> &CondParam) override {
10145 // A branch instruction will be inserted as "if (Cond) goto epilogue".
10146 // Cond is normalized for such use.
10147 // The predecessors of the branch are assumed to have already been inserted.
10148 CondParam = Cond;
10149 return {};
10150 }
10151
10152 void createRemainingIterationsGreaterCondition(
10154 DenseMap<MachineInstr *, MachineInstr *> &LastStage0Insts) override;
10155
10156 void setPreheader(MachineBasicBlock *NewPreheader) override {}
10157
10158 void adjustTripCount(int TripCountAdjust) override {}
10159
10160 bool isMVEExpanderSupported() override { return true; }
10161};
10162} // namespace
10163
10164/// Clone an instruction from MI. The register of ReplaceOprNum-th operand
10165/// is replaced by ReplaceReg. The output register is newly created.
10166/// The other operands are unchanged from MI.
10167static Register cloneInstr(const MachineInstr *MI, unsigned ReplaceOprNum,
10168 Register ReplaceReg, MachineBasicBlock &MBB,
10169 MachineBasicBlock::iterator InsertTo) {
10172 const TargetRegisterInfo *TRI =
10175 Register Result = 0;
10176 for (unsigned I = 0; I < NewMI->getNumOperands(); ++I) {
10177 if (I == 0 && NewMI->getOperand(0).getReg().isVirtual()) {
10178 Result = MRI.createVirtualRegister(
10179 MRI.getRegClass(NewMI->getOperand(0).getReg()));
10180 NewMI->getOperand(I).setReg(Result);
10181 } else if (I == ReplaceOprNum) {
10182 MRI.constrainRegClass(
10183 ReplaceReg,
10184 TII->getRegClass(NewMI->getDesc(), I, TRI, *MBB.getParent()));
10185 NewMI->getOperand(I).setReg(ReplaceReg);
10186 }
10187 }
10188 MBB.insert(InsertTo, NewMI);
10189 return Result;
10190}
10191
10192void AArch64PipelinerLoopInfo::createRemainingIterationsGreaterCondition(
10195 // Create and accumulate conditions for next TC iterations.
10196 // Example:
10197 // SUBSXrr N, counter, implicit-def $nzcv # compare instruction for the last
10198 // # iteration of the kernel
10199 //
10200 // # insert the following instructions
10201 // cond = CSINCXr 0, 0, C, implicit $nzcv
10202 // counter = ADDXri counter, 1 # clone from this->Update
10203 // SUBSXrr n, counter, implicit-def $nzcv # clone from this->Comp
10204 // cond = CSINCXr cond, cond, C, implicit $nzcv
10205 // ... (repeat TC times)
10206 // SUBSXri cond, 0, implicit-def $nzcv
10207
10208 assert(CondBranch->getOpcode() == AArch64::Bcc);
10209 // CondCode to exit the loop
10211 (AArch64CC::CondCode)CondBranch->getOperand(0).getImm();
10212 if (CondBranch->getOperand(1).getMBB() == LoopBB)
10214
10215 // Accumulate conditions to exit the loop
10216 Register AccCond = AArch64::XZR;
10217
10218 // If CC holds, CurCond+1 is returned; otherwise CurCond is returned.
10219 auto AccumulateCond = [&](Register CurCond,
10221 Register NewCond = MRI.createVirtualRegister(&AArch64::GPR64commonRegClass);
10222 BuildMI(MBB, MBB.end(), Comp->getDebugLoc(), TII->get(AArch64::CSINCXr))
10223 .addReg(NewCond, RegState::Define)
10224 .addReg(CurCond)
10225 .addReg(CurCond)
10227 return NewCond;
10228 };
10229
10230 if (!LastStage0Insts.empty() && LastStage0Insts[Comp]->getParent() == &MBB) {
10231 // Update and Comp for I==0 are already exists in MBB
10232 // (MBB is an unrolled kernel)
10233 Register Counter;
10234 for (int I = 0; I <= TC; ++I) {
10235 Register NextCounter;
10236 if (I != 0)
10237 NextCounter =
10238 cloneInstr(Comp, CompCounterOprNum, Counter, MBB, MBB.end());
10239
10240 AccCond = AccumulateCond(AccCond, CC);
10241
10242 if (I != TC) {
10243 if (I == 0) {
10244 if (Update != Comp && IsUpdatePriorComp) {
10245 Counter =
10246 LastStage0Insts[Comp]->getOperand(CompCounterOprNum).getReg();
10247 NextCounter = cloneInstr(Update, UpdateCounterOprNum, Counter, MBB,
10248 MBB.end());
10249 } else {
10250 // can use already calculated value
10251 NextCounter = LastStage0Insts[Update]->getOperand(0).getReg();
10252 }
10253 } else if (Update != Comp) {
10254 NextCounter =
10255 cloneInstr(Update, UpdateCounterOprNum, Counter, MBB, MBB.end());
10256 }
10257 }
10258 Counter = NextCounter;
10259 }
10260 } else {
10261 Register Counter;
10262 if (LastStage0Insts.empty()) {
10263 // use initial counter value (testing if the trip count is sufficient to
10264 // be executed by pipelined code)
10265 Counter = Init;
10266 if (IsUpdatePriorComp)
10267 Counter =
10268 cloneInstr(Update, UpdateCounterOprNum, Counter, MBB, MBB.end());
10269 } else {
10270 // MBB is an epilogue block. LastStage0Insts[Comp] is in the kernel block.
10271 Counter = LastStage0Insts[Comp]->getOperand(CompCounterOprNum).getReg();
10272 }
10273
10274 for (int I = 0; I <= TC; ++I) {
10275 Register NextCounter;
10276 NextCounter =
10277 cloneInstr(Comp, CompCounterOprNum, Counter, MBB, MBB.end());
10278 AccCond = AccumulateCond(AccCond, CC);
10279 if (I != TC && Update != Comp)
10280 NextCounter =
10281 cloneInstr(Update, UpdateCounterOprNum, Counter, MBB, MBB.end());
10282 Counter = NextCounter;
10283 }
10284 }
10285
10286 // If AccCond == 0, the remainder is greater than TC.
10287 BuildMI(MBB, MBB.end(), Comp->getDebugLoc(), TII->get(AArch64::SUBSXri))
10288 .addReg(AArch64::XZR, RegState::Define | RegState::Dead)
10289 .addReg(AccCond)
10290 .addImm(0)
10291 .addImm(0);
10292 Cond.clear();
10294}
10295
10296static void extractPhiReg(const MachineInstr &Phi, const MachineBasicBlock *MBB,
10297 Register &RegMBB, Register &RegOther) {
10298 assert(Phi.getNumOperands() == 5);
10299 if (Phi.getOperand(2).getMBB() == MBB) {
10300 RegMBB = Phi.getOperand(1).getReg();
10301 RegOther = Phi.getOperand(3).getReg();
10302 } else {
10303 assert(Phi.getOperand(4).getMBB() == MBB);
10304 RegMBB = Phi.getOperand(3).getReg();
10305 RegOther = Phi.getOperand(1).getReg();
10306 }
10307}
10308
10309static bool isDefinedOutside(Register Reg, const MachineBasicBlock *BB) {
10310 if (!Reg.isVirtual())
10311 return false;
10312 const MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
10313 return MRI.getVRegDef(Reg)->getParent() != BB;
10314}
10315
10316/// If Reg is an induction variable, return true and set some parameters
10317static bool getIndVarInfo(Register Reg, const MachineBasicBlock *LoopBB,
10318 MachineInstr *&UpdateInst,
10319 unsigned &UpdateCounterOprNum, Register &InitReg,
10320 bool &IsUpdatePriorComp) {
10321 // Example:
10322 //
10323 // Preheader:
10324 // InitReg = ...
10325 // LoopBB:
10326 // Reg0 = PHI (InitReg, Preheader), (Reg1, LoopBB)
10327 // Reg = COPY Reg0 ; COPY is ignored.
10328 // Reg1 = ADD Reg, #1; UpdateInst. Incremented by a loop invariant value.
10329 // ; Reg is the value calculated in the previous
10330 // ; iteration, so IsUpdatePriorComp == false.
10331
10332 if (LoopBB->pred_size() != 2)
10333 return false;
10334 if (!Reg.isVirtual())
10335 return false;
10336 const MachineRegisterInfo &MRI = LoopBB->getParent()->getRegInfo();
10337 UpdateInst = nullptr;
10338 UpdateCounterOprNum = 0;
10339 InitReg = 0;
10340 IsUpdatePriorComp = true;
10341 Register CurReg = Reg;
10342 while (true) {
10343 MachineInstr *Def = MRI.getVRegDef(CurReg);
10344 if (Def->getParent() != LoopBB)
10345 return false;
10346 if (Def->isCopy()) {
10347 // Ignore copy instructions unless they contain subregisters
10348 if (Def->getOperand(0).getSubReg() || Def->getOperand(1).getSubReg())
10349 return false;
10350 CurReg = Def->getOperand(1).getReg();
10351 } else if (Def->isPHI()) {
10352 if (InitReg != 0)
10353 return false;
10354 if (!UpdateInst)
10355 IsUpdatePriorComp = false;
10356 extractPhiReg(*Def, LoopBB, CurReg, InitReg);
10357 } else {
10358 if (UpdateInst)
10359 return false;
10360 switch (Def->getOpcode()) {
10361 case AArch64::ADDSXri:
10362 case AArch64::ADDSWri:
10363 case AArch64::SUBSXri:
10364 case AArch64::SUBSWri:
10365 case AArch64::ADDXri:
10366 case AArch64::ADDWri:
10367 case AArch64::SUBXri:
10368 case AArch64::SUBWri:
10369 UpdateInst = Def;
10370 UpdateCounterOprNum = 1;
10371 break;
10372 case AArch64::ADDSXrr:
10373 case AArch64::ADDSWrr:
10374 case AArch64::SUBSXrr:
10375 case AArch64::SUBSWrr:
10376 case AArch64::ADDXrr:
10377 case AArch64::ADDWrr:
10378 case AArch64::SUBXrr:
10379 case AArch64::SUBWrr:
10380 UpdateInst = Def;
10381 if (isDefinedOutside(Def->getOperand(2).getReg(), LoopBB))
10382 UpdateCounterOprNum = 1;
10383 else if (isDefinedOutside(Def->getOperand(1).getReg(), LoopBB))
10384 UpdateCounterOprNum = 2;
10385 else
10386 return false;
10387 break;
10388 default:
10389 return false;
10390 }
10391 CurReg = Def->getOperand(UpdateCounterOprNum).getReg();
10392 }
10393
10394 if (!CurReg.isVirtual())
10395 return false;
10396 if (Reg == CurReg)
10397 break;
10398 }
10399
10400 if (!UpdateInst)
10401 return false;
10402
10403 return true;
10404}
10405
10406std::unique_ptr<TargetInstrInfo::PipelinerLoopInfo>
10408 // Accept loops that meet the following conditions
10409 // * The conditional branch is BCC
10410 // * The compare instruction is ADDS/SUBS/WHILEXX
10411 // * One operand of the compare is an induction variable and the other is a
10412 // loop invariant value
10413 // * The induction variable is incremented/decremented by a single instruction
10414 // * Does not contain CALL or instructions which have unmodeled side effects
10415
10416 for (MachineInstr &MI : *LoopBB)
10417 if (MI.isCall() || MI.hasUnmodeledSideEffects())
10418 // This instruction may use NZCV, which interferes with the instruction to
10419 // be inserted for loop control.
10420 return nullptr;
10421
10422 MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
10424 if (analyzeBranch(*LoopBB, TBB, FBB, Cond))
10425 return nullptr;
10426
10427 // Infinite loops are not supported
10428 if (TBB == LoopBB && FBB == LoopBB)
10429 return nullptr;
10430
10431 // Must be conditional branch
10432 if (TBB != LoopBB && FBB == nullptr)
10433 return nullptr;
10434
10435 assert((TBB == LoopBB || FBB == LoopBB) &&
10436 "The Loop must be a single-basic-block loop");
10437
10438 MachineInstr *CondBranch = &*LoopBB->getFirstTerminator();
10440
10441 if (CondBranch->getOpcode() != AArch64::Bcc)
10442 return nullptr;
10443
10444 // Normalization for createTripCountGreaterCondition()
10445 if (TBB == LoopBB)
10447
10448 MachineInstr *Comp = nullptr;
10449 unsigned CompCounterOprNum = 0;
10450 for (MachineInstr &MI : reverse(*LoopBB)) {
10451 if (MI.modifiesRegister(AArch64::NZCV, &TRI)) {
10452 // Guarantee that the compare is SUBS/ADDS/WHILEXX and that one of the
10453 // operands is a loop invariant value
10454
10455 switch (MI.getOpcode()) {
10456 case AArch64::SUBSXri:
10457 case AArch64::SUBSWri:
10458 case AArch64::ADDSXri:
10459 case AArch64::ADDSWri:
10460 Comp = &MI;
10461 CompCounterOprNum = 1;
10462 break;
10463 case AArch64::ADDSWrr:
10464 case AArch64::ADDSXrr:
10465 case AArch64::SUBSWrr:
10466 case AArch64::SUBSXrr:
10467 Comp = &MI;
10468 break;
10469 default:
10470 if (isWhileOpcode(MI.getOpcode())) {
10471 Comp = &MI;
10472 break;
10473 }
10474 return nullptr;
10475 }
10476
10477 if (CompCounterOprNum == 0) {
10478 if (isDefinedOutside(Comp->getOperand(1).getReg(), LoopBB))
10479 CompCounterOprNum = 2;
10480 else if (isDefinedOutside(Comp->getOperand(2).getReg(), LoopBB))
10481 CompCounterOprNum = 1;
10482 else
10483 return nullptr;
10484 }
10485 break;
10486 }
10487 }
10488 if (!Comp)
10489 return nullptr;
10490
10491 MachineInstr *Update = nullptr;
10492 Register Init;
10493 bool IsUpdatePriorComp;
10494 unsigned UpdateCounterOprNum;
10495 if (!getIndVarInfo(Comp->getOperand(CompCounterOprNum).getReg(), LoopBB,
10496 Update, UpdateCounterOprNum, Init, IsUpdatePriorComp))
10497 return nullptr;
10498
10499 return std::make_unique<AArch64PipelinerLoopInfo>(
10500 LoopBB, CondBranch, Comp, CompCounterOprNum, Update, UpdateCounterOprNum,
10501 Init, IsUpdatePriorComp, Cond);
10502}
10503
10504/// verifyInstruction - Perform target specific instruction verification.
10505bool AArch64InstrInfo::verifyInstruction(const MachineInstr &MI,
10506 StringRef &ErrInfo) const {
10507
10508 // Verify that immediate offsets on load/store instructions are within range.
10509 // Stack objects with an FI operand are excluded as they can be fixed up
10510 // during PEI.
10511 TypeSize Scale(0U, false), Width(0U, false);
10512 int64_t MinOffset, MaxOffset;
10513 if (getMemOpInfo(MI.getOpcode(), Scale, Width, MinOffset, MaxOffset)) {
10514 unsigned ImmIdx = getLoadStoreImmIdx(MI.getOpcode());
10515 if (MI.getOperand(ImmIdx).isImm() && !MI.getOperand(ImmIdx - 1).isFI()) {
10516 int64_t Imm = MI.getOperand(ImmIdx).getImm();
10517 if (Imm < MinOffset || Imm > MaxOffset) {
10518 ErrInfo = "Unexpected immediate on load/store instruction";
10519 return false;
10520 }
10521 }
10522 }
10523 return true;
10524}
10525
10526#define GET_INSTRINFO_HELPERS
10527#define GET_INSTRMAP_INFO
10528#include "AArch64GenInstrInfo.inc"
unsigned SubReg
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
static bool forwardCopyWillClobberTuple(unsigned DestReg, unsigned SrcReg, unsigned NumRegs)
static cl::opt< unsigned > BCCDisplacementBits("aarch64-bcc-offset-bits", cl::Hidden, cl::init(19), cl::desc("Restrict range of Bcc instructions (DEBUG)"))
static void genSubAdd2SubSub(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, SmallVectorImpl< MachineInstr * > &DelInstrs, unsigned IdxOpd1, DenseMap< unsigned, unsigned > &InstrIdxForVirtReg)
Do the following transformation A - (B + C) ==> (A - B) - C A - (B + C) ==> (A - C) - B.
static bool getMaddPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns)
Find instructions that can be turned into madd.
static AArch64CC::CondCode findCondCodeUsedByInstr(const MachineInstr &Instr)
Find a condition code used by the instruction.
static MachineInstr * genFusedMultiplyAcc(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, unsigned IdxMulOpd, unsigned MaddOpc, const TargetRegisterClass *RC)
genFusedMultiplyAcc - Helper to generate fused multiply accumulate instructions.
static bool isCombineInstrCandidate64(unsigned Opc)
static unsigned removeCopies(const MachineRegisterInfo &MRI, unsigned VReg)
static bool areCFlagsAccessedBetweenInstrs(MachineBasicBlock::iterator From, MachineBasicBlock::iterator To, const TargetRegisterInfo *TRI, const AccessKind AccessToCheck=AK_All)
True when condition flags are accessed (either by writing or reading) on the instruction trace starti...
static bool getFMAPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns)
Floating-Point Support.
static bool isADDSRegImm(unsigned Opcode)
static MachineInstr * genFusedMultiplyIdxNeg(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, DenseMap< unsigned, unsigned > &InstrIdxForVirtReg, unsigned IdxMulOpd, unsigned MaddOpc, unsigned MnegOpc, const TargetRegisterClass *RC)
genFusedMultiplyAccNeg - Helper to generate fused multiply accumulate instructions with an additional...
static unsigned sForm(MachineInstr &Instr)
Get opcode of S version of Instr.
static bool isCombineInstrSettingFlag(unsigned Opc)
@ AK_Write
static bool getFNEGPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns)
static bool getIndVarInfo(Register Reg, const MachineBasicBlock *LoopBB, MachineInstr *&UpdateInst, unsigned &UpdateCounterOprNum, Register &InitReg, bool &IsUpdatePriorComp)
If Reg is an induction variable, return true and set some parameters.
static const MachineInstrBuilder & AddSubReg(const MachineInstrBuilder &MIB, MCRegister Reg, unsigned SubIdx, unsigned State, const TargetRegisterInfo *TRI)
static bool canPairLdStOpc(unsigned FirstOpc, unsigned SecondOpc)
static int findCondCodeUseOperandIdxForBranchOrSelect(const MachineInstr &Instr)
static bool isPostIndexLdStOpcode(unsigned Opcode)
Return true if the opcode is a post-index ld/st instruction, which really loads from base+0.
static unsigned getBranchDisplacementBits(unsigned Opc)
static std::optional< ParamLoadedValue > describeORRLoadedValue(const MachineInstr &MI, Register DescribedReg, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
If the given ORR instruction is a copy, and DescribedReg overlaps with the destination register then,...
static bool getFMULPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns)
static MachineInstr * genFusedMultiplyAccNeg(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, DenseMap< unsigned, unsigned > &InstrIdxForVirtReg, unsigned IdxMulOpd, unsigned MaddOpc, unsigned MnegOpc, const TargetRegisterClass *RC)
genFusedMultiplyAccNeg - Helper to generate fused multiply accumulate instructions with an additional...
static void appendVGScaledOffsetExpr(SmallVectorImpl< char > &Expr, int NumBytes, int NumVGScaledBytes, unsigned VG, llvm::raw_string_ostream &Comment)
static MachineInstr * genMaddR(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, unsigned IdxMulOpd, unsigned MaddOpc, unsigned VR, const TargetRegisterClass *RC)
genMaddR - Generate madd instruction and combine mul and add using an extra virtual register Example ...
static Register cloneInstr(const MachineInstr *MI, unsigned ReplaceOprNum, Register ReplaceReg, MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertTo)
Clone an instruction from MI.
static bool scaleOffset(unsigned Opc, int64_t &Offset)
static bool canCombineWithFMUL(MachineBasicBlock &MBB, MachineOperand &MO, unsigned MulOpc)
unsigned scaledOffsetOpcode(unsigned Opcode, unsigned &Scale)
static MachineInstr * genFusedMultiplyIdx(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, unsigned IdxMulOpd, unsigned MaddOpc, const TargetRegisterClass *RC)
genFusedMultiplyIdx - Helper to generate fused multiply accumulate instructions.
static MachineInstr * genIndexedMultiply(MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, unsigned IdxDupOp, unsigned MulOpc, const TargetRegisterClass *RC, MachineRegisterInfo &MRI)
Fold (FMUL x (DUP y lane)) into (FMUL_indexed x y lane)
static bool isSUBSRegImm(unsigned Opcode)
static bool UpdateOperandRegClass(MachineInstr &Instr)
static const TargetRegisterClass * getRegClass(const MachineInstr &MI, Register Reg)
static bool canCmpInstrBeRemoved(MachineInstr &MI, MachineInstr &CmpInstr, int CmpValue, const TargetRegisterInfo &TRI, SmallVectorImpl< MachineInstr * > &CCUseInstrs, bool &IsInvertCC)
unsigned unscaledOffsetOpcode(unsigned Opcode)
static bool canInstrSubstituteCmpInstr(MachineInstr &MI, MachineInstr &CmpInstr, const TargetRegisterInfo &TRI)
Check if CmpInstr can be substituted by MI.
static UsedNZCV getUsedNZCV(AArch64CC::CondCode CC)
static Register genNeg(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, DenseMap< unsigned, unsigned > &InstrIdxForVirtReg, unsigned MnegOpc, const TargetRegisterClass *RC)
genNeg - Helper to generate an intermediate negation of the second operand of Root
static bool isCombineInstrCandidateFP(const MachineInstr &Inst)
static bool canCombine(MachineBasicBlock &MBB, MachineOperand &MO, unsigned CombineOpc, unsigned ZeroReg=0, bool CheckZeroReg=false)
static unsigned convertToNonFlagSettingOpc(const MachineInstr &MI)
Return the opcode that does not set flags when possible - otherwise return the original opcode.
static bool outliningCandidatesV8_3OpsConsensus(const outliner::Candidate &a, const outliner::Candidate &b)
static bool isCombineInstrCandidate32(unsigned Opc)
static void parseCondBranch(MachineInstr *LastInst, MachineBasicBlock *&Target, SmallVectorImpl< MachineOperand > &Cond)
static unsigned offsetExtendOpcode(unsigned Opcode)
MachineOutlinerMBBFlags
@ LRUnavailableSomewhere
@ HasCalls
@ UnsafeRegsDead
static void loadRegPairFromStackSlot(const TargetRegisterInfo &TRI, MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore, const MCInstrDesc &MCID, Register DestReg, unsigned SubIdx0, unsigned SubIdx1, int FI, MachineMemOperand *MMO)
static bool getMiscPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns)
Find other MI combine patterns.
static bool outliningCandidatesSigningKeyConsensus(const outliner::Candidate &a, const outliner::Candidate &b)
static bool outliningCandidatesSigningScopeConsensus(const outliner::Candidate &a, const outliner::Candidate &b)
static bool shouldClusterFI(const MachineFrameInfo &MFI, int FI1, int64_t Offset1, unsigned Opcode1, int FI2, int64_t Offset2, unsigned Opcode2)
static cl::opt< unsigned > TBZDisplacementBits("aarch64-tbz-offset-bits", cl::Hidden, cl::init(14), cl::desc("Restrict range of TB[N]Z instructions (DEBUG)"))
static void extractPhiReg(const MachineInstr &Phi, const MachineBasicBlock *MBB, Register &RegMBB, Register &RegOther)
static MCCFIInstruction createDefCFAExpression(const TargetRegisterInfo &TRI, unsigned Reg, const StackOffset &Offset)
static bool isDefinedOutside(Register Reg, const MachineBasicBlock *BB)
static MachineInstr * genFusedMultiply(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, unsigned IdxMulOpd, unsigned MaddOpc, const TargetRegisterClass *RC, FMAInstKind kind=FMAInstKind::Default, const Register *ReplacedAddend=nullptr)
genFusedMultiply - Generate fused multiply instructions.
static bool isCombineInstrCandidate(unsigned Opc)
static unsigned regOffsetOpcode(unsigned Opcode)
MachineOutlinerClass
Constants defining how certain sequences should be outlined.
@ MachineOutlinerTailCall
Emit a save, restore, call, and return.
@ MachineOutlinerRegSave
Emit a call and tail-call.
@ MachineOutlinerNoLRSave
Only emit a branch.
@ MachineOutlinerThunk
Emit a call and return.
@ MachineOutlinerDefault
static cl::opt< unsigned > BDisplacementBits("aarch64-b-offset-bits", cl::Hidden, cl::init(26), cl::desc("Restrict range of B instructions (DEBUG)"))
static bool areCFlagsAliveInSuccessors(const MachineBasicBlock *MBB)
Check if AArch64::NZCV should be alive in successors of MBB.
static void emitFrameOffsetAdj(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, unsigned DestReg, unsigned SrcReg, int64_t Offset, unsigned Opc, const TargetInstrInfo *TII, MachineInstr::MIFlag Flag, bool NeedsWinCFI, bool *HasWinCFI, bool EmitCFAOffset, StackOffset CFAOffset, unsigned FrameReg)
static bool isCheapImmediate(const MachineInstr &MI, unsigned BitSize)
static cl::opt< unsigned > CBZDisplacementBits("aarch64-cbz-offset-bits", cl::Hidden, cl::init(19), cl::desc("Restrict range of CB[N]Z instructions (DEBUG)"))
static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg, unsigned *NewVReg=nullptr)
static void signOutlinedFunction(MachineFunction &MF, MachineBasicBlock &MBB, const AArch64InstrInfo *TII, bool ShouldSignReturnAddr)
static MachineInstr * genFNegatedMAD(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs)
static bool canCombineWithMUL(MachineBasicBlock &MBB, MachineOperand &MO, unsigned MulOpc, unsigned ZeroReg)
static void storeRegPairToStackSlot(const TargetRegisterInfo &TRI, MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore, const MCInstrDesc &MCID, Register SrcReg, bool IsKill, unsigned SubIdx0, unsigned SubIdx1, int FI, MachineMemOperand *MMO)
SmallVector< AArch64_IMM::ImmInsnModel, 4 > Insn
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static const Function * getParent(const Value *V)
BlockVerifier::State From
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
uint64_t Size
bool End
Definition: ELF_riscv.cpp:480
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
Module.h This file contains the declarations for the Module class.
static LVOptions Options
Definition: LVOptions.cpp:25
This file implements the LivePhysRegs utility for tracking liveness of physical registers.
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
unsigned const TargetRegisterInfo * TRI
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
const SmallVectorImpl< MachineOperand > & Cond
This file declares the machine register scavenger class.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition: Value.cpp:469
This file defines the SmallVector class.
AArch64FunctionInfo - This class is derived from MachineFunctionInfo and contains private AArch64-spe...
bool shouldSignReturnAddress(const MachineFunction &MF) const
void setOutliningStyle(const std::string &Style)
const SetOfInstructions & getLOHRelated() const
bool needsDwarfUnwindInfo(const MachineFunction &MF) const
std::optional< bool > hasRedZone() const
static bool isHForm(const MachineInstr &MI)
Returns whether the instruction is in H form (16 bit operands)
void insertSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, Register DstReg, ArrayRef< MachineOperand > Cond, Register TrueReg, Register FalseReg) const override
static bool hasBTISemantics(const MachineInstr &MI)
Returns whether the instruction can be compatible with non-zero BTYPE.
static bool isQForm(const MachineInstr &MI)
Returns whether the instruction is in Q form (128 bit operands)
static void decomposeStackOffsetForFrameOffsets(const StackOffset &Offset, int64_t &NumBytes, int64_t &NumPredicateVectors, int64_t &NumDataVectors)
Returns the offset in parts to which this frame offset can be decomposed for the purpose of describin...
static bool getMemOpInfo(unsigned Opcode, TypeSize &Scale, TypeSize &Width, int64_t &MinOffset, int64_t &MaxOffset)
Returns true if opcode Opc is a memory operation.
static bool isTailCallReturnInst(const MachineInstr &MI)
Returns true if MI is one of the TCRETURN* instructions.
static bool isFPRCopy(const MachineInstr &MI)
Does this instruction rename an FPR without modifying bits?
MachineInstr * emitLdStWithAddr(MachineInstr &MemI, const ExtAddrMode &AM) const override
std::optional< DestSourcePair > isCopyInstrImpl(const MachineInstr &MI) const override
If the specific machine instruction is an instruction that moves/copies value from one register to an...
MachineBasicBlock * getBranchDestBlock(const MachineInstr &MI) const override
std::optional< RegImmPair > isAddImmediate(const MachineInstr &MI, Register Reg) const override
unsigned getInstSizeInBytes(const MachineInstr &MI) const override
GetInstSize - Return the number of bytes of code the specified instruction may be.
uint64_t getElementSizeForOpcode(unsigned Opc) const
Returns the vector element size (B, H, S or D) of an SVE opcode.
bool areMemAccessesTriviallyDisjoint(const MachineInstr &MIa, const MachineInstr &MIb) const override
static bool isGPRCopy(const MachineInstr &MI)
Does this instruction rename a GPR without modifying bits?
static unsigned convertToFlagSettingOpc(unsigned Opc)
Return the opcode that set flags when possible.
bool getMachineCombinerPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns, bool DoRegPressureReduce) const override
Return true when there is potentially a faster code sequence for an instruction chain ending in Root.
bool isBranchOffsetInRange(unsigned BranchOpc, int64_t BrOffset) const override
bool canInsertSelect(const MachineBasicBlock &, ArrayRef< MachineOperand > Cond, Register, Register, Register, int &, int &, int &) const override
static const MachineOperand & getLdStOffsetOp(const MachineInstr &MI)
Returns the immediate offset operator of a load/store.
bool isCoalescableExtInstr(const MachineInstr &MI, Register &SrcReg, Register &DstReg, unsigned &SubIdx) const override
bool isWhileOpcode(unsigned Opc) const
Returns true if the opcode is for an SVE WHILE## instruction.
static std::optional< unsigned > getUnscaledLdSt(unsigned Opc)
Returns the unscaled load/store for the scaled load/store opcode, if there is a corresponding unscale...
static bool hasUnscaledLdStOffset(unsigned Opc)
Return true if it has an unscaled load/store offset.
static const MachineOperand & getLdStAmountOp(const MachineInstr &MI)
Returns the shift amount operator of a load/store.
static bool isPreLdSt(const MachineInstr &MI)
Returns whether the instruction is a pre-indexed load/store.
MachineBasicBlock::iterator insertOutlinedCall(Module &M, MachineBasicBlock &MBB, MachineBasicBlock::iterator &It, MachineFunction &MF, outliner::Candidate &C) const override
std::optional< ExtAddrMode > getAddrModeFromMemoryOp(const MachineInstr &MemI, const TargetRegisterInfo *TRI) const override
bool getMemOperandsWithOffsetWidth(const MachineInstr &MI, SmallVectorImpl< const MachineOperand * > &BaseOps, int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width, const TargetRegisterInfo *TRI) const override
bool analyzeBranchPredicate(MachineBasicBlock &MBB, MachineBranchPredicate &MBP, bool AllowModify) const override
static bool isSEHInstruction(const MachineInstr &MI)
Return true if the instructions is a SEH instruciton used for unwinding on Windows.
void insertIndirectBranch(MachineBasicBlock &MBB, MachineBasicBlock &NewDestBB, MachineBasicBlock &RestoreBB, const DebugLoc &DL, int64_t BrOffset, RegScavenger *RS) const override
SmallVector< std::pair< MachineBasicBlock::iterator, MachineBasicBlock::iterator > > getOutlinableRanges(MachineBasicBlock &MBB, unsigned &Flags) const override
static bool isPairableLdStInst(const MachineInstr &MI)
Return true if pairing the given load or store may be paired with another.
void genAlternativeCodeSequence(MachineInstr &Root, unsigned Pattern, SmallVectorImpl< MachineInstr * > &InsInstrs, SmallVectorImpl< MachineInstr * > &DelInstrs, DenseMap< unsigned, unsigned > &InstrIdxForVirtReg) const override
When getMachineCombinerPatterns() finds patterns, this function generates the instructions that could...
const AArch64RegisterInfo & getRegisterInfo() const
getRegisterInfo - TargetInstrInfo is a superset of MRegister info.
static bool isPreSt(const MachineInstr &MI)
Returns whether the instruction is a pre-indexed store.
MachineInstr * foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI, ArrayRef< unsigned > Ops, MachineBasicBlock::iterator InsertPt, int FrameIndex, LiveIntervals *LIS=nullptr, VirtRegMap *VRM=nullptr) const override
void insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const override
AArch64InstrInfo(const AArch64Subtarget &STI)
static bool isPairedLdSt(const MachineInstr &MI)
Returns whether the instruction is a paired load/store.
outliner::InstrType getOutliningTypeImpl(const MachineModuleInfo &MMI, MachineBasicBlock::iterator &MIT, unsigned Flags) const override
bool useMachineCombiner() const override
AArch64 supports MachineCombiner.
ArrayRef< std::pair< MachineMemOperand::Flags, const char * > > getSerializableMachineMemOperandTargetFlags() const override
bool isExtendLikelyToBeFolded(MachineInstr &ExtMI, MachineRegisterInfo &MRI) const override
static bool isFalkorShiftExtFast(const MachineInstr &MI)
Returns true if the instruction has a shift by immediate that can be executed in one cycle less.
std::optional< ParamLoadedValue > describeLoadedValue(const MachineInstr &MI, Register Reg) const override
bool getMemOperandWithOffsetWidth(const MachineInstr &MI, const MachineOperand *&BaseOp, int64_t &Offset, bool &OffsetIsScalable, TypeSize &Width, const TargetRegisterInfo *TRI) const
If OffsetIsScalable is set to 'true', the offset is scaled by vscale.
Register isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const override
bool reverseBranchCondition(SmallVectorImpl< MachineOperand > &Cond) const override
static bool isStridedAccess(const MachineInstr &MI)
Return true if the given load or store is a strided memory access.
bool shouldClusterMemOps(ArrayRef< const MachineOperand * > BaseOps1, int64_t Offset1, bool OffsetIsScalable1, ArrayRef< const MachineOperand * > BaseOps2, int64_t Offset2, bool OffsetIsScalable2, unsigned ClusterSize, unsigned NumBytes) const override
Detect opportunities for ldp/stp formation.
bool expandPostRAPseudo(MachineInstr &MI) const override
unsigned int getTailDuplicateSize(CodeGenOptLevel OptLevel) const override
bool isFunctionSafeToOutlineFrom(MachineFunction &MF, bool OutlineFromLinkOnceODRs) const override
bool shouldOutlineFromFunctionByDefault(MachineFunction &MF) const override
unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const override
bool isThroughputPattern(unsigned Pattern) const override
Return true when a code sequence can improve throughput.
void buildClearRegister(Register Reg, MachineBasicBlock &MBB, MachineBasicBlock::iterator Iter, DebugLoc &DL, bool AllowSideEffects=true) const override
void buildOutlinedFrame(MachineBasicBlock &MBB, MachineFunction &MF, const outliner::OutlinedFunction &OF) const override
ArrayRef< std::pair< unsigned, const char * > > getSerializableDirectMachineOperandTargetFlags() const override
MachineOperand & getMemOpBaseRegImmOfsOffsetOperand(MachineInstr &LdSt) const
Return the immediate offset of the base register in a load/store LdSt.
bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify=false) const override
bool canFoldIntoAddrMode(const MachineInstr &MemI, Register Reg, const MachineInstr &AddrI, ExtAddrMode &AM) const override
static bool isLdStPairSuppressed(const MachineInstr &MI)
Return true if pairing the given load or store is hinted to be unprofitable.
bool isFunctionSafeToSplit(const MachineFunction &MF) const override
bool isAssociativeAndCommutative(const MachineInstr &Inst, bool Invert) const override
Return true when Inst is associative and commutative so that it can be reassociated.
std::unique_ptr< TargetInstrInfo::PipelinerLoopInfo > analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const override
bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const override
unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const override
MachineBasicBlock::iterator probedStackAlloc(MachineBasicBlock::iterator MBBI, Register TargetReg, bool FrameSetup) const
bool optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int64_t CmpMask, int64_t CmpValue, const MachineRegisterInfo *MRI) const override
optimizeCompareInstr - Convert the instruction supplying the argument to the comparison into one that...
static unsigned getLoadStoreImmIdx(unsigned Opc)
Returns the index for the immediate for a given instruction.
static bool isGPRZero(const MachineInstr &MI)
Does this instruction set its full destination register to zero?
std::pair< unsigned, unsigned > decomposeMachineOperandsTargetFlags(unsigned TF) const override
void copyGPRRegTuple(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc, unsigned Opcode, unsigned ZeroReg, llvm::ArrayRef< unsigned > Indices) const
bool analyzeCompare(const MachineInstr &MI, Register &SrcReg, Register &SrcReg2, int64_t &CmpMask, int64_t &CmpValue) const override
analyzeCompare - For a comparison instruction, return the source registers in SrcReg and SrcReg2,...
CombinerObjective getCombinerObjective(unsigned Pattern) const override
static bool isFpOrNEON(Register Reg)
Returns whether the physical register is FP or NEON.
bool isMBBSafeToSplitToCold(const MachineBasicBlock &MBB) const override
void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc, bool RenamableDest=false, bool RenamableSrc=false) const override
bool isAsCheapAsAMove(const MachineInstr &MI) const override
bool isLegalAddressingMode(unsigned NumBytes, int64_t Offset, unsigned Scale) const
std::optional< std::unique_ptr< outliner::OutlinedFunction > > getOutliningCandidateInfo(const MachineModuleInfo &MMI, std::vector< outliner::Candidate > &RepeatedSequenceLocs, unsigned MinRepeats) const override
std::optional< DestSourcePair > isCopyLikeInstrImpl(const MachineInstr &MI) const override
static void suppressLdStPair(MachineInstr &MI)
Hint that pairing the given load or store is unprofitable.
Register isStoreToStackSlot(const MachineInstr &MI, int &FrameIndex) const override
static bool isPreLd(const MachineInstr &MI)
Returns whether the instruction is a pre-indexed load.
void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg, MachineInstr::MIFlag Flags=MachineInstr::NoFlags) const override
ArrayRef< std::pair< unsigned, const char * > > getSerializableBitmaskMachineOperandTargetFlags() const override
void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register DestReg, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg, MachineInstr::MIFlag Flags=MachineInstr::NoFlags) const override
void copyPhysRegTuple(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc, unsigned Opcode, llvm::ArrayRef< unsigned > Indices) const
bool optimizeCondBranch(MachineInstr &MI) const override
Replace csincr-branch sequence by simple conditional branch.
static int getMemScale(unsigned Opc)
Scaling factor for (scaled or unscaled) load or store.
bool isCandidateToMergeOrPair(const MachineInstr &MI) const
Return true if this is a load/store that can be potentially paired/merged.
MCInst getNop() const override
static const MachineOperand & getLdStBaseOp(const MachineInstr &MI)
Returns the base register operator of a load/store.
bool isPTestLikeOpcode(unsigned Opc) const
Returns true if the opcode is for an SVE instruction that sets the condition codes as if it's results...
void mergeOutliningCandidateAttributes(Function &F, std::vector< outliner::Candidate > &Candidates) const override
static void decomposeStackOffsetForDwarfOffsets(const StackOffset &Offset, int64_t &ByteSized, int64_t &VGSized)
bool isReservedReg(const MachineFunction &MF, MCRegister Reg) const
const AArch64RegisterInfo * getRegisterInfo() const override
bool isNeonAvailable() const
Returns true if the target has NEON and the function at runtime is known to have NEON enabled (e....
bool isSVEorStreamingSVEAvailable() const
Returns true if the target has access to either the full range of SVE instructions,...
unsigned ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const
ClassifyGlobalReference - Find the target operand flags that describe how a global value should be re...
AArch64PAuth::AuthCheckMethod getAuthenticatedLRCheckMethod(const MachineFunction &MF) const
Choose a method of checking LR before performing a tail call.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
const T & front() const
front - Get the first element.
Definition: ArrayRef.h:171
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:168
This class represents an Operation in the Expression.
A debug info location.
Definition: DebugLoc.h:33
bool empty() const
Definition: DenseMap.h:98
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:211
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition: Function.h:719
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:716
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:657
A set of register units used to track register liveness.
Definition: LiveRegUnits.h:30
bool available(MCPhysReg Reg) const
Returns true if no part of physical register Reg is live.
Definition: LiveRegUnits.h:116
void stepBackward(const MachineInstr &MI)
Updates liveness when stepping backwards over the instruction MI.
void addLiveOuts(const MachineBasicBlock &MBB)
Adds registers living out of block MBB.
static LocationSize precise(uint64_t Value)
This class is intended to be used as a base class for asm properties and features specific to the tar...
Definition: MCAsmInfo.h:56
bool usesWindowsCFI() const
Definition: MCAsmInfo.h:661
static MCCFIInstruction cfiDefCfa(MCSymbol *L, unsigned Register, int64_t Offset, SMLoc Loc={})
.cfi_def_cfa defines a rule for computing CFA as: take address from Register and add Offset to it.
Definition: MCDwarf.h:575
static MCCFIInstruction createOffset(MCSymbol *L, unsigned Register, int64_t Offset, SMLoc Loc={})
.cfi_offset Previous value of Register is saved at offset Offset from CFA.
Definition: MCDwarf.h:617
static MCCFIInstruction cfiDefCfaOffset(MCSymbol *L, int64_t Offset, SMLoc Loc={})
.cfi_def_cfa_offset modifies a rule for computing CFA.
Definition: MCDwarf.h:590
static MCCFIInstruction createEscape(MCSymbol *L, StringRef Vals, SMLoc Loc={}, StringRef Comment="")
.cfi_escape Allows the user to add arbitrary bytes to the unwind info.
Definition: MCDwarf.h:687
MCInstBuilder & addImm(int64_t Val)
Add a new integer immediate operand.
Definition: MCInstBuilder.h:43
Instances of this class represent a single low-level machine instruction.
Definition: MCInst.h:185
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:198
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
Wrapper class representing physical registers. Should be passed by value.
Definition: MCRegister.h:33
constexpr bool isValid() const
Definition: MCRegister.h:85
static constexpr unsigned NoRegister
Definition: MCRegister.h:52
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition: Metadata.h:1549
Set of metadata that should be preserved when using BuildMI().
bool isInlineAsmBrIndirectTarget() const
Returns true if this is the indirect dest of an INLINEASM_BR.
unsigned pred_size() const
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
reverse_instr_iterator instr_rbegin()
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
MBBSectionID getSectionID() const
Returns the section ID of this basic block.
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
reverse_instr_iterator instr_rend()
DebugLoc findDebugLoc(instr_iterator MBBI)
Find the next valid DebugLoc starting at MBBI, skipping any debug instructions.
Instructions::iterator instr_iterator
iterator getLastNonDebugInstr(bool SkipPseudoOp=true)
Returns an iterator to the last non-debug instruction in the basic block, or end().
bool isLayoutSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB will be emitted immediately after this block, such that if this bloc...
instr_iterator instr_end()
Instructions::const_iterator const_instr_iterator
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
iterator_range< succ_iterator > successors()
instr_iterator getFirstInstrTerminator()
Same getFirstTerminator but it ignores bundles and return an instr_iterator instead.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
void setMachineBlockAddressTaken()
Set this block to indicate that its address is used as something other than the target of a terminato...
bool isLiveIn(MCRegister Reg, LaneBitmask LaneMask=LaneBitmask::getAll()) const
Return true if the specified register is in the live in set.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
uint64_t getStackSize() const
Return the number of bytes that must be allocated to hold all of the fixed size frame objects.
void setStackID(int ObjectIdx, uint8_t ID)
bool isCalleeSavedInfoValid() const
Has the callee saved info been calculated yet?
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
unsigned getNumObjects() const
Return the number of objects.
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
unsigned addFrameInst(const MCCFIInstruction &Inst)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
MachineInstr * CloneMachineInstr(const MachineInstr *Orig)
Create a new MachineInstr which is a copy of Orig, identical in all ways except the instruction has n...
const MachineJumpTableInfo * getJumpTableInfo() const
getJumpTableInfo - Return the jump table info object for the current function.
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & setMemRefs(ArrayRef< MachineMemOperand * > MMOs) const
const MachineInstrBuilder & addCFIIndex(unsigned CFIIndex) const
const MachineInstrBuilder & setMIFlag(MachineInstr::MIFlag Flag) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addSym(MCSymbol *Sym, unsigned char TargetFlags=0) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
reverse_iterator getReverse() const
Get a reverse iterator to the same node.
Representation of each machine instruction.
Definition: MachineInstr.h:71
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:577
bool mayLoadOrStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read or modify memory.
bool isCopy() const
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:349
bool isCall(QueryType Type=AnyInBundle) const
Definition: MachineInstr.h:958
bool getFlag(MIFlag Flag) const
Return whether an MI flag is set.
Definition: MachineInstr.h:399
uint32_t mergeFlagsWith(const MachineInstr &Other) const
Return the MIFlags which represent both MachineInstrs.
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:580
unsigned getNumExplicitOperands() const
Returns the number of non-implicit operands.
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
bool isFullCopy() const
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
Definition: MachineInstr.h:574
bool hasUnmodeledSideEffects() const
Return true if this instruction has side effects that are not modeled by mayLoad / mayStore,...
bool registerDefIsDead(Register Reg, const TargetRegisterInfo *TRI) const
Returns true if the register is dead in this machine instruction.
bool definesRegister(Register Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr fully defines the specified register.
void setDesc(const MCInstrDesc &TID)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one.
bool hasOrderedMemoryRef() const
Return true if this instruction may have an ordered or volatile memory reference, or if the informati...
const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
ArrayRef< MachineMemOperand * > memoperands() const
Access to memory operands of the instruction.
Definition: MachineInstr.h:790
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:501
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
void removeOperand(unsigned OpNo)
Erase an operand from an instruction, leaving it with one fewer operand than it started with.
void addRegisterDefined(Register Reg, const TargetRegisterInfo *RegInfo=nullptr)
We have determined MI defines a register.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:587
uint32_t getFlags() const
Return the MI flags bitvector.
Definition: MachineInstr.h:394
int findRegisterDefOperandIdx(Register Reg, const TargetRegisterInfo *TRI, bool isDead=false, bool Overlap=false) const
Returns the operand index that is a def of the specified register or -1 if it is not found.
const std::vector< MachineJumpTableEntry > & getJumpTables() const
A description of a memory reference used in the backend.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
This class contains meta information specific to a module.
MachineFunction * getMachineFunction(const Function &F) const
Returns the MachineFunction associated to IR function F if there is one, otherwise nullptr.
MachineOperand class - Representation of each machine instruction operand.
void setSubReg(unsigned subReg)
unsigned getSubReg() const
void setImm(int64_t immVal)
int64_t getImm() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
MachineBasicBlock * getMBB() const
void setIsDead(bool Val=true)
void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
void setIsKill(bool Val=true)
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
static MachineOperand CreateImm(int64_t Val)
MachineOperandType getType() const
getType - Returns the MachineOperandType for this operand.
void setIsUndef(bool Val=true)
Register getReg() const
getReg - Returns the register number.
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
bool tracksLiveness() const
tracksLiveness - Returns true when tracking register liveness accurately.
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
bool reservedRegsFrozen() const
reservedRegsFrozen - Returns true after freezeReservedRegs() was called to ensure the set of reserved...
const TargetRegisterClass * getRegClassOrNull(Register Reg) const
Return the register class of Reg, or null if Reg has not been assigned a register class yet.
const TargetRegisterClass * constrainRegClass(Register Reg, const TargetRegisterClass *RC, unsigned MinNumRegs=0)
constrainRegClass - Constrain the register class of the specified virtual register to be a common sub...
MachineInstr * getUniqueVRegDef(Register Reg) const
getUniqueVRegDef - Return the unique machine instr that defines the specified virtual register or nul...
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
MI-level patchpoint operands.
Definition: StackMaps.h:76
uint32_t getNumPatchBytes() const
Return the number of patchable bytes the given patchpoint should emit.
Definition: StackMaps.h:104
void enterBasicBlockEnd(MachineBasicBlock &MBB)
Start tracking liveness from the end of basic block MBB.
bool isRegUsed(Register Reg, bool includeReserved=true) const
Return if a specific register is currently used.
Register FindUnusedReg(const TargetRegisterClass *RC) const
Find an unused register of the specified register class.
void setRegUsed(Register Reg, LaneBitmask LaneMask=LaneBitmask::getAll())
Tell the scavenger a register is used.
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
constexpr bool isValid() const
Definition: Register.h:121
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
static constexpr bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:71
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition: Register.h:95
Represents a location in source code.
Definition: SMLoc.h:23
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:452
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition: SmallString.h:26
void append(StringRef RHS)
Append from a StringRef.
Definition: SmallString.h:68
StringRef str() const
Explicit conversion to StringRef.
Definition: SmallString.h:254
size_t size() const
Definition: SmallVector.h:78
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:573
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:683
void push_back(const T &Elt)
Definition: SmallVector.h:413
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
MI-level stackmap operands.
Definition: StackMaps.h:35
uint32_t getNumPatchBytes() const
Return the number of patchable bytes the given stackmap should emit.
Definition: StackMaps.h:50
StackOffset holds a fixed and a scalable offset in bytes.
Definition: TypeSize.h:33
int64_t getFixed() const
Returns the fixed component of the stack.
Definition: TypeSize.h:49
int64_t getScalable() const
Returns the scalable component of the stack.
Definition: TypeSize.h:52
static StackOffset get(int64_t Fixed, int64_t Scalable)
Definition: TypeSize.h:44
static StackOffset getScalable(int64_t Scalable)
Definition: TypeSize.h:43
static StackOffset getFixed(int64_t Fixed)
Definition: TypeSize.h:42
MI-level Statepoint operands.
Definition: StackMaps.h:158
uint32_t getNumPatchBytes() const
Return the number of patchable bytes the given statepoint should emit.
Definition: StackMaps.h:207
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:51
Object returned by analyzeLoopForPipelining.
TargetInstrInfo - Interface to description of machine instruction set.
virtual void genAlternativeCodeSequence(MachineInstr &Root, unsigned Pattern, SmallVectorImpl< MachineInstr * > &InsInstrs, SmallVectorImpl< MachineInstr * > &DelInstrs, DenseMap< unsigned, unsigned > &InstIdxForVirtReg) const
When getMachineCombinerPatterns() finds patterns, this function generates the instructions that could...
virtual std::optional< ParamLoadedValue > describeLoadedValue(const MachineInstr &MI, Register Reg) const
Produce the expression describing the MI loading a value into the physical register Reg.
virtual bool getMachineCombinerPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns, bool DoRegPressureReduce) const
Return true when there is potentially a faster code sequence for an instruction chain ending in Root.
virtual bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const
Test if the given instruction should be considered a scheduling boundary.
virtual CombinerObjective getCombinerObjective(unsigned Pattern) const
Return the objective of a combiner pattern.
virtual bool isFunctionSafeToSplit(const MachineFunction &MF) const
Return true if the function is a viable candidate for machine function splitting.
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:80
TargetOptions Options
const MCAsmInfo * getMCAsmInfo() const
Return target specific asm information.
unsigned UnsafeFPMath
UnsafeFPMath - This flag is enabled when the -enable-unsafe-fp-math flag is specified on the command ...
bool contains(Register Reg) const
Return true if the specified register is included in this register class.
bool hasSubClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a sub-class of or equal to this class.
bool hasSuperClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a super-class of or equal to this class.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
TargetSubtargetInfo - Generic base class for all target subtargets.
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
virtual const TargetInstrInfo * getInstrInfo() const
Target - Wrapper for Target specific information.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition: TypeSize.h:345
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
Definition: TypeSize.h:348
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:171
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition: TypeSize.h:168
self_iterator getIterator()
Definition: ilist_node.h:132
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
Definition: ilist_node.h:353
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:661
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static CondCode getInvertedCondCode(CondCode Code)
@ MO_NC
MO_NC - Indicates whether the linker is expected to check the symbol reference for overflow.
@ MO_G1
MO_G1 - A symbol operand with this flag (granule 1) represents the bits 16-31 of a 64-bit address,...
@ MO_PAGEOFF
MO_PAGEOFF - A symbol operand with this flag represents the offset of that symbol within a 4K page.
@ MO_GOT
MO_GOT - This flag indicates that a symbol operand represents the address of the GOT entry for the sy...
@ MO_G0
MO_G0 - A symbol operand with this flag (granule 0) represents the bits 0-15 of a 64-bit address,...
@ MO_PAGE
MO_PAGE - A symbol operand with this flag represents the pc-relative offset of the 4K page containing...
@ MO_G2
MO_G2 - A symbol operand with this flag (granule 2) represents the bits 32-47 of a 64-bit address,...
@ MO_G3
MO_G3 - A symbol operand with this flag (granule 3) represents the high 16-bits of a 64-bit address,...
unsigned getCheckerSizeInBytes(AuthCheckMethod Method)
Returns the number of bytes added by checkAuthenticatedRegister.
static uint64_t decodeLogicalImmediate(uint64_t val, unsigned regSize)
decodeLogicalImmediate - Decode a logical immediate value in the form "N:immr:imms" (where the immr a...
static unsigned getShiftValue(unsigned Imm)
getShiftValue - Extract the shift value.
static unsigned getArithExtendImm(AArch64_AM::ShiftExtendType ET, unsigned Imm)
getArithExtendImm - Encode the extend type and shift amount for an arithmetic instruction: imm: 3-bit...
static unsigned getArithShiftValue(unsigned Imm)
getArithShiftValue - get the arithmetic shift value.
static uint64_t encodeLogicalImmediate(uint64_t imm, unsigned regSize)
encodeLogicalImmediate - Return the encoded immediate value for a logical immediate instruction of th...
static AArch64_AM::ShiftExtendType getArithExtendType(unsigned Imm)
static AArch64_AM::ShiftExtendType getShiftType(unsigned Imm)
getShiftType - Extract the shift type.
static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)
getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...
void expandMOVImm(uint64_t Imm, unsigned BitSize, SmallVectorImpl< ImmInsnModel > &Insn)
Expand a MOVi32imm or MOVi64imm pseudo instruction to one or more real move-immediate instructions to...
static const uint64_t InstrFlagIsWhile
static const uint64_t InstrFlagIsPTestLike
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:125
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Dead
Unused definition.
@ Renamable
Register that may be renamed.
@ Define
Register definition.
@ Kill
The last use of a register.
@ Undef
Value of the register doesn't matter.
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
constexpr double e
Definition: MathExtras.h:47
InstrType
Represents how an instruction should be mapped by the outliner.
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:480
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1739
static bool isCondBranchOpcode(int Opc)
std::pair< MachineOperand, DIExpression * > ParamLoadedValue
MCCFIInstruction createDefCFA(const TargetRegisterInfo &TRI, unsigned FrameReg, unsigned Reg, const StackOffset &Offset, bool LastAdjustmentWasScalable=true)
static bool isPTrueOpcode(unsigned Opc)
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
bool succeeded(LogicalResult Result)
Utility function that returns true if the provided LogicalResult corresponds to a success value.
Definition: LogicalResult.h:67
int isAArch64FrameOffsetLegal(const MachineInstr &MI, StackOffset &Offset, bool *OutUseUnscaledOp=nullptr, unsigned *OutUnscaledOp=nullptr, int64_t *EmittableOffset=nullptr)
Check if the Offset is a valid frame offset for MI.
static bool isIndirectBranchOpcode(int Opc)
MCCFIInstruction createCFAOffset(const TargetRegisterInfo &MRI, unsigned Reg, const StackOffset &OffsetFromDefCFA)
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
unsigned getBLRCallOpcode(const MachineFunction &MF)
Return opcode to be used for indirect calls.
@ AArch64FrameOffsetIsLegal
Offset is legal.
@ AArch64FrameOffsetCanUpdate
Offset can apply, at least partly.
@ AArch64FrameOffsetCannotUpdate
Offset cannot apply.
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition: MathExtras.h:297
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:347
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1746
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:420
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
AArch64MachineCombinerPattern
@ MULSUBv8i16_OP2
@ FMULv4i16_indexed_OP1
@ FMLSv1i32_indexed_OP2
@ MULSUBv2i32_indexed_OP1
@ MULADDXI_OP1
@ FMLAv2i32_indexed_OP2
@ MULADDv4i16_indexed_OP2
@ FMLAv1i64_indexed_OP1
@ MULSUBv16i8_OP1
@ FMLAv8i16_indexed_OP2
@ FMULv2i32_indexed_OP1
@ MULSUBv8i16_indexed_OP2
@ FMLAv1i64_indexed_OP2
@ MULSUBv4i16_indexed_OP2
@ FMLAv1i32_indexed_OP1
@ FMLAv2i64_indexed_OP2
@ FMLSv8i16_indexed_OP1
@ MULSUBv2i32_OP1
@ FMULv4i16_indexed_OP2
@ MULSUBv4i32_indexed_OP2
@ FMULv2i64_indexed_OP2
@ MULSUBXI_OP1
@ FMLAv4i32_indexed_OP1
@ MULADDWI_OP1
@ MULADDv4i16_OP2
@ FMULv8i16_indexed_OP2
@ MULSUBv4i16_OP1
@ MULADDv4i32_OP2
@ MULADDv8i8_OP1
@ MULADDv2i32_OP2
@ MULADDv16i8_OP2
@ MULADDv8i8_OP2
@ FMLSv4i16_indexed_OP1
@ MULADDv16i8_OP1
@ FMLAv2i64_indexed_OP1
@ FMLAv1i32_indexed_OP2
@ FMLSv2i64_indexed_OP2
@ MULADDv2i32_OP1
@ MULADDv4i32_OP1
@ MULADDv2i32_indexed_OP1
@ MULSUBv16i8_OP2
@ MULADDv4i32_indexed_OP1
@ MULADDv2i32_indexed_OP2
@ FMLAv4i16_indexed_OP2
@ MULSUBv8i16_OP1
@ FMULv2i32_indexed_OP2
@ FMLSv2i32_indexed_OP2
@ FMLSv4i32_indexed_OP1
@ FMULv2i64_indexed_OP1
@ MULSUBv4i16_OP2
@ FMLSv4i16_indexed_OP2
@ FMLAv2i32_indexed_OP1
@ FMLSv2i32_indexed_OP1
@ FMLAv8i16_indexed_OP1
@ MULSUBv4i16_indexed_OP1
@ FMLSv4i32_indexed_OP2
@ MULADDv4i32_indexed_OP2
@ MULSUBv4i32_OP2
@ MULSUBv8i16_indexed_OP1
@ MULADDv8i16_OP2
@ MULSUBv2i32_indexed_OP2
@ FMULv4i32_indexed_OP2
@ FMLSv2i64_indexed_OP1
@ MULADDv4i16_OP1
@ FMLAv4i32_indexed_OP2
@ MULADDv8i16_indexed_OP1
@ FMULv4i32_indexed_OP1
@ FMLAv4i16_indexed_OP1
@ FMULv8i16_indexed_OP1
@ MULSUBv8i8_OP1
@ MULADDv8i16_OP1
@ MULSUBv4i32_indexed_OP1
@ MULSUBv4i32_OP1
@ FMLSv8i16_indexed_OP2
@ MULADDv8i16_indexed_OP2
@ MULSUBWI_OP1
@ MULSUBv2i32_OP2
@ FMLSv1i64_indexed_OP2
@ MULADDv4i16_indexed_OP1
@ MULSUBv8i8_OP2
void emitFrameOffset(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, unsigned DestReg, unsigned SrcReg, StackOffset Offset, const TargetInstrInfo *TII, MachineInstr::MIFlag=MachineInstr::NoFlags, bool SetNZCV=false, bool NeedsWinCFI=false, bool *HasWinCFI=nullptr, bool EmitCFAOffset=false, StackOffset InitialOffset={}, unsigned FrameReg=AArch64::SP)
emitFrameOffset - Emit instructions as needed to set DestReg to SrcReg plus Offset.
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:167
CombinerObjective
The combiner's goal may differ based on which pattern it is attempting to optimize.
std::optional< UsedNZCV > examineCFlagsUse(MachineInstr &MI, MachineInstr &CmpInstr, const TargetRegisterInfo &TRI, SmallVectorImpl< MachineInstr * > *CCUseInstrs=nullptr)
CodeGenOptLevel
Code generation optimization level.
Definition: CodeGen.h:54
auto instructionsWithoutDebug(IterT It, IterT End, bool SkipPseudoOp=true)
Construct a range iterator which begins at It and moves forwards until End is reached,...
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
auto drop_end(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the last N elements excluded.
Definition: STLExtras.h:336
unsigned getUndefRegState(bool B)
unsigned getDefRegState(bool B)
unsigned getKillRegState(bool B)
bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
Definition: MathExtras.h:261
DWARFExpression::Operation Op
static bool isUncondBranchOpcode(int Opc)
unsigned encodeSLEB128(int64_t Value, raw_ostream &OS, unsigned PadTo=0)
Utility function to encode a SLEB128 value to an output stream.
Definition: LEB128.h:23
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition: STLExtras.h:2099
bool rewriteAArch64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, unsigned FrameReg, StackOffset &Offset, const AArch64InstrInfo *TII)
rewriteAArch64FrameIndex - Rewrite MI to access 'Offset' bytes from the FP.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1903
static const MachineMemOperand::Flags MOSuppressPair
unsigned encodeULEB128(uint64_t Value, raw_ostream &OS, unsigned PadTo=0)
Utility function to encode a ULEB128 value to an output stream.
Definition: LEB128.h:80
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition: MathExtras.h:582
bool isNZCVTouchedInInstructionRange(const MachineInstr &DefMI, const MachineInstr &UseMI, const TargetRegisterInfo *TRI)
Return true if there is an instruction /after/ DefMI and before UseMI which either reads or clobbers ...
static const MachineMemOperand::Flags MOStridedAccess
@ Default
The result values are uniform if and only if all operands are uniform.
void fullyRecomputeLiveIns(ArrayRef< MachineBasicBlock * > MBBs)
Convenience function for recomputing live-in's for a set of MBBs until the computation converges.
Definition: LivePhysRegs.h:215
Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
Description of the encoding of one expression Op.
Used to describe addressing mode similar to ExtAddrMode in CodeGenPrepare.
static const MBBSectionID ColdSectionID
MachineJumpTableEntry - One jump table in the jump table info.
This class contains a discriminated union of information about pointers in memory operands,...
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
Used to describe a register and immediate addition.
An individual sequence of instructions to be replaced with a call to an outlined function.
MachineFunction * getMF() const
The information necessary to create an outlined function for some class of candidate.
unsigned FrameConstructionID
Target-defined identifier for constructing a frame for this function.