LLVM 20.0.0git
AArch64InstrInfo.cpp
Go to the documentation of this file.
1//===- AArch64InstrInfo.cpp - AArch64 Instruction Information -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains the AArch64 implementation of the TargetInstrInfo class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "AArch64InstrInfo.h"
14#include "AArch64ExpandImm.h"
17#include "AArch64PointerAuth.h"
18#include "AArch64Subtarget.h"
22#include "llvm/ADT/ArrayRef.h"
23#include "llvm/ADT/STLExtras.h"
41#include "llvm/IR/DebugLoc.h"
42#include "llvm/IR/GlobalValue.h"
43#include "llvm/IR/Module.h"
44#include "llvm/MC/MCAsmInfo.h"
45#include "llvm/MC/MCInst.h"
47#include "llvm/MC/MCInstrDesc.h"
52#include "llvm/Support/LEB128.h"
56#include <cassert>
57#include <cstdint>
58#include <iterator>
59#include <utility>
60
61using namespace llvm;
62
63#define GET_INSTRINFO_CTOR_DTOR
64#include "AArch64GenInstrInfo.inc"
65
67 "aarch64-tbz-offset-bits", cl::Hidden, cl::init(14),
68 cl::desc("Restrict range of TB[N]Z instructions (DEBUG)"));
69
71 "aarch64-cbz-offset-bits", cl::Hidden, cl::init(19),
72 cl::desc("Restrict range of CB[N]Z instructions (DEBUG)"));
73
75 BCCDisplacementBits("aarch64-bcc-offset-bits", cl::Hidden, cl::init(19),
76 cl::desc("Restrict range of Bcc instructions (DEBUG)"));
77
79 BDisplacementBits("aarch64-b-offset-bits", cl::Hidden, cl::init(26),
80 cl::desc("Restrict range of B instructions (DEBUG)"));
81
83 : AArch64GenInstrInfo(AArch64::ADJCALLSTACKDOWN, AArch64::ADJCALLSTACKUP,
84 AArch64::CATCHRET),
85 RI(STI.getTargetTriple()), Subtarget(STI) {}
86
87/// GetInstSize - Return the number of bytes of code the specified
88/// instruction may be. This returns the maximum number of bytes.
90 const MachineBasicBlock &MBB = *MI.getParent();
91 const MachineFunction *MF = MBB.getParent();
92 const Function &F = MF->getFunction();
93 const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo();
94
95 {
96 auto Op = MI.getOpcode();
97 if (Op == AArch64::INLINEASM || Op == AArch64::INLINEASM_BR)
98 return getInlineAsmLength(MI.getOperand(0).getSymbolName(), *MAI);
99 }
100
101 // Meta-instructions emit no code.
102 if (MI.isMetaInstruction())
103 return 0;
104
105 // FIXME: We currently only handle pseudoinstructions that don't get expanded
106 // before the assembly printer.
107 unsigned NumBytes = 0;
108 const MCInstrDesc &Desc = MI.getDesc();
109
110 // Size should be preferably set in
111 // llvm/lib/Target/AArch64/AArch64InstrInfo.td (default case).
112 // Specific cases handle instructions of variable sizes
113 switch (Desc.getOpcode()) {
114 default:
115 if (Desc.getSize())
116 return Desc.getSize();
117
118 // Anything not explicitly designated otherwise (i.e. pseudo-instructions
119 // with fixed constant size but not specified in .td file) is a normal
120 // 4-byte insn.
121 NumBytes = 4;
122 break;
123 case TargetOpcode::STACKMAP:
124 // The upper bound for a stackmap intrinsic is the full length of its shadow
125 NumBytes = StackMapOpers(&MI).getNumPatchBytes();
126 assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
127 break;
128 case TargetOpcode::PATCHPOINT:
129 // The size of the patchpoint intrinsic is the number of bytes requested
130 NumBytes = PatchPointOpers(&MI).getNumPatchBytes();
131 assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
132 break;
133 case TargetOpcode::STATEPOINT:
134 NumBytes = StatepointOpers(&MI).getNumPatchBytes();
135 assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
136 // No patch bytes means a normal call inst is emitted
137 if (NumBytes == 0)
138 NumBytes = 4;
139 break;
140 case TargetOpcode::PATCHABLE_FUNCTION_ENTER:
141 // If `patchable-function-entry` is set, PATCHABLE_FUNCTION_ENTER
142 // instructions are expanded to the specified number of NOPs. Otherwise,
143 // they are expanded to 36-byte XRay sleds.
144 NumBytes =
145 F.getFnAttributeAsParsedInteger("patchable-function-entry", 9) * 4;
146 break;
147 case TargetOpcode::PATCHABLE_FUNCTION_EXIT:
148 case TargetOpcode::PATCHABLE_TYPED_EVENT_CALL:
149 // An XRay sled can be 4 bytes of alignment plus a 32-byte block.
150 NumBytes = 36;
151 break;
152 case TargetOpcode::PATCHABLE_EVENT_CALL:
153 // EVENT_CALL XRay sleds are exactly 6 instructions long (no alignment).
154 NumBytes = 24;
155 break;
156
157 case AArch64::SPACE:
158 NumBytes = MI.getOperand(1).getImm();
159 break;
160 case TargetOpcode::BUNDLE:
161 NumBytes = getInstBundleLength(MI);
162 break;
163 }
164
165 return NumBytes;
166}
167
168unsigned AArch64InstrInfo::getInstBundleLength(const MachineInstr &MI) const {
169 unsigned Size = 0;
171 MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end();
172 while (++I != E && I->isInsideBundle()) {
173 assert(!I->isBundle() && "No nested bundle!");
175 }
176 return Size;
177}
178
181 // Block ends with fall-through condbranch.
182 switch (LastInst->getOpcode()) {
183 default:
184 llvm_unreachable("Unknown branch instruction?");
185 case AArch64::Bcc:
186 Target = LastInst->getOperand(1).getMBB();
187 Cond.push_back(LastInst->getOperand(0));
188 break;
189 case AArch64::CBZW:
190 case AArch64::CBZX:
191 case AArch64::CBNZW:
192 case AArch64::CBNZX:
193 Target = LastInst->getOperand(1).getMBB();
194 Cond.push_back(MachineOperand::CreateImm(-1));
195 Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
196 Cond.push_back(LastInst->getOperand(0));
197 break;
198 case AArch64::TBZW:
199 case AArch64::TBZX:
200 case AArch64::TBNZW:
201 case AArch64::TBNZX:
202 Target = LastInst->getOperand(2).getMBB();
203 Cond.push_back(MachineOperand::CreateImm(-1));
204 Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
205 Cond.push_back(LastInst->getOperand(0));
206 Cond.push_back(LastInst->getOperand(1));
207 }
208}
209
210static unsigned getBranchDisplacementBits(unsigned Opc) {
211 switch (Opc) {
212 default:
213 llvm_unreachable("unexpected opcode!");
214 case AArch64::B:
215 return BDisplacementBits;
216 case AArch64::TBNZW:
217 case AArch64::TBZW:
218 case AArch64::TBNZX:
219 case AArch64::TBZX:
220 return TBZDisplacementBits;
221 case AArch64::CBNZW:
222 case AArch64::CBZW:
223 case AArch64::CBNZX:
224 case AArch64::CBZX:
225 return CBZDisplacementBits;
226 case AArch64::Bcc:
227 return BCCDisplacementBits;
228 }
229}
230
232 int64_t BrOffset) const {
233 unsigned Bits = getBranchDisplacementBits(BranchOp);
234 assert(Bits >= 3 && "max branch displacement must be enough to jump"
235 "over conditional branch expansion");
236 return isIntN(Bits, BrOffset / 4);
237}
238
241 switch (MI.getOpcode()) {
242 default:
243 llvm_unreachable("unexpected opcode!");
244 case AArch64::B:
245 return MI.getOperand(0).getMBB();
246 case AArch64::TBZW:
247 case AArch64::TBNZW:
248 case AArch64::TBZX:
249 case AArch64::TBNZX:
250 return MI.getOperand(2).getMBB();
251 case AArch64::CBZW:
252 case AArch64::CBNZW:
253 case AArch64::CBZX:
254 case AArch64::CBNZX:
255 case AArch64::Bcc:
256 return MI.getOperand(1).getMBB();
257 }
258}
259
261 MachineBasicBlock &NewDestBB,
262 MachineBasicBlock &RestoreBB,
263 const DebugLoc &DL,
264 int64_t BrOffset,
265 RegScavenger *RS) const {
266 assert(RS && "RegScavenger required for long branching");
267 assert(MBB.empty() &&
268 "new block should be inserted for expanding unconditional branch");
269 assert(MBB.pred_size() == 1);
270 assert(RestoreBB.empty() &&
271 "restore block should be inserted for restoring clobbered registers");
272
273 auto buildIndirectBranch = [&](Register Reg, MachineBasicBlock &DestBB) {
274 // Offsets outside of the signed 33-bit range are not supported for ADRP +
275 // ADD.
276 if (!isInt<33>(BrOffset))
278 "Branch offsets outside of the signed 33-bit range not supported");
279
280 BuildMI(MBB, MBB.end(), DL, get(AArch64::ADRP), Reg)
281 .addSym(DestBB.getSymbol(), AArch64II::MO_PAGE);
282 BuildMI(MBB, MBB.end(), DL, get(AArch64::ADDXri), Reg)
283 .addReg(Reg)
284 .addSym(DestBB.getSymbol(), AArch64II::MO_PAGEOFF | AArch64II::MO_NC)
285 .addImm(0);
286 BuildMI(MBB, MBB.end(), DL, get(AArch64::BR)).addReg(Reg);
287 };
288
290 // If X16 is unused, we can rely on the linker to insert a range extension
291 // thunk if NewDestBB is out of range of a single B instruction.
292 constexpr Register Reg = AArch64::X16;
293 if (!RS->isRegUsed(Reg)) {
294 insertUnconditionalBranch(MBB, &NewDestBB, DL);
295 RS->setRegUsed(Reg);
296 return;
297 }
298
299 // If there's a free register and it's worth inflating the code size,
300 // manually insert the indirect branch.
301 Register Scavenged = RS->FindUnusedReg(&AArch64::GPR64RegClass);
302 if (Scavenged != AArch64::NoRegister &&
304 buildIndirectBranch(Scavenged, NewDestBB);
305 RS->setRegUsed(Scavenged);
306 return;
307 }
308
309 // Note: Spilling X16 briefly moves the stack pointer, making it incompatible
310 // with red zones.
312 if (!AFI || AFI->hasRedZone().value_or(true))
314 "Unable to insert indirect branch inside function that has red zone");
315
316 // Otherwise, spill X16 and defer range extension to the linker.
317 BuildMI(MBB, MBB.end(), DL, get(AArch64::STRXpre))
318 .addReg(AArch64::SP, RegState::Define)
319 .addReg(Reg)
320 .addReg(AArch64::SP)
321 .addImm(-16);
322
323 BuildMI(MBB, MBB.end(), DL, get(AArch64::B)).addMBB(&RestoreBB);
324
325 BuildMI(RestoreBB, RestoreBB.end(), DL, get(AArch64::LDRXpost))
326 .addReg(AArch64::SP, RegState::Define)
328 .addReg(AArch64::SP)
329 .addImm(16);
330}
331
332// Branch analysis.
335 MachineBasicBlock *&FBB,
337 bool AllowModify) const {
338 // If the block has no terminators, it just falls into the block after it.
340 if (I == MBB.end())
341 return false;
342
343 // Skip over SpeculationBarrierEndBB terminators
344 if (I->getOpcode() == AArch64::SpeculationBarrierISBDSBEndBB ||
345 I->getOpcode() == AArch64::SpeculationBarrierSBEndBB) {
346 --I;
347 }
348
349 if (!isUnpredicatedTerminator(*I))
350 return false;
351
352 // Get the last instruction in the block.
353 MachineInstr *LastInst = &*I;
354
355 // If there is only one terminator instruction, process it.
356 unsigned LastOpc = LastInst->getOpcode();
357 if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
358 if (isUncondBranchOpcode(LastOpc)) {
359 TBB = LastInst->getOperand(0).getMBB();
360 return false;
361 }
362 if (isCondBranchOpcode(LastOpc)) {
363 // Block ends with fall-through condbranch.
364 parseCondBranch(LastInst, TBB, Cond);
365 return false;
366 }
367 return true; // Can't handle indirect branch.
368 }
369
370 // Get the instruction before it if it is a terminator.
371 MachineInstr *SecondLastInst = &*I;
372 unsigned SecondLastOpc = SecondLastInst->getOpcode();
373
374 // If AllowModify is true and the block ends with two or more unconditional
375 // branches, delete all but the first unconditional branch.
376 if (AllowModify && isUncondBranchOpcode(LastOpc)) {
377 while (isUncondBranchOpcode(SecondLastOpc)) {
378 LastInst->eraseFromParent();
379 LastInst = SecondLastInst;
380 LastOpc = LastInst->getOpcode();
381 if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
382 // Return now the only terminator is an unconditional branch.
383 TBB = LastInst->getOperand(0).getMBB();
384 return false;
385 }
386 SecondLastInst = &*I;
387 SecondLastOpc = SecondLastInst->getOpcode();
388 }
389 }
390
391 // If we're allowed to modify and the block ends in a unconditional branch
392 // which could simply fallthrough, remove the branch. (Note: This case only
393 // matters when we can't understand the whole sequence, otherwise it's also
394 // handled by BranchFolding.cpp.)
395 if (AllowModify && isUncondBranchOpcode(LastOpc) &&
397 LastInst->eraseFromParent();
398 LastInst = SecondLastInst;
399 LastOpc = LastInst->getOpcode();
400 if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
401 assert(!isUncondBranchOpcode(LastOpc) &&
402 "unreachable unconditional branches removed above");
403
404 if (isCondBranchOpcode(LastOpc)) {
405 // Block ends with fall-through condbranch.
406 parseCondBranch(LastInst, TBB, Cond);
407 return false;
408 }
409 return true; // Can't handle indirect branch.
410 }
411 SecondLastInst = &*I;
412 SecondLastOpc = SecondLastInst->getOpcode();
413 }
414
415 // If there are three terminators, we don't know what sort of block this is.
416 if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(*--I))
417 return true;
418
419 // If the block ends with a B and a Bcc, handle it.
420 if (isCondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
421 parseCondBranch(SecondLastInst, TBB, Cond);
422 FBB = LastInst->getOperand(0).getMBB();
423 return false;
424 }
425
426 // If the block ends with two unconditional branches, handle it. The second
427 // one is not executed, so remove it.
428 if (isUncondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
429 TBB = SecondLastInst->getOperand(0).getMBB();
430 I = LastInst;
431 if (AllowModify)
432 I->eraseFromParent();
433 return false;
434 }
435
436 // ...likewise if it ends with an indirect branch followed by an unconditional
437 // branch.
438 if (isIndirectBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
439 I = LastInst;
440 if (AllowModify)
441 I->eraseFromParent();
442 return true;
443 }
444
445 // Otherwise, can't handle this.
446 return true;
447}
448
450 MachineBranchPredicate &MBP,
451 bool AllowModify) const {
452 // For the moment, handle only a block which ends with a cb(n)zx followed by
453 // a fallthrough. Why this? Because it is a common form.
454 // TODO: Should we handle b.cc?
455
457 if (I == MBB.end())
458 return true;
459
460 // Skip over SpeculationBarrierEndBB terminators
461 if (I->getOpcode() == AArch64::SpeculationBarrierISBDSBEndBB ||
462 I->getOpcode() == AArch64::SpeculationBarrierSBEndBB) {
463 --I;
464 }
465
466 if (!isUnpredicatedTerminator(*I))
467 return true;
468
469 // Get the last instruction in the block.
470 MachineInstr *LastInst = &*I;
471 unsigned LastOpc = LastInst->getOpcode();
472 if (!isCondBranchOpcode(LastOpc))
473 return true;
474
475 switch (LastOpc) {
476 default:
477 return true;
478 case AArch64::CBZW:
479 case AArch64::CBZX:
480 case AArch64::CBNZW:
481 case AArch64::CBNZX:
482 break;
483 };
484
485 MBP.TrueDest = LastInst->getOperand(1).getMBB();
486 assert(MBP.TrueDest && "expected!");
487 MBP.FalseDest = MBB.getNextNode();
488
489 MBP.ConditionDef = nullptr;
490 MBP.SingleUseCondition = false;
491
492 MBP.LHS = LastInst->getOperand(0);
493 MBP.RHS = MachineOperand::CreateImm(0);
494 MBP.Predicate = LastOpc == AArch64::CBNZX ? MachineBranchPredicate::PRED_NE
495 : MachineBranchPredicate::PRED_EQ;
496 return false;
497}
498
501 if (Cond[0].getImm() != -1) {
502 // Regular Bcc
505 } else {
506 // Folded compare-and-branch
507 switch (Cond[1].getImm()) {
508 default:
509 llvm_unreachable("Unknown conditional branch!");
510 case AArch64::CBZW:
511 Cond[1].setImm(AArch64::CBNZW);
512 break;
513 case AArch64::CBNZW:
514 Cond[1].setImm(AArch64::CBZW);
515 break;
516 case AArch64::CBZX:
517 Cond[1].setImm(AArch64::CBNZX);
518 break;
519 case AArch64::CBNZX:
520 Cond[1].setImm(AArch64::CBZX);
521 break;
522 case AArch64::TBZW:
523 Cond[1].setImm(AArch64::TBNZW);
524 break;
525 case AArch64::TBNZW:
526 Cond[1].setImm(AArch64::TBZW);
527 break;
528 case AArch64::TBZX:
529 Cond[1].setImm(AArch64::TBNZX);
530 break;
531 case AArch64::TBNZX:
532 Cond[1].setImm(AArch64::TBZX);
533 break;
534 }
535 }
536
537 return false;
538}
539
541 int *BytesRemoved) const {
543 if (I == MBB.end())
544 return 0;
545
546 if (!isUncondBranchOpcode(I->getOpcode()) &&
547 !isCondBranchOpcode(I->getOpcode()))
548 return 0;
549
550 // Remove the branch.
551 I->eraseFromParent();
552
553 I = MBB.end();
554
555 if (I == MBB.begin()) {
556 if (BytesRemoved)
557 *BytesRemoved = 4;
558 return 1;
559 }
560 --I;
561 if (!isCondBranchOpcode(I->getOpcode())) {
562 if (BytesRemoved)
563 *BytesRemoved = 4;
564 return 1;
565 }
566
567 // Remove the branch.
568 I->eraseFromParent();
569 if (BytesRemoved)
570 *BytesRemoved = 8;
571
572 return 2;
573}
574
575void AArch64InstrInfo::instantiateCondBranch(
578 if (Cond[0].getImm() != -1) {
579 // Regular Bcc
580 BuildMI(&MBB, DL, get(AArch64::Bcc)).addImm(Cond[0].getImm()).addMBB(TBB);
581 } else {
582 // Folded compare-and-branch
583 // Note that we use addOperand instead of addReg to keep the flags.
584 const MachineInstrBuilder MIB =
585 BuildMI(&MBB, DL, get(Cond[1].getImm())).add(Cond[2]);
586 if (Cond.size() > 3)
587 MIB.addImm(Cond[3].getImm());
588 MIB.addMBB(TBB);
589 }
590}
591
594 ArrayRef<MachineOperand> Cond, const DebugLoc &DL, int *BytesAdded) const {
595 // Shouldn't be a fall through.
596 assert(TBB && "insertBranch must not be told to insert a fallthrough");
597
598 if (!FBB) {
599 if (Cond.empty()) // Unconditional branch?
600 BuildMI(&MBB, DL, get(AArch64::B)).addMBB(TBB);
601 else
602 instantiateCondBranch(MBB, DL, TBB, Cond);
603
604 if (BytesAdded)
605 *BytesAdded = 4;
606
607 return 1;
608 }
609
610 // Two-way conditional branch.
611 instantiateCondBranch(MBB, DL, TBB, Cond);
612 BuildMI(&MBB, DL, get(AArch64::B)).addMBB(FBB);
613
614 if (BytesAdded)
615 *BytesAdded = 8;
616
617 return 2;
618}
619
620// Find the original register that VReg is copied from.
621static unsigned removeCopies(const MachineRegisterInfo &MRI, unsigned VReg) {
622 while (Register::isVirtualRegister(VReg)) {
623 const MachineInstr *DefMI = MRI.getVRegDef(VReg);
624 if (!DefMI->isFullCopy())
625 return VReg;
626 VReg = DefMI->getOperand(1).getReg();
627 }
628 return VReg;
629}
630
631// Determine if VReg is defined by an instruction that can be folded into a
632// csel instruction. If so, return the folded opcode, and the replacement
633// register.
634static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg,
635 unsigned *NewVReg = nullptr) {
636 VReg = removeCopies(MRI, VReg);
638 return 0;
639
640 bool Is64Bit = AArch64::GPR64allRegClass.hasSubClassEq(MRI.getRegClass(VReg));
641 const MachineInstr *DefMI = MRI.getVRegDef(VReg);
642 unsigned Opc = 0;
643 unsigned SrcOpNum = 0;
644 switch (DefMI->getOpcode()) {
645 case AArch64::ADDSXri:
646 case AArch64::ADDSWri:
647 // if NZCV is used, do not fold.
648 if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr,
649 true) == -1)
650 return 0;
651 // fall-through to ADDXri and ADDWri.
652 [[fallthrough]];
653 case AArch64::ADDXri:
654 case AArch64::ADDWri:
655 // add x, 1 -> csinc.
656 if (!DefMI->getOperand(2).isImm() || DefMI->getOperand(2).getImm() != 1 ||
657 DefMI->getOperand(3).getImm() != 0)
658 return 0;
659 SrcOpNum = 1;
660 Opc = Is64Bit ? AArch64::CSINCXr : AArch64::CSINCWr;
661 break;
662
663 case AArch64::ORNXrr:
664 case AArch64::ORNWrr: {
665 // not x -> csinv, represented as orn dst, xzr, src.
666 unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg());
667 if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)
668 return 0;
669 SrcOpNum = 2;
670 Opc = Is64Bit ? AArch64::CSINVXr : AArch64::CSINVWr;
671 break;
672 }
673
674 case AArch64::SUBSXrr:
675 case AArch64::SUBSWrr:
676 // if NZCV is used, do not fold.
677 if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr,
678 true) == -1)
679 return 0;
680 // fall-through to SUBXrr and SUBWrr.
681 [[fallthrough]];
682 case AArch64::SUBXrr:
683 case AArch64::SUBWrr: {
684 // neg x -> csneg, represented as sub dst, xzr, src.
685 unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg());
686 if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)
687 return 0;
688 SrcOpNum = 2;
689 Opc = Is64Bit ? AArch64::CSNEGXr : AArch64::CSNEGWr;
690 break;
691 }
692 default:
693 return 0;
694 }
695 assert(Opc && SrcOpNum && "Missing parameters");
696
697 if (NewVReg)
698 *NewVReg = DefMI->getOperand(SrcOpNum).getReg();
699 return Opc;
700}
701
704 Register DstReg, Register TrueReg,
705 Register FalseReg, int &CondCycles,
706 int &TrueCycles,
707 int &FalseCycles) const {
708 // Check register classes.
710 const TargetRegisterClass *RC =
711 RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg));
712 if (!RC)
713 return false;
714
715 // Also need to check the dest regclass, in case we're trying to optimize
716 // something like:
717 // %1(gpr) = PHI %2(fpr), bb1, %(fpr), bb2
718 if (!RI.getCommonSubClass(RC, MRI.getRegClass(DstReg)))
719 return false;
720
721 // Expanding cbz/tbz requires an extra cycle of latency on the condition.
722 unsigned ExtraCondLat = Cond.size() != 1;
723
724 // GPRs are handled by csel.
725 // FIXME: Fold in x+1, -x, and ~x when applicable.
726 if (AArch64::GPR64allRegClass.hasSubClassEq(RC) ||
727 AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
728 // Single-cycle csel, csinc, csinv, and csneg.
729 CondCycles = 1 + ExtraCondLat;
730 TrueCycles = FalseCycles = 1;
731 if (canFoldIntoCSel(MRI, TrueReg))
732 TrueCycles = 0;
733 else if (canFoldIntoCSel(MRI, FalseReg))
734 FalseCycles = 0;
735 return true;
736 }
737
738 // Scalar floating point is handled by fcsel.
739 // FIXME: Form fabs, fmin, and fmax when applicable.
740 if (AArch64::FPR64RegClass.hasSubClassEq(RC) ||
741 AArch64::FPR32RegClass.hasSubClassEq(RC)) {
742 CondCycles = 5 + ExtraCondLat;
743 TrueCycles = FalseCycles = 2;
744 return true;
745 }
746
747 // Can't do vectors.
748 return false;
749}
750
753 const DebugLoc &DL, Register DstReg,
755 Register TrueReg, Register FalseReg) const {
757
758 // Parse the condition code, see parseCondBranch() above.
760 switch (Cond.size()) {
761 default:
762 llvm_unreachable("Unknown condition opcode in Cond");
763 case 1: // b.cc
764 CC = AArch64CC::CondCode(Cond[0].getImm());
765 break;
766 case 3: { // cbz/cbnz
767 // We must insert a compare against 0.
768 bool Is64Bit;
769 switch (Cond[1].getImm()) {
770 default:
771 llvm_unreachable("Unknown branch opcode in Cond");
772 case AArch64::CBZW:
773 Is64Bit = false;
775 break;
776 case AArch64::CBZX:
777 Is64Bit = true;
779 break;
780 case AArch64::CBNZW:
781 Is64Bit = false;
783 break;
784 case AArch64::CBNZX:
785 Is64Bit = true;
787 break;
788 }
789 Register SrcReg = Cond[2].getReg();
790 if (Is64Bit) {
791 // cmp reg, #0 is actually subs xzr, reg, #0.
792 MRI.constrainRegClass(SrcReg, &AArch64::GPR64spRegClass);
793 BuildMI(MBB, I, DL, get(AArch64::SUBSXri), AArch64::XZR)
794 .addReg(SrcReg)
795 .addImm(0)
796 .addImm(0);
797 } else {
798 MRI.constrainRegClass(SrcReg, &AArch64::GPR32spRegClass);
799 BuildMI(MBB, I, DL, get(AArch64::SUBSWri), AArch64::WZR)
800 .addReg(SrcReg)
801 .addImm(0)
802 .addImm(0);
803 }
804 break;
805 }
806 case 4: { // tbz/tbnz
807 // We must insert a tst instruction.
808 switch (Cond[1].getImm()) {
809 default:
810 llvm_unreachable("Unknown branch opcode in Cond");
811 case AArch64::TBZW:
812 case AArch64::TBZX:
814 break;
815 case AArch64::TBNZW:
816 case AArch64::TBNZX:
818 break;
819 }
820 // cmp reg, #foo is actually ands xzr, reg, #1<<foo.
821 if (Cond[1].getImm() == AArch64::TBZW || Cond[1].getImm() == AArch64::TBNZW)
822 BuildMI(MBB, I, DL, get(AArch64::ANDSWri), AArch64::WZR)
823 .addReg(Cond[2].getReg())
824 .addImm(
825 AArch64_AM::encodeLogicalImmediate(1ull << Cond[3].getImm(), 32));
826 else
827 BuildMI(MBB, I, DL, get(AArch64::ANDSXri), AArch64::XZR)
828 .addReg(Cond[2].getReg())
829 .addImm(
830 AArch64_AM::encodeLogicalImmediate(1ull << Cond[3].getImm(), 64));
831 break;
832 }
833 }
834
835 unsigned Opc = 0;
836 const TargetRegisterClass *RC = nullptr;
837 bool TryFold = false;
838 if (MRI.constrainRegClass(DstReg, &AArch64::GPR64RegClass)) {
839 RC = &AArch64::GPR64RegClass;
840 Opc = AArch64::CSELXr;
841 TryFold = true;
842 } else if (MRI.constrainRegClass(DstReg, &AArch64::GPR32RegClass)) {
843 RC = &AArch64::GPR32RegClass;
844 Opc = AArch64::CSELWr;
845 TryFold = true;
846 } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR64RegClass)) {
847 RC = &AArch64::FPR64RegClass;
848 Opc = AArch64::FCSELDrrr;
849 } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR32RegClass)) {
850 RC = &AArch64::FPR32RegClass;
851 Opc = AArch64::FCSELSrrr;
852 }
853 assert(RC && "Unsupported regclass");
854
855 // Try folding simple instructions into the csel.
856 if (TryFold) {
857 unsigned NewVReg = 0;
858 unsigned FoldedOpc = canFoldIntoCSel(MRI, TrueReg, &NewVReg);
859 if (FoldedOpc) {
860 // The folded opcodes csinc, csinc and csneg apply the operation to
861 // FalseReg, so we need to invert the condition.
863 TrueReg = FalseReg;
864 } else
865 FoldedOpc = canFoldIntoCSel(MRI, FalseReg, &NewVReg);
866
867 // Fold the operation. Leave any dead instructions for DCE to clean up.
868 if (FoldedOpc) {
869 FalseReg = NewVReg;
870 Opc = FoldedOpc;
871 // The extends the live range of NewVReg.
872 MRI.clearKillFlags(NewVReg);
873 }
874 }
875
876 // Pull all virtual register into the appropriate class.
877 MRI.constrainRegClass(TrueReg, RC);
878 MRI.constrainRegClass(FalseReg, RC);
879
880 // Insert the csel.
881 BuildMI(MBB, I, DL, get(Opc), DstReg)
882 .addReg(TrueReg)
883 .addReg(FalseReg)
884 .addImm(CC);
885}
886
887// Return true if Imm can be loaded into a register by a "cheap" sequence of
888// instructions. For now, "cheap" means at most two instructions.
889static bool isCheapImmediate(const MachineInstr &MI, unsigned BitSize) {
890 if (BitSize == 32)
891 return true;
892
893 assert(BitSize == 64 && "Only bit sizes of 32 or 64 allowed");
894 uint64_t Imm = static_cast<uint64_t>(MI.getOperand(1).getImm());
896 AArch64_IMM::expandMOVImm(Imm, BitSize, Is);
897
898 return Is.size() <= 2;
899}
900
901// FIXME: this implementation should be micro-architecture dependent, so a
902// micro-architecture target hook should be introduced here in future.
904 if (Subtarget.hasExynosCheapAsMoveHandling()) {
905 if (isExynosCheapAsMove(MI))
906 return true;
907 return MI.isAsCheapAsAMove();
908 }
909
910 switch (MI.getOpcode()) {
911 default:
912 return MI.isAsCheapAsAMove();
913
914 case AArch64::ADDWrs:
915 case AArch64::ADDXrs:
916 case AArch64::SUBWrs:
917 case AArch64::SUBXrs:
918 return Subtarget.hasALULSLFast() && MI.getOperand(3).getImm() <= 4;
919
920 // If MOVi32imm or MOVi64imm can be expanded into ORRWri or
921 // ORRXri, it is as cheap as MOV.
922 // Likewise if it can be expanded to MOVZ/MOVN/MOVK.
923 case AArch64::MOVi32imm:
924 return isCheapImmediate(MI, 32);
925 case AArch64::MOVi64imm:
926 return isCheapImmediate(MI, 64);
927 }
928}
929
931 switch (MI.getOpcode()) {
932 default:
933 return false;
934
935 case AArch64::ADDWrs:
936 case AArch64::ADDXrs:
937 case AArch64::ADDSWrs:
938 case AArch64::ADDSXrs: {
939 unsigned Imm = MI.getOperand(3).getImm();
940 unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
941 if (ShiftVal == 0)
942 return true;
943 return AArch64_AM::getShiftType(Imm) == AArch64_AM::LSL && ShiftVal <= 5;
944 }
945
946 case AArch64::ADDWrx:
947 case AArch64::ADDXrx:
948 case AArch64::ADDXrx64:
949 case AArch64::ADDSWrx:
950 case AArch64::ADDSXrx:
951 case AArch64::ADDSXrx64: {
952 unsigned Imm = MI.getOperand(3).getImm();
953 switch (AArch64_AM::getArithExtendType(Imm)) {
954 default:
955 return false;
956 case AArch64_AM::UXTB:
957 case AArch64_AM::UXTH:
958 case AArch64_AM::UXTW:
959 case AArch64_AM::UXTX:
960 return AArch64_AM::getArithShiftValue(Imm) <= 4;
961 }
962 }
963
964 case AArch64::SUBWrs:
965 case AArch64::SUBSWrs: {
966 unsigned Imm = MI.getOperand(3).getImm();
967 unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
968 return ShiftVal == 0 ||
969 (AArch64_AM::getShiftType(Imm) == AArch64_AM::ASR && ShiftVal == 31);
970 }
971
972 case AArch64::SUBXrs:
973 case AArch64::SUBSXrs: {
974 unsigned Imm = MI.getOperand(3).getImm();
975 unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
976 return ShiftVal == 0 ||
977 (AArch64_AM::getShiftType(Imm) == AArch64_AM::ASR && ShiftVal == 63);
978 }
979
980 case AArch64::SUBWrx:
981 case AArch64::SUBXrx:
982 case AArch64::SUBXrx64:
983 case AArch64::SUBSWrx:
984 case AArch64::SUBSXrx:
985 case AArch64::SUBSXrx64: {
986 unsigned Imm = MI.getOperand(3).getImm();
987 switch (AArch64_AM::getArithExtendType(Imm)) {
988 default:
989 return false;
990 case AArch64_AM::UXTB:
991 case AArch64_AM::UXTH:
992 case AArch64_AM::UXTW:
993 case AArch64_AM::UXTX:
994 return AArch64_AM::getArithShiftValue(Imm) == 0;
995 }
996 }
997
998 case AArch64::LDRBBroW:
999 case AArch64::LDRBBroX:
1000 case AArch64::LDRBroW:
1001 case AArch64::LDRBroX:
1002 case AArch64::LDRDroW:
1003 case AArch64::LDRDroX:
1004 case AArch64::LDRHHroW:
1005 case AArch64::LDRHHroX:
1006 case AArch64::LDRHroW:
1007 case AArch64::LDRHroX:
1008 case AArch64::LDRQroW:
1009 case AArch64::LDRQroX:
1010 case AArch64::LDRSBWroW:
1011 case AArch64::LDRSBWroX:
1012 case AArch64::LDRSBXroW:
1013 case AArch64::LDRSBXroX:
1014 case AArch64::LDRSHWroW:
1015 case AArch64::LDRSHWroX:
1016 case AArch64::LDRSHXroW:
1017 case AArch64::LDRSHXroX:
1018 case AArch64::LDRSWroW:
1019 case AArch64::LDRSWroX:
1020 case AArch64::LDRSroW:
1021 case AArch64::LDRSroX:
1022 case AArch64::LDRWroW:
1023 case AArch64::LDRWroX:
1024 case AArch64::LDRXroW:
1025 case AArch64::LDRXroX:
1026 case AArch64::PRFMroW:
1027 case AArch64::PRFMroX:
1028 case AArch64::STRBBroW:
1029 case AArch64::STRBBroX:
1030 case AArch64::STRBroW:
1031 case AArch64::STRBroX:
1032 case AArch64::STRDroW:
1033 case AArch64::STRDroX:
1034 case AArch64::STRHHroW:
1035 case AArch64::STRHHroX:
1036 case AArch64::STRHroW:
1037 case AArch64::STRHroX:
1038 case AArch64::STRQroW:
1039 case AArch64::STRQroX:
1040 case AArch64::STRSroW:
1041 case AArch64::STRSroX:
1042 case AArch64::STRWroW:
1043 case AArch64::STRWroX:
1044 case AArch64::STRXroW:
1045 case AArch64::STRXroX: {
1046 unsigned IsSigned = MI.getOperand(3).getImm();
1047 return !IsSigned;
1048 }
1049 }
1050}
1051
1053 unsigned Opc = MI.getOpcode();
1054 switch (Opc) {
1055 default:
1056 return false;
1057 case AArch64::SEH_StackAlloc:
1058 case AArch64::SEH_SaveFPLR:
1059 case AArch64::SEH_SaveFPLR_X:
1060 case AArch64::SEH_SaveReg:
1061 case AArch64::SEH_SaveReg_X:
1062 case AArch64::SEH_SaveRegP:
1063 case AArch64::SEH_SaveRegP_X:
1064 case AArch64::SEH_SaveFReg:
1065 case AArch64::SEH_SaveFReg_X:
1066 case AArch64::SEH_SaveFRegP:
1067 case AArch64::SEH_SaveFRegP_X:
1068 case AArch64::SEH_SetFP:
1069 case AArch64::SEH_AddFP:
1070 case AArch64::SEH_Nop:
1071 case AArch64::SEH_PrologEnd:
1072 case AArch64::SEH_EpilogStart:
1073 case AArch64::SEH_EpilogEnd:
1074 case AArch64::SEH_PACSignLR:
1075 case AArch64::SEH_SaveAnyRegQP:
1076 case AArch64::SEH_SaveAnyRegQPX:
1077 return true;
1078 }
1079}
1080
1082 Register &SrcReg, Register &DstReg,
1083 unsigned &SubIdx) const {
1084 switch (MI.getOpcode()) {
1085 default:
1086 return false;
1087 case AArch64::SBFMXri: // aka sxtw
1088 case AArch64::UBFMXri: // aka uxtw
1089 // Check for the 32 -> 64 bit extension case, these instructions can do
1090 // much more.
1091 if (MI.getOperand(2).getImm() != 0 || MI.getOperand(3).getImm() != 31)
1092 return false;
1093 // This is a signed or unsigned 32 -> 64 bit extension.
1094 SrcReg = MI.getOperand(1).getReg();
1095 DstReg = MI.getOperand(0).getReg();
1096 SubIdx = AArch64::sub_32;
1097 return true;
1098 }
1099}
1100
1102 const MachineInstr &MIa, const MachineInstr &MIb) const {
1104 const MachineOperand *BaseOpA = nullptr, *BaseOpB = nullptr;
1105 int64_t OffsetA = 0, OffsetB = 0;
1106 TypeSize WidthA(0, false), WidthB(0, false);
1107 bool OffsetAIsScalable = false, OffsetBIsScalable = false;
1108
1109 assert(MIa.mayLoadOrStore() && "MIa must be a load or store.");
1110 assert(MIb.mayLoadOrStore() && "MIb must be a load or store.");
1111
1114 return false;
1115
1116 // Retrieve the base, offset from the base and width. Width
1117 // is the size of memory that is being loaded/stored (e.g. 1, 2, 4, 8). If
1118 // base are identical, and the offset of a lower memory access +
1119 // the width doesn't overlap the offset of a higher memory access,
1120 // then the memory accesses are different.
1121 // If OffsetAIsScalable and OffsetBIsScalable are both true, they
1122 // are assumed to have the same scale (vscale).
1123 if (getMemOperandWithOffsetWidth(MIa, BaseOpA, OffsetA, OffsetAIsScalable,
1124 WidthA, TRI) &&
1125 getMemOperandWithOffsetWidth(MIb, BaseOpB, OffsetB, OffsetBIsScalable,
1126 WidthB, TRI)) {
1127 if (BaseOpA->isIdenticalTo(*BaseOpB) &&
1128 OffsetAIsScalable == OffsetBIsScalable) {
1129 int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB;
1130 int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA;
1131 TypeSize LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
1132 if (LowWidth.isScalable() == OffsetAIsScalable &&
1133 LowOffset + (int)LowWidth.getKnownMinValue() <= HighOffset)
1134 return true;
1135 }
1136 }
1137 return false;
1138}
1139
1141 const MachineBasicBlock *MBB,
1142 const MachineFunction &MF) const {
1144 return true;
1145
1146 // Do not move an instruction that can be recognized as a branch target.
1147 if (hasBTISemantics(MI))
1148 return true;
1149
1150 switch (MI.getOpcode()) {
1151 case AArch64::HINT:
1152 // CSDB hints are scheduling barriers.
1153 if (MI.getOperand(0).getImm() == 0x14)
1154 return true;
1155 break;
1156 case AArch64::DSB:
1157 case AArch64::ISB:
1158 // DSB and ISB also are scheduling barriers.
1159 return true;
1160 case AArch64::MSRpstatesvcrImm1:
1161 // SMSTART and SMSTOP are also scheduling barriers.
1162 return true;
1163 default:;
1164 }
1165 if (isSEHInstruction(MI))
1166 return true;
1167 auto Next = std::next(MI.getIterator());
1168 return Next != MBB->end() && Next->isCFIInstruction();
1169}
1170
1171/// analyzeCompare - For a comparison instruction, return the source registers
1172/// in SrcReg and SrcReg2, and the value it compares against in CmpValue.
1173/// Return true if the comparison instruction can be analyzed.
1175 Register &SrcReg2, int64_t &CmpMask,
1176 int64_t &CmpValue) const {
1177 // The first operand can be a frame index where we'd normally expect a
1178 // register.
1179 assert(MI.getNumOperands() >= 2 && "All AArch64 cmps should have 2 operands");
1180 if (!MI.getOperand(1).isReg())
1181 return false;
1182
1183 switch (MI.getOpcode()) {
1184 default:
1185 break;
1186 case AArch64::PTEST_PP:
1187 case AArch64::PTEST_PP_ANY:
1188 SrcReg = MI.getOperand(0).getReg();
1189 SrcReg2 = MI.getOperand(1).getReg();
1190 // Not sure about the mask and value for now...
1191 CmpMask = ~0;
1192 CmpValue = 0;
1193 return true;
1194 case AArch64::SUBSWrr:
1195 case AArch64::SUBSWrs:
1196 case AArch64::SUBSWrx:
1197 case AArch64::SUBSXrr:
1198 case AArch64::SUBSXrs:
1199 case AArch64::SUBSXrx:
1200 case AArch64::ADDSWrr:
1201 case AArch64::ADDSWrs:
1202 case AArch64::ADDSWrx:
1203 case AArch64::ADDSXrr:
1204 case AArch64::ADDSXrs:
1205 case AArch64::ADDSXrx:
1206 // Replace SUBSWrr with SUBWrr if NZCV is not used.
1207 SrcReg = MI.getOperand(1).getReg();
1208 SrcReg2 = MI.getOperand(2).getReg();
1209 CmpMask = ~0;
1210 CmpValue = 0;
1211 return true;
1212 case AArch64::SUBSWri:
1213 case AArch64::ADDSWri:
1214 case AArch64::SUBSXri:
1215 case AArch64::ADDSXri:
1216 SrcReg = MI.getOperand(1).getReg();
1217 SrcReg2 = 0;
1218 CmpMask = ~0;
1219 CmpValue = MI.getOperand(2).getImm();
1220 return true;
1221 case AArch64::ANDSWri:
1222 case AArch64::ANDSXri:
1223 // ANDS does not use the same encoding scheme as the others xxxS
1224 // instructions.
1225 SrcReg = MI.getOperand(1).getReg();
1226 SrcReg2 = 0;
1227 CmpMask = ~0;
1229 MI.getOperand(2).getImm(),
1230 MI.getOpcode() == AArch64::ANDSWri ? 32 : 64);
1231 return true;
1232 }
1233
1234 return false;
1235}
1236
1238 MachineBasicBlock *MBB = Instr.getParent();
1239 assert(MBB && "Can't get MachineBasicBlock here");
1240 MachineFunction *MF = MBB->getParent();
1241 assert(MF && "Can't get MachineFunction here");
1245
1246 for (unsigned OpIdx = 0, EndIdx = Instr.getNumOperands(); OpIdx < EndIdx;
1247 ++OpIdx) {
1248 MachineOperand &MO = Instr.getOperand(OpIdx);
1249 const TargetRegisterClass *OpRegCstraints =
1250 Instr.getRegClassConstraint(OpIdx, TII, TRI);
1251
1252 // If there's no constraint, there's nothing to do.
1253 if (!OpRegCstraints)
1254 continue;
1255 // If the operand is a frame index, there's nothing to do here.
1256 // A frame index operand will resolve correctly during PEI.
1257 if (MO.isFI())
1258 continue;
1259
1260 assert(MO.isReg() &&
1261 "Operand has register constraints without being a register!");
1262
1263 Register Reg = MO.getReg();
1264 if (Reg.isPhysical()) {
1265 if (!OpRegCstraints->contains(Reg))
1266 return false;
1267 } else if (!OpRegCstraints->hasSubClassEq(MRI->getRegClass(Reg)) &&
1268 !MRI->constrainRegClass(Reg, OpRegCstraints))
1269 return false;
1270 }
1271
1272 return true;
1273}
1274
1275/// Return the opcode that does not set flags when possible - otherwise
1276/// return the original opcode. The caller is responsible to do the actual
1277/// substitution and legality checking.
1279 // Don't convert all compare instructions, because for some the zero register
1280 // encoding becomes the sp register.
1281 bool MIDefinesZeroReg = false;
1282 if (MI.definesRegister(AArch64::WZR, /*TRI=*/nullptr) ||
1283 MI.definesRegister(AArch64::XZR, /*TRI=*/nullptr))
1284 MIDefinesZeroReg = true;
1285
1286 switch (MI.getOpcode()) {
1287 default:
1288 return MI.getOpcode();
1289 case AArch64::ADDSWrr:
1290 return AArch64::ADDWrr;
1291 case AArch64::ADDSWri:
1292 return MIDefinesZeroReg ? AArch64::ADDSWri : AArch64::ADDWri;
1293 case AArch64::ADDSWrs:
1294 return MIDefinesZeroReg ? AArch64::ADDSWrs : AArch64::ADDWrs;
1295 case AArch64::ADDSWrx:
1296 return AArch64::ADDWrx;
1297 case AArch64::ADDSXrr:
1298 return AArch64::ADDXrr;
1299 case AArch64::ADDSXri:
1300 return MIDefinesZeroReg ? AArch64::ADDSXri : AArch64::ADDXri;
1301 case AArch64::ADDSXrs:
1302 return MIDefinesZeroReg ? AArch64::ADDSXrs : AArch64::ADDXrs;
1303 case AArch64::ADDSXrx:
1304 return AArch64::ADDXrx;
1305 case AArch64::SUBSWrr:
1306 return AArch64::SUBWrr;
1307 case AArch64::SUBSWri:
1308 return MIDefinesZeroReg ? AArch64::SUBSWri : AArch64::SUBWri;
1309 case AArch64::SUBSWrs:
1310 return MIDefinesZeroReg ? AArch64::SUBSWrs : AArch64::SUBWrs;
1311 case AArch64::SUBSWrx:
1312 return AArch64::SUBWrx;
1313 case AArch64::SUBSXrr:
1314 return AArch64::SUBXrr;
1315 case AArch64::SUBSXri:
1316 return MIDefinesZeroReg ? AArch64::SUBSXri : AArch64::SUBXri;
1317 case AArch64::SUBSXrs:
1318 return MIDefinesZeroReg ? AArch64::SUBSXrs : AArch64::SUBXrs;
1319 case AArch64::SUBSXrx:
1320 return AArch64::SUBXrx;
1321 }
1322}
1323
1324enum AccessKind { AK_Write = 0x01, AK_Read = 0x10, AK_All = 0x11 };
1325
1326/// True when condition flags are accessed (either by writing or reading)
1327/// on the instruction trace starting at From and ending at To.
1328///
1329/// Note: If From and To are from different blocks it's assumed CC are accessed
1330/// on the path.
1333 const TargetRegisterInfo *TRI, const AccessKind AccessToCheck = AK_All) {
1334 // Early exit if To is at the beginning of the BB.
1335 if (To == To->getParent()->begin())
1336 return true;
1337
1338 // Check whether the instructions are in the same basic block
1339 // If not, assume the condition flags might get modified somewhere.
1340 if (To->getParent() != From->getParent())
1341 return true;
1342
1343 // From must be above To.
1344 assert(std::any_of(
1345 ++To.getReverse(), To->getParent()->rend(),
1346 [From](MachineInstr &MI) { return MI.getIterator() == From; }));
1347
1348 // We iterate backward starting at \p To until we hit \p From.
1349 for (const MachineInstr &Instr :
1350 instructionsWithoutDebug(++To.getReverse(), From.getReverse())) {
1351 if (((AccessToCheck & AK_Write) &&
1352 Instr.modifiesRegister(AArch64::NZCV, TRI)) ||
1353 ((AccessToCheck & AK_Read) && Instr.readsRegister(AArch64::NZCV, TRI)))
1354 return true;
1355 }
1356 return false;
1357}
1358
1359std::optional<unsigned>
1360AArch64InstrInfo::canRemovePTestInstr(MachineInstr *PTest, MachineInstr *Mask,
1361 MachineInstr *Pred,
1362 const MachineRegisterInfo *MRI) const {
1363 unsigned MaskOpcode = Mask->getOpcode();
1364 unsigned PredOpcode = Pred->getOpcode();
1365 bool PredIsPTestLike = isPTestLikeOpcode(PredOpcode);
1366 bool PredIsWhileLike = isWhileOpcode(PredOpcode);
1367
1368 if (PredIsWhileLike) {
1369 // For PTEST(PG, PG), PTEST is redundant when PG is the result of a WHILEcc
1370 // instruction and the condition is "any" since WHILcc does an implicit
1371 // PTEST(ALL, PG) check and PG is always a subset of ALL.
1372 if ((Mask == Pred) && PTest->getOpcode() == AArch64::PTEST_PP_ANY)
1373 return PredOpcode;
1374
1375 // For PTEST(PTRUE_ALL, WHILE), if the element size matches, the PTEST is
1376 // redundant since WHILE performs an implicit PTEST with an all active
1377 // mask.
1378 if (isPTrueOpcode(MaskOpcode) && Mask->getOperand(1).getImm() == 31 &&
1379 getElementSizeForOpcode(MaskOpcode) ==
1380 getElementSizeForOpcode(PredOpcode))
1381 return PredOpcode;
1382
1383 return {};
1384 }
1385
1386 if (PredIsPTestLike) {
1387 // For PTEST(PG, PG), PTEST is redundant when PG is the result of an
1388 // instruction that sets the flags as PTEST would and the condition is
1389 // "any" since PG is always a subset of the governing predicate of the
1390 // ptest-like instruction.
1391 if ((Mask == Pred) && PTest->getOpcode() == AArch64::PTEST_PP_ANY)
1392 return PredOpcode;
1393
1394 // For PTEST(PTRUE_ALL, PTEST_LIKE), the PTEST is redundant if the
1395 // the element size matches and either the PTEST_LIKE instruction uses
1396 // the same all active mask or the condition is "any".
1397 if (isPTrueOpcode(MaskOpcode) && Mask->getOperand(1).getImm() == 31 &&
1398 getElementSizeForOpcode(MaskOpcode) ==
1399 getElementSizeForOpcode(PredOpcode)) {
1400 auto PTestLikeMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg());
1401 if (Mask == PTestLikeMask || PTest->getOpcode() == AArch64::PTEST_PP_ANY)
1402 return PredOpcode;
1403 }
1404
1405 // For PTEST(PG, PTEST_LIKE(PG, ...)), the PTEST is redundant since the
1406 // flags are set based on the same mask 'PG', but PTEST_LIKE must operate
1407 // on 8-bit predicates like the PTEST. Otherwise, for instructions like
1408 // compare that also support 16/32/64-bit predicates, the implicit PTEST
1409 // performed by the compare could consider fewer lanes for these element
1410 // sizes.
1411 //
1412 // For example, consider
1413 //
1414 // ptrue p0.b ; P0=1111-1111-1111-1111
1415 // index z0.s, #0, #1 ; Z0=<0,1,2,3>
1416 // index z1.s, #1, #1 ; Z1=<1,2,3,4>
1417 // cmphi p1.s, p0/z, z1.s, z0.s ; P1=0001-0001-0001-0001
1418 // ; ^ last active
1419 // ptest p0, p1.b ; P1=0001-0001-0001-0001
1420 // ; ^ last active
1421 //
1422 // where the compare generates a canonical all active 32-bit predicate
1423 // (equivalent to 'ptrue p1.s, all'). The implicit PTEST sets the last
1424 // active flag, whereas the PTEST instruction with the same mask doesn't.
1425 // For PTEST_ANY this doesn't apply as the flags in this case would be
1426 // identical regardless of element size.
1427 auto PTestLikeMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg());
1428 uint64_t PredElementSize = getElementSizeForOpcode(PredOpcode);
1429 if (Mask == PTestLikeMask && (PredElementSize == AArch64::ElementSizeB ||
1430 PTest->getOpcode() == AArch64::PTEST_PP_ANY))
1431 return PredOpcode;
1432
1433 return {};
1434 }
1435
1436 // If OP in PTEST(PG, OP(PG, ...)) has a flag-setting variant change the
1437 // opcode so the PTEST becomes redundant.
1438 switch (PredOpcode) {
1439 case AArch64::AND_PPzPP:
1440 case AArch64::BIC_PPzPP:
1441 case AArch64::EOR_PPzPP:
1442 case AArch64::NAND_PPzPP:
1443 case AArch64::NOR_PPzPP:
1444 case AArch64::ORN_PPzPP:
1445 case AArch64::ORR_PPzPP:
1446 case AArch64::BRKA_PPzP:
1447 case AArch64::BRKPA_PPzPP:
1448 case AArch64::BRKB_PPzP:
1449 case AArch64::BRKPB_PPzPP:
1450 case AArch64::RDFFR_PPz: {
1451 // Check to see if our mask is the same. If not the resulting flag bits
1452 // may be different and we can't remove the ptest.
1453 auto *PredMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg());
1454 if (Mask != PredMask)
1455 return {};
1456 break;
1457 }
1458 case AArch64::BRKN_PPzP: {
1459 // BRKN uses an all active implicit mask to set flags unlike the other
1460 // flag-setting instructions.
1461 // PTEST(PTRUE_B(31), BRKN(PG, A, B)) -> BRKNS(PG, A, B).
1462 if ((MaskOpcode != AArch64::PTRUE_B) ||
1463 (Mask->getOperand(1).getImm() != 31))
1464 return {};
1465 break;
1466 }
1467 case AArch64::PTRUE_B:
1468 // PTEST(OP=PTRUE_B(A), OP) -> PTRUES_B(A)
1469 break;
1470 default:
1471 // Bail out if we don't recognize the input
1472 return {};
1473 }
1474
1475 return convertToFlagSettingOpc(PredOpcode);
1476}
1477
1478/// optimizePTestInstr - Attempt to remove a ptest of a predicate-generating
1479/// operation which could set the flags in an identical manner
1480bool AArch64InstrInfo::optimizePTestInstr(
1481 MachineInstr *PTest, unsigned MaskReg, unsigned PredReg,
1482 const MachineRegisterInfo *MRI) const {
1483 auto *Mask = MRI->getUniqueVRegDef(MaskReg);
1484 auto *Pred = MRI->getUniqueVRegDef(PredReg);
1485 unsigned PredOpcode = Pred->getOpcode();
1486 auto NewOp = canRemovePTestInstr(PTest, Mask, Pred, MRI);
1487 if (!NewOp)
1488 return false;
1489
1491
1492 // If another instruction between Pred and PTest accesses flags, don't remove
1493 // the ptest or update the earlier instruction to modify them.
1494 if (areCFlagsAccessedBetweenInstrs(Pred, PTest, TRI))
1495 return false;
1496
1497 // If we pass all the checks, it's safe to remove the PTEST and use the flags
1498 // as they are prior to PTEST. Sometimes this requires the tested PTEST
1499 // operand to be replaced with an equivalent instruction that also sets the
1500 // flags.
1501 PTest->eraseFromParent();
1502 if (*NewOp != PredOpcode) {
1503 Pred->setDesc(get(*NewOp));
1504 bool succeeded = UpdateOperandRegClass(*Pred);
1505 (void)succeeded;
1506 assert(succeeded && "Operands have incompatible register classes!");
1507 Pred->addRegisterDefined(AArch64::NZCV, TRI);
1508 }
1509
1510 // Ensure that the flags def is live.
1511 if (Pred->registerDefIsDead(AArch64::NZCV, TRI)) {
1512 unsigned i = 0, e = Pred->getNumOperands();
1513 for (; i != e; ++i) {
1514 MachineOperand &MO = Pred->getOperand(i);
1515 if (MO.isReg() && MO.isDef() && MO.getReg() == AArch64::NZCV) {
1516 MO.setIsDead(false);
1517 break;
1518 }
1519 }
1520 }
1521 return true;
1522}
1523
1524/// Try to optimize a compare instruction. A compare instruction is an
1525/// instruction which produces AArch64::NZCV. It can be truly compare
1526/// instruction
1527/// when there are no uses of its destination register.
1528///
1529/// The following steps are tried in order:
1530/// 1. Convert CmpInstr into an unconditional version.
1531/// 2. Remove CmpInstr if above there is an instruction producing a needed
1532/// condition code or an instruction which can be converted into such an
1533/// instruction.
1534/// Only comparison with zero is supported.
1536 MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int64_t CmpMask,
1537 int64_t CmpValue, const MachineRegisterInfo *MRI) const {
1538 assert(CmpInstr.getParent());
1539 assert(MRI);
1540
1541 // Replace SUBSWrr with SUBWrr if NZCV is not used.
1542 int DeadNZCVIdx =
1543 CmpInstr.findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr, true);
1544 if (DeadNZCVIdx != -1) {
1545 if (CmpInstr.definesRegister(AArch64::WZR, /*TRI=*/nullptr) ||
1546 CmpInstr.definesRegister(AArch64::XZR, /*TRI=*/nullptr)) {
1547 CmpInstr.eraseFromParent();
1548 return true;
1549 }
1550 unsigned Opc = CmpInstr.getOpcode();
1551 unsigned NewOpc = convertToNonFlagSettingOpc(CmpInstr);
1552 if (NewOpc == Opc)
1553 return false;
1554 const MCInstrDesc &MCID = get(NewOpc);
1555 CmpInstr.setDesc(MCID);
1556 CmpInstr.removeOperand(DeadNZCVIdx);
1557 bool succeeded = UpdateOperandRegClass(CmpInstr);
1558 (void)succeeded;
1559 assert(succeeded && "Some operands reg class are incompatible!");
1560 return true;
1561 }
1562
1563 if (CmpInstr.getOpcode() == AArch64::PTEST_PP ||
1564 CmpInstr.getOpcode() == AArch64::PTEST_PP_ANY)
1565 return optimizePTestInstr(&CmpInstr, SrcReg, SrcReg2, MRI);
1566
1567 if (SrcReg2 != 0)
1568 return false;
1569
1570 // CmpInstr is a Compare instruction if destination register is not used.
1571 if (!MRI->use_nodbg_empty(CmpInstr.getOperand(0).getReg()))
1572 return false;
1573
1574 if (CmpValue == 0 && substituteCmpToZero(CmpInstr, SrcReg, *MRI))
1575 return true;
1576 return (CmpValue == 0 || CmpValue == 1) &&
1577 removeCmpToZeroOrOne(CmpInstr, SrcReg, CmpValue, *MRI);
1578}
1579
1580/// Get opcode of S version of Instr.
1581/// If Instr is S version its opcode is returned.
1582/// AArch64::INSTRUCTION_LIST_END is returned if Instr does not have S version
1583/// or we are not interested in it.
1584static unsigned sForm(MachineInstr &Instr) {
1585 switch (Instr.getOpcode()) {
1586 default:
1587 return AArch64::INSTRUCTION_LIST_END;
1588
1589 case AArch64::ADDSWrr:
1590 case AArch64::ADDSWri:
1591 case AArch64::ADDSXrr:
1592 case AArch64::ADDSXri:
1593 case AArch64::SUBSWrr:
1594 case AArch64::SUBSWri:
1595 case AArch64::SUBSXrr:
1596 case AArch64::SUBSXri:
1597 return Instr.getOpcode();
1598
1599 case AArch64::ADDWrr:
1600 return AArch64::ADDSWrr;
1601 case AArch64::ADDWri:
1602 return AArch64::ADDSWri;
1603 case AArch64::ADDXrr:
1604 return AArch64::ADDSXrr;
1605 case AArch64::ADDXri:
1606 return AArch64::ADDSXri;
1607 case AArch64::ADCWr:
1608 return AArch64::ADCSWr;
1609 case AArch64::ADCXr:
1610 return AArch64::ADCSXr;
1611 case AArch64::SUBWrr:
1612 return AArch64::SUBSWrr;
1613 case AArch64::SUBWri:
1614 return AArch64::SUBSWri;
1615 case AArch64::SUBXrr:
1616 return AArch64::SUBSXrr;
1617 case AArch64::SUBXri:
1618 return AArch64::SUBSXri;
1619 case AArch64::SBCWr:
1620 return AArch64::SBCSWr;
1621 case AArch64::SBCXr:
1622 return AArch64::SBCSXr;
1623 case AArch64::ANDWri:
1624 return AArch64::ANDSWri;
1625 case AArch64::ANDXri:
1626 return AArch64::ANDSXri;
1627 }
1628}
1629
1630/// Check if AArch64::NZCV should be alive in successors of MBB.
1632 for (auto *BB : MBB->successors())
1633 if (BB->isLiveIn(AArch64::NZCV))
1634 return true;
1635 return false;
1636}
1637
1638/// \returns The condition code operand index for \p Instr if it is a branch
1639/// or select and -1 otherwise.
1640static int
1642 switch (Instr.getOpcode()) {
1643 default:
1644 return -1;
1645
1646 case AArch64::Bcc: {
1647 int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV, /*TRI=*/nullptr);
1648 assert(Idx >= 2);
1649 return Idx - 2;
1650 }
1651
1652 case AArch64::CSINVWr:
1653 case AArch64::CSINVXr:
1654 case AArch64::CSINCWr:
1655 case AArch64::CSINCXr:
1656 case AArch64::CSELWr:
1657 case AArch64::CSELXr:
1658 case AArch64::CSNEGWr:
1659 case AArch64::CSNEGXr:
1660 case AArch64::FCSELSrrr:
1661 case AArch64::FCSELDrrr: {
1662 int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV, /*TRI=*/nullptr);
1663 assert(Idx >= 1);
1664 return Idx - 1;
1665 }
1666 }
1667}
1668
1669/// Find a condition code used by the instruction.
1670/// Returns AArch64CC::Invalid if either the instruction does not use condition
1671/// codes or we don't optimize CmpInstr in the presence of such instructions.
1674 return CCIdx >= 0 ? static_cast<AArch64CC::CondCode>(
1675 Instr.getOperand(CCIdx).getImm())
1677}
1678
1681 UsedNZCV UsedFlags;
1682 switch (CC) {
1683 default:
1684 break;
1685
1686 case AArch64CC::EQ: // Z set
1687 case AArch64CC::NE: // Z clear
1688 UsedFlags.Z = true;
1689 break;
1690
1691 case AArch64CC::HI: // Z clear and C set
1692 case AArch64CC::LS: // Z set or C clear
1693 UsedFlags.Z = true;
1694 [[fallthrough]];
1695 case AArch64CC::HS: // C set
1696 case AArch64CC::LO: // C clear
1697 UsedFlags.C = true;
1698 break;
1699
1700 case AArch64CC::MI: // N set
1701 case AArch64CC::PL: // N clear
1702 UsedFlags.N = true;
1703 break;
1704
1705 case AArch64CC::VS: // V set
1706 case AArch64CC::VC: // V clear
1707 UsedFlags.V = true;
1708 break;
1709
1710 case AArch64CC::GT: // Z clear, N and V the same
1711 case AArch64CC::LE: // Z set, N and V differ
1712 UsedFlags.Z = true;
1713 [[fallthrough]];
1714 case AArch64CC::GE: // N and V the same
1715 case AArch64CC::LT: // N and V differ
1716 UsedFlags.N = true;
1717 UsedFlags.V = true;
1718 break;
1719 }
1720 return UsedFlags;
1721}
1722
1723/// \returns Conditions flags used after \p CmpInstr in its MachineBB if NZCV
1724/// flags are not alive in successors of the same \p CmpInstr and \p MI parent.
1725/// \returns std::nullopt otherwise.
1726///
1727/// Collect instructions using that flags in \p CCUseInstrs if provided.
1728std::optional<UsedNZCV>
1730 const TargetRegisterInfo &TRI,
1731 SmallVectorImpl<MachineInstr *> *CCUseInstrs) {
1732 MachineBasicBlock *CmpParent = CmpInstr.getParent();
1733 if (MI.getParent() != CmpParent)
1734 return std::nullopt;
1735
1736 if (areCFlagsAliveInSuccessors(CmpParent))
1737 return std::nullopt;
1738
1739 UsedNZCV NZCVUsedAfterCmp;
1741 std::next(CmpInstr.getIterator()), CmpParent->instr_end())) {
1742 if (Instr.readsRegister(AArch64::NZCV, &TRI)) {
1744 if (CC == AArch64CC::Invalid) // Unsupported conditional instruction
1745 return std::nullopt;
1746 NZCVUsedAfterCmp |= getUsedNZCV(CC);
1747 if (CCUseInstrs)
1748 CCUseInstrs->push_back(&Instr);
1749 }
1750 if (Instr.modifiesRegister(AArch64::NZCV, &TRI))
1751 break;
1752 }
1753 return NZCVUsedAfterCmp;
1754}
1755
1756static bool isADDSRegImm(unsigned Opcode) {
1757 return Opcode == AArch64::ADDSWri || Opcode == AArch64::ADDSXri;
1758}
1759
1760static bool isSUBSRegImm(unsigned Opcode) {
1761 return Opcode == AArch64::SUBSWri || Opcode == AArch64::SUBSXri;
1762}
1763
1764/// Check if CmpInstr can be substituted by MI.
1765///
1766/// CmpInstr can be substituted:
1767/// - CmpInstr is either 'ADDS %vreg, 0' or 'SUBS %vreg, 0'
1768/// - and, MI and CmpInstr are from the same MachineBB
1769/// - and, condition flags are not alive in successors of the CmpInstr parent
1770/// - and, if MI opcode is the S form there must be no defs of flags between
1771/// MI and CmpInstr
1772/// or if MI opcode is not the S form there must be neither defs of flags
1773/// nor uses of flags between MI and CmpInstr.
1774/// - and, if C/V flags are not used after CmpInstr
1775/// or if N flag is used but MI produces poison value if signed overflow
1776/// occurs.
1778 const TargetRegisterInfo &TRI) {
1779 // NOTE this assertion guarantees that MI.getOpcode() is add or subtraction
1780 // that may or may not set flags.
1781 assert(sForm(MI) != AArch64::INSTRUCTION_LIST_END);
1782
1783 const unsigned CmpOpcode = CmpInstr.getOpcode();
1784 if (!isADDSRegImm(CmpOpcode) && !isSUBSRegImm(CmpOpcode))
1785 return false;
1786
1787 assert((CmpInstr.getOperand(2).isImm() &&
1788 CmpInstr.getOperand(2).getImm() == 0) &&
1789 "Caller guarantees that CmpInstr compares with constant 0");
1790
1791 std::optional<UsedNZCV> NZVCUsed = examineCFlagsUse(MI, CmpInstr, TRI);
1792 if (!NZVCUsed || NZVCUsed->C)
1793 return false;
1794
1795 // CmpInstr is either 'ADDS %vreg, 0' or 'SUBS %vreg, 0', and MI is either
1796 // '%vreg = add ...' or '%vreg = sub ...'.
1797 // Condition flag V is used to indicate signed overflow.
1798 // 1) MI and CmpInstr set N and V to the same value.
1799 // 2) If MI is add/sub with no-signed-wrap, it produces a poison value when
1800 // signed overflow occurs, so CmpInstr could still be simplified away.
1801 if (NZVCUsed->V && !MI.getFlag(MachineInstr::NoSWrap))
1802 return false;
1803
1804 AccessKind AccessToCheck = AK_Write;
1805 if (sForm(MI) != MI.getOpcode())
1806 AccessToCheck = AK_All;
1807 return !areCFlagsAccessedBetweenInstrs(&MI, &CmpInstr, &TRI, AccessToCheck);
1808}
1809
1810/// Substitute an instruction comparing to zero with another instruction
1811/// which produces needed condition flags.
1812///
1813/// Return true on success.
1814bool AArch64InstrInfo::substituteCmpToZero(
1815 MachineInstr &CmpInstr, unsigned SrcReg,
1816 const MachineRegisterInfo &MRI) const {
1817 // Get the unique definition of SrcReg.
1818 MachineInstr *MI = MRI.getUniqueVRegDef(SrcReg);
1819 if (!MI)
1820 return false;
1821
1823
1824 unsigned NewOpc = sForm(*MI);
1825 if (NewOpc == AArch64::INSTRUCTION_LIST_END)
1826 return false;
1827
1828 if (!canInstrSubstituteCmpInstr(*MI, CmpInstr, TRI))
1829 return false;
1830
1831 // Update the instruction to set NZCV.
1832 MI->setDesc(get(NewOpc));
1833 CmpInstr.eraseFromParent();
1835 (void)succeeded;
1836 assert(succeeded && "Some operands reg class are incompatible!");
1837 MI->addRegisterDefined(AArch64::NZCV, &TRI);
1838 return true;
1839}
1840
1841/// \returns True if \p CmpInstr can be removed.
1842///
1843/// \p IsInvertCC is true if, after removing \p CmpInstr, condition
1844/// codes used in \p CCUseInstrs must be inverted.
1846 int CmpValue, const TargetRegisterInfo &TRI,
1848 bool &IsInvertCC) {
1849 assert((CmpValue == 0 || CmpValue == 1) &&
1850 "Only comparisons to 0 or 1 considered for removal!");
1851
1852 // MI is 'CSINCWr %vreg, wzr, wzr, <cc>' or 'CSINCXr %vreg, xzr, xzr, <cc>'
1853 unsigned MIOpc = MI.getOpcode();
1854 if (MIOpc == AArch64::CSINCWr) {
1855 if (MI.getOperand(1).getReg() != AArch64::WZR ||
1856 MI.getOperand(2).getReg() != AArch64::WZR)
1857 return false;
1858 } else if (MIOpc == AArch64::CSINCXr) {
1859 if (MI.getOperand(1).getReg() != AArch64::XZR ||
1860 MI.getOperand(2).getReg() != AArch64::XZR)
1861 return false;
1862 } else {
1863 return false;
1864 }
1866 if (MICC == AArch64CC::Invalid)
1867 return false;
1868
1869 // NZCV needs to be defined
1870 if (MI.findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr, true) != -1)
1871 return false;
1872
1873 // CmpInstr is 'ADDS %vreg, 0' or 'SUBS %vreg, 0' or 'SUBS %vreg, 1'
1874 const unsigned CmpOpcode = CmpInstr.getOpcode();
1875 bool IsSubsRegImm = isSUBSRegImm(CmpOpcode);
1876 if (CmpValue && !IsSubsRegImm)
1877 return false;
1878 if (!CmpValue && !IsSubsRegImm && !isADDSRegImm(CmpOpcode))
1879 return false;
1880
1881 // MI conditions allowed: eq, ne, mi, pl
1882 UsedNZCV MIUsedNZCV = getUsedNZCV(MICC);
1883 if (MIUsedNZCV.C || MIUsedNZCV.V)
1884 return false;
1885
1886 std::optional<UsedNZCV> NZCVUsedAfterCmp =
1887 examineCFlagsUse(MI, CmpInstr, TRI, &CCUseInstrs);
1888 // Condition flags are not used in CmpInstr basic block successors and only
1889 // Z or N flags allowed to be used after CmpInstr within its basic block
1890 if (!NZCVUsedAfterCmp || NZCVUsedAfterCmp->C || NZCVUsedAfterCmp->V)
1891 return false;
1892 // Z or N flag used after CmpInstr must correspond to the flag used in MI
1893 if ((MIUsedNZCV.Z && NZCVUsedAfterCmp->N) ||
1894 (MIUsedNZCV.N && NZCVUsedAfterCmp->Z))
1895 return false;
1896 // If CmpInstr is comparison to zero MI conditions are limited to eq, ne
1897 if (MIUsedNZCV.N && !CmpValue)
1898 return false;
1899
1900 // There must be no defs of flags between MI and CmpInstr
1901 if (areCFlagsAccessedBetweenInstrs(&MI, &CmpInstr, &TRI, AK_Write))
1902 return false;
1903
1904 // Condition code is inverted in the following cases:
1905 // 1. MI condition is ne; CmpInstr is 'ADDS %vreg, 0' or 'SUBS %vreg, 0'
1906 // 2. MI condition is eq, pl; CmpInstr is 'SUBS %vreg, 1'
1907 IsInvertCC = (CmpValue && (MICC == AArch64CC::EQ || MICC == AArch64CC::PL)) ||
1908 (!CmpValue && MICC == AArch64CC::NE);
1909 return true;
1910}
1911
1912/// Remove comparison in csinc-cmp sequence
1913///
1914/// Examples:
1915/// 1. \code
1916/// csinc w9, wzr, wzr, ne
1917/// cmp w9, #0
1918/// b.eq
1919/// \endcode
1920/// to
1921/// \code
1922/// csinc w9, wzr, wzr, ne
1923/// b.ne
1924/// \endcode
1925///
1926/// 2. \code
1927/// csinc x2, xzr, xzr, mi
1928/// cmp x2, #1
1929/// b.pl
1930/// \endcode
1931/// to
1932/// \code
1933/// csinc x2, xzr, xzr, mi
1934/// b.pl
1935/// \endcode
1936///
1937/// \param CmpInstr comparison instruction
1938/// \return True when comparison removed
1939bool AArch64InstrInfo::removeCmpToZeroOrOne(
1940 MachineInstr &CmpInstr, unsigned SrcReg, int CmpValue,
1941 const MachineRegisterInfo &MRI) const {
1942 MachineInstr *MI = MRI.getUniqueVRegDef(SrcReg);
1943 if (!MI)
1944 return false;
1947 bool IsInvertCC = false;
1948 if (!canCmpInstrBeRemoved(*MI, CmpInstr, CmpValue, TRI, CCUseInstrs,
1949 IsInvertCC))
1950 return false;
1951 // Make transformation
1952 CmpInstr.eraseFromParent();
1953 if (IsInvertCC) {
1954 // Invert condition codes in CmpInstr CC users
1955 for (MachineInstr *CCUseInstr : CCUseInstrs) {
1957 assert(Idx >= 0 && "Unexpected instruction using CC.");
1958 MachineOperand &CCOperand = CCUseInstr->getOperand(Idx);
1960 static_cast<AArch64CC::CondCode>(CCOperand.getImm()));
1961 CCOperand.setImm(CCUse);
1962 }
1963 }
1964 return true;
1965}
1966
1968 if (MI.getOpcode() != TargetOpcode::LOAD_STACK_GUARD &&
1969 MI.getOpcode() != AArch64::CATCHRET)
1970 return false;
1971
1972 MachineBasicBlock &MBB = *MI.getParent();
1973 auto &Subtarget = MBB.getParent()->getSubtarget<AArch64Subtarget>();
1974 auto TRI = Subtarget.getRegisterInfo();
1975 DebugLoc DL = MI.getDebugLoc();
1976
1977 if (MI.getOpcode() == AArch64::CATCHRET) {
1978 // Skip to the first instruction before the epilog.
1979 const TargetInstrInfo *TII =
1981 MachineBasicBlock *TargetMBB = MI.getOperand(0).getMBB();
1983 MachineBasicBlock::iterator FirstEpilogSEH = std::prev(MBBI);
1984 while (FirstEpilogSEH->getFlag(MachineInstr::FrameDestroy) &&
1985 FirstEpilogSEH != MBB.begin())
1986 FirstEpilogSEH = std::prev(FirstEpilogSEH);
1987 if (FirstEpilogSEH != MBB.begin())
1988 FirstEpilogSEH = std::next(FirstEpilogSEH);
1989 BuildMI(MBB, FirstEpilogSEH, DL, TII->get(AArch64::ADRP))
1990 .addReg(AArch64::X0, RegState::Define)
1991 .addMBB(TargetMBB);
1992 BuildMI(MBB, FirstEpilogSEH, DL, TII->get(AArch64::ADDXri))
1993 .addReg(AArch64::X0, RegState::Define)
1994 .addReg(AArch64::X0)
1995 .addMBB(TargetMBB)
1996 .addImm(0);
1997 return true;
1998 }
1999
2000 Register Reg = MI.getOperand(0).getReg();
2002 if (M.getStackProtectorGuard() == "sysreg") {
2003 const AArch64SysReg::SysReg *SrcReg =
2004 AArch64SysReg::lookupSysRegByName(M.getStackProtectorGuardReg());
2005 if (!SrcReg)
2006 report_fatal_error("Unknown SysReg for Stack Protector Guard Register");
2007
2008 // mrs xN, sysreg
2009 BuildMI(MBB, MI, DL, get(AArch64::MRS))
2011 .addImm(SrcReg->Encoding);
2012 int Offset = M.getStackProtectorGuardOffset();
2013 if (Offset >= 0 && Offset <= 32760 && Offset % 8 == 0) {
2014 // ldr xN, [xN, #offset]
2015 BuildMI(MBB, MI, DL, get(AArch64::LDRXui))
2016 .addDef(Reg)
2017 .addUse(Reg, RegState::Kill)
2018 .addImm(Offset / 8);
2019 } else if (Offset >= -256 && Offset <= 255) {
2020 // ldur xN, [xN, #offset]
2021 BuildMI(MBB, MI, DL, get(AArch64::LDURXi))
2022 .addDef(Reg)
2023 .addUse(Reg, RegState::Kill)
2024 .addImm(Offset);
2025 } else if (Offset >= -4095 && Offset <= 4095) {
2026 if (Offset > 0) {
2027 // add xN, xN, #offset
2028 BuildMI(MBB, MI, DL, get(AArch64::ADDXri))
2029 .addDef(Reg)
2030 .addUse(Reg, RegState::Kill)
2031 .addImm(Offset)
2032 .addImm(0);
2033 } else {
2034 // sub xN, xN, #offset
2035 BuildMI(MBB, MI, DL, get(AArch64::SUBXri))
2036 .addDef(Reg)
2037 .addUse(Reg, RegState::Kill)
2038 .addImm(-Offset)
2039 .addImm(0);
2040 }
2041 // ldr xN, [xN]
2042 BuildMI(MBB, MI, DL, get(AArch64::LDRXui))
2043 .addDef(Reg)
2044 .addUse(Reg, RegState::Kill)
2045 .addImm(0);
2046 } else {
2047 // Cases that are larger than +/- 4095 and not a multiple of 8, or larger
2048 // than 23760.
2049 // It might be nice to use AArch64::MOVi32imm here, which would get
2050 // expanded in PreSched2 after PostRA, but our lone scratch Reg already
2051 // contains the MRS result. findScratchNonCalleeSaveRegister() in
2052 // AArch64FrameLowering might help us find such a scratch register
2053 // though. If we failed to find a scratch register, we could emit a
2054 // stream of add instructions to build up the immediate. Or, we could try
2055 // to insert a AArch64::MOVi32imm before register allocation so that we
2056 // didn't need to scavenge for a scratch register.
2057 report_fatal_error("Unable to encode Stack Protector Guard Offset");
2058 }
2059 MBB.erase(MI);
2060 return true;
2061 }
2062
2063 const GlobalValue *GV =
2064 cast<GlobalValue>((*MI.memoperands_begin())->getValue());
2065 const TargetMachine &TM = MBB.getParent()->getTarget();
2066 unsigned OpFlags = Subtarget.ClassifyGlobalReference(GV, TM);
2067 const unsigned char MO_NC = AArch64II::MO_NC;
2068
2069 if ((OpFlags & AArch64II::MO_GOT) != 0) {
2070 BuildMI(MBB, MI, DL, get(AArch64::LOADgot), Reg)
2071 .addGlobalAddress(GV, 0, OpFlags);
2072 if (Subtarget.isTargetILP32()) {
2073 unsigned Reg32 = TRI->getSubReg(Reg, AArch64::sub_32);
2074 BuildMI(MBB, MI, DL, get(AArch64::LDRWui))
2075 .addDef(Reg32, RegState::Dead)
2076 .addUse(Reg, RegState::Kill)
2077 .addImm(0)
2078 .addMemOperand(*MI.memoperands_begin())
2080 } else {
2081 BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
2082 .addReg(Reg, RegState::Kill)
2083 .addImm(0)
2084 .addMemOperand(*MI.memoperands_begin());
2085 }
2086 } else if (TM.getCodeModel() == CodeModel::Large) {
2087 assert(!Subtarget.isTargetILP32() && "how can large exist in ILP32?");
2088 BuildMI(MBB, MI, DL, get(AArch64::MOVZXi), Reg)
2089 .addGlobalAddress(GV, 0, AArch64II::MO_G0 | MO_NC)
2090 .addImm(0);
2091 BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
2092 .addReg(Reg, RegState::Kill)
2093 .addGlobalAddress(GV, 0, AArch64II::MO_G1 | MO_NC)
2094 .addImm(16);
2095 BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
2096 .addReg(Reg, RegState::Kill)
2097 .addGlobalAddress(GV, 0, AArch64II::MO_G2 | MO_NC)
2098 .addImm(32);
2099 BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
2100 .addReg(Reg, RegState::Kill)
2102 .addImm(48);
2103 BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
2104 .addReg(Reg, RegState::Kill)
2105 .addImm(0)
2106 .addMemOperand(*MI.memoperands_begin());
2107 } else if (TM.getCodeModel() == CodeModel::Tiny) {
2108 BuildMI(MBB, MI, DL, get(AArch64::ADR), Reg)
2109 .addGlobalAddress(GV, 0, OpFlags);
2110 } else {
2111 BuildMI(MBB, MI, DL, get(AArch64::ADRP), Reg)
2112 .addGlobalAddress(GV, 0, OpFlags | AArch64II::MO_PAGE);
2113 unsigned char LoFlags = OpFlags | AArch64II::MO_PAGEOFF | MO_NC;
2114 if (Subtarget.isTargetILP32()) {
2115 unsigned Reg32 = TRI->getSubReg(Reg, AArch64::sub_32);
2116 BuildMI(MBB, MI, DL, get(AArch64::LDRWui))
2117 .addDef(Reg32, RegState::Dead)
2118 .addUse(Reg, RegState::Kill)
2119 .addGlobalAddress(GV, 0, LoFlags)
2120 .addMemOperand(*MI.memoperands_begin())
2122 } else {
2123 BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
2124 .addReg(Reg, RegState::Kill)
2125 .addGlobalAddress(GV, 0, LoFlags)
2126 .addMemOperand(*MI.memoperands_begin());
2127 }
2128 }
2129
2130 MBB.erase(MI);
2131
2132 return true;
2133}
2134
2135// Return true if this instruction simply sets its single destination register
2136// to zero. This is equivalent to a register rename of the zero-register.
2138 switch (MI.getOpcode()) {
2139 default:
2140 break;
2141 case AArch64::MOVZWi:
2142 case AArch64::MOVZXi: // movz Rd, #0 (LSL #0)
2143 if (MI.getOperand(1).isImm() && MI.getOperand(1).getImm() == 0) {
2144 assert(MI.getDesc().getNumOperands() == 3 &&
2145 MI.getOperand(2).getImm() == 0 && "invalid MOVZi operands");
2146 return true;
2147 }
2148 break;
2149 case AArch64::ANDWri: // and Rd, Rzr, #imm
2150 return MI.getOperand(1).getReg() == AArch64::WZR;
2151 case AArch64::ANDXri:
2152 return MI.getOperand(1).getReg() == AArch64::XZR;
2153 case TargetOpcode::COPY:
2154 return MI.getOperand(1).getReg() == AArch64::WZR;
2155 }
2156 return false;
2157}
2158
2159// Return true if this instruction simply renames a general register without
2160// modifying bits.
2162 switch (MI.getOpcode()) {
2163 default:
2164 break;
2165 case TargetOpcode::COPY: {
2166 // GPR32 copies will by lowered to ORRXrs
2167 Register DstReg = MI.getOperand(0).getReg();
2168 return (AArch64::GPR32RegClass.contains(DstReg) ||
2169 AArch64::GPR64RegClass.contains(DstReg));
2170 }
2171 case AArch64::ORRXrs: // orr Xd, Xzr, Xm (LSL #0)
2172 if (MI.getOperand(1).getReg() == AArch64::XZR) {
2173 assert(MI.getDesc().getNumOperands() == 4 &&
2174 MI.getOperand(3).getImm() == 0 && "invalid ORRrs operands");
2175 return true;
2176 }
2177 break;
2178 case AArch64::ADDXri: // add Xd, Xn, #0 (LSL #0)
2179 if (MI.getOperand(2).getImm() == 0) {
2180 assert(MI.getDesc().getNumOperands() == 4 &&
2181 MI.getOperand(3).getImm() == 0 && "invalid ADDXri operands");
2182 return true;
2183 }
2184 break;
2185 }
2186 return false;
2187}
2188
2189// Return true if this instruction simply renames a general register without
2190// modifying bits.
2192 switch (MI.getOpcode()) {
2193 default:
2194 break;
2195 case TargetOpcode::COPY: {
2196 Register DstReg = MI.getOperand(0).getReg();
2197 return AArch64::FPR128RegClass.contains(DstReg);
2198 }
2199 case AArch64::ORRv16i8:
2200 if (MI.getOperand(1).getReg() == MI.getOperand(2).getReg()) {
2201 assert(MI.getDesc().getNumOperands() == 3 && MI.getOperand(0).isReg() &&
2202 "invalid ORRv16i8 operands");
2203 return true;
2204 }
2205 break;
2206 }
2207 return false;
2208}
2209
2211 int &FrameIndex) const {
2212 switch (MI.getOpcode()) {
2213 default:
2214 break;
2215 case AArch64::LDRWui:
2216 case AArch64::LDRXui:
2217 case AArch64::LDRBui:
2218 case AArch64::LDRHui:
2219 case AArch64::LDRSui:
2220 case AArch64::LDRDui:
2221 case AArch64::LDRQui:
2222 case AArch64::LDR_PXI:
2223 if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() &&
2224 MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) {
2225 FrameIndex = MI.getOperand(1).getIndex();
2226 return MI.getOperand(0).getReg();
2227 }
2228 break;
2229 }
2230
2231 return 0;
2232}
2233
2235 int &FrameIndex) const {
2236 switch (MI.getOpcode()) {
2237 default:
2238 break;
2239 case AArch64::STRWui:
2240 case AArch64::STRXui:
2241 case AArch64::STRBui:
2242 case AArch64::STRHui:
2243 case AArch64::STRSui:
2244 case AArch64::STRDui:
2245 case AArch64::STRQui:
2246 case AArch64::STR_PXI:
2247 if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() &&
2248 MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) {
2249 FrameIndex = MI.getOperand(1).getIndex();
2250 return MI.getOperand(0).getReg();
2251 }
2252 break;
2253 }
2254 return 0;
2255}
2256
2257/// Check all MachineMemOperands for a hint to suppress pairing.
2259 return llvm::any_of(MI.memoperands(), [](MachineMemOperand *MMO) {
2260 return MMO->getFlags() & MOSuppressPair;
2261 });
2262}
2263
2264/// Set a flag on the first MachineMemOperand to suppress pairing.
2266 if (MI.memoperands_empty())
2267 return;
2268 (*MI.memoperands_begin())->setFlags(MOSuppressPair);
2269}
2270
2271/// Check all MachineMemOperands for a hint that the load/store is strided.
2273 return llvm::any_of(MI.memoperands(), [](MachineMemOperand *MMO) {
2274 return MMO->getFlags() & MOStridedAccess;
2275 });
2276}
2277
2279 switch (Opc) {
2280 default:
2281 return false;
2282 case AArch64::STURSi:
2283 case AArch64::STRSpre:
2284 case AArch64::STURDi:
2285 case AArch64::STRDpre:
2286 case AArch64::STURQi:
2287 case AArch64::STRQpre:
2288 case AArch64::STURBBi:
2289 case AArch64::STURHHi:
2290 case AArch64::STURWi:
2291 case AArch64::STRWpre:
2292 case AArch64::STURXi:
2293 case AArch64::STRXpre:
2294 case AArch64::LDURSi:
2295 case AArch64::LDRSpre:
2296 case AArch64::LDURDi:
2297 case AArch64::LDRDpre:
2298 case AArch64::LDURQi:
2299 case AArch64::LDRQpre:
2300 case AArch64::LDURWi:
2301 case AArch64::LDRWpre:
2302 case AArch64::LDURXi:
2303 case AArch64::LDRXpre:
2304 case AArch64::LDRSWpre:
2305 case AArch64::LDURSWi:
2306 case AArch64::LDURHHi:
2307 case AArch64::LDURBBi:
2308 case AArch64::LDURSBWi:
2309 case AArch64::LDURSHWi:
2310 return true;
2311 }
2312}
2313
2314std::optional<unsigned> AArch64InstrInfo::getUnscaledLdSt(unsigned Opc) {
2315 switch (Opc) {
2316 default: return {};
2317 case AArch64::PRFMui: return AArch64::PRFUMi;
2318 case AArch64::LDRXui: return AArch64::LDURXi;
2319 case AArch64::LDRWui: return AArch64::LDURWi;
2320 case AArch64::LDRBui: return AArch64::LDURBi;
2321 case AArch64::LDRHui: return AArch64::LDURHi;
2322 case AArch64::LDRSui: return AArch64::LDURSi;
2323 case AArch64::LDRDui: return AArch64::LDURDi;
2324 case AArch64::LDRQui: return AArch64::LDURQi;
2325 case AArch64::LDRBBui: return AArch64::LDURBBi;
2326 case AArch64::LDRHHui: return AArch64::LDURHHi;
2327 case AArch64::LDRSBXui: return AArch64::LDURSBXi;
2328 case AArch64::LDRSBWui: return AArch64::LDURSBWi;
2329 case AArch64::LDRSHXui: return AArch64::LDURSHXi;
2330 case AArch64::LDRSHWui: return AArch64::LDURSHWi;
2331 case AArch64::LDRSWui: return AArch64::LDURSWi;
2332 case AArch64::STRXui: return AArch64::STURXi;
2333 case AArch64::STRWui: return AArch64::STURWi;
2334 case AArch64::STRBui: return AArch64::STURBi;
2335 case AArch64::STRHui: return AArch64::STURHi;
2336 case AArch64::STRSui: return AArch64::STURSi;
2337 case AArch64::STRDui: return AArch64::STURDi;
2338 case AArch64::STRQui: return AArch64::STURQi;
2339 case AArch64::STRBBui: return AArch64::STURBBi;
2340 case AArch64::STRHHui: return AArch64::STURHHi;
2341 }
2342}
2343
2345 switch (Opc) {
2346 default:
2347 llvm_unreachable("Unhandled Opcode in getLoadStoreImmIdx");
2348 case AArch64::ADDG:
2349 case AArch64::LDAPURBi:
2350 case AArch64::LDAPURHi:
2351 case AArch64::LDAPURi:
2352 case AArch64::LDAPURSBWi:
2353 case AArch64::LDAPURSBXi:
2354 case AArch64::LDAPURSHWi:
2355 case AArch64::LDAPURSHXi:
2356 case AArch64::LDAPURSWi:
2357 case AArch64::LDAPURXi:
2358 case AArch64::LDR_PPXI:
2359 case AArch64::LDR_PXI:
2360 case AArch64::LDR_ZXI:
2361 case AArch64::LDR_ZZXI:
2362 case AArch64::LDR_ZZZXI:
2363 case AArch64::LDR_ZZZZXI:
2364 case AArch64::LDRBBui:
2365 case AArch64::LDRBui:
2366 case AArch64::LDRDui:
2367 case AArch64::LDRHHui:
2368 case AArch64::LDRHui:
2369 case AArch64::LDRQui:
2370 case AArch64::LDRSBWui:
2371 case AArch64::LDRSBXui:
2372 case AArch64::LDRSHWui:
2373 case AArch64::LDRSHXui:
2374 case AArch64::LDRSui:
2375 case AArch64::LDRSWui:
2376 case AArch64::LDRWui:
2377 case AArch64::LDRXui:
2378 case AArch64::LDURBBi:
2379 case AArch64::LDURBi:
2380 case AArch64::LDURDi:
2381 case AArch64::LDURHHi:
2382 case AArch64::LDURHi:
2383 case AArch64::LDURQi:
2384 case AArch64::LDURSBWi:
2385 case AArch64::LDURSBXi:
2386 case AArch64::LDURSHWi:
2387 case AArch64::LDURSHXi:
2388 case AArch64::LDURSi:
2389 case AArch64::LDURSWi:
2390 case AArch64::LDURWi:
2391 case AArch64::LDURXi:
2392 case AArch64::PRFMui:
2393 case AArch64::PRFUMi:
2394 case AArch64::ST2Gi:
2395 case AArch64::STGi:
2396 case AArch64::STLURBi:
2397 case AArch64::STLURHi:
2398 case AArch64::STLURWi:
2399 case AArch64::STLURXi:
2400 case AArch64::StoreSwiftAsyncContext:
2401 case AArch64::STR_PPXI:
2402 case AArch64::STR_PXI:
2403 case AArch64::STR_ZXI:
2404 case AArch64::STR_ZZXI:
2405 case AArch64::STR_ZZZXI:
2406 case AArch64::STR_ZZZZXI:
2407 case AArch64::STRBBui:
2408 case AArch64::STRBui:
2409 case AArch64::STRDui:
2410 case AArch64::STRHHui:
2411 case AArch64::STRHui:
2412 case AArch64::STRQui:
2413 case AArch64::STRSui:
2414 case AArch64::STRWui:
2415 case AArch64::STRXui:
2416 case AArch64::STURBBi:
2417 case AArch64::STURBi:
2418 case AArch64::STURDi:
2419 case AArch64::STURHHi:
2420 case AArch64::STURHi:
2421 case AArch64::STURQi:
2422 case AArch64::STURSi:
2423 case AArch64::STURWi:
2424 case AArch64::STURXi:
2425 case AArch64::STZ2Gi:
2426 case AArch64::STZGi:
2427 case AArch64::TAGPstack:
2428 return 2;
2429 case AArch64::LD1B_D_IMM:
2430 case AArch64::LD1B_H_IMM:
2431 case AArch64::LD1B_IMM:
2432 case AArch64::LD1B_S_IMM:
2433 case AArch64::LD1D_IMM:
2434 case AArch64::LD1H_D_IMM:
2435 case AArch64::LD1H_IMM:
2436 case AArch64::LD1H_S_IMM:
2437 case AArch64::LD1RB_D_IMM:
2438 case AArch64::LD1RB_H_IMM:
2439 case AArch64::LD1RB_IMM:
2440 case AArch64::LD1RB_S_IMM:
2441 case AArch64::LD1RD_IMM:
2442 case AArch64::LD1RH_D_IMM:
2443 case AArch64::LD1RH_IMM:
2444 case AArch64::LD1RH_S_IMM:
2445 case AArch64::LD1RSB_D_IMM:
2446 case AArch64::LD1RSB_H_IMM:
2447 case AArch64::LD1RSB_S_IMM:
2448 case AArch64::LD1RSH_D_IMM:
2449 case AArch64::LD1RSH_S_IMM:
2450 case AArch64::LD1RSW_IMM:
2451 case AArch64::LD1RW_D_IMM:
2452 case AArch64::LD1RW_IMM:
2453 case AArch64::LD1SB_D_IMM:
2454 case AArch64::LD1SB_H_IMM:
2455 case AArch64::LD1SB_S_IMM:
2456 case AArch64::LD1SH_D_IMM:
2457 case AArch64::LD1SH_S_IMM:
2458 case AArch64::LD1SW_D_IMM:
2459 case AArch64::LD1W_D_IMM:
2460 case AArch64::LD1W_IMM:
2461 case AArch64::LD2B_IMM:
2462 case AArch64::LD2D_IMM:
2463 case AArch64::LD2H_IMM:
2464 case AArch64::LD2W_IMM:
2465 case AArch64::LD3B_IMM:
2466 case AArch64::LD3D_IMM:
2467 case AArch64::LD3H_IMM:
2468 case AArch64::LD3W_IMM:
2469 case AArch64::LD4B_IMM:
2470 case AArch64::LD4D_IMM:
2471 case AArch64::LD4H_IMM:
2472 case AArch64::LD4W_IMM:
2473 case AArch64::LDG:
2474 case AArch64::LDNF1B_D_IMM:
2475 case AArch64::LDNF1B_H_IMM:
2476 case AArch64::LDNF1B_IMM:
2477 case AArch64::LDNF1B_S_IMM:
2478 case AArch64::LDNF1D_IMM:
2479 case AArch64::LDNF1H_D_IMM:
2480 case AArch64::LDNF1H_IMM:
2481 case AArch64::LDNF1H_S_IMM:
2482 case AArch64::LDNF1SB_D_IMM:
2483 case AArch64::LDNF1SB_H_IMM:
2484 case AArch64::LDNF1SB_S_IMM:
2485 case AArch64::LDNF1SH_D_IMM:
2486 case AArch64::LDNF1SH_S_IMM:
2487 case AArch64::LDNF1SW_D_IMM:
2488 case AArch64::LDNF1W_D_IMM:
2489 case AArch64::LDNF1W_IMM:
2490 case AArch64::LDNPDi:
2491 case AArch64::LDNPQi:
2492 case AArch64::LDNPSi:
2493 case AArch64::LDNPWi:
2494 case AArch64::LDNPXi:
2495 case AArch64::LDNT1B_ZRI:
2496 case AArch64::LDNT1D_ZRI:
2497 case AArch64::LDNT1H_ZRI:
2498 case AArch64::LDNT1W_ZRI:
2499 case AArch64::LDPDi:
2500 case AArch64::LDPQi:
2501 case AArch64::LDPSi:
2502 case AArch64::LDPWi:
2503 case AArch64::LDPXi:
2504 case AArch64::LDRBBpost:
2505 case AArch64::LDRBBpre:
2506 case AArch64::LDRBpost:
2507 case AArch64::LDRBpre:
2508 case AArch64::LDRDpost:
2509 case AArch64::LDRDpre:
2510 case AArch64::LDRHHpost:
2511 case AArch64::LDRHHpre:
2512 case AArch64::LDRHpost:
2513 case AArch64::LDRHpre:
2514 case AArch64::LDRQpost:
2515 case AArch64::LDRQpre:
2516 case AArch64::LDRSpost:
2517 case AArch64::LDRSpre:
2518 case AArch64::LDRWpost:
2519 case AArch64::LDRWpre:
2520 case AArch64::LDRXpost:
2521 case AArch64::LDRXpre:
2522 case AArch64::ST1B_D_IMM:
2523 case AArch64::ST1B_H_IMM:
2524 case AArch64::ST1B_IMM:
2525 case AArch64::ST1B_S_IMM:
2526 case AArch64::ST1D_IMM:
2527 case AArch64::ST1H_D_IMM:
2528 case AArch64::ST1H_IMM:
2529 case AArch64::ST1H_S_IMM:
2530 case AArch64::ST1W_D_IMM:
2531 case AArch64::ST1W_IMM:
2532 case AArch64::ST2B_IMM:
2533 case AArch64::ST2D_IMM:
2534 case AArch64::ST2H_IMM:
2535 case AArch64::ST2W_IMM:
2536 case AArch64::ST3B_IMM:
2537 case AArch64::ST3D_IMM:
2538 case AArch64::ST3H_IMM:
2539 case AArch64::ST3W_IMM:
2540 case AArch64::ST4B_IMM:
2541 case AArch64::ST4D_IMM:
2542 case AArch64::ST4H_IMM:
2543 case AArch64::ST4W_IMM:
2544 case AArch64::STGPi:
2545 case AArch64::STNPDi:
2546 case AArch64::STNPQi:
2547 case AArch64::STNPSi:
2548 case AArch64::STNPWi:
2549 case AArch64::STNPXi:
2550 case AArch64::STNT1B_ZRI:
2551 case AArch64::STNT1D_ZRI:
2552 case AArch64::STNT1H_ZRI:
2553 case AArch64::STNT1W_ZRI:
2554 case AArch64::STPDi:
2555 case AArch64::STPQi:
2556 case AArch64::STPSi:
2557 case AArch64::STPWi:
2558 case AArch64::STPXi:
2559 case AArch64::STRBBpost:
2560 case AArch64::STRBBpre:
2561 case AArch64::STRBpost:
2562 case AArch64::STRBpre:
2563 case AArch64::STRDpost:
2564 case AArch64::STRDpre:
2565 case AArch64::STRHHpost:
2566 case AArch64::STRHHpre:
2567 case AArch64::STRHpost:
2568 case AArch64::STRHpre:
2569 case AArch64::STRQpost:
2570 case AArch64::STRQpre:
2571 case AArch64::STRSpost:
2572 case AArch64::STRSpre:
2573 case AArch64::STRWpost:
2574 case AArch64::STRWpre:
2575 case AArch64::STRXpost:
2576 case AArch64::STRXpre:
2577 return 3;
2578 case AArch64::LDPDpost:
2579 case AArch64::LDPDpre:
2580 case AArch64::LDPQpost:
2581 case AArch64::LDPQpre:
2582 case AArch64::LDPSpost:
2583 case AArch64::LDPSpre:
2584 case AArch64::LDPWpost:
2585 case AArch64::LDPWpre:
2586 case AArch64::LDPXpost:
2587 case AArch64::LDPXpre:
2588 case AArch64::STPDpost:
2589 case AArch64::STPDpre:
2590 case AArch64::STPQpost:
2591 case AArch64::STPQpre:
2592 case AArch64::STPSpost:
2593 case AArch64::STPSpre:
2594 case AArch64::STPWpost:
2595 case AArch64::STPWpre:
2596 case AArch64::STPXpost:
2597 case AArch64::STPXpre:
2598 return 4;
2599 }
2600}
2601
2603 switch (MI.getOpcode()) {
2604 default:
2605 return false;
2606 // Scaled instructions.
2607 case AArch64::STRSui:
2608 case AArch64::STRDui:
2609 case AArch64::STRQui:
2610 case AArch64::STRXui:
2611 case AArch64::STRWui:
2612 case AArch64::LDRSui:
2613 case AArch64::LDRDui:
2614 case AArch64::LDRQui:
2615 case AArch64::LDRXui:
2616 case AArch64::LDRWui:
2617 case AArch64::LDRSWui:
2618 // Unscaled instructions.
2619 case AArch64::STURSi:
2620 case AArch64::STRSpre:
2621 case AArch64::STURDi:
2622 case AArch64::STRDpre:
2623 case AArch64::STURQi:
2624 case AArch64::STRQpre:
2625 case AArch64::STURWi:
2626 case AArch64::STRWpre:
2627 case AArch64::STURXi:
2628 case AArch64::STRXpre:
2629 case AArch64::LDURSi:
2630 case AArch64::LDRSpre:
2631 case AArch64::LDURDi:
2632 case AArch64::LDRDpre:
2633 case AArch64::LDURQi:
2634 case AArch64::LDRQpre:
2635 case AArch64::LDURWi:
2636 case AArch64::LDRWpre:
2637 case AArch64::LDURXi:
2638 case AArch64::LDRXpre:
2639 case AArch64::LDURSWi:
2640 case AArch64::LDRSWpre:
2641 return true;
2642 }
2643}
2644
2646 switch (MI.getOpcode()) {
2647 default:
2648 assert((!MI.isCall() || !MI.isReturn()) &&
2649 "Unexpected instruction - was a new tail call opcode introduced?");
2650 return false;
2651 case AArch64::TCRETURNdi:
2652 case AArch64::TCRETURNri:
2653 case AArch64::TCRETURNrix16x17:
2654 case AArch64::TCRETURNrix17:
2655 case AArch64::TCRETURNrinotx16:
2656 case AArch64::TCRETURNriALL:
2657 case AArch64::AUTH_TCRETURN:
2658 case AArch64::AUTH_TCRETURN_BTI:
2659 return true;
2660 }
2661}
2662
2664 switch (Opc) {
2665 default:
2666 llvm_unreachable("Opcode has no flag setting equivalent!");
2667 // 32-bit cases:
2668 case AArch64::ADDWri:
2669 return AArch64::ADDSWri;
2670 case AArch64::ADDWrr:
2671 return AArch64::ADDSWrr;
2672 case AArch64::ADDWrs:
2673 return AArch64::ADDSWrs;
2674 case AArch64::ADDWrx:
2675 return AArch64::ADDSWrx;
2676 case AArch64::ANDWri:
2677 return AArch64::ANDSWri;
2678 case AArch64::ANDWrr:
2679 return AArch64::ANDSWrr;
2680 case AArch64::ANDWrs:
2681 return AArch64::ANDSWrs;
2682 case AArch64::BICWrr:
2683 return AArch64::BICSWrr;
2684 case AArch64::BICWrs:
2685 return AArch64::BICSWrs;
2686 case AArch64::SUBWri:
2687 return AArch64::SUBSWri;
2688 case AArch64::SUBWrr:
2689 return AArch64::SUBSWrr;
2690 case AArch64::SUBWrs:
2691 return AArch64::SUBSWrs;
2692 case AArch64::SUBWrx:
2693 return AArch64::SUBSWrx;
2694 // 64-bit cases:
2695 case AArch64::ADDXri:
2696 return AArch64::ADDSXri;
2697 case AArch64::ADDXrr:
2698 return AArch64::ADDSXrr;
2699 case AArch64::ADDXrs:
2700 return AArch64::ADDSXrs;
2701 case AArch64::ADDXrx:
2702 return AArch64::ADDSXrx;
2703 case AArch64::ANDXri:
2704 return AArch64::ANDSXri;
2705 case AArch64::ANDXrr:
2706 return AArch64::ANDSXrr;
2707 case AArch64::ANDXrs:
2708 return AArch64::ANDSXrs;
2709 case AArch64::BICXrr:
2710 return AArch64::BICSXrr;
2711 case AArch64::BICXrs:
2712 return AArch64::BICSXrs;
2713 case AArch64::SUBXri:
2714 return AArch64::SUBSXri;
2715 case AArch64::SUBXrr:
2716 return AArch64::SUBSXrr;
2717 case AArch64::SUBXrs:
2718 return AArch64::SUBSXrs;
2719 case AArch64::SUBXrx:
2720 return AArch64::SUBSXrx;
2721 // SVE instructions:
2722 case AArch64::AND_PPzPP:
2723 return AArch64::ANDS_PPzPP;
2724 case AArch64::BIC_PPzPP:
2725 return AArch64::BICS_PPzPP;
2726 case AArch64::EOR_PPzPP:
2727 return AArch64::EORS_PPzPP;
2728 case AArch64::NAND_PPzPP:
2729 return AArch64::NANDS_PPzPP;
2730 case AArch64::NOR_PPzPP:
2731 return AArch64::NORS_PPzPP;
2732 case AArch64::ORN_PPzPP:
2733 return AArch64::ORNS_PPzPP;
2734 case AArch64::ORR_PPzPP:
2735 return AArch64::ORRS_PPzPP;
2736 case AArch64::BRKA_PPzP:
2737 return AArch64::BRKAS_PPzP;
2738 case AArch64::BRKPA_PPzPP:
2739 return AArch64::BRKPAS_PPzPP;
2740 case AArch64::BRKB_PPzP:
2741 return AArch64::BRKBS_PPzP;
2742 case AArch64::BRKPB_PPzPP:
2743 return AArch64::BRKPBS_PPzPP;
2744 case AArch64::BRKN_PPzP:
2745 return AArch64::BRKNS_PPzP;
2746 case AArch64::RDFFR_PPz:
2747 return AArch64::RDFFRS_PPz;
2748 case AArch64::PTRUE_B:
2749 return AArch64::PTRUES_B;
2750 }
2751}
2752
2753// Is this a candidate for ld/st merging or pairing? For example, we don't
2754// touch volatiles or load/stores that have a hint to avoid pair formation.
2756
2757 bool IsPreLdSt = isPreLdSt(MI);
2758
2759 // If this is a volatile load/store, don't mess with it.
2760 if (MI.hasOrderedMemoryRef())
2761 return false;
2762
2763 // Make sure this is a reg/fi+imm (as opposed to an address reloc).
2764 // For Pre-inc LD/ST, the operand is shifted by one.
2765 assert((MI.getOperand(IsPreLdSt ? 2 : 1).isReg() ||
2766 MI.getOperand(IsPreLdSt ? 2 : 1).isFI()) &&
2767 "Expected a reg or frame index operand.");
2768
2769 // For Pre-indexed addressing quadword instructions, the third operand is the
2770 // immediate value.
2771 bool IsImmPreLdSt = IsPreLdSt && MI.getOperand(3).isImm();
2772
2773 if (!MI.getOperand(2).isImm() && !IsImmPreLdSt)
2774 return false;
2775
2776 // Can't merge/pair if the instruction modifies the base register.
2777 // e.g., ldr x0, [x0]
2778 // This case will never occur with an FI base.
2779 // However, if the instruction is an LDR<S,D,Q,W,X,SW>pre or
2780 // STR<S,D,Q,W,X>pre, it can be merged.
2781 // For example:
2782 // ldr q0, [x11, #32]!
2783 // ldr q1, [x11, #16]
2784 // to
2785 // ldp q0, q1, [x11, #32]!
2786 if (MI.getOperand(1).isReg() && !IsPreLdSt) {
2787 Register BaseReg = MI.getOperand(1).getReg();
2789 if (MI.modifiesRegister(BaseReg, TRI))
2790 return false;
2791 }
2792
2793 // Check if this load/store has a hint to avoid pair formation.
2794 // MachineMemOperands hints are set by the AArch64StorePairSuppress pass.
2796 return false;
2797
2798 // Do not pair any callee-save store/reload instructions in the
2799 // prologue/epilogue if the CFI information encoded the operations as separate
2800 // instructions, as that will cause the size of the actual prologue to mismatch
2801 // with the prologue size recorded in the Windows CFI.
2802 const MCAsmInfo *MAI = MI.getMF()->getTarget().getMCAsmInfo();
2803 bool NeedsWinCFI = MAI->usesWindowsCFI() &&
2804 MI.getMF()->getFunction().needsUnwindTableEntry();
2805 if (NeedsWinCFI && (MI.getFlag(MachineInstr::FrameSetup) ||
2807 return false;
2808
2809 // On some CPUs quad load/store pairs are slower than two single load/stores.
2810 if (Subtarget.isPaired128Slow()) {
2811 switch (MI.getOpcode()) {
2812 default:
2813 break;
2814 case AArch64::LDURQi:
2815 case AArch64::STURQi:
2816 case AArch64::LDRQui:
2817 case AArch64::STRQui:
2818 return false;
2819 }
2820 }
2821
2822 return true;
2823}
2824
2827 int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width,
2828 const TargetRegisterInfo *TRI) const {
2829 if (!LdSt.mayLoadOrStore())
2830 return false;
2831
2832 const MachineOperand *BaseOp;
2833 TypeSize WidthN(0, false);
2834 if (!getMemOperandWithOffsetWidth(LdSt, BaseOp, Offset, OffsetIsScalable,
2835 WidthN, TRI))
2836 return false;
2837 // The maximum vscale is 16 under AArch64, return the maximal extent for the
2838 // vector.
2839 Width = LocationSize::precise(WidthN);
2840 BaseOps.push_back(BaseOp);
2841 return true;
2842}
2843
2844std::optional<ExtAddrMode>
2846 const TargetRegisterInfo *TRI) const {
2847 const MachineOperand *Base; // Filled with the base operand of MI.
2848 int64_t Offset; // Filled with the offset of MI.
2849 bool OffsetIsScalable;
2850 if (!getMemOperandWithOffset(MemI, Base, Offset, OffsetIsScalable, TRI))
2851 return std::nullopt;
2852
2853 if (!Base->isReg())
2854 return std::nullopt;
2855 ExtAddrMode AM;
2856 AM.BaseReg = Base->getReg();
2857 AM.Displacement = Offset;
2858 AM.ScaledReg = 0;
2859 AM.Scale = 0;
2860 return AM;
2861}
2862
2864 Register Reg,
2865 const MachineInstr &AddrI,
2866 ExtAddrMode &AM) const {
2867 // Filter out instructions into which we cannot fold.
2868 unsigned NumBytes;
2869 int64_t OffsetScale = 1;
2870 switch (MemI.getOpcode()) {
2871 default:
2872 return false;
2873
2874 case AArch64::LDURQi:
2875 case AArch64::STURQi:
2876 NumBytes = 16;
2877 break;
2878
2879 case AArch64::LDURDi:
2880 case AArch64::STURDi:
2881 case AArch64::LDURXi:
2882 case AArch64::STURXi:
2883 NumBytes = 8;
2884 break;
2885
2886 case AArch64::LDURWi:
2887 case AArch64::LDURSWi:
2888 case AArch64::STURWi:
2889 NumBytes = 4;
2890 break;
2891
2892 case AArch64::LDURHi:
2893 case AArch64::STURHi:
2894 case AArch64::LDURHHi:
2895 case AArch64::STURHHi:
2896 case AArch64::LDURSHXi:
2897 case AArch64::LDURSHWi:
2898 NumBytes = 2;
2899 break;
2900
2901 case AArch64::LDRBroX:
2902 case AArch64::LDRBBroX:
2903 case AArch64::LDRSBXroX:
2904 case AArch64::LDRSBWroX:
2905 case AArch64::STRBroX:
2906 case AArch64::STRBBroX:
2907 case AArch64::LDURBi:
2908 case AArch64::LDURBBi:
2909 case AArch64::LDURSBXi:
2910 case AArch64::LDURSBWi:
2911 case AArch64::STURBi:
2912 case AArch64::STURBBi:
2913 case AArch64::LDRBui:
2914 case AArch64::LDRBBui:
2915 case AArch64::LDRSBXui:
2916 case AArch64::LDRSBWui:
2917 case AArch64::STRBui:
2918 case AArch64::STRBBui:
2919 NumBytes = 1;
2920 break;
2921
2922 case AArch64::LDRQroX:
2923 case AArch64::STRQroX:
2924 case AArch64::LDRQui:
2925 case AArch64::STRQui:
2926 NumBytes = 16;
2927 OffsetScale = 16;
2928 break;
2929
2930 case AArch64::LDRDroX:
2931 case AArch64::STRDroX:
2932 case AArch64::LDRXroX:
2933 case AArch64::STRXroX:
2934 case AArch64::LDRDui:
2935 case AArch64::STRDui:
2936 case AArch64::LDRXui:
2937 case AArch64::STRXui:
2938 NumBytes = 8;
2939 OffsetScale = 8;
2940 break;
2941
2942 case AArch64::LDRWroX:
2943 case AArch64::LDRSWroX:
2944 case AArch64::STRWroX:
2945 case AArch64::LDRWui:
2946 case AArch64::LDRSWui:
2947 case AArch64::STRWui:
2948 NumBytes = 4;
2949 OffsetScale = 4;
2950 break;
2951
2952 case AArch64::LDRHroX:
2953 case AArch64::STRHroX:
2954 case AArch64::LDRHHroX:
2955 case AArch64::STRHHroX:
2956 case AArch64::LDRSHXroX:
2957 case AArch64::LDRSHWroX:
2958 case AArch64::LDRHui:
2959 case AArch64::STRHui:
2960 case AArch64::LDRHHui:
2961 case AArch64::STRHHui:
2962 case AArch64::LDRSHXui:
2963 case AArch64::LDRSHWui:
2964 NumBytes = 2;
2965 OffsetScale = 2;
2966 break;
2967 }
2968
2969 // Check the fold operand is not the loaded/stored value.
2970 const MachineOperand &BaseRegOp = MemI.getOperand(0);
2971 if (BaseRegOp.isReg() && BaseRegOp.getReg() == Reg)
2972 return false;
2973
2974 // Handle memory instructions with a [Reg, Reg] addressing mode.
2975 if (MemI.getOperand(2).isReg()) {
2976 // Bail if the addressing mode already includes extension of the offset
2977 // register.
2978 if (MemI.getOperand(3).getImm())
2979 return false;
2980
2981 // Check if we actually have a scaled offset.
2982 if (MemI.getOperand(4).getImm() == 0)
2983 OffsetScale = 1;
2984
2985 // If the address instructions is folded into the base register, then the
2986 // addressing mode must not have a scale. Then we can swap the base and the
2987 // scaled registers.
2988 if (MemI.getOperand(1).getReg() == Reg && OffsetScale != 1)
2989 return false;
2990
2991 switch (AddrI.getOpcode()) {
2992 default:
2993 return false;
2994
2995 case AArch64::SBFMXri:
2996 // sxtw Xa, Wm
2997 // ldr Xd, [Xn, Xa, lsl #N]
2998 // ->
2999 // ldr Xd, [Xn, Wm, sxtw #N]
3000 if (AddrI.getOperand(2).getImm() != 0 ||
3001 AddrI.getOperand(3).getImm() != 31)
3002 return false;
3003
3004 AM.BaseReg = MemI.getOperand(1).getReg();
3005 if (AM.BaseReg == Reg)
3006 AM.BaseReg = MemI.getOperand(2).getReg();
3007 AM.ScaledReg = AddrI.getOperand(1).getReg();
3008 AM.Scale = OffsetScale;
3009 AM.Displacement = 0;
3011 return true;
3012
3013 case TargetOpcode::SUBREG_TO_REG: {
3014 // mov Wa, Wm
3015 // ldr Xd, [Xn, Xa, lsl #N]
3016 // ->
3017 // ldr Xd, [Xn, Wm, uxtw #N]
3018
3019 // Zero-extension looks like an ORRWrs followed by a SUBREG_TO_REG.
3020 if (AddrI.getOperand(1).getImm() != 0 ||
3021 AddrI.getOperand(3).getImm() != AArch64::sub_32)
3022 return false;
3023
3024 const MachineRegisterInfo &MRI = AddrI.getMF()->getRegInfo();
3025 Register OffsetReg = AddrI.getOperand(2).getReg();
3026 if (!OffsetReg.isVirtual() || !MRI.hasOneNonDBGUse(OffsetReg))
3027 return false;
3028
3029 const MachineInstr &DefMI = *MRI.getVRegDef(OffsetReg);
3030 if (DefMI.getOpcode() != AArch64::ORRWrs ||
3031 DefMI.getOperand(1).getReg() != AArch64::WZR ||
3032 DefMI.getOperand(3).getImm() != 0)
3033 return false;
3034
3035 AM.BaseReg = MemI.getOperand(1).getReg();
3036 if (AM.BaseReg == Reg)
3037 AM.BaseReg = MemI.getOperand(2).getReg();
3038 AM.ScaledReg = DefMI.getOperand(2).getReg();
3039 AM.Scale = OffsetScale;
3040 AM.Displacement = 0;
3042 return true;
3043 }
3044 }
3045 }
3046
3047 // Handle memory instructions with a [Reg, #Imm] addressing mode.
3048
3049 // Check we are not breaking a potential conversion to an LDP.
3050 auto validateOffsetForLDP = [](unsigned NumBytes, int64_t OldOffset,
3051 int64_t NewOffset) -> bool {
3052 int64_t MinOffset, MaxOffset;
3053 switch (NumBytes) {
3054 default:
3055 return true;
3056 case 4:
3057 MinOffset = -256;
3058 MaxOffset = 252;
3059 break;
3060 case 8:
3061 MinOffset = -512;
3062 MaxOffset = 504;
3063 break;
3064 case 16:
3065 MinOffset = -1024;
3066 MaxOffset = 1008;
3067 break;
3068 }
3069 return OldOffset < MinOffset || OldOffset > MaxOffset ||
3070 (NewOffset >= MinOffset && NewOffset <= MaxOffset);
3071 };
3072 auto canFoldAddSubImmIntoAddrMode = [&](int64_t Disp) -> bool {
3073 int64_t OldOffset = MemI.getOperand(2).getImm() * OffsetScale;
3074 int64_t NewOffset = OldOffset + Disp;
3075 if (!isLegalAddressingMode(NumBytes, NewOffset, /* Scale */ 0))
3076 return false;
3077 // If the old offset would fit into an LDP, but the new offset wouldn't,
3078 // bail out.
3079 if (!validateOffsetForLDP(NumBytes, OldOffset, NewOffset))
3080 return false;
3081 AM.BaseReg = AddrI.getOperand(1).getReg();
3082 AM.ScaledReg = 0;
3083 AM.Scale = 0;
3084 AM.Displacement = NewOffset;
3086 return true;
3087 };
3088
3089 auto canFoldAddRegIntoAddrMode =
3090 [&](int64_t Scale,
3092 if (MemI.getOperand(2).getImm() != 0)
3093 return false;
3094 if (!isLegalAddressingMode(NumBytes, /* Offset */ 0, Scale))
3095 return false;
3096 AM.BaseReg = AddrI.getOperand(1).getReg();
3097 AM.ScaledReg = AddrI.getOperand(2).getReg();
3098 AM.Scale = Scale;
3099 AM.Displacement = 0;
3100 AM.Form = Form;
3101 return true;
3102 };
3103
3104 auto avoidSlowSTRQ = [&](const MachineInstr &MemI) {
3105 unsigned Opcode = MemI.getOpcode();
3106 return (Opcode == AArch64::STURQi || Opcode == AArch64::STRQui) &&
3107 Subtarget.isSTRQroSlow();
3108 };
3109
3110 int64_t Disp = 0;
3111 const bool OptSize = MemI.getMF()->getFunction().hasOptSize();
3112 switch (AddrI.getOpcode()) {
3113 default:
3114 return false;
3115
3116 case AArch64::ADDXri:
3117 // add Xa, Xn, #N
3118 // ldr Xd, [Xa, #M]
3119 // ->
3120 // ldr Xd, [Xn, #N'+M]
3121 Disp = AddrI.getOperand(2).getImm() << AddrI.getOperand(3).getImm();
3122 return canFoldAddSubImmIntoAddrMode(Disp);
3123
3124 case AArch64::SUBXri:
3125 // sub Xa, Xn, #N
3126 // ldr Xd, [Xa, #M]
3127 // ->
3128 // ldr Xd, [Xn, #N'+M]
3129 Disp = AddrI.getOperand(2).getImm() << AddrI.getOperand(3).getImm();
3130 return canFoldAddSubImmIntoAddrMode(-Disp);
3131
3132 case AArch64::ADDXrs: {
3133 // add Xa, Xn, Xm, lsl #N
3134 // ldr Xd, [Xa]
3135 // ->
3136 // ldr Xd, [Xn, Xm, lsl #N]
3137
3138 // Don't fold the add if the result would be slower, unless optimising for
3139 // size.
3140 unsigned Shift = static_cast<unsigned>(AddrI.getOperand(3).getImm());
3142 return false;
3143 Shift = AArch64_AM::getShiftValue(Shift);
3144 if (!OptSize) {
3145 if (Shift != 2 && Shift != 3 && Subtarget.hasAddrLSLSlow14())
3146 return false;
3147 if (avoidSlowSTRQ(MemI))
3148 return false;
3149 }
3150 return canFoldAddRegIntoAddrMode(1ULL << Shift);
3151 }
3152
3153 case AArch64::ADDXrr:
3154 // add Xa, Xn, Xm
3155 // ldr Xd, [Xa]
3156 // ->
3157 // ldr Xd, [Xn, Xm, lsl #0]
3158
3159 // Don't fold the add if the result would be slower, unless optimising for
3160 // size.
3161 if (!OptSize && avoidSlowSTRQ(MemI))
3162 return false;
3163 return canFoldAddRegIntoAddrMode(1);
3164
3165 case AArch64::ADDXrx:
3166 // add Xa, Xn, Wm, {s,u}xtw #N
3167 // ldr Xd, [Xa]
3168 // ->
3169 // ldr Xd, [Xn, Wm, {s,u}xtw #N]
3170
3171 // Don't fold the add if the result would be slower, unless optimising for
3172 // size.
3173 if (!OptSize && avoidSlowSTRQ(MemI))
3174 return false;
3175
3176 // Can fold only sign-/zero-extend of a word.
3177 unsigned Imm = static_cast<unsigned>(AddrI.getOperand(3).getImm());
3179 if (Extend != AArch64_AM::UXTW && Extend != AArch64_AM::SXTW)
3180 return false;
3181
3182 return canFoldAddRegIntoAddrMode(
3183 1ULL << AArch64_AM::getArithShiftValue(Imm),
3186 }
3187}
3188
3189// Given an opcode for an instruction with a [Reg, #Imm] addressing mode,
3190// return the opcode of an instruction performing the same operation, but using
3191// the [Reg, Reg] addressing mode.
3192static unsigned regOffsetOpcode(unsigned Opcode) {
3193 switch (Opcode) {
3194 default:
3195 llvm_unreachable("Address folding not implemented for instruction");
3196
3197 case AArch64::LDURQi:
3198 case AArch64::LDRQui:
3199 return AArch64::LDRQroX;
3200 case AArch64::STURQi:
3201 case AArch64::STRQui:
3202 return AArch64::STRQroX;
3203 case AArch64::LDURDi:
3204 case AArch64::LDRDui:
3205 return AArch64::LDRDroX;
3206 case AArch64::STURDi:
3207 case AArch64::STRDui:
3208 return AArch64::STRDroX;
3209 case AArch64::LDURXi:
3210 case AArch64::LDRXui:
3211 return AArch64::LDRXroX;
3212 case AArch64::STURXi:
3213 case AArch64::STRXui:
3214 return AArch64::STRXroX;
3215 case AArch64::LDURWi:
3216 case AArch64::LDRWui:
3217 return AArch64::LDRWroX;
3218 case AArch64::LDURSWi:
3219 case AArch64::LDRSWui:
3220 return AArch64::LDRSWroX;
3221 case AArch64::STURWi:
3222 case AArch64::STRWui:
3223 return AArch64::STRWroX;
3224 case AArch64::LDURHi:
3225 case AArch64::LDRHui:
3226 return AArch64::LDRHroX;
3227 case AArch64::STURHi:
3228 case AArch64::STRHui:
3229 return AArch64::STRHroX;
3230 case AArch64::LDURHHi:
3231 case AArch64::LDRHHui:
3232 return AArch64::LDRHHroX;
3233 case AArch64::STURHHi:
3234 case AArch64::STRHHui:
3235 return AArch64::STRHHroX;
3236 case AArch64::LDURSHXi:
3237 case AArch64::LDRSHXui:
3238 return AArch64::LDRSHXroX;
3239 case AArch64::LDURSHWi:
3240 case AArch64::LDRSHWui:
3241 return AArch64::LDRSHWroX;
3242 case AArch64::LDURBi:
3243 case AArch64::LDRBui:
3244 return AArch64::LDRBroX;
3245 case AArch64::LDURBBi:
3246 case AArch64::LDRBBui:
3247 return AArch64::LDRBBroX;
3248 case AArch64::LDURSBXi:
3249 case AArch64::LDRSBXui:
3250 return AArch64::LDRSBXroX;
3251 case AArch64::LDURSBWi:
3252 case AArch64::LDRSBWui:
3253 return AArch64::LDRSBWroX;
3254 case AArch64::STURBi:
3255 case AArch64::STRBui:
3256 return AArch64::STRBroX;
3257 case AArch64::STURBBi:
3258 case AArch64::STRBBui:
3259 return AArch64::STRBBroX;
3260 }
3261}
3262
3263// Given an opcode for an instruction with a [Reg, #Imm] addressing mode, return
3264// the opcode of an instruction performing the same operation, but using the
3265// [Reg, #Imm] addressing mode with scaled offset.
3266unsigned scaledOffsetOpcode(unsigned Opcode, unsigned &Scale) {
3267 switch (Opcode) {
3268 default:
3269 llvm_unreachable("Address folding not implemented for instruction");
3270
3271 case AArch64::LDURQi:
3272 Scale = 16;
3273 return AArch64::LDRQui;
3274 case AArch64::STURQi:
3275 Scale = 16;
3276 return AArch64::STRQui;
3277 case AArch64::LDURDi:
3278 Scale = 8;
3279 return AArch64::LDRDui;
3280 case AArch64::STURDi:
3281 Scale = 8;
3282 return AArch64::STRDui;
3283 case AArch64::LDURXi:
3284 Scale = 8;
3285 return AArch64::LDRXui;
3286 case AArch64::STURXi:
3287 Scale = 8;
3288 return AArch64::STRXui;
3289 case AArch64::LDURWi:
3290 Scale = 4;
3291 return AArch64::LDRWui;
3292 case AArch64::LDURSWi:
3293 Scale = 4;
3294 return AArch64::LDRSWui;
3295 case AArch64::STURWi:
3296 Scale = 4;
3297 return AArch64::STRWui;
3298 case AArch64::LDURHi:
3299 Scale = 2;
3300 return AArch64::LDRHui;
3301 case AArch64::STURHi:
3302 Scale = 2;
3303 return AArch64::STRHui;
3304 case AArch64::LDURHHi:
3305 Scale = 2;
3306 return AArch64::LDRHHui;
3307 case AArch64::STURHHi:
3308 Scale = 2;
3309 return AArch64::STRHHui;
3310 case AArch64::LDURSHXi:
3311 Scale = 2;
3312 return AArch64::LDRSHXui;
3313 case AArch64::LDURSHWi:
3314 Scale = 2;
3315 return AArch64::LDRSHWui;
3316 case AArch64::LDURBi:
3317 Scale = 1;
3318 return AArch64::LDRBui;
3319 case AArch64::LDURBBi:
3320 Scale = 1;
3321 return AArch64::LDRBBui;
3322 case AArch64::LDURSBXi:
3323 Scale = 1;
3324 return AArch64::LDRSBXui;
3325 case AArch64::LDURSBWi:
3326 Scale = 1;
3327 return AArch64::LDRSBWui;
3328 case AArch64::STURBi:
3329 Scale = 1;
3330 return AArch64::STRBui;
3331 case AArch64::STURBBi:
3332 Scale = 1;
3333 return AArch64::STRBBui;
3334 case AArch64::LDRQui:
3335 case AArch64::STRQui:
3336 Scale = 16;
3337 return Opcode;
3338 case AArch64::LDRDui:
3339 case AArch64::STRDui:
3340 case AArch64::LDRXui:
3341 case AArch64::STRXui:
3342 Scale = 8;
3343 return Opcode;
3344 case AArch64::LDRWui:
3345 case AArch64::LDRSWui:
3346 case AArch64::STRWui:
3347 Scale = 4;
3348 return Opcode;
3349 case AArch64::LDRHui:
3350 case AArch64::STRHui:
3351 case AArch64::LDRHHui:
3352 case AArch64::STRHHui:
3353 case AArch64::LDRSHXui:
3354 case AArch64::LDRSHWui:
3355 Scale = 2;
3356 return Opcode;
3357 case AArch64::LDRBui:
3358 case AArch64::LDRBBui:
3359 case AArch64::LDRSBXui:
3360 case AArch64::LDRSBWui:
3361 case AArch64::STRBui:
3362 case AArch64::STRBBui:
3363 Scale = 1;
3364 return Opcode;
3365 }
3366}
3367
3368// Given an opcode for an instruction with a [Reg, #Imm] addressing mode, return
3369// the opcode of an instruction performing the same operation, but using the
3370// [Reg, #Imm] addressing mode with unscaled offset.
3371unsigned unscaledOffsetOpcode(unsigned Opcode) {
3372 switch (Opcode) {
3373 default:
3374 llvm_unreachable("Address folding not implemented for instruction");
3375
3376 case AArch64::LDURQi:
3377 case AArch64::STURQi:
3378 case AArch64::LDURDi:
3379 case AArch64::STURDi:
3380 case AArch64::LDURXi:
3381 case AArch64::STURXi:
3382 case AArch64::LDURWi:
3383 case AArch64::LDURSWi:
3384 case AArch64::STURWi:
3385 case AArch64::LDURHi:
3386 case AArch64::STURHi:
3387 case AArch64::LDURHHi:
3388 case AArch64::STURHHi:
3389 case AArch64::LDURSHXi:
3390 case AArch64::LDURSHWi:
3391 case AArch64::LDURBi:
3392 case AArch64::STURBi:
3393 case AArch64::LDURBBi:
3394 case AArch64::STURBBi:
3395 case AArch64::LDURSBWi:
3396 case AArch64::LDURSBXi:
3397 return Opcode;
3398 case AArch64::LDRQui:
3399 return AArch64::LDURQi;
3400 case AArch64::STRQui:
3401 return AArch64::STURQi;
3402 case AArch64::LDRDui:
3403 return AArch64::LDURDi;
3404 case AArch64::STRDui:
3405 return AArch64::STURDi;
3406 case AArch64::LDRXui:
3407 return AArch64::LDURXi;
3408 case AArch64::STRXui:
3409 return AArch64::STURXi;
3410 case AArch64::LDRWui:
3411 return AArch64::LDURWi;
3412 case AArch64::LDRSWui:
3413 return AArch64::LDURSWi;
3414 case AArch64::STRWui:
3415 return AArch64::STURWi;
3416 case AArch64::LDRHui:
3417 return AArch64::LDURHi;
3418 case AArch64::STRHui:
3419 return AArch64::STURHi;
3420 case AArch64::LDRHHui:
3421 return AArch64::LDURHHi;
3422 case AArch64::STRHHui:
3423 return AArch64::STURHHi;
3424 case AArch64::LDRSHXui:
3425 return AArch64::LDURSHXi;
3426 case AArch64::LDRSHWui:
3427 return AArch64::LDURSHWi;
3428 case AArch64::LDRBBui:
3429 return AArch64::LDURBBi;
3430 case AArch64::LDRBui:
3431 return AArch64::LDURBi;
3432 case AArch64::STRBBui:
3433 return AArch64::STURBBi;
3434 case AArch64::STRBui:
3435 return AArch64::STURBi;
3436 case AArch64::LDRSBWui:
3437 return AArch64::LDURSBWi;
3438 case AArch64::LDRSBXui:
3439 return AArch64::LDURSBXi;
3440 }
3441}
3442
3443// Given the opcode of a memory load/store instruction, return the opcode of an
3444// instruction performing the same operation, but using
3445// the [Reg, Reg, {s,u}xtw #N] addressing mode with sign-/zero-extend of the
3446// offset register.
3447static unsigned offsetExtendOpcode(unsigned Opcode) {
3448 switch (Opcode) {
3449 default:
3450 llvm_unreachable("Address folding not implemented for instruction");
3451
3452 case AArch64::LDRQroX:
3453 case AArch64::LDURQi:
3454 case AArch64::LDRQui:
3455 return AArch64::LDRQroW;
3456 case AArch64::STRQroX:
3457 case AArch64::STURQi:
3458 case AArch64::STRQui:
3459 return AArch64::STRQroW;
3460 case AArch64::LDRDroX:
3461 case AArch64::LDURDi:
3462 case AArch64::LDRDui:
3463 return AArch64::LDRDroW;
3464 case AArch64::STRDroX:
3465 case AArch64::STURDi:
3466 case AArch64::STRDui:
3467 return AArch64::STRDroW;
3468 case AArch64::LDRXroX:
3469 case AArch64::LDURXi:
3470 case AArch64::LDRXui:
3471 return AArch64::LDRXroW;
3472 case AArch64::STRXroX:
3473 case AArch64::STURXi:
3474 case AArch64::STRXui:
3475 return AArch64::STRXroW;
3476 case AArch64::LDRWroX:
3477 case AArch64::LDURWi:
3478 case AArch64::LDRWui:
3479 return AArch64::LDRWroW;
3480 case AArch64::LDRSWroX:
3481 case AArch64::LDURSWi:
3482 case AArch64::LDRSWui:
3483 return AArch64::LDRSWroW;
3484 case AArch64::STRWroX:
3485 case AArch64::STURWi:
3486 case AArch64::STRWui:
3487 return AArch64::STRWroW;
3488 case AArch64::LDRHroX:
3489 case AArch64::LDURHi:
3490 case AArch64::LDRHui:
3491 return AArch64::LDRHroW;
3492 case AArch64::STRHroX:
3493 case AArch64::STURHi:
3494 case AArch64::STRHui:
3495 return AArch64::STRHroW;
3496 case AArch64::LDRHHroX:
3497 case AArch64::LDURHHi:
3498 case AArch64::LDRHHui:
3499 return AArch64::LDRHHroW;
3500 case AArch64::STRHHroX:
3501 case AArch64::STURHHi:
3502 case AArch64::STRHHui:
3503 return AArch64::STRHHroW;
3504 case AArch64::LDRSHXroX:
3505 case AArch64::LDURSHXi:
3506 case AArch64::LDRSHXui:
3507 return AArch64::LDRSHXroW;
3508 case AArch64::LDRSHWroX:
3509 case AArch64::LDURSHWi:
3510 case AArch64::LDRSHWui:
3511 return AArch64::LDRSHWroW;
3512 case AArch64::LDRBroX:
3513 case AArch64::LDURBi:
3514 case AArch64::LDRBui:
3515 return AArch64::LDRBroW;
3516 case AArch64::LDRBBroX:
3517 case AArch64::LDURBBi:
3518 case AArch64::LDRBBui:
3519 return AArch64::LDRBBroW;
3520 case AArch64::LDRSBXroX:
3521 case AArch64::LDURSBXi:
3522 case AArch64::LDRSBXui:
3523 return AArch64::LDRSBXroW;
3524 case AArch64::LDRSBWroX:
3525 case AArch64::LDURSBWi:
3526 case AArch64::LDRSBWui:
3527 return AArch64::LDRSBWroW;
3528 case AArch64::STRBroX:
3529 case AArch64::STURBi:
3530 case AArch64::STRBui:
3531 return AArch64::STRBroW;
3532 case AArch64::STRBBroX:
3533 case AArch64::STURBBi:
3534 case AArch64::STRBBui:
3535 return AArch64::STRBBroW;
3536 }
3537}
3538
3540 const ExtAddrMode &AM) const {
3541
3542 const DebugLoc &DL = MemI.getDebugLoc();
3543 MachineBasicBlock &MBB = *MemI.getParent();
3545
3547 if (AM.ScaledReg) {
3548 // The new instruction will be in the form `ldr Rt, [Xn, Xm, lsl #imm]`.
3549 unsigned Opcode = regOffsetOpcode(MemI.getOpcode());
3550 MRI.constrainRegClass(AM.BaseReg, &AArch64::GPR64spRegClass);
3551 auto B = BuildMI(MBB, MemI, DL, get(Opcode))
3552 .addReg(MemI.getOperand(0).getReg(),
3553 MemI.mayLoad() ? RegState::Define : 0)
3554 .addReg(AM.BaseReg)
3555 .addReg(AM.ScaledReg)
3556 .addImm(0)
3557 .addImm(AM.Scale > 1)
3558 .setMemRefs(MemI.memoperands())
3559 .setMIFlags(MemI.getFlags());
3560 return B.getInstr();
3561 }
3562
3563 assert(AM.ScaledReg == 0 && AM.Scale == 0 &&
3564 "Addressing mode not supported for folding");
3565
3566 // The new instruction will be in the form `ld[u]r Rt, [Xn, #imm]`.
3567 unsigned Scale = 1;
3568 unsigned Opcode = MemI.getOpcode();
3569 if (isInt<9>(AM.Displacement))
3570 Opcode = unscaledOffsetOpcode(Opcode);
3571 else
3572 Opcode = scaledOffsetOpcode(Opcode, Scale);
3573
3574 auto B = BuildMI(MBB, MemI, DL, get(Opcode))
3575 .addReg(MemI.getOperand(0).getReg(),
3576 MemI.mayLoad() ? RegState::Define : 0)
3577 .addReg(AM.BaseReg)
3578 .addImm(AM.Displacement / Scale)
3579 .setMemRefs(MemI.memoperands())
3580 .setMIFlags(MemI.getFlags());
3581 return B.getInstr();
3582 }
3583
3586 // The new instruction will be in the form `ldr Rt, [Xn, Wm, {s,u}xtw #N]`.
3587 assert(AM.ScaledReg && !AM.Displacement &&
3588 "Address offset can be a register or an immediate, but not both");
3589 unsigned Opcode = offsetExtendOpcode(MemI.getOpcode());
3590 MRI.constrainRegClass(AM.BaseReg, &AArch64::GPR64spRegClass);
3591 // Make sure the offset register is in the correct register class.
3592 Register OffsetReg = AM.ScaledReg;
3593 const TargetRegisterClass *RC = MRI.getRegClass(OffsetReg);
3594 if (RC->hasSuperClassEq(&AArch64::GPR64RegClass)) {
3595 OffsetReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3596 BuildMI(MBB, MemI, DL, get(TargetOpcode::COPY), OffsetReg)
3597 .addReg(AM.ScaledReg, 0, AArch64::sub_32);
3598 }
3599 auto B = BuildMI(MBB, MemI, DL, get(Opcode))
3600 .addReg(MemI.getOperand(0).getReg(),
3601 MemI.mayLoad() ? RegState::Define : 0)
3602 .addReg(AM.BaseReg)
3603 .addReg(OffsetReg)
3605 .addImm(AM.Scale != 1)
3606 .setMemRefs(MemI.memoperands())
3607 .setMIFlags(MemI.getFlags());
3608
3609 return B.getInstr();
3610 }
3611
3613 "Function must not be called with an addressing mode it can't handle");
3614}
3615
3616/// Return true if the opcode is a post-index ld/st instruction, which really
3617/// loads from base+0.
3618static bool isPostIndexLdStOpcode(unsigned Opcode) {
3619 switch (Opcode) {
3620 default:
3621 return false;
3622 case AArch64::LD1Fourv16b_POST:
3623 case AArch64::LD1Fourv1d_POST:
3624 case AArch64::LD1Fourv2d_POST:
3625 case AArch64::LD1Fourv2s_POST:
3626 case AArch64::LD1Fourv4h_POST:
3627 case AArch64::LD1Fourv4s_POST:
3628 case AArch64::LD1Fourv8b_POST:
3629 case AArch64::LD1Fourv8h_POST:
3630 case AArch64::LD1Onev16b_POST:
3631 case AArch64::LD1Onev1d_POST:
3632 case AArch64::LD1Onev2d_POST:
3633 case AArch64::LD1Onev2s_POST:
3634 case AArch64::LD1Onev4h_POST:
3635 case AArch64::LD1Onev4s_POST:
3636 case AArch64::LD1Onev8b_POST:
3637 case AArch64::LD1Onev8h_POST:
3638 case AArch64::LD1Rv16b_POST:
3639 case AArch64::LD1Rv1d_POST:
3640 case AArch64::LD1Rv2d_POST:
3641 case AArch64::LD1Rv2s_POST:
3642 case AArch64::LD1Rv4h_POST:
3643 case AArch64::LD1Rv4s_POST:
3644 case AArch64::LD1Rv8b_POST:
3645 case AArch64::LD1Rv8h_POST:
3646 case AArch64::LD1Threev16b_POST:
3647 case AArch64::LD1Threev1d_POST:
3648 case AArch64::LD1Threev2d_POST:
3649 case AArch64::LD1Threev2s_POST:
3650 case AArch64::LD1Threev4h_POST:
3651 case AArch64::LD1Threev4s_POST:
3652 case AArch64::LD1Threev8b_POST:
3653 case AArch64::LD1Threev8h_POST:
3654 case AArch64::LD1Twov16b_POST:
3655 case AArch64::LD1Twov1d_POST:
3656 case AArch64::LD1Twov2d_POST:
3657 case AArch64::LD1Twov2s_POST:
3658 case AArch64::LD1Twov4h_POST:
3659 case AArch64::LD1Twov4s_POST:
3660 case AArch64::LD1Twov8b_POST:
3661 case AArch64::LD1Twov8h_POST:
3662 case AArch64::LD1i16_POST:
3663 case AArch64::LD1i32_POST:
3664 case AArch64::LD1i64_POST:
3665 case AArch64::LD1i8_POST:
3666 case AArch64::LD2Rv16b_POST:
3667 case AArch64::LD2Rv1d_POST:
3668 case AArch64::LD2Rv2d_POST:
3669 case AArch64::LD2Rv2s_POST:
3670 case AArch64::LD2Rv4h_POST:
3671 case AArch64::LD2Rv4s_POST:
3672 case AArch64::LD2Rv8b_POST:
3673 case AArch64::LD2Rv8h_POST:
3674 case AArch64::LD2Twov16b_POST:
3675 case AArch64::LD2Twov2d_POST:
3676 case AArch64::LD2Twov2s_POST:
3677 case AArch64::LD2Twov4h_POST:
3678 case AArch64::LD2Twov4s_POST:
3679 case AArch64::LD2Twov8b_POST:
3680 case AArch64::LD2Twov8h_POST:
3681 case AArch64::LD2i16_POST:
3682 case AArch64::LD2i32_POST:
3683 case AArch64::LD2i64_POST:
3684 case AArch64::LD2i8_POST:
3685 case AArch64::LD3Rv16b_POST:
3686 case AArch64::LD3Rv1d_POST:
3687 case AArch64::LD3Rv2d_POST:
3688 case AArch64::LD3Rv2s_POST:
3689 case AArch64::LD3Rv4h_POST:
3690 case AArch64::LD3Rv4s_POST:
3691 case AArch64::LD3Rv8b_POST:
3692 case AArch64::LD3Rv8h_POST:
3693 case AArch64::LD3Threev16b_POST:
3694 case AArch64::LD3Threev2d_POST:
3695 case AArch64::LD3Threev2s_POST:
3696 case AArch64::LD3Threev4h_POST:
3697 case AArch64::LD3Threev4s_POST:
3698 case AArch64::LD3Threev8b_POST:
3699 case AArch64::LD3Threev8h_POST:
3700 case AArch64::LD3i16_POST:
3701 case AArch64::LD3i32_POST:
3702 case AArch64::LD3i64_POST:
3703 case AArch64::LD3i8_POST:
3704 case AArch64::LD4Fourv16b_POST:
3705 case AArch64::LD4Fourv2d_POST:
3706 case AArch64::LD4Fourv2s_POST:
3707 case AArch64::LD4Fourv4h_POST:
3708 case AArch64::LD4Fourv4s_POST:
3709 case AArch64::LD4Fourv8b_POST:
3710 case AArch64::LD4Fourv8h_POST:
3711 case AArch64::LD4Rv16b_POST:
3712 case AArch64::LD4Rv1d_POST:
3713 case AArch64::LD4Rv2d_POST:
3714 case AArch64::LD4Rv2s_POST:
3715 case AArch64::LD4Rv4h_POST:
3716 case AArch64::LD4Rv4s_POST:
3717 case AArch64::LD4Rv8b_POST:
3718 case AArch64::LD4Rv8h_POST:
3719 case AArch64::LD4i16_POST:
3720 case AArch64::LD4i32_POST:
3721 case AArch64::LD4i64_POST:
3722 case AArch64::LD4i8_POST:
3723 case AArch64::LDAPRWpost:
3724 case AArch64::LDAPRXpost:
3725 case AArch64::LDIAPPWpost:
3726 case AArch64::LDIAPPXpost:
3727 case AArch64::LDPDpost:
3728 case AArch64::LDPQpost:
3729 case AArch64::LDPSWpost:
3730 case AArch64::LDPSpost:
3731 case AArch64::LDPWpost:
3732 case AArch64::LDPXpost:
3733 case AArch64::LDRBBpost:
3734 case AArch64::LDRBpost:
3735 case AArch64::LDRDpost:
3736 case AArch64::LDRHHpost:
3737 case AArch64::LDRHpost:
3738 case AArch64::LDRQpost:
3739 case AArch64::LDRSBWpost:
3740 case AArch64::LDRSBXpost:
3741 case AArch64::LDRSHWpost:
3742 case AArch64::LDRSHXpost:
3743 case AArch64::LDRSWpost:
3744 case AArch64::LDRSpost:
3745 case AArch64::LDRWpost:
3746 case AArch64::LDRXpost:
3747 case AArch64::ST1Fourv16b_POST:
3748 case AArch64::ST1Fourv1d_POST:
3749 case AArch64::ST1Fourv2d_POST:
3750 case AArch64::ST1Fourv2s_POST:
3751 case AArch64::ST1Fourv4h_POST:
3752 case AArch64::ST1Fourv4s_POST:
3753 case AArch64::ST1Fourv8b_POST:
3754 case AArch64::ST1Fourv8h_POST:
3755 case AArch64::ST1Onev16b_POST:
3756 case AArch64::ST1Onev1d_POST:
3757 case AArch64::ST1Onev2d_POST:
3758 case AArch64::ST1Onev2s_POST:
3759 case AArch64::ST1Onev4h_POST:
3760 case AArch64::ST1Onev4s_POST:
3761 case AArch64::ST1Onev8b_POST:
3762 case AArch64::ST1Onev8h_POST:
3763 case AArch64::ST1Threev16b_POST:
3764 case AArch64::ST1Threev1d_POST:
3765 case AArch64::ST1Threev2d_POST:
3766 case AArch64::ST1Threev2s_POST:
3767 case AArch64::ST1Threev4h_POST:
3768 case AArch64::ST1Threev4s_POST:
3769 case AArch64::ST1Threev8b_POST:
3770 case AArch64::ST1Threev8h_POST:
3771 case AArch64::ST1Twov16b_POST:
3772 case AArch64::ST1Twov1d_POST:
3773 case AArch64::ST1Twov2d_POST:
3774 case AArch64::ST1Twov2s_POST:
3775 case AArch64::ST1Twov4h_POST:
3776 case AArch64::ST1Twov4s_POST:
3777 case AArch64::ST1Twov8b_POST:
3778 case AArch64::ST1Twov8h_POST:
3779 case AArch64::ST1i16_POST:
3780 case AArch64::ST1i32_POST:
3781 case AArch64::ST1i64_POST:
3782 case AArch64::ST1i8_POST:
3783 case AArch64::ST2GPostIndex:
3784 case AArch64::ST2Twov16b_POST:
3785 case AArch64::ST2Twov2d_POST:
3786 case AArch64::ST2Twov2s_POST:
3787 case AArch64::ST2Twov4h_POST:
3788 case AArch64::ST2Twov4s_POST:
3789 case AArch64::ST2Twov8b_POST:
3790 case AArch64::ST2Twov8h_POST:
3791 case AArch64::ST2i16_POST:
3792 case AArch64::ST2i32_POST:
3793 case AArch64::ST2i64_POST:
3794 case AArch64::ST2i8_POST:
3795 case AArch64::ST3Threev16b_POST:
3796 case AArch64::ST3Threev2d_POST:
3797 case AArch64::ST3Threev2s_POST:
3798 case AArch64::ST3Threev4h_POST:
3799 case AArch64::ST3Threev4s_POST:
3800 case AArch64::ST3Threev8b_POST:
3801 case AArch64::ST3Threev8h_POST:
3802 case AArch64::ST3i16_POST:
3803 case AArch64::ST3i32_POST:
3804 case AArch64::ST3i64_POST:
3805 case AArch64::ST3i8_POST:
3806 case AArch64::ST4Fourv16b_POST:
3807 case AArch64::ST4Fourv2d_POST:
3808 case AArch64::ST4Fourv2s_POST:
3809 case AArch64::ST4Fourv4h_POST:
3810 case AArch64::ST4Fourv4s_POST:
3811 case AArch64::ST4Fourv8b_POST:
3812 case AArch64::ST4Fourv8h_POST:
3813 case AArch64::ST4i16_POST:
3814 case AArch64::ST4i32_POST:
3815 case AArch64::ST4i64_POST:
3816 case AArch64::ST4i8_POST:
3817 case AArch64::STGPostIndex:
3818 case AArch64::STGPpost:
3819 case AArch64::STPDpost:
3820 case AArch64::STPQpost:
3821 case AArch64::STPSpost:
3822 case AArch64::STPWpost:
3823 case AArch64::STPXpost:
3824 case AArch64::STRBBpost:
3825 case AArch64::STRBpost:
3826 case AArch64::STRDpost:
3827 case AArch64::STRHHpost:
3828 case AArch64::STRHpost:
3829 case AArch64::STRQpost:
3830 case AArch64::STRSpost:
3831 case AArch64::STRWpost:
3832 case AArch64::STRXpost:
3833 case AArch64::STZ2GPostIndex:
3834 case AArch64::STZGPostIndex:
3835 return true;
3836 }
3837}
3838
3840 const MachineInstr &LdSt, const MachineOperand *&BaseOp, int64_t &Offset,
3841 bool &OffsetIsScalable, TypeSize &Width,
3842 const TargetRegisterInfo *TRI) const {
3843 assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
3844 // Handle only loads/stores with base register followed by immediate offset.
3845 if (LdSt.getNumExplicitOperands() == 3) {
3846 // Non-paired instruction (e.g., ldr x1, [x0, #8]).
3847 if ((!LdSt.getOperand(1).isReg() && !LdSt.getOperand(1).isFI()) ||
3848 !LdSt.getOperand(2).isImm())
3849 return false;
3850 } else if (LdSt.getNumExplicitOperands() == 4) {
3851 // Paired instruction (e.g., ldp x1, x2, [x0, #8]).
3852 if (!LdSt.getOperand(1).isReg() ||
3853 (!LdSt.getOperand(2).isReg() && !LdSt.getOperand(2).isFI()) ||
3854 !LdSt.getOperand(3).isImm())
3855 return false;
3856 } else
3857 return false;
3858
3859 // Get the scaling factor for the instruction and set the width for the
3860 // instruction.
3861 TypeSize Scale(0U, false);
3862 int64_t Dummy1, Dummy2;
3863
3864 // If this returns false, then it's an instruction we don't want to handle.
3865 if (!getMemOpInfo(LdSt.getOpcode(), Scale, Width, Dummy1, Dummy2))
3866 return false;
3867
3868 // Compute the offset. Offset is calculated as the immediate operand
3869 // multiplied by the scaling factor. Unscaled instructions have scaling factor
3870 // set to 1. Postindex are a special case which have an offset of 0.
3871 if (isPostIndexLdStOpcode(LdSt.getOpcode())) {
3872 BaseOp = &LdSt.getOperand(2);
3873 Offset = 0;
3874 } else if (LdSt.getNumExplicitOperands() == 3) {
3875 BaseOp = &LdSt.getOperand(1);
3876 Offset = LdSt.getOperand(2).getImm() * Scale.getKnownMinValue();
3877 } else {
3878 assert(LdSt.getNumExplicitOperands() == 4 && "invalid number of operands");
3879 BaseOp = &LdSt.getOperand(2);
3880 Offset = LdSt.getOperand(3).getImm() * Scale.getKnownMinValue();
3881 }
3882 OffsetIsScalable = Scale.isScalable();
3883
3884 return BaseOp->isReg() || BaseOp->isFI();
3885}
3886
3889 assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
3890 MachineOperand &OfsOp = LdSt.getOperand(LdSt.getNumExplicitOperands() - 1);
3891 assert(OfsOp.isImm() && "Offset operand wasn't immediate.");
3892 return OfsOp;
3893}
3894
3895bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
3896 TypeSize &Width, int64_t &MinOffset,
3897 int64_t &MaxOffset) {
3898 switch (Opcode) {
3899 // Not a memory operation or something we want to handle.
3900 default:
3901 Scale = TypeSize::getFixed(0);
3902 Width = TypeSize::getFixed(0);
3903 MinOffset = MaxOffset = 0;
3904 return false;
3905 // LDR / STR
3906 case AArch64::LDRQui:
3907 case AArch64::STRQui:
3908 Scale = TypeSize::getFixed(16);
3909 Width = TypeSize::getFixed(16);
3910 MinOffset = 0;
3911 MaxOffset = 4095;
3912 break;
3913 case AArch64::LDRXui:
3914 case AArch64::LDRDui:
3915 case AArch64::STRXui:
3916 case AArch64::STRDui:
3917 case AArch64::PRFMui:
3918 Scale = TypeSize::getFixed(8);
3919 Width = TypeSize::getFixed(8);
3920 MinOffset = 0;
3921 MaxOffset = 4095;
3922 break;
3923 case AArch64::LDRWui:
3924 case AArch64::LDRSui:
3925 case AArch64::LDRSWui:
3926 case AArch64::STRWui:
3927 case AArch64::STRSui:
3928 Scale = TypeSize::getFixed(4);
3929 Width = TypeSize::getFixed(4);
3930 MinOffset = 0;
3931 MaxOffset = 4095;
3932 break;
3933 case AArch64::LDRHui:
3934 case AArch64::LDRHHui:
3935 case AArch64::LDRSHWui:
3936 case AArch64::LDRSHXui:
3937 case AArch64::STRHui:
3938 case AArch64::STRHHui:
3939 Scale = TypeSize::getFixed(2);
3940 Width = TypeSize::getFixed(2);
3941 MinOffset = 0;
3942 MaxOffset = 4095;
3943 break;
3944 case AArch64::LDRBui:
3945 case AArch64::LDRBBui:
3946 case AArch64::LDRSBWui:
3947 case AArch64::LDRSBXui:
3948 case AArch64::STRBui:
3949 case AArch64::STRBBui:
3950 Scale = TypeSize::getFixed(1);
3951 Width = TypeSize::getFixed(1);
3952 MinOffset = 0;
3953 MaxOffset = 4095;
3954 break;
3955 // post/pre inc
3956 case AArch64::STRQpre:
3957 case AArch64::LDRQpost:
3958 Scale = TypeSize::getFixed(1);
3959 Width = TypeSize::getFixed(16);
3960 MinOffset = -256;
3961 MaxOffset = 255;
3962 break;
3963 case AArch64::LDRDpost:
3964 case AArch64::LDRDpre:
3965 case AArch64::LDRXpost:
3966 case AArch64::LDRXpre:
3967 case AArch64::STRDpost:
3968 case AArch64::STRDpre:
3969 case AArch64::STRXpost:
3970 case AArch64::STRXpre:
3971 Scale = TypeSize::getFixed(1);
3972 Width = TypeSize::getFixed(8);
3973 MinOffset = -256;
3974 MaxOffset = 255;
3975 break;
3976 case AArch64::STRWpost:
3977 case AArch64::STRWpre:
3978 case AArch64::LDRWpost:
3979 case AArch64::LDRWpre:
3980 case AArch64::STRSpost:
3981 case AArch64::STRSpre:
3982 case AArch64::LDRSpost:
3983 case AArch64::LDRSpre:
3984 Scale = TypeSize::getFixed(1);
3985 Width = TypeSize::getFixed(4);
3986 MinOffset = -256;
3987 MaxOffset = 255;
3988 break;
3989 case AArch64::LDRHpost:
3990 case AArch64::LDRHpre:
3991 case AArch64::STRHpost:
3992 case AArch64::STRHpre:
3993 case AArch64::LDRHHpost:
3994 case AArch64::LDRHHpre:
3995 case AArch64::STRHHpost:
3996 case AArch64::STRHHpre:
3997 Scale = TypeSize::getFixed(1);
3998 Width = TypeSize::getFixed(2);
3999 MinOffset = -256;
4000 MaxOffset = 255;
4001 break;
4002 case AArch64::LDRBpost:
4003 case AArch64::LDRBpre:
4004 case AArch64::STRBpost:
4005 case AArch64::STRBpre:
4006 case AArch64::LDRBBpost:
4007 case AArch64::LDRBBpre:
4008 case AArch64::STRBBpost:
4009 case AArch64::STRBBpre:
4010 Scale = TypeSize::getFixed(1);
4011 Width = TypeSize::getFixed(1);
4012 MinOffset = -256;
4013 MaxOffset = 255;
4014 break;
4015 // Unscaled
4016 case AArch64::LDURQi:
4017 case AArch64::STURQi:
4018 Scale = TypeSize::getFixed(1);
4019 Width = TypeSize::getFixed(16);
4020 MinOffset = -256;
4021 MaxOffset = 255;
4022 break;
4023 case AArch64::LDURXi:
4024 case AArch64::LDURDi:
4025 case AArch64::LDAPURXi:
4026 case AArch64::STURXi:
4027 case AArch64::STURDi:
4028 case AArch64::STLURXi:
4029 case AArch64::PRFUMi:
4030 Scale = TypeSize::getFixed(1);
4031 Width = TypeSize::getFixed(8);
4032 MinOffset = -256;
4033 MaxOffset = 255;
4034 break;
4035 case AArch64::LDURWi:
4036 case AArch64::LDURSi:
4037 case AArch64::LDURSWi:
4038 case AArch64::LDAPURi:
4039 case AArch64::LDAPURSWi:
4040 case AArch64::STURWi:
4041 case AArch64::STURSi:
4042 case AArch64::STLURWi:
4043 Scale = TypeSize::getFixed(1);
4044 Width = TypeSize::getFixed(4);
4045 MinOffset = -256;
4046 MaxOffset = 255;
4047 break;
4048 case AArch64::LDURHi:
4049 case AArch64::LDURHHi:
4050 case AArch64::LDURSHXi:
4051 case AArch64::LDURSHWi:
4052 case AArch64::LDAPURHi:
4053 case AArch64::LDAPURSHWi:
4054 case AArch64::LDAPURSHXi:
4055 case AArch64::STURHi:
4056 case AArch64::STURHHi:
4057 case AArch64::STLURHi:
4058 Scale = TypeSize::getFixed(1);
4059 Width = TypeSize::getFixed(2);
4060 MinOffset = -256;
4061 MaxOffset = 255;
4062 break;
4063 case AArch64::LDURBi:
4064 case AArch64::LDURBBi:
4065 case AArch64::LDURSBXi:
4066 case AArch64::LDURSBWi:
4067 case AArch64::LDAPURBi:
4068 case AArch64::LDAPURSBWi:
4069 case AArch64::LDAPURSBXi:
4070 case AArch64::STURBi:
4071 case AArch64::STURBBi:
4072 case AArch64::STLURBi:
4073 Scale = TypeSize::getFixed(1);
4074 Width = TypeSize::getFixed(1);
4075 MinOffset = -256;
4076 MaxOffset = 255;
4077 break;
4078 // LDP / STP (including pre/post inc)
4079 case AArch64::LDPQi:
4080 case AArch64::LDNPQi:
4081 case AArch64::STPQi:
4082 case AArch64::STNPQi:
4083 case AArch64::LDPQpost:
4084 case AArch64::LDPQpre:
4085 case AArch64::STPQpost:
4086 case AArch64::STPQpre:
4087 Scale = TypeSize::getFixed(16);
4088 Width = TypeSize::getFixed(16 * 2);
4089 MinOffset = -64;
4090 MaxOffset = 63;
4091 break;
4092 case AArch64::LDPXi:
4093 case AArch64::LDPDi:
4094 case AArch64::LDNPXi:
4095 case AArch64::LDNPDi:
4096 case AArch64::STPXi:
4097 case AArch64::STPDi:
4098 case AArch64::STNPXi:
4099 case AArch64::STNPDi:
4100 case AArch64::LDPDpost:
4101 case AArch64::LDPDpre:
4102 case AArch64::LDPXpost:
4103 case AArch64::LDPXpre:
4104 case AArch64::STPDpost:
4105 case AArch64::STPDpre:
4106 case AArch64::STPXpost:
4107 case AArch64::STPXpre:
4108 Scale = TypeSize::getFixed(8);
4109 Width = TypeSize::getFixed(8 * 2);
4110 MinOffset = -64;
4111 MaxOffset = 63;
4112 break;
4113 case AArch64::LDPWi:
4114 case AArch64::LDPSi:
4115 case AArch64::LDNPWi:
4116 case AArch64::LDNPSi:
4117 case AArch64::STPWi:
4118 case AArch64::STPSi:
4119 case AArch64::STNPWi:
4120 case AArch64::STNPSi:
4121 case AArch64::LDPSpost:
4122 case AArch64::LDPSpre:
4123 case AArch64::LDPWpost:
4124 case AArch64::LDPWpre:
4125 case AArch64::STPSpost:
4126 case AArch64::STPSpre:
4127 case AArch64::STPWpost:
4128 case AArch64::STPWpre:
4129 Scale = TypeSize::getFixed(4);
4130 Width = TypeSize::getFixed(4 * 2);
4131 MinOffset = -64;
4132 MaxOffset = 63;
4133 break;
4134 case AArch64::StoreSwiftAsyncContext:
4135 // Store is an STRXui, but there might be an ADDXri in the expansion too.
4136 Scale = TypeSize::getFixed(1);
4137 Width = TypeSize::getFixed(8);
4138 MinOffset = 0;
4139 MaxOffset = 4095;
4140 break;
4141 case AArch64::ADDG:
4142 Scale = TypeSize::getFixed(16);
4143 Width = TypeSize::getFixed(0);
4144 MinOffset = 0;
4145 MaxOffset = 63;
4146 break;
4147 case AArch64::TAGPstack:
4148 Scale = TypeSize::getFixed(16);
4149 Width = TypeSize::getFixed(0);
4150 // TAGP with a negative offset turns into SUBP, which has a maximum offset
4151 // of 63 (not 64!).
4152 MinOffset = -63;
4153 MaxOffset = 63;
4154 break;
4155 case AArch64::LDG:
4156 case AArch64::STGi:
4157 case AArch64::STZGi:
4158 Scale = TypeSize::getFixed(16);
4159 Width = TypeSize::getFixed(16);
4160 MinOffset = -256;
4161 MaxOffset = 255;
4162 break;
4163 // SVE
4164 case AArch64::STR_ZZZZXI:
4165 case AArch64::LDR_ZZZZXI:
4166 Scale = TypeSize::getScalable(16);
4167 Width = TypeSize::getScalable(16 * 4);
4168 MinOffset = -256;
4169 MaxOffset = 252;
4170 break;
4171 case AArch64::STR_ZZZXI:
4172 case AArch64::LDR_ZZZXI:
4173 Scale = TypeSize::getScalable(16);
4174 Width = TypeSize::getScalable(16 * 3);
4175 MinOffset = -256;
4176 MaxOffset = 253;
4177 break;
4178 case AArch64::STR_ZZXI:
4179 case AArch64::LDR_ZZXI:
4180 Scale = TypeSize::getScalable(16);
4181 Width = TypeSize::getScalable(16 * 2);
4182 MinOffset = -256;
4183 MaxOffset = 254;
4184 break;
4185 case AArch64::LDR_PXI:
4186 case AArch64::STR_PXI:
4187 Scale = TypeSize::getScalable(2);
4188 Width = TypeSize::getScalable(2);
4189 MinOffset = -256;
4190 MaxOffset = 255;
4191 break;
4192 case AArch64::LDR_PPXI:
4193 case AArch64::STR_PPXI:
4194 Scale = TypeSize::getScalable(2);
4195 Width = TypeSize::getScalable(2 * 2);
4196 MinOffset = -256;
4197 MaxOffset = 254;
4198 break;
4199 case AArch64::LDR_ZXI:
4200 case AArch64::STR_ZXI:
4201 Scale = TypeSize::getScalable(16);
4202 Width = TypeSize::getScalable(16);
4203 MinOffset = -256;
4204 MaxOffset = 255;
4205 break;
4206 case AArch64::LD1B_IMM:
4207 case AArch64::LD1H_IMM:
4208 case AArch64::LD1W_IMM:
4209 case AArch64::LD1D_IMM:
4210 case AArch64::LDNT1B_ZRI:
4211 case AArch64::LDNT1H_ZRI:
4212 case AArch64::LDNT1W_ZRI:
4213 case AArch64::LDNT1D_ZRI:
4214 case AArch64::ST1B_IMM:
4215 case AArch64::ST1H_IMM:
4216 case AArch64::ST1W_IMM:
4217 case AArch64::ST1D_IMM:
4218 case AArch64::STNT1B_ZRI:
4219 case AArch64::STNT1H_ZRI:
4220 case AArch64::STNT1W_ZRI:
4221 case AArch64::STNT1D_ZRI:
4222 case AArch64::LDNF1B_IMM:
4223 case AArch64::LDNF1H_IMM:
4224 case AArch64::LDNF1W_IMM:
4225 case AArch64::LDNF1D_IMM:
4226 // A full vectors worth of data
4227 // Width = mbytes * elements
4228 Scale = TypeSize::getScalable(16);
4229 Width = TypeSize::getScalable(16);
4230 MinOffset = -8;
4231 MaxOffset = 7;
4232 break;
4233 case AArch64::LD2B_IMM:
4234 case AArch64::LD2H_IMM:
4235 case AArch64::LD2W_IMM:
4236 case AArch64::LD2D_IMM:
4237 case AArch64::ST2B_IMM:
4238 case AArch64::ST2H_IMM:
4239 case AArch64::ST2W_IMM:
4240 case AArch64::ST2D_IMM:
4241 Scale = TypeSize::getScalable(32);
4242 Width = TypeSize::getScalable(16 * 2);
4243 MinOffset = -8;
4244 MaxOffset = 7;
4245 break;
4246 case AArch64::LD3B_IMM:
4247 case AArch64::LD3H_IMM:
4248 case AArch64::LD3W_IMM:
4249 case AArch64::LD3D_IMM:
4250 case AArch64::ST3B_IMM:
4251 case AArch64::ST3H_IMM:
4252 case AArch64::ST3W_IMM:
4253 case AArch64::ST3D_IMM:
4254 Scale = TypeSize::getScalable(48);
4255 Width = TypeSize::getScalable(16 * 3);
4256 MinOffset = -8;
4257 MaxOffset = 7;
4258 break;
4259 case AArch64::LD4B_IMM:
4260 case AArch64::LD4H_IMM:
4261 case AArch64::LD4W_IMM:
4262 case AArch64::LD4D_IMM:
4263 case AArch64::ST4B_IMM:
4264 case AArch64::ST4H_IMM:
4265 case AArch64::ST4W_IMM:
4266 case AArch64::ST4D_IMM:
4267 Scale = TypeSize::getScalable(64);
4268 Width = TypeSize::getScalable(16 * 4);
4269 MinOffset = -8;
4270 MaxOffset = 7;
4271 break;
4272 case AArch64::LD1B_H_IMM:
4273 case AArch64::LD1SB_H_IMM:
4274 case AArch64::LD1H_S_IMM:
4275 case AArch64::LD1SH_S_IMM:
4276 case AArch64::LD1W_D_IMM:
4277 case AArch64::LD1SW_D_IMM:
4278 case AArch64::ST1B_H_IMM:
4279 case AArch64::ST1H_S_IMM:
4280 case AArch64::ST1W_D_IMM:
4281 case AArch64::LDNF1B_H_IMM:
4282 case AArch64::LDNF1SB_H_IMM:
4283 case AArch64::LDNF1H_S_IMM:
4284 case AArch64::LDNF1SH_S_IMM:
4285 case AArch64::LDNF1W_D_IMM:
4286 case AArch64::LDNF1SW_D_IMM:
4287 // A half vector worth of data
4288 // Width = mbytes * elements
4289 Scale = TypeSize::getScalable(8);
4290 Width = TypeSize::getScalable(8);
4291 MinOffset = -8;
4292 MaxOffset = 7;
4293 break;
4294 case AArch64::LD1B_S_IMM:
4295 case AArch64::LD1SB_S_IMM:
4296 case AArch64::LD1H_D_IMM:
4297 case AArch64::LD1SH_D_IMM:
4298 case AArch64::ST1B_S_IMM:
4299 case AArch64::ST1H_D_IMM:
4300 case AArch64::LDNF1B_S_IMM:
4301 case AArch64::LDNF1SB_S_IMM:
4302 case AArch64::LDNF1H_D_IMM:
4303 case AArch64::LDNF1SH_D_IMM:
4304 // A quarter vector worth of data
4305 // Width = mbytes * elements
4306 Scale = TypeSize::getScalable(4);
4307 Width = TypeSize::getScalable(4);
4308 MinOffset = -8;
4309 MaxOffset = 7;
4310 break;
4311 case AArch64::LD1B_D_IMM:
4312 case AArch64::LD1SB_D_IMM:
4313 case AArch64::ST1B_D_IMM:
4314 case AArch64::LDNF1B_D_IMM:
4315 case AArch64::LDNF1SB_D_IMM:
4316 // A eighth vector worth of data
4317 // Width = mbytes * elements
4318 Scale = TypeSize::getScalable(2);
4319 Width = TypeSize::getScalable(2);
4320 MinOffset = -8;
4321 MaxOffset = 7;
4322 break;
4323 case AArch64::ST2Gi:
4324 case AArch64::STZ2Gi:
4325 Scale = TypeSize::getFixed(16);
4326 Width = TypeSize::getFixed(32);
4327 MinOffset = -256;
4328 MaxOffset = 255;
4329 break;
4330 case AArch64::STGPi:
4331 Scale = TypeSize::getFixed(16);
4332 Width = TypeSize::getFixed(16);
4333 MinOffset = -64;
4334 MaxOffset = 63;
4335 break;
4336 case AArch64::LD1RB_IMM:
4337 case AArch64::LD1RB_H_IMM:
4338 case AArch64::LD1RB_S_IMM:
4339 case AArch64::LD1RB_D_IMM:
4340 case AArch64::LD1RSB_H_IMM:
4341 case AArch64::LD1RSB_S_IMM:
4342 case AArch64::LD1RSB_D_IMM:
4343 Scale = TypeSize::getFixed(1);
4344 Width = TypeSize::getFixed(1);
4345 MinOffset = 0;
4346 MaxOffset = 63;
4347 break;
4348 case AArch64::LD1RH_IMM:
4349 case AArch64::LD1RH_S_IMM:
4350 case AArch64::LD1RH_D_IMM:
4351 case AArch64::LD1RSH_S_IMM:
4352 case AArch64::LD1RSH_D_IMM:
4353 Scale = TypeSize::getFixed(2);
4354 Width = TypeSize::getFixed(2);
4355 MinOffset = 0;
4356 MaxOffset = 63;
4357 break;
4358 case AArch64::LD1RW_IMM:
4359 case AArch64::LD1RW_D_IMM:
4360 case AArch64::LD1RSW_IMM:
4361 Scale = TypeSize::getFixed(4);
4362 Width = TypeSize::getFixed(4);
4363 MinOffset = 0;
4364 MaxOffset = 63;
4365 break;
4366 case AArch64::LD1RD_IMM:
4367 Scale = TypeSize::getFixed(8);
4368 Width = TypeSize::getFixed(8);
4369 MinOffset = 0;
4370 MaxOffset = 63;
4371 break;
4372 }
4373
4374 return true;
4375}
4376
4377// Scaling factor for unscaled load or store.
4379 switch (Opc) {
4380 default:
4381 llvm_unreachable("Opcode has unknown scale!");
4382 case AArch64::LDRBBui:
4383 case AArch64::LDURBBi:
4384 case AArch64::LDRSBWui:
4385 case AArch64::LDURSBWi:
4386 case AArch64::STRBBui:
4387 case AArch64::STURBBi:
4388 return 1;
4389 case AArch64::LDRHHui:
4390 case AArch64::LDURHHi:
4391 case AArch64::LDRSHWui:
4392 case AArch64::LDURSHWi:
4393 case AArch64::STRHHui:
4394 case AArch64::STURHHi:
4395 return 2;
4396 case AArch64::LDRSui:
4397 case AArch64::LDURSi:
4398 case AArch64::LDRSpre:
4399 case AArch64::LDRSWui:
4400 case AArch64::LDURSWi:
4401 case AArch64::LDRSWpre:
4402 case AArch64::LDRWpre:
4403 case AArch64::LDRWui:
4404 case AArch64::LDURWi:
4405 case AArch64::STRSui:
4406 case AArch64::STURSi:
4407 case AArch64::STRSpre:
4408 case AArch64::STRWui:
4409 case AArch64::STURWi:
4410 case AArch64::STRWpre:
4411 case AArch64::LDPSi:
4412 case AArch64::LDPSWi:
4413 case AArch64::LDPWi:
4414 case AArch64::STPSi:
4415 case AArch64::STPWi:
4416 return 4;
4417 case AArch64::LDRDui:
4418 case AArch64::LDURDi:
4419 case AArch64::LDRDpre:
4420 case AArch64::LDRXui:
4421 case AArch64::LDURXi:
4422 case AArch64::LDRXpre:
4423 case AArch64::STRDui:
4424 case AArch64::STURDi:
4425 case AArch64::STRDpre:
4426 case AArch64::STRXui:
4427 case AArch64::STURXi:
4428 case AArch64::STRXpre:
4429 case AArch64::LDPDi:
4430 case AArch64::LDPXi:
4431 case AArch64::STPDi:
4432 case AArch64::STPXi:
4433 return 8;
4434 case AArch64::LDRQui:
4435 case AArch64::LDURQi:
4436 case AArch64::STRQui:
4437 case AArch64::STURQi:
4438 case AArch64::STRQpre:
4439 case AArch64::LDPQi:
4440 case AArch64::LDRQpre:
4441 case AArch64::STPQi:
4442 case AArch64::STGi:
4443 case AArch64::STZGi:
4444 case AArch64::ST2Gi:
4445 case AArch64::STZ2Gi:
4446 case AArch64::STGPi:
4447 return 16;
4448 }
4449}
4450
4452 switch (MI.getOpcode()) {
4453 default:
4454 return false;
4455 case AArch64::LDRWpre:
4456 case AArch64::LDRXpre:
4457 case AArch64::LDRSWpre:
4458 case AArch64::LDRSpre:
4459 case AArch64::LDRDpre:
4460 case AArch64::LDRQpre:
4461 return true;
4462 }
4463}
4464
4466 switch (MI.getOpcode()) {
4467 default:
4468 return false;
4469 case AArch64::STRWpre:
4470 case AArch64::STRXpre:
4471 case AArch64::STRSpre:
4472 case AArch64::STRDpre:
4473 case AArch64::STRQpre:
4474 return true;
4475 }
4476}
4477
4479 return isPreLd(MI) || isPreSt(MI);
4480}
4481
4483 switch (MI.getOpcode()) {
4484 default:
4485 return false;
4486 case AArch64::LDPSi:
4487 case AArch64::LDPSWi:
4488 case AArch64::LDPDi:
4489 case AArch64::LDPQi:
4490 case AArch64::LDPWi:
4491 case AArch64::LDPXi:
4492 case AArch64::STPSi:
4493 case AArch64::STPDi:
4494 case AArch64::STPQi:
4495 case AArch64::STPWi:
4496 case AArch64::STPXi:
4497 case AArch64::STGPi:
4498 return true;
4499 }
4500}
4501
4503 assert(MI.mayLoadOrStore() && "Load or store instruction expected");
4504 unsigned Idx =
4506 : 1;
4507 return MI.getOperand(Idx);
4508}
4509
4510const MachineOperand &
4512 assert(MI.mayLoadOrStore() && "Load or store instruction expected");
4513 unsigned Idx =
4515 : 2;
4516 return MI.getOperand(Idx);
4517}
4518
4519const MachineOperand &
4521 switch (MI.getOpcode()) {
4522 default:
4523 llvm_unreachable("Unexpected opcode");
4524 case AArch64::LDRBBroX:
4525 return MI.getOperand(4);
4526 }
4527}
4528
4530 Register Reg) {
4531 if (MI.getParent() == nullptr)
4532 return nullptr;
4533 const MachineFunction *MF = MI.getParent()->getParent();
4534 return MF ? MF->getRegInfo().getRegClassOrNull(Reg) : nullptr;
4535}
4536
4538 auto IsHFPR = [&](const MachineOperand &Op) {
4539 if (!Op.isReg())
4540 return false;
4541 auto Reg = Op.getReg();
4542 if (Reg.isPhysical())
4543 return AArch64::FPR16RegClass.contains(Reg);
4544 const TargetRegisterClass *TRC = ::getRegClass(MI, Reg);
4545 return TRC == &AArch64::FPR16RegClass ||
4546 TRC == &AArch64::FPR16_loRegClass;
4547 };
4548 return llvm::any_of(MI.operands(), IsHFPR);
4549}
4550
4552 auto IsQFPR = [&](const MachineOperand &Op) {
4553 if (!Op.isReg())
4554 return false;
4555 auto Reg = Op.getReg();
4556 if (Reg.isPhysical())
4557 return AArch64::FPR128RegClass.contains(Reg);
4558 const TargetRegisterClass *TRC = ::getRegClass(MI, Reg);
4559 return TRC == &AArch64::FPR128RegClass ||
4560 TRC == &AArch64::FPR128_loRegClass;
4561 };
4562 return llvm::any_of(MI.operands(), IsQFPR);
4563}
4564
4566 switch (MI.getOpcode()) {
4567 case AArch64::BRK:
4568 case AArch64::HLT:
4569 case AArch64::PACIASP:
4570 case AArch64::PACIBSP:
4571 // Implicit BTI behavior.
4572 return true;
4573 case AArch64::PAUTH_PROLOGUE:
4574 // PAUTH_PROLOGUE expands to PACI(A|B)SP.
4575 return true;
4576 case AArch64::HINT: {
4577 unsigned Imm = MI.getOperand(0).getImm();
4578 // Explicit BTI instruction.
4579 if (Imm == 32 || Imm == 34 || Imm == 36 || Imm == 38)
4580 return true;
4581 // PACI(A|B)SP instructions.
4582 if (Imm == 25 || Imm == 27)
4583 return true;
4584 return false;
4585 }
4586 default:
4587 return false;
4588 }
4589}
4590
4592 if (Reg == 0)
4593 return false;
4594 assert(Reg.isPhysical() && "Expected physical register in isFpOrNEON");
4595 return AArch64::FPR128RegClass.contains(Reg) ||
4596 AArch64::FPR64RegClass.contains(Reg) ||
4597 AArch64::FPR32RegClass.contains(Reg) ||
4598 AArch64::FPR16RegClass.contains(Reg) ||
4599 AArch64::FPR8RegClass.contains(Reg);
4600}
4601
4603 auto IsFPR = [&](const MachineOperand &Op) {
4604 if (!Op.isReg())
4605 return false;
4606 auto Reg = Op.getReg();
4607 if (Reg.isPhysical())
4608 return isFpOrNEON(Reg);
4609
4610 const TargetRegisterClass *TRC = ::getRegClass(MI, Reg);
4611 return TRC == &AArch64::FPR128RegClass ||
4612 TRC == &AArch64::FPR128_loRegClass ||
4613 TRC == &AArch64::FPR64RegClass ||
4614 TRC == &AArch64::FPR64_loRegClass ||
4615 TRC == &AArch64::FPR32RegClass || TRC == &AArch64::FPR16RegClass ||
4616 TRC == &AArch64::FPR8RegClass;
4617 };
4618 return llvm::any_of(MI.operands(), IsFPR);
4619}
4620
4621// Scale the unscaled offsets. Returns false if the unscaled offset can't be
4622// scaled.
4623static bool scaleOffset(unsigned Opc, int64_t &Offset) {
4624 int Scale = AArch64InstrInfo::getMemScale(Opc);
4625
4626 // If the byte-offset isn't a multiple of the stride, we can't scale this
4627 // offset.
4628 if (Offset % Scale != 0)
4629 return false;
4630
4631 // Convert the byte-offset used by unscaled into an "element" offset used
4632 // by the scaled pair load/store instructions.
4633 Offset /= Scale;
4634 return true;
4635}
4636
4637static bool canPairLdStOpc(unsigned FirstOpc, unsigned SecondOpc) {
4638 if (FirstOpc == SecondOpc)
4639 return true;
4640 // We can also pair sign-ext and zero-ext instructions.
4641 switch (FirstOpc) {
4642 default:
4643 return false;
4644 case AArch64::STRSui:
4645 case AArch64::STURSi:
4646 return SecondOpc == AArch64::STRSui || SecondOpc == AArch64::STURSi;
4647 case AArch64::STRDui:
4648 case AArch64::STURDi:
4649 return SecondOpc == AArch64::STRDui || SecondOpc == AArch64::STURDi;
4650 case AArch64::STRQui:
4651 case AArch64::STURQi:
4652 return SecondOpc == AArch64::STRQui || SecondOpc == AArch64::STURQi;
4653 case AArch64::STRWui:
4654 case AArch64::STURWi:
4655 return SecondOpc == AArch64::STRWui || SecondOpc == AArch64::STURWi;
4656 case AArch64::STRXui:
4657 case AArch64::STURXi:
4658 return SecondOpc == AArch64::STRXui || SecondOpc == AArch64::STURXi;
4659 case AArch64::LDRSui:
4660 case AArch64::LDURSi:
4661 return SecondOpc == AArch64::LDRSui || SecondOpc == AArch64::LDURSi;
4662 case AArch64::LDRDui:
4663 case AArch64::LDURDi:
4664 return SecondOpc == AArch64::LDRDui || SecondOpc == AArch64::LDURDi;
4665 case AArch64::LDRQui:
4666 case AArch64::LDURQi:
4667 return SecondOpc == AArch64::LDRQui || SecondOpc == AArch64::LDURQi;
4668 case AArch64::LDRWui:
4669 case AArch64::LDURWi:
4670 return SecondOpc == AArch64::LDRSWui || SecondOpc == AArch64::LDURSWi;
4671 case AArch64::LDRSWui:
4672 case AArch64::LDURSWi:
4673 return SecondOpc == AArch64::LDRWui || SecondOpc == AArch64::LDURWi;
4674 case AArch64::LDRXui:
4675 case AArch64::LDURXi:
4676 return SecondOpc == AArch64::LDRXui || SecondOpc == AArch64::LDURXi;
4677 }
4678 // These instructions can't be paired based on their opcodes.
4679 return false;
4680}
4681
4682static bool shouldClusterFI(const MachineFrameInfo &MFI, int FI1,
4683 int64_t Offset1, unsigned Opcode1, int FI2,
4684 int64_t Offset2, unsigned Opcode2) {
4685 // Accesses through fixed stack object frame indices may access a different
4686 // fixed stack slot. Check that the object offsets + offsets match.
4687 if (MFI.isFixedObjectIndex(FI1) && MFI.isFixedObjectIndex(FI2)) {
4688 int64_t ObjectOffset1 = MFI.getObjectOffset(FI1);
4689 int64_t ObjectOffset2 = MFI.getObjectOffset(FI2);
4690 assert(ObjectOffset1 <= ObjectOffset2 && "Object offsets are not ordered.");
4691 // Convert to scaled object offsets.
4692 int Scale1 = AArch64InstrInfo::getMemScale(Opcode1);
4693 if (ObjectOffset1 % Scale1 != 0)
4694 return false;
4695 ObjectOffset1 /= Scale1;
4696 int Scale2 = AArch64InstrInfo::getMemScale(Opcode2);
4697 if (ObjectOffset2 % Scale2 != 0)
4698 return false;
4699 ObjectOffset2 /= Scale2;
4700 ObjectOffset1 += Offset1;
4701 ObjectOffset2 += Offset2;
4702 return ObjectOffset1 + 1 == ObjectOffset2;
4703 }
4704
4705 return FI1 == FI2;
4706}
4707
4708/// Detect opportunities for ldp/stp formation.
4709///
4710/// Only called for LdSt for which getMemOperandWithOffset returns true.
4712 ArrayRef<const MachineOperand *> BaseOps1, int64_t OpOffset1,
4713 bool OffsetIsScalable1, ArrayRef<const MachineOperand *> BaseOps2,
4714 int64_t OpOffset2, bool OffsetIsScalable2, unsigned ClusterSize,
4715 unsigned NumBytes) const {
4716 assert(BaseOps1.size() == 1 && BaseOps2.size() == 1);
4717 const MachineOperand &BaseOp1 = *BaseOps1.front();
4718 const MachineOperand &BaseOp2 = *BaseOps2.front();
4719 const MachineInstr &FirstLdSt = *BaseOp1.getParent();
4720 const MachineInstr &SecondLdSt = *BaseOp2.getParent();
4721 if (BaseOp1.getType() != BaseOp2.getType())
4722 return false;
4723
4724 assert((BaseOp1.isReg() || BaseOp1.isFI()) &&
4725 "Only base registers and frame indices are supported.");
4726
4727 // Check for both base regs and base FI.
4728 if (BaseOp1.isReg() && BaseOp1.getReg() != BaseOp2.getReg())
4729 return false;
4730
4731 // Only cluster up to a single pair.
4732 if (ClusterSize > 2)
4733 return false;
4734
4735 if (!isPairableLdStInst(FirstLdSt) || !isPairableLdStInst(SecondLdSt))
4736 return false;
4737
4738 // Can we pair these instructions based on their opcodes?
4739 unsigned FirstOpc = FirstLdSt.getOpcode();
4740 unsigned SecondOpc = SecondLdSt.getOpcode();
4741 if (!canPairLdStOpc(FirstOpc, SecondOpc))
4742 return false;
4743
4744 // Can't merge volatiles or load/stores that have a hint to avoid pair
4745 // formation, for example.
4746 if (!isCandidateToMergeOrPair(FirstLdSt) ||
4747 !isCandidateToMergeOrPair(SecondLdSt))
4748 return false;
4749
4750 // isCandidateToMergeOrPair guarantees that operand 2 is an immediate.
4751 int64_t Offset1 = FirstLdSt.getOperand(2).getImm();
4752 if (hasUnscaledLdStOffset(FirstOpc) && !scaleOffset(FirstOpc, Offset1))
4753 return false;
4754
4755 int64_t Offset2 = SecondLdSt.getOperand(2).getImm();
4756 if (hasUnscaledLdStOffset(SecondOpc) && !scaleOffset(SecondOpc, Offset2))
4757 return false;
4758
4759 // Pairwise instructions have a 7-bit signed offset field.
4760 if (Offset1 > 63 || Offset1 < -64)
4761 return false;
4762
4763 // The caller should already have ordered First/SecondLdSt by offset.
4764 // Note: except for non-equal frame index bases
4765 if (BaseOp1.isFI()) {
4766 assert((!BaseOp1.isIdenticalTo(BaseOp2) || Offset1 <= Offset2) &&
4767 "Caller should have ordered offsets.");
4768
4769 const MachineFrameInfo &MFI =
4770 FirstLdSt.getParent()->getParent()->getFrameInfo();
4771 return shouldClusterFI(MFI, BaseOp1.getIndex(), Offset1, FirstOpc,
4772 BaseOp2.getIndex(), Offset2, SecondOpc);
4773 }
4774
4775 assert(Offset1 <= Offset2 && "Caller should have ordered offsets.");
4776
4777 return Offset1 + 1 == Offset2;
4778}
4779
4781 unsigned Reg, unsigned SubIdx,
4782 unsigned State,
4783 const TargetRegisterInfo *TRI) {
4784 if (!SubIdx)
4785 return MIB.addReg(Reg, State);
4786
4788 return MIB.addReg(TRI->getSubReg(Reg, SubIdx), State);
4789 return MIB.addReg(Reg, State, SubIdx);
4790}
4791
4792static bool forwardCopyWillClobberTuple(unsigned DestReg, unsigned SrcReg,
4793 unsigned NumRegs) {
4794 // We really want the positive remainder mod 32 here, that happens to be
4795 // easily obtainable with a mask.
4796 return ((DestReg - SrcReg) & 0x1f) < NumRegs;
4797}
4798
4801 const DebugLoc &DL, MCRegister DestReg,
4802 MCRegister SrcReg, bool KillSrc,
4803 unsigned Opcode,
4804 ArrayRef<unsigned> Indices) const {
4805 assert(Subtarget.hasNEON() && "Unexpected register copy without NEON");
4807 uint16_t DestEncoding = TRI->getEncodingValue(DestReg);
4808 uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg);
4809 unsigned NumRegs = Indices.size();
4810
4811 int SubReg = 0, End = NumRegs, Incr = 1;
4812 if (forwardCopyWillClobberTuple(DestEncoding, SrcEncoding, NumRegs)) {
4813 SubReg = NumRegs - 1;
4814 End = -1;
4815 Incr = -1;
4816 }
4817
4818 for (; SubReg != End; SubReg += Incr) {
4819 const MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opcode));
4820 AddSubReg(MIB, DestReg, Indices[SubReg], RegState::Define, TRI);
4821 AddSubReg(MIB, SrcReg, Indices[SubReg], 0, TRI);
4822 AddSubReg(MIB, SrcReg, Indices[SubReg], getKillRegState(KillSrc), TRI);
4823 }
4824}
4825
4828 DebugLoc DL, unsigned DestReg,
4829 unsigned SrcReg, bool KillSrc,
4830 unsigned Opcode, unsigned ZeroReg,
4831 llvm::ArrayRef<unsigned> Indices) const {
4833 unsigned NumRegs = Indices.size();
4834
4835#ifndef NDEBUG
4836 uint16_t DestEncoding = TRI->getEncodingValue(DestReg);
4837 uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg);
4838 assert(DestEncoding % NumRegs == 0 && SrcEncoding % NumRegs == 0 &&
4839 "GPR reg sequences should not be able to overlap");
4840#endif
4841
4842 for (unsigned SubReg = 0; SubReg != NumRegs; ++SubReg) {
4843 const MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opcode));
4844 AddSubReg(MIB, DestReg, Indices[SubReg], RegState::Define, TRI);
4845 MIB.addReg(ZeroReg);
4846 AddSubReg(MIB, SrcReg, Indices[SubReg], getKillRegState(KillSrc), TRI);
4847 MIB.addImm(0);
4848 }
4849}
4850
4853 const DebugLoc &DL, MCRegister DestReg,
4854 MCRegister SrcReg, bool KillSrc) const {
4855 if (AArch64::GPR32spRegClass.contains(DestReg) &&
4856 (AArch64::GPR32spRegClass.contains(SrcReg) || SrcReg == AArch64::WZR)) {
4858
4859 if (DestReg == AArch64::WSP || SrcReg == AArch64::WSP) {
4860 // If either operand is WSP, expand to ADD #0.
4861 if (Subtarget.hasZeroCycleRegMove()) {
4862 // Cyclone recognizes "ADD Xd, Xn, #0" as a zero-cycle register move.
4863 MCRegister DestRegX = TRI->getMatchingSuperReg(
4864 DestReg, AArch64::sub_32, &AArch64::GPR64spRegClass);
4865 MCRegister SrcRegX = TRI->getMatchingSuperReg(
4866 SrcReg, AArch64::sub_32, &AArch64::GPR64spRegClass);
4867 // This instruction is reading and writing X registers. This may upset
4868 // the register scavenger and machine verifier, so we need to indicate
4869 // that we are reading an undefined value from SrcRegX, but a proper
4870 // value from SrcReg.
4871 BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestRegX)
4872 .addReg(SrcRegX, RegState::Undef)
4873 .addImm(0)
4875 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
4876 } else {
4877 BuildMI(MBB, I, DL, get(AArch64::ADDWri), DestReg)
4878 .addReg(SrcReg, getKillRegState(KillSrc))
4879 .addImm(0)
4881 }
4882 } else if (SrcReg == AArch64::WZR && Subtarget.hasZeroCycleZeroingGP()) {
4883 BuildMI(MBB, I, DL, get(AArch64::MOVZWi), DestReg)
4884 .addImm(0)
4886 } else {
4887 if (Subtarget.hasZeroCycleRegMove()) {
4888 // Cyclone recognizes "ORR Xd, XZR, Xm" as a zero-cycle register move.
4889 MCRegister DestRegX = TRI->getMatchingSuperReg(
4890 DestReg, AArch64::sub_32, &AArch64::GPR64spRegClass);
4891 MCRegister SrcRegX = TRI->getMatchingSuperReg(
4892 SrcReg, AArch64::sub_32, &AArch64::GPR64spRegClass);
4893 // This instruction is reading and writing X registers. This may upset
4894 // the register scavenger and machine verifier, so we need to indicate
4895 // that we are reading an undefined value from SrcRegX, but a proper
4896 // value from SrcReg.
4897 BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestRegX)
4898 .addReg(AArch64::XZR)
4899 .addReg(SrcRegX, RegState::Undef)
4900 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
4901 } else {
4902 // Otherwise, expand to ORR WZR.
4903 BuildMI(MBB, I, DL, get(AArch64::ORRWrr), DestReg)
4904 .addReg(AArch64::WZR)
4905 .addReg(SrcReg, getKillRegState(KillSrc));
4906 }
4907 }
4908 return;
4909 }
4910
4911 // Copy a Predicate register by ORRing with itself.
4912 if (AArch64::PPRRegClass.contains(DestReg) &&
4913 AArch64::PPRRegClass.contains(SrcReg)) {
4915 "Unexpected SVE register.");
4916 BuildMI(MBB, I, DL, get(AArch64::ORR_PPzPP), DestReg)
4917 .addReg(SrcReg) // Pg
4918 .addReg(SrcReg)
4919 .addReg(SrcReg, getKillRegState(KillSrc));
4920 return;
4921 }
4922
4923 // Copy a predicate-as-counter register by ORRing with itself as if it
4924 // were a regular predicate (mask) register.
4925 bool DestIsPNR = AArch64::PNRRegClass.contains(DestReg);
4926 bool SrcIsPNR = AArch64::PNRRegClass.contains(SrcReg);
4927 if (DestIsPNR || SrcIsPNR) {
4928 auto ToPPR = [](MCRegister R) -> MCRegister {
4929 return (R - AArch64::PN0) + AArch64::P0;
4930 };
4931 MCRegister PPRSrcReg = SrcIsPNR ? ToPPR(SrcReg) : SrcReg;
4932 MCRegister PPRDestReg = DestIsPNR ? ToPPR(DestReg) : DestReg;
4933
4934 if (PPRSrcReg != PPRDestReg) {
4935 auto NewMI = BuildMI(MBB, I, DL, get(AArch64::ORR_PPzPP), PPRDestReg)
4936 .addReg(PPRSrcReg) // Pg
4937 .addReg(PPRSrcReg)
4938 .addReg(PPRSrcReg, getKillRegState(KillSrc));
4939 if (DestIsPNR)
4940 NewMI.addDef(DestReg, RegState::Implicit);
4941 }
4942 return;
4943 }
4944
4945 // Copy a Z register by ORRing with itself.
4946 if (AArch64::ZPRRegClass.contains(DestReg) &&
4947 AArch64::ZPRRegClass.contains(SrcReg)) {
4949 "Unexpected SVE register.");
4950 BuildMI(MBB, I, DL, get(AArch64::ORR_ZZZ), DestReg)
4951 .addReg(SrcReg)
4952 .addReg(SrcReg, getKillRegState(KillSrc));
4953 return;
4954 }
4955
4956 // Copy a Z register pair by copying the individual sub-registers.
4957 if ((AArch64::ZPR2RegClass.contains(DestReg) ||
4958 AArch64::ZPR2StridedOrContiguousRegClass.contains(DestReg)) &&
4959 (AArch64::ZPR2RegClass.contains(SrcReg) ||
4960 AArch64::ZPR2StridedOrContiguousRegClass.contains(SrcReg))) {
4962 "Unexpected SVE register.");
4963 static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1};
4964 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORR_ZZZ,
4965 Indices);
4966 return;
4967 }
4968
4969 // Copy a Z register triple by copying the individual sub-registers.
4970 if (AArch64::ZPR3RegClass.contains(DestReg) &&
4971 AArch64::ZPR3RegClass.contains(SrcReg)) {
4973 "Unexpected SVE register.");
4974 static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1,
4975 AArch64::zsub2};
4976 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORR_ZZZ,
4977 Indices);
4978 return;
4979 }
4980
4981 // Copy a Z register quad by copying the individual sub-registers.
4982 if ((AArch64::ZPR4RegClass.contains(DestReg) ||
4983 AArch64::ZPR4StridedOrContiguousRegClass.contains(DestReg)) &&
4984 (AArch64::ZPR4RegClass.contains(SrcReg) ||
4985 AArch64::ZPR4StridedOrContiguousRegClass.contains(SrcReg))) {
4987 "Unexpected SVE register.");
4988 static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1,
4989 AArch64::zsub2, AArch64::zsub3};
4990 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORR_ZZZ,
4991 Indices);
4992 return;
4993 }
4994
4995 if (AArch64::GPR64spRegClass.contains(DestReg) &&
4996 (AArch64::GPR64spRegClass.contains(SrcReg) || SrcReg == AArch64::XZR)) {
4997 if (DestReg == AArch64::SP || SrcReg == AArch64::SP) {
4998 // If either operand is SP, expand to ADD #0.
4999 BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestReg)
5000 .addReg(SrcReg, getKillRegState(KillSrc))
5001 .addImm(0)
5003 } else if (SrcReg == AArch64::XZR && Subtarget.hasZeroCycleZeroingGP()) {
5004 BuildMI(MBB, I, DL, get(AArch64::MOVZXi), DestReg)
5005 .addImm(0)
5007 } else {
5008 // Otherwise, expand to ORR XZR.
5009 BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestReg)
5010 .addReg(AArch64::XZR)
5011 .addReg(SrcReg, getKillRegState(KillSrc));
5012 }
5013 return;
5014 }
5015
5016 // Copy a DDDD register quad by copying the individual sub-registers.
5017 if (AArch64::DDDDRegClass.contains(DestReg) &&
5018 AArch64::DDDDRegClass.contains(SrcReg)) {
5019 static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1,
5020 AArch64::dsub2, AArch64::dsub3};
5021 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
5022 Indices);
5023 return;
5024 }
5025
5026 // Copy a DDD register triple by copying the individual sub-registers.
5027 if (AArch64::DDDRegClass.contains(DestReg) &&
5028 AArch64::DDDRegClass.contains(SrcReg)) {
5029 static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1,
5030 AArch64::dsub2};
5031 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
5032 Indices);
5033 return;
5034 }
5035
5036 // Copy a DD register pair by copying the individual sub-registers.
5037 if (AArch64::DDRegClass.contains(DestReg) &&
5038 AArch64::DDRegClass.contains(SrcReg)) {
5039 static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1};
5040 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
5041 Indices);
5042 return;
5043 }
5044
5045 // Copy a QQQQ register quad by copying the individual sub-registers.
5046 if (AArch64::QQQQRegClass.contains(DestReg) &&
5047 AArch64::QQQQRegClass.contains(SrcReg)) {
5048 static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1,
5049 AArch64::qsub2, AArch64::qsub3};
5050 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
5051 Indices);
5052 return;
5053 }
5054
5055 // Copy a QQQ register triple by copying the individual sub-registers.
5056 if (AArch64::QQQRegClass.contains(DestReg) &&
5057 AArch64::QQQRegClass.contains(SrcReg)) {
5058 static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1,
5059 AArch64::qsub2};
5060 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
5061 Indices);
5062 return;
5063 }
5064
5065 // Copy a QQ register pair by copying the individual sub-registers.
5066 if (AArch64::QQRegClass.contains(DestReg) &&
5067 AArch64::QQRegClass.contains(SrcReg)) {
5068 static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1};
5069 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
5070 Indices);
5071 return;
5072 }
5073
5074 if (AArch64::XSeqPairsClassRegClass.contains(DestReg) &&
5075 AArch64::XSeqPairsClassRegClass.contains(SrcReg)) {
5076 static const unsigned Indices[] = {AArch64::sube64, AArch64::subo64};
5077 copyGPRRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRXrs,
5078 AArch64::XZR, Indices);
5079 return;
5080 }
5081
5082 if (AArch64::WSeqPairsClassRegClass.contains(DestReg) &&
5083 AArch64::WSeqPairsClassRegClass.contains(SrcReg)) {
5084 static const unsigned Indices[] = {AArch64::sube32, AArch64::subo32};
5085 copyGPRRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRWrs,
5086 AArch64::WZR, Indices);
5087 return;
5088 }
5089
5090 if (AArch64::FPR128RegClass.contains(DestReg) &&
5091 AArch64::FPR128RegClass.contains(SrcReg)) {
5092 if (Subtarget.isSVEorStreamingSVEAvailable() &&
5093 !Subtarget.isNeonAvailable())
5094 BuildMI(MBB, I, DL, get(AArch64::ORR_ZZZ))
5095 .addReg(AArch64::Z0 + (DestReg - AArch64::Q0), RegState::Define)
5096 .addReg(AArch64::Z0 + (SrcReg - AArch64::Q0))
5097 .addReg(AArch64::Z0 + (SrcReg - AArch64::Q0));
5098 else if (Subtarget.isNeonAvailable())
5099 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
5100 .addReg(SrcReg)
5101 .addReg(SrcReg, getKillRegState(KillSrc));
5102 else {
5103 BuildMI(MBB, I, DL, get(AArch64::STRQpre))
5104 .addReg(AArch64::SP, RegState::Define)
5105 .addReg(SrcReg, getKillRegState(KillSrc))
5106 .addReg(AArch64::SP)
5107 .addImm(-16);
5108 BuildMI(MBB, I, DL, get(AArch64::LDRQpost))
5109 .addReg(AArch64::SP, RegState::Define)
5110 .addReg(DestReg, RegState::Define)
5111 .addReg(AArch64::SP)
5112 .addImm(16);
5113 }
5114 return;
5115 }
5116
5117 if (AArch64::FPR64RegClass.contains(DestReg) &&
5118 AArch64::FPR64RegClass.contains(SrcReg)) {
5119 BuildMI(MBB, I, DL, get(AArch64::FMOVDr), DestReg)
5120 .addReg(SrcReg, getKillRegState(KillSrc));
5121 return;
5122 }
5123
5124 if (AArch64::FPR32RegClass.contains(DestReg) &&
5125 AArch64::FPR32RegClass.contains(SrcReg)) {
5126 BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
5127 .addReg(SrcReg, getKillRegState(KillSrc));
5128 return;
5129 }
5130
5131 if (AArch64::FPR16RegClass.contains(DestReg) &&
5132 AArch64::FPR16RegClass.contains(SrcReg)) {
5133 DestReg =
5134 RI.getMatchingSuperReg(DestReg, AArch64::hsub, &AArch64::FPR32RegClass);
5135 SrcReg =
5136 RI.getMatchingSuperReg(SrcReg, AArch64::hsub, &AArch64::FPR32RegClass);
5137 BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
5138 .addReg(SrcReg, getKillRegState(KillSrc));
5139 return;
5140 }
5141
5142 if (AArch64::FPR8RegClass.contains(DestReg) &&
5143 AArch64::FPR8RegClass.contains(SrcReg)) {
5144 DestReg =
5145 RI.getMatchingSuperReg(DestReg, AArch64::bsub, &AArch64::FPR32RegClass);
5146 SrcReg =
5147 RI.getMatchingSuperReg(SrcReg, AArch64::bsub, &AArch64::FPR32RegClass);
5148 BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
5149 .addReg(SrcReg, getKillRegState(KillSrc));
5150 return;
5151 }
5152
5153 // Copies between GPR64 and FPR64.
5154 if (AArch64::FPR64RegClass.contains(DestReg) &&
5155 AArch64::GPR64RegClass.contains(SrcReg)) {
5156 BuildMI(MBB, I, DL, get(AArch64::FMOVXDr), DestReg)
5157 .addReg(SrcReg, getKillRegState(KillSrc));
5158 return;
5159 }
5160 if (AArch64::GPR64RegClass.contains(DestReg) &&
5161 AArch64::FPR64RegClass.contains(SrcReg)) {
5162 BuildMI(MBB, I, DL, get(AArch64::FMOVDXr), DestReg)
5163 .addReg(SrcReg, getKillRegState(KillSrc));
5164 return;
5165 }
5166 // Copies between GPR32 and FPR32.
5167 if (AArch64::FPR32RegClass.contains(DestReg) &&
5168 AArch64::GPR32RegClass.contains(SrcReg)) {
5169 BuildMI(MBB, I, DL, get(AArch64::FMOVWSr), DestReg)
5170 .addReg(SrcReg, getKillRegState(KillSrc));
5171 return;
5172 }
5173 if (AArch64::GPR32RegClass.contains(DestReg) &&
5174 AArch64::FPR32RegClass.contains(SrcReg)) {
5175 BuildMI(MBB, I, DL, get(AArch64::FMOVSWr), DestReg)
5176 .addReg(SrcReg, getKillRegState(KillSrc));
5177 return;
5178 }
5179
5180 if (DestReg == AArch64::NZCV) {
5181 assert(AArch64::GPR64RegClass.contains(SrcReg) && "Invalid NZCV copy");
5182 BuildMI(MBB, I, DL, get(AArch64::MSR))
5183 .addImm(AArch64SysReg::NZCV)
5184 .addReg(SrcReg, getKillRegState(KillSrc))
5185 .addReg(AArch64::NZCV, RegState::Implicit | RegState::Define);
5186 return;
5187 }
5188
5189 if (SrcReg == AArch64::NZCV) {
5190 assert(AArch64::GPR64RegClass.contains(DestReg) && "Invalid NZCV copy");
5191 BuildMI(MBB, I, DL, get(AArch64::MRS), DestReg)
5192 .addImm(AArch64SysReg::NZCV)
5193 .addReg(AArch64::NZCV, RegState::Implicit | getKillRegState(KillSrc));
5194 return;
5195 }
5196
5197#ifndef NDEBUG
5199 errs() << TRI.getRegAsmName(DestReg) << " = COPY "
5200 << TRI.getRegAsmName(SrcReg) << "\n";
5201#endif
5202 llvm_unreachable("unimplemented reg-to-reg copy");
5203}
5204
5207 MachineBasicBlock::iterator InsertBefore,
5208 const MCInstrDesc &MCID,
5209 Register SrcReg, bool IsKill,
5210 unsigned SubIdx0, unsigned SubIdx1, int FI,
5211 MachineMemOperand *MMO) {
5212 Register SrcReg0 = SrcReg;
5213 Register SrcReg1 = SrcReg;
5214 if (SrcReg.isPhysical()) {
5215 SrcReg0 = TRI.getSubReg(SrcReg, SubIdx0);
5216 SubIdx0 = 0;
5217 SrcReg1 = TRI.getSubReg(SrcReg, SubIdx1);
5218 SubIdx1 = 0;
5219 }
5220 BuildMI(MBB, InsertBefore, DebugLoc(), MCID)
5221 .addReg(SrcReg0, getKillRegState(IsKill), SubIdx0)
5222 .addReg(SrcReg1, getKillRegState(IsKill), SubIdx1)
5223 .addFrameIndex(FI)
5224 .addImm(0)
5225 .addMemOperand(MMO);
5226}
5227
5230 Register SrcReg, bool isKill, int FI,
5231 const TargetRegisterClass *RC,
5232 const TargetRegisterInfo *TRI,
5233 Register VReg) const {
5234 MachineFunction &MF = *MBB.getParent();
5235 MachineFrameInfo &MFI = MF.getFrameInfo();
5236
5238 MachineMemOperand *MMO =
5240 MFI.getObjectSize(FI), MFI.getObjectAlign(FI));
5241 unsigned Opc = 0;
5242 bool Offset = true;
5244 unsigned StackID = TargetStackID::Default;
5245 switch (TRI->getSpillSize(*RC)) {
5246 case 1:
5247 if (AArch64::FPR8RegClass.hasSubClassEq(RC))
5248 Opc = AArch64::STRBui;
5249 break;
5250 case 2: {
5251 if (AArch64::FPR16RegClass.hasSubClassEq(RC))
5252 Opc = AArch64::STRHui;
5253 else if (AArch64::PNRRegClass.hasSubClassEq(RC) ||
5254 AArch64::PPRRegClass.hasSubClassEq(RC)) {
5256 "Unexpected register store without SVE store instructions");
5257 Opc = AArch64::STR_PXI;
5259 }
5260 break;
5261 }
5262 case 4:
5263 if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
5264 Opc = AArch64::STRWui;
5265 if (SrcReg.isVirtual())
5266 MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR32RegClass);
5267 else
5268 assert(SrcReg != AArch64::WSP);
5269 } else if (AArch64::FPR32RegClass.hasSubClassEq(RC))
5270 Opc = AArch64::STRSui;
5271 else if (AArch64::PPR2RegClass.hasSubClassEq(RC)) {
5272 Opc = AArch64::STR_PPXI;
5274 }
5275 break;
5276 case 8:
5277 if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) {
5278 Opc = AArch64::STRXui;
5279 if (SrcReg.isVirtual())
5280 MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass);
5281 else
5282 assert(SrcReg != AArch64::SP);
5283 } else if (AArch64::FPR64RegClass.hasSubClassEq(RC)) {
5284 Opc = AArch64::STRDui;
5285 } else if (AArch64::WSeqPairsClassRegClass.hasSubClassEq(RC)) {
5287 get(AArch64::STPWi), SrcReg, isKill,
5288 AArch64::sube32, AArch64::subo32, FI, MMO);
5289 return;
5290 }
5291 break;
5292 case 16:
5293 if (AArch64::FPR128RegClass.hasSubClassEq(RC))
5294 Opc = AArch64::STRQui;
5295 else if (AArch64::DDRegClass.hasSubClassEq(RC)) {
5296 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
5297 Opc = AArch64::ST1Twov1d;
5298 Offset = false;
5299 } else if (AArch64::XSeqPairsClassRegClass.hasSubClassEq(RC)) {
5301 get(AArch64::STPXi), SrcReg, isKill,
5302 AArch64::sube64, AArch64::subo64, FI, MMO);
5303 return;
5304 } else if (AArch64::ZPRRegClass.hasSubClassEq(RC)) {
5306 "Unexpected register store without SVE store instructions");
5307 Opc = AArch64::STR_ZXI;
5309 }
5310 break;
5311 case 24:
5312 if (AArch64::DDDRegClass.hasSubClassEq(RC)) {
5313 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
5314 Opc = AArch64::ST1Threev1d;
5315 Offset = false;
5316 }
5317 break;
5318 case 32:
5319 if (AArch64::DDDDRegClass.hasSubClassEq(RC)) {
5320 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
5321 Opc = AArch64::ST1Fourv1d;
5322 Offset = false;
5323 } else if (AArch64::QQRegClass.hasSubClassEq(RC)) {
5324 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
5325 Opc = AArch64::ST1Twov2d;
5326 Offset = false;
5327 } else if (AArch64::ZPR2RegClass.hasSubClassEq(RC) ||
5328 AArch64::ZPR2StridedOrContiguousRegClass.hasSubClassEq(RC)) {
5330 "Unexpected register store without SVE store instructions");
5331 Opc = AArch64::STR_ZZXI;
5333 }
5334 break;
5335 case 48:
5336 if (AArch64::QQQRegClass.hasSubClassEq(RC)) {
5337 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
5338 Opc = AArch64::ST1Threev2d;
5339 Offset = false;
5340 } else if (AArch64::ZPR3RegClass.hasSubClassEq(RC)) {
5342 "Unexpected register store without SVE store instructions");
5343 Opc = AArch64::STR_ZZZXI;
5345 }
5346 break;
5347 case 64:
5348 if (AArch64::QQQQRegClass.hasSubClassEq(RC)) {
5349 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
5350 Opc = AArch64::ST1Fourv2d;
5351 Offset = false;
5352 } else if (AArch64::ZPR4RegClass.hasSubClassEq(RC) ||
5353 AArch64::ZPR4StridedOrContiguousRegClass.hasSubClassEq(RC)) {
5355 "Unexpected register store without SVE store instructions");
5356 Opc = AArch64::STR_ZZZZXI;
5358 }
5359 break;
5360 }
5361 assert(Opc && "Unknown register class");
5362 MFI.setStackID(FI, StackID);
5363
5364 const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DebugLoc(), get(Opc))
5365 .addReg(SrcReg, getKillRegState(isKill))
5366 .addFrameIndex(FI);
5367
5368 if (Offset)
5369 MI.addImm(0);
5370 if (PNRReg.isValid())
5371 MI.addDef(PNRReg, RegState::Implicit);
5372 MI.addMemOperand(MMO);
5373}
5374
5377 MachineBasicBlock::iterator InsertBefore,
5378 const MCInstrDesc &MCID,
5379 Register DestReg, unsigned SubIdx0,
5380 unsigned SubIdx1, int FI,
5381 MachineMemOperand *MMO) {
5382 Register DestReg0 = DestReg;
5383 Register DestReg1 = DestReg;
5384 bool IsUndef = true;
5385 if (DestReg.isPhysical()) {
5386 DestReg0 = TRI.getSubReg(DestReg, SubIdx0);
5387 SubIdx0 = 0;
5388 DestReg1 = TRI.getSubReg(DestReg, SubIdx1);
5389 SubIdx1 = 0;
5390 IsUndef = false;
5391 }
5392 BuildMI(MBB, InsertBefore, DebugLoc(), MCID)
5393 .addReg(DestReg0, RegState::Define | getUndefRegState(IsUndef), SubIdx0)
5394 .addReg(DestReg1, RegState::Define | getUndefRegState(IsUndef), SubIdx1)
5395 .addFrameIndex(FI)
5396 .addImm(0)
5397 .addMemOperand(MMO);
5398}
5399
5402 Register DestReg, int FI,
5403 const TargetRegisterClass *RC,
5404 const TargetRegisterInfo *TRI,
5405 Register VReg) const {
5406 MachineFunction &MF = *MBB.getParent();
5407 MachineFrameInfo &MFI = MF.getFrameInfo();
5409 MachineMemOperand *MMO =
5411 MFI.getObjectSize(FI), MFI.getObjectAlign(FI));
5412
5413 unsigned Opc = 0;
5414 bool Offset = true;
5415 unsigned StackID = TargetStackID::Default;
5417 switch (TRI->getSpillSize(*RC)) {
5418 case 1:
5419 if (AArch64::FPR8RegClass.hasSubClassEq(RC))
5420 Opc = AArch64::LDRBui;
5421 break;
5422 case 2: {
5423 bool IsPNR = AArch64::PNRRegClass.hasSubClassEq(RC);
5424 if (AArch64::FPR16RegClass.hasSubClassEq(RC))
5425 Opc = AArch64::LDRHui;
5426 else if (IsPNR || AArch64::PPRRegClass.hasSubClassEq(RC)) {
5428 "Unexpected register load without SVE load instructions");
5429 if (IsPNR)
5430 PNRReg = DestReg;
5431 Opc = AArch64::LDR_PXI;
5433 }
5434 break;
5435 }
5436 case 4:
5437 if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
5438 Opc = AArch64::LDRWui;
5439 if (DestReg.isVirtual())
5440 MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR32RegClass);
5441 else
5442 assert(DestReg != AArch64::WSP);
5443 } else if (AArch64::FPR32RegClass.hasSubClassEq(RC))
5444 Opc = AArch64::LDRSui;
5445 else if (AArch64::PPR2RegClass.hasSubClassEq(RC)) {
5446 Opc = AArch64::LDR_PPXI;
5448 }
5449 break;
5450 case 8:
5451 if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) {
5452 Opc = AArch64::LDRXui;
5453 if (DestReg.isVirtual())
5454 MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR64RegClass);
5455 else
5456 assert(DestReg != AArch64::SP);
5457 } else if (AArch64::FPR64RegClass.hasSubClassEq(RC)) {
5458 Opc = AArch64::LDRDui;
5459 } else if (AArch64::WSeqPairsClassRegClass.hasSubClassEq(RC)) {
5461 get(AArch64::LDPWi), DestReg, AArch64::sube32,
5462 AArch64::subo32, FI, MMO);
5463 return;
5464 }
5465 break;
5466 case 16:
5467 if (AArch64::FPR128RegClass.hasSubClassEq(RC))
5468 Opc = AArch64::LDRQui;
5469 else if (AArch64::DDRegClass.hasSubClassEq(RC)) {
5470 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
5471 Opc = AArch64::LD1Twov1d;
5472 Offset = false;
5473 } else if (AArch64::XSeqPairsClassRegClass.hasSubClassEq(RC)) {
5475 get(AArch64::LDPXi), DestReg, AArch64::sube64,
5476 AArch64::subo64, FI, MMO);
5477 return;
5478 } else if (AArch64::ZPRRegClass.hasSubClassEq(RC)) {
5480 "Unexpected register load without SVE load instructions");
5481 Opc = AArch64::LDR_ZXI;
5483 }
5484 break;
5485 case 24:
5486 if (AArch64::DDDRegClass.hasSubClassEq(RC)) {
5487 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
5488 Opc = AArch64::LD1Threev1d;
5489 Offset = false;
5490 }
5491 break;
5492 case 32:
5493 if (AArch64::DDDDRegClass.hasSubClassEq(RC)) {
5494 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
5495 Opc = AArch64::LD1Fourv1d;
5496 Offset = false;
5497 } else if (AArch64::QQRegClass.hasSubClassEq(RC)) {
5498 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
5499 Opc = AArch64::LD1Twov2d;
5500 Offset = false;
5501 } else if (AArch64::ZPR2RegClass.hasSubClassEq(RC) ||
5502 AArch64::ZPR2StridedOrContiguousRegClass.hasSubClassEq(RC)) {
5504 "Unexpected register load without SVE load instructions");
5505 Opc = AArch64::LDR_ZZXI;
5507 }
5508 break;
5509 case 48:
5510 if (AArch64::QQQRegClass.hasSubClassEq(RC)) {
5511 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
5512 Opc = AArch64::LD1Threev2d;
5513 Offset = false;
5514 } else if (AArch64::ZPR3RegClass.hasSubClassEq(RC)) {
5516 "Unexpected register load without SVE load instructions");
5517 Opc = AArch64::LDR_ZZZXI;
5519 }
5520 break;
5521 case 64:
5522 if (AArch64::QQQQRegClass.hasSubClassEq(RC)) {
5523 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
5524 Opc = AArch64::LD1Fourv2d;
5525 Offset = false;
5526 } else if (AArch64::ZPR4RegClass.hasSubClassEq(RC) ||
5527 AArch64::ZPR4StridedOrContiguousRegClass.hasSubClassEq(RC)) {
5529 "Unexpected register load without SVE load instructions");
5530 Opc = AArch64::LDR_ZZZZXI;
5532 }
5533 break;
5534 }
5535
5536 assert(Opc && "Unknown register class");
5537 MFI.setStackID(FI, StackID);
5538
5539 const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DebugLoc(), get(Opc))
5540 .addReg(DestReg, getDefRegState(true))
5541 .addFrameIndex(FI);
5542 if (Offset)
5543 MI.addImm(0);
5544 if (PNRReg.isValid() && !PNRReg.isVirtual())
5545 MI.addDef(PNRReg, RegState::Implicit);
5546 MI.addMemOperand(MMO);
5547
5548 if (PNRReg.isValid() && PNRReg.isVirtual())
5549 BuildMI(MBB, MBBI, DebugLoc(), get(TargetOpcode::COPY), PNRReg)
5550 .addReg(DestReg);
5551}
5552
5554 const MachineInstr &UseMI,
5555 const TargetRegisterInfo *TRI) {
5556 return any_of(instructionsWithoutDebug(std::next(DefMI.getIterator()),
5557 UseMI.getIterator()),
5558 [TRI](const MachineInstr &I) {
5559 return I.modifiesRegister(AArch64::NZCV, TRI) ||
5560 I.readsRegister(AArch64::NZCV, TRI);
5561 });
5562}
5563
5565 const StackOffset &Offset, int64_t &ByteSized, int64_t &VGSized) {
5566 // The smallest scalable element supported by scaled SVE addressing
5567 // modes are predicates, which are 2 scalable bytes in size. So the scalable
5568 // byte offset must always be a multiple of 2.
5569 assert(Offset.getScalable() % 2 == 0 && "Invalid frame offset");
5570
5571 // VGSized offsets are divided by '2', because the VG register is the
5572 // the number of 64bit granules as opposed to 128bit vector chunks,
5573 // which is how the 'n' in e.g. MVT::nxv1i8 is modelled.
5574 // So, for a stack offset of 16 MVT::nxv1i8's, the size is n x 16 bytes.
5575 // VG = n * 2 and the dwarf offset must be VG * 8 bytes.
5576 ByteSized = Offset.getFixed();
5577 VGSized = Offset.getScalable() / 2;
5578}
5579
5580/// Returns the offset in parts to which this frame offset can be
5581/// decomposed for the purpose of describing a frame offset.
5582/// For non-scalable offsets this is simply its byte size.
5584 const StackOffset &Offset, int64_t &NumBytes, int64_t &NumPredicateVectors,
5585 int64_t &NumDataVectors) {
5586 // The smallest scalable element supported by scaled SVE addressing
5587 // modes are predicates, which are 2 scalable bytes in size. So the scalable
5588 // byte offset must always be a multiple of 2.
5589 assert(Offset.getScalable() % 2 == 0 && "Invalid frame offset");
5590
5591 NumBytes = Offset.getFixed();
5592 NumDataVectors = 0;
5593 NumPredicateVectors = Offset.getScalable() / 2;
5594 // This method is used to get the offsets to adjust the frame offset.
5595 // If the function requires ADDPL to be used and needs more than two ADDPL
5596 // instructions, part of the offset is folded into NumDataVectors so that it
5597 // uses ADDVL for part of it, reducing the number of ADDPL instructions.
5598 if (NumPredicateVectors % 8 == 0 || NumPredicateVectors < -64 ||
5599 NumPredicateVectors > 62) {
5600 NumDataVectors = NumPredicateVectors / 8;
5601 NumPredicateVectors -= NumDataVectors * 8;
5602 }
5603}
5604
5605// Convenience function to create a DWARF expression for
5606// Expr + NumBytes + NumVGScaledBytes * AArch64::VG
5607static void appendVGScaledOffsetExpr(SmallVectorImpl<char> &Expr, int NumBytes,
5608 int NumVGScaledBytes, unsigned VG,
5609 llvm::raw_string_ostream &Comment) {
5610 uint8_t buffer[16];
5611
5612 if (NumBytes) {
5613 Expr.push_back(dwarf::DW_OP_consts);
5614 Expr.append(buffer, buffer + encodeSLEB128(NumBytes, buffer));
5615 Expr.push_back((uint8_t)dwarf::DW_OP_plus);
5616 Comment << (NumBytes < 0 ? " - " : " + ") << std::abs(NumBytes);
5617 }
5618
5619 if (NumVGScaledBytes) {
5620 Expr.push_back((uint8_t)dwarf::DW_OP_consts);
5621 Expr.append(buffer, buffer + encodeSLEB128(NumVGScaledBytes, buffer));
5622
5623 Expr.push_back((uint8_t)dwarf::DW_OP_bregx);
5624 Expr.append(buffer, buffer + encodeULEB128(VG, buffer));
5625 Expr.push_back(0);
5626
5627 Expr.push_back((uint8_t)dwarf::DW_OP_mul);
5628 Expr.push_back((uint8_t)dwarf::DW_OP_plus);
5629
5630 Comment << (NumVGScaledBytes < 0 ? " - " : " + ")
5631 << std::abs(NumVGScaledBytes) << " * VG";
5632 }
5633}
5634
5635// Creates an MCCFIInstruction:
5636// { DW_CFA_def_cfa_expression, ULEB128 (sizeof expr), expr }
5638 unsigned Reg,
5639 const StackOffset &Offset) {
5640 int64_t NumBytes, NumVGScaledBytes;
5642 NumVGScaledBytes);
5643 std::string CommentBuffer;
5644 llvm::raw_string_ostream Comment(CommentBuffer);
5645
5646 if (Reg == AArch64::SP)
5647 Comment << "sp";
5648 else if (Reg == AArch64::FP)
5649 Comment << "fp";
5650 else
5651 Comment << printReg(Reg, &TRI);
5652
5653 // Build up the expression (Reg + NumBytes + NumVGScaledBytes * AArch64::VG)
5654 SmallString<64> Expr;
5655 unsigned DwarfReg = TRI.getDwarfRegNum(Reg, true);
5656 Expr.push_back((uint8_t)(dwarf::DW_OP_breg0 + DwarfReg));
5657 Expr.push_back(0);
5658 appendVGScaledOffsetExpr(Expr, NumBytes, NumVGScaledBytes,
5659 TRI.getDwarfRegNum(AArch64::VG, true), Comment);
5660
5661 // Wrap this into DW_CFA_def_cfa.
5662 SmallString<64> DefCfaExpr;
5663 DefCfaExpr.push_back(dwarf::DW_CFA_def_cfa_expression);
5664 uint8_t buffer[16];
5665 DefCfaExpr.append(buffer, buffer + encodeULEB128(Expr.size(), buffer));
5666 DefCfaExpr.append(Expr.str());
5667 return MCCFIInstruction::createEscape(nullptr, DefCfaExpr.str(), SMLoc(),
5668 Comment.str());
5669}
5670
5672 unsigned FrameReg, unsigned Reg,
5673 const StackOffset &Offset,
5674 bool LastAdjustmentWasScalable) {
5675 if (Offset.getScalable())
5676 return createDefCFAExpression(TRI, Reg, Offset);
5677
5678 if (FrameReg == Reg && !LastAdjustmentWasScalable)
5679 return MCCFIInstruction::cfiDefCfaOffset(nullptr, int(Offset.getFixed()));
5680
5681 unsigned DwarfReg = TRI.getDwarfRegNum(Reg, true);
5682 return MCCFIInstruction::cfiDefCfa(nullptr, DwarfReg, (int)Offset.getFixed());
5683}
5684
5686 unsigned Reg,
5687 const StackOffset &OffsetFromDefCFA) {
5688 int64_t NumBytes, NumVGScaledBytes;
5690 OffsetFromDefCFA, NumBytes, NumVGScaledBytes);
5691
5692 unsigned DwarfReg = TRI.getDwarfRegNum(Reg, true);
5693
5694 // Non-scalable offsets can use DW_CFA_offset directly.
5695 if (!NumVGScaledBytes)
5696 return MCCFIInstruction::createOffset(nullptr, DwarfReg, NumBytes);
5697
5698 std::string CommentBuffer;
5699 llvm::raw_string_ostream Comment(CommentBuffer);
5700 Comment << printReg(Reg, &TRI) << " @ cfa";
5701
5702 // Build up expression (NumBytes + NumVGScaledBytes * AArch64::VG)
5703 SmallString<64> OffsetExpr;
5704 appendVGScaledOffsetExpr(OffsetExpr, NumBytes, NumVGScaledBytes,
5705 TRI.getDwarfRegNum(AArch64::VG, true), Comment);
5706
5707 // Wrap this into DW_CFA_expression
5708 SmallString<64> CfaExpr;
5709 CfaExpr.push_back(dwarf::DW_CFA_expression);
5710 uint8_t buffer[16];
5711 CfaExpr.append(buffer, buffer + encodeULEB128(DwarfReg, buffer));
5712 CfaExpr.append(buffer, buffer + encodeULEB128(OffsetExpr.size(), buffer));
5713 CfaExpr.append(OffsetExpr.str());
5714
5715 return MCCFIInstruction::createEscape(nullptr, CfaExpr.str(), SMLoc(),
5716 Comment.str());
5717}
5718
5719// Helper function to emit a frame offset adjustment from a given
5720// pointer (SrcReg), stored into DestReg. This function is explicit
5721// in that it requires the opcode.
5724 const DebugLoc &DL, unsigned DestReg,
5725 unsigned SrcReg, int64_t Offset, unsigned Opc,
5726 const TargetInstrInfo *TII,
5727 MachineInstr::MIFlag Flag, bool NeedsWinCFI,
5728 bool *HasWinCFI, bool EmitCFAOffset,
5729 StackOffset CFAOffset, unsigned FrameReg) {
5730 int Sign = 1;
5731 unsigned MaxEncoding, ShiftSize;
5732 switch (Opc) {
5733 case AArch64::ADDXri:
5734 case AArch64::ADDSXri:
5735 case AArch64::SUBXri:
5736 case AArch64::SUBSXri:
5737 MaxEncoding = 0xfff;
5738 ShiftSize = 12;
5739 break;
5740 case AArch64::ADDVL_XXI:
5741 case AArch64::ADDPL_XXI:
5742 case AArch64::ADDSVL_XXI:
5743 case AArch64::ADDSPL_XXI:
5744 MaxEncoding = 31;
5745 ShiftSize = 0;
5746 if (Offset < 0) {
5747 MaxEncoding = 32;
5748 Sign = -1;
5749 Offset = -Offset;
5750 }
5751 break;
5752 default:
5753 llvm_unreachable("Unsupported opcode");
5754 }
5755
5756 // `Offset` can be in bytes or in "scalable bytes".
5757 int VScale = 1;
5758 if (Opc == AArch64::ADDVL_XXI || Opc == AArch64::ADDSVL_XXI)
5759 VScale = 16;
5760 else if (Opc == AArch64::ADDPL_XXI || Opc == AArch64::ADDSPL_XXI)
5761 VScale = 2;
5762
5763 // FIXME: If the offset won't fit in 24-bits, compute the offset into a
5764 // scratch register. If DestReg is a virtual register, use it as the
5765 // scratch register; otherwise, create a new virtual register (to be
5766 // replaced by the scavenger at the end of PEI). That case can be optimized
5767 // slightly if DestReg is SP which is always 16-byte aligned, so the scratch
5768 // register can be loaded with offset%8 and the add/sub can use an extending
5769 // instruction with LSL#3.
5770 // Currently the function handles any offsets but generates a poor sequence
5771 // of code.
5772 // assert(Offset < (1 << 24) && "unimplemented reg plus immediate");
5773
5774 const unsigned MaxEncodableValue = MaxEncoding << ShiftSize;
5775 Register TmpReg = DestReg;
5776 if (TmpReg == AArch64::XZR)
5778 &AArch64::GPR64RegClass);
5779 do {
5780 uint64_t ThisVal = std::min<uint64_t>(Offset, MaxEncodableValue);
5781 unsigned LocalShiftSize = 0;
5782 if (ThisVal > MaxEncoding) {
5783 ThisVal = ThisVal >> ShiftSize;
5784 LocalShiftSize = ShiftSize;
5785 }
5786 assert((ThisVal >> ShiftSize) <= MaxEncoding &&
5787 "Encoding cannot handle value that big");
5788
5789 Offset -= ThisVal << LocalShiftSize;
5790 if (Offset == 0)
5791 TmpReg = DestReg;
5792 auto MBI = BuildMI(MBB, MBBI, DL, TII->get(Opc), TmpReg)
5793 .addReg(SrcReg)
5794 .addImm(Sign * (int)ThisVal);
5795 if (ShiftSize)
5796 MBI = MBI.addImm(
5798 MBI = MBI.setMIFlag(Flag);
5799
5800 auto Change =
5801 VScale == 1
5802 ? StackOffset::getFixed(ThisVal << LocalShiftSize)
5803 : StackOffset::getScalable(VScale * (ThisVal << LocalShiftSize));
5804 if (Sign == -1 || Opc == AArch64::SUBXri || Opc == AArch64::SUBSXri)
5805 CFAOffset += Change;
5806 else
5807 CFAOffset -= Change;
5808 if (EmitCFAOffset && DestReg == TmpReg) {
5809 MachineFunction &MF = *MBB.getParent();
5810 const TargetSubtargetInfo &STI = MF.getSubtarget();
5811 const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
5812
5813 unsigned CFIIndex = MF.addFrameInst(
5814 createDefCFA(TRI, FrameReg, DestReg, CFAOffset, VScale != 1));
5815 BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
5816 .addCFIIndex(CFIIndex)
5817 .setMIFlags(Flag);
5818 }
5819
5820 if (NeedsWinCFI) {
5821 assert(Sign == 1 && "SEH directives should always have a positive sign");
5822 int Imm = (int)(ThisVal << LocalShiftSize);
5823 if ((DestReg == AArch64::FP && SrcReg == AArch64::SP) ||
5824 (SrcReg == AArch64::FP && DestReg == AArch64::SP)) {
5825 if (HasWinCFI)
5826 *HasWinCFI = true;
5827 if (Imm == 0)
5828 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_SetFP)).setMIFlag(Flag);
5829 else
5830 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_AddFP))
5831 .addImm(Imm)
5832 .setMIFlag(Flag);
5833 assert(Offset == 0 && "Expected remaining offset to be zero to "
5834 "emit a single SEH directive");
5835 } else if (DestReg == AArch64::SP) {
5836 if (HasWinCFI)
5837 *HasWinCFI = true;
5838 assert(SrcReg == AArch64::SP && "Unexpected SrcReg for SEH_StackAlloc");
5839 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_StackAlloc))
5840 .addImm(Imm)
5841 .setMIFlag(Flag);
5842 }
5843 }
5844
5845 SrcReg = TmpReg;
5846 } while (Offset);
5847}
5848
5851 unsigned DestReg, unsigned SrcReg,
5853 MachineInstr::MIFlag Flag, bool SetNZCV,
5854 bool NeedsWinCFI, bool *HasWinCFI,
5855 bool EmitCFAOffset, StackOffset CFAOffset,
5856 unsigned FrameReg) {
5857 // If a function is marked as arm_locally_streaming, then the runtime value of
5858 // vscale in the prologue/epilogue is different the runtime value of vscale
5859 // in the function's body. To avoid having to consider multiple vscales,
5860 // we can use `addsvl` to allocate any scalable stack-slots, which under
5861 // most circumstances will be only locals, not callee-save slots.
5862 const Function &F = MBB.getParent()->getFunction();
5863 bool UseSVL = F.hasFnAttribute("aarch64_pstate_sm_body");
5864
5865 int64_t Bytes, NumPredicateVectors, NumDataVectors;
5867 Offset, Bytes, NumPredicateVectors, NumDataVectors);
5868
5869 // First emit non-scalable frame offsets, or a simple 'mov'.
5870 if (Bytes || (!Offset && SrcReg != DestReg)) {
5871 assert((DestReg != AArch64::SP || Bytes % 8 == 0) &&
5872 "SP increment/decrement not 8-byte aligned");
5873 unsigned Opc = SetNZCV ? AArch64::ADDSXri : AArch64::ADDXri;
5874 if (Bytes < 0) {
5875 Bytes = -Bytes;
5876 Opc = SetNZCV ? AArch64::SUBSXri : AArch64::SUBXri;
5877 }
5878 emitFrameOffsetAdj(MBB, MBBI, DL, DestReg, SrcReg, Bytes, Opc, TII, Flag,
5879 NeedsWinCFI, HasWinCFI, EmitCFAOffset, CFAOffset,
5880 FrameReg);
5881 CFAOffset += (Opc == AArch64::ADDXri || Opc == AArch64::ADDSXri)
5882 ? StackOffset::getFixed(-Bytes)
5883 : StackOffset::getFixed(Bytes);
5884 SrcReg = DestReg;
5885 FrameReg = DestReg;
5886 }
5887
5888 assert(!(SetNZCV && (NumPredicateVectors || NumDataVectors)) &&
5889 "SetNZCV not supported with SVE vectors");
5890 assert(!(NeedsWinCFI && (NumPredicateVectors || NumDataVectors)) &&
5891 "WinCFI not supported with SVE vectors");
5892
5893 if (NumDataVectors) {
5894 emitFrameOffsetAdj(MBB, MBBI, DL, DestReg, SrcReg, NumDataVectors,
5895 UseSVL ? AArch64::ADDSVL_XXI : AArch64::ADDVL_XXI,
5896 TII, Flag, NeedsWinCFI, nullptr, EmitCFAOffset,
5897 CFAOffset, FrameReg);
5898 CFAOffset += StackOffset::getScalable(-NumDataVectors * 16);
5899 SrcReg = DestReg;
5900 }
5901
5902 if (NumPredicateVectors) {
5903 assert(DestReg != AArch64::SP && "Unaligned access to SP");
5904 emitFrameOffsetAdj(MBB, MBBI, DL, DestReg, SrcReg, NumPredicateVectors,
5905 UseSVL ? AArch64::ADDSPL_XXI : AArch64::ADDPL_XXI,
5906 TII, Flag, NeedsWinCFI, nullptr, EmitCFAOffset,
5907 CFAOffset, FrameReg);
5908 }
5909}
5910
5913 MachineBasicBlock::iterator InsertPt, int FrameIndex,
5914 LiveIntervals *LIS, VirtRegMap *VRM) const {
5915 // This is a bit of a hack. Consider this instruction:
5916 //
5917 // %0 = COPY %sp; GPR64all:%0
5918 //
5919 // We explicitly chose GPR64all for the virtual register so such a copy might
5920 // be eliminated by RegisterCoalescer. However, that may not be possible, and
5921 // %0 may even spill. We can't spill %sp, and since it is in the GPR64all
5922 // register class, TargetInstrInfo::foldMemoryOperand() is going to try.
5923 //
5924 // To prevent that, we are going to constrain the %0 register class here.
5925 if (MI.isFullCopy()) {
5926 Register DstReg = MI.getOperand(0).getReg();
5927 Register SrcReg = MI.getOperand(1).getReg();
5928 if (SrcReg == AArch64::SP && DstReg.isVirtual()) {
5929 MF.getRegInfo().constrainRegClass(DstReg, &AArch64::GPR64RegClass);
5930 return nullptr;
5931 }
5932 if (DstReg == AArch64::SP && SrcReg.isVirtual()) {
5933 MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass);
5934 return nullptr;
5935 }
5936 // Nothing can folded with copy from/to NZCV.
5937 if (SrcReg == AArch64::NZCV || DstReg == AArch64::NZCV)
5938 return nullptr;
5939 }
5940
5941 // Handle the case where a copy is being spilled or filled but the source
5942 // and destination register class don't match. For example:
5943 //
5944 // %0 = COPY %xzr; GPR64common:%0
5945 //
5946 // In this case we can still safely fold away the COPY and generate the
5947 // following spill code:
5948 //
5949 // STRXui %xzr, %stack.0
5950 //
5951 // This also eliminates spilled cross register class COPYs (e.g. between x and
5952 // d regs) of the same size. For example:
5953 //
5954 // %0 = COPY %1; GPR64:%0, FPR64:%1
5955 //
5956 // will be filled as
5957 //
5958 // LDRDui %0, fi<#0>
5959 //
5960 // instead of
5961 //
5962 // LDRXui %Temp, fi<#0>
5963 // %0 = FMOV %Temp
5964 //
5965 if (MI.isCopy() && Ops.size() == 1 &&
5966 // Make sure we're only folding the explicit COPY defs/uses.
5967 (Ops[0] == 0 || Ops[0] == 1)) {
5968 bool IsSpill = Ops[0] == 0;
5969 bool IsFill = !IsSpill;
5971 const MachineRegisterInfo &MRI = MF.getRegInfo();
5972 MachineBasicBlock &MBB = *MI.getParent();
5973 const MachineOperand &DstMO = MI.getOperand(0);
5974 const MachineOperand &SrcMO = MI.getOperand(1);
5975 Register DstReg = DstMO.getReg();
5976 Register SrcReg = SrcMO.getReg();
5977 // This is slightly expensive to compute for physical regs since
5978 // getMinimalPhysRegClass is slow.
5979 auto getRegClass = [&](unsigned Reg) {
5980 return Register::isVirtualRegister(Reg) ? MRI.getRegClass(Reg)
5981 : TRI.getMinimalPhysRegClass(Reg);
5982 };
5983
5984 if (DstMO.getSubReg() == 0 && SrcMO.getSubReg() == 0) {
5985 assert(TRI.getRegSizeInBits(*getRegClass(DstReg)) ==
5986 TRI.getRegSizeInBits(*getRegClass(SrcReg)) &&
5987 "Mismatched register size in non subreg COPY");
5988 if (IsSpill)
5989 storeRegToStackSlot(MBB, InsertPt, SrcReg, SrcMO.isKill(), FrameIndex,
5990 getRegClass(SrcReg), &TRI, Register());
5991 else
5992 loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex,
5993 getRegClass(DstReg), &TRI, Register());
5994 return &*--InsertPt;
5995 }
5996
5997 // Handle cases like spilling def of:
5998 //
5999 // %0:sub_32<def,read-undef> = COPY %wzr; GPR64common:%0
6000 //
6001 // where the physical register source can be widened and stored to the full
6002 // virtual reg destination stack slot, in this case producing:
6003 //
6004 // STRXui %xzr, %stack.0
6005 //
6006 if (IsSpill && DstMO.isUndef() && SrcReg == AArch64::WZR &&
6007 TRI.getRegSizeInBits(*getRegClass(DstReg)) == 64) {
6008 assert(SrcMO.getSubReg() == 0 &&
6009 "Unexpected subreg on physical register");
6010 storeRegToStackSlot(MBB, InsertPt, AArch64::XZR, SrcMO.isKill(),
6011 FrameIndex, &AArch64::GPR64RegClass, &TRI,
6012 Register());
6013 return &*--InsertPt;
6014 }
6015
6016 // Handle cases like filling use of:
6017 //
6018 // %0:sub_32<def,read-undef> = COPY %1; GPR64:%0, GPR32:%1
6019 //
6020 // where we can load the full virtual reg source stack slot, into the subreg
6021 // destination, in this case producing:
6022 //
6023 // LDRWui %0:sub_32<def,read-undef>, %stack.0
6024 //
6025 if (IsFill && SrcMO.getSubReg() == 0 && DstMO.isUndef()) {
6026 const TargetRegisterClass *FillRC;
6027 switch (DstMO.getSubReg()) {
6028 default:
6029 FillRC = nullptr;
6030 break;
6031 case AArch64::sub_32:
6032 FillRC = &AArch64::GPR32RegClass;
6033 break;
6034 case AArch64::ssub:
6035 FillRC = &AArch64::FPR32RegClass;
6036 break;
6037 case AArch64::dsub:
6038 FillRC = &AArch64::FPR64RegClass;
6039 break;
6040 }
6041
6042 if (FillRC) {
6043 assert(TRI.getRegSizeInBits(*getRegClass(SrcReg)) ==
6044 TRI.getRegSizeInBits(*FillRC) &&
6045 "Mismatched regclass size on folded subreg COPY");
6046 loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex, FillRC, &TRI,
6047 Register());
6048 MachineInstr &LoadMI = *--InsertPt;
6049 MachineOperand &LoadDst = LoadMI.getOperand(0);
6050 assert(LoadDst.getSubReg() == 0 && "unexpected subreg on fill load");
6051 LoadDst.setSubReg(DstMO.getSubReg());
6052 LoadDst.setIsUndef();
6053 return &LoadMI;
6054 }
6055 }
6056 }
6057
6058 // Cannot fold.
6059 return nullptr;
6060}
6061
6063 StackOffset &SOffset,
6064 bool *OutUseUnscaledOp,
6065 unsigned *OutUnscaledOp,
6066 int64_t *EmittableOffset) {
6067 // Set output values in case of early exit.
6068 if (EmittableOffset)
6069 *EmittableOffset = 0;
6070 if (OutUseUnscaledOp)
6071 *OutUseUnscaledOp = false;
6072 if (OutUnscaledOp)
6073 *OutUnscaledOp = 0;
6074
6075 // Exit early for structured vector spills/fills as they can't take an
6076 // immediate offset.
6077 switch (MI.getOpcode()) {
6078 default:
6079 break;
6080 case AArch64::LD1Rv1d:
6081 case AArch64::LD1Rv2s:
6082 case AArch64::LD1Rv2d:
6083 case AArch64::LD1Rv4h:
6084 case AArch64::LD1Rv4s:
6085 case AArch64::LD1Rv8b:
6086 case AArch64::LD1Rv8h:
6087 case AArch64::LD1Rv16b:
6088 case AArch64::LD1Twov2d:
6089 case AArch64::LD1Threev2d:
6090 case AArch64::LD1Fourv2d:
6091 case AArch64::LD1Twov1d:
6092 case AArch64::LD1Threev1d:
6093 case AArch64::LD1Fourv1d:
6094 case AArch64::ST1Twov2d:
6095 case AArch64::ST1Threev2d:
6096 case AArch64::ST1Fourv2d:
6097 case AArch64::ST1Twov1d:
6098 case AArch64::ST1Threev1d:
6099 case AArch64::ST1Fourv1d:
6100 case AArch64::ST1i8:
6101 case AArch64::ST1i16:
6102 case AArch64::ST1i32:
6103 case AArch64::ST1i64:
6104 case AArch64::IRG:
6105 case AArch64::IRGstack:
6106 case AArch64::STGloop:
6107 case AArch64::STZGloop:
6109 }
6110
6111 // Get the min/max offset and the scale.
6112 TypeSize ScaleValue(0U, false), Width(0U, false);
6113 int64_t MinOff, MaxOff;
6114 if (!AArch64InstrInfo::getMemOpInfo(MI.getOpcode(), ScaleValue, Width, MinOff,
6115 MaxOff))
6116 llvm_unreachable("unhandled opcode in isAArch64FrameOffsetLegal");
6117
6118 // Construct the complete offset.
6119 bool IsMulVL = ScaleValue.isScalable();
6120 unsigned Scale = ScaleValue.getKnownMinValue();
6121 int64_t Offset = IsMulVL ? SOffset.getScalable() : SOffset.getFixed();
6122
6123 const MachineOperand &ImmOpnd =
6124 MI.getOperand(AArch64InstrInfo::getLoadStoreImmIdx(MI.getOpcode()));
6125 Offset += ImmOpnd.getImm() * Scale;
6126
6127 // If the offset doesn't match the scale, we rewrite the instruction to
6128 // use the unscaled instruction instead. Likewise, if we have a negative
6129 // offset and there is an unscaled op to use.
6130 std::optional<unsigned> UnscaledOp =
6132 bool useUnscaledOp = UnscaledOp && (Offset % Scale || Offset < 0);
6133 if (useUnscaledOp &&
6134 !AArch64InstrInfo::getMemOpInfo(*UnscaledOp, ScaleValue, Width, MinOff,
6135 MaxOff))
6136 llvm_unreachable("unhandled opcode in isAArch64FrameOffsetLegal");
6137
6138 Scale = ScaleValue.getKnownMinValue();
6139 assert(IsMulVL == ScaleValue.isScalable() &&
6140 "Unscaled opcode has different value for scalable");
6141
6142 int64_t Remainder = Offset % Scale;
6143 assert(!(Remainder && useUnscaledOp) &&
6144 "Cannot have remainder when using unscaled op");
6145
6146 assert(MinOff < MaxOff && "Unexpected Min/Max offsets");
6147 int64_t NewOffset = Offset / Scale;
6148 if (MinOff <= NewOffset && NewOffset <= MaxOff)
6149 Offset = Remainder;
6150 else {
6151 NewOffset = NewOffset < 0 ? MinOff : MaxOff;
6152 Offset = Offset - (NewOffset * Scale);
6153 }
6154
6155 if (EmittableOffset)
6156 *EmittableOffset = NewOffset;
6157 if (OutUseUnscaledOp)
6158 *OutUseUnscaledOp = useUnscaledOp;
6159 if (OutUnscaledOp && UnscaledOp)
6160 *OutUnscaledOp = *UnscaledOp;
6161
6162 if (IsMulVL)
6163 SOffset = StackOffset::get(SOffset.getFixed(), Offset);
6164 else
6165 SOffset = StackOffset::get(Offset, SOffset.getScalable());
6167 (SOffset ? 0 : AArch64FrameOffsetIsLegal);
6168}
6169
6171 unsigned FrameReg, StackOffset &Offset,
6172 const AArch64InstrInfo *TII) {
6173 unsigned Opcode = MI.getOpcode();
6174 unsigned ImmIdx = FrameRegIdx + 1;
6175
6176 if (Opcode == AArch64::ADDSXri || Opcode == AArch64::ADDXri) {
6177 Offset += StackOffset::getFixed(MI.getOperand(ImmIdx).getImm());
6178 emitFrameOffset(*MI.getParent(), MI, MI.getDebugLoc(),
6179 MI.getOperand(0).getReg(), FrameReg, Offset, TII,
6180 MachineInstr::NoFlags, (Opcode == AArch64::ADDSXri));
6181 MI.eraseFromParent();
6182 Offset = StackOffset();
6183 return true;
6184 }
6185
6186 int64_t NewOffset;
6187 unsigned UnscaledOp;
6188 bool UseUnscaledOp;
6189 int Status = isAArch64FrameOffsetLegal(MI, Offset, &UseUnscaledOp,
6190 &UnscaledOp, &NewOffset);
6193 // Replace the FrameIndex with FrameReg.
6194 MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
6195 if (UseUnscaledOp)
6196 MI.setDesc(TII->get(UnscaledOp));
6197
6198 MI.getOperand(ImmIdx).ChangeToImmediate(NewOffset);
6199 return !Offset;
6200 }
6201
6202 return false;
6203}
6204
6207 DebugLoc DL;
6208 BuildMI(MBB, MI, DL, get(AArch64::HINT)).addImm(0);
6209}
6210
6212 return MCInstBuilder(AArch64::HINT).addImm(0);
6213}
6214
6215// AArch64 supports MachineCombiner.
6216bool AArch64InstrInfo::useMachineCombiner() const { return true; }
6217
6218// True when Opc sets flag
6219static bool isCombineInstrSettingFlag(unsigned Opc) {
6220 switch (Opc) {
6221 case AArch64::ADDSWrr:
6222 case AArch64::ADDSWri:
6223 case AArch64::ADDSXrr:
6224 case AArch64::ADDSXri:
6225 case AArch64::SUBSWrr:
6226 case AArch64::SUBSXrr:
6227 // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
6228 case AArch64::SUBSWri:
6229 case AArch64::SUBSXri:
6230 return true;
6231 default:
6232 break;
6233 }
6234 return false;
6235}
6236
6237// 32b Opcodes that can be combined with a MUL
6238static bool isCombineInstrCandidate32(unsigned Opc) {
6239 switch (Opc) {
6240 case AArch64::ADDWrr:
6241 case AArch64::ADDWri:
6242 case AArch64::SUBWrr:
6243 case AArch64::ADDSWrr:
6244 case AArch64::ADDSWri:
6245 case AArch64::SUBSWrr:
6246 // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
6247 case AArch64::SUBWri:
6248 case AArch64::SUBSWri:
6249 return true;
6250 default:
6251 break;
6252 }
6253 return false;
6254}
6255
6256// 64b Opcodes that can be combined with a MUL
6257static bool isCombineInstrCandidate64(unsigned Opc) {
6258 switch (Opc) {
6259 case AArch64::ADDXrr:
6260 case AArch64::ADDXri:
6261 case AArch64::SUBXrr:
6262 case AArch64::ADDSXrr:
6263 case AArch64::ADDSXri:
6264 case AArch64::SUBSXrr:
6265 // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
6266 case AArch64::SUBXri:
6267 case AArch64::SUBSXri:
6268 case AArch64::ADDv8i8:
6269 case AArch64::ADDv16i8:
6270 case AArch64::ADDv4i16:
6271 case AArch64::ADDv8i16:
6272 case AArch64::ADDv2i32:
6273 case AArch64::ADDv4i32:
6274 case AArch64::SUBv8i8:
6275 case AArch64::SUBv16i8:
6276 case AArch64::SUBv4i16:
6277 case AArch64::SUBv8i16:
6278 case AArch64::SUBv2i32:
6279 case AArch64::SUBv4i32:
6280 return true;
6281 default:
6282 break;
6283 }
6284 return false;
6285}
6286
6287// FP Opcodes that can be combined with a FMUL.
6288static bool isCombineInstrCandidateFP(const MachineInstr &Inst) {
6289 switch (Inst.getOpcode()) {
6290 default:
6291 break;
6292 case AArch64::FADDHrr:
6293 case AArch64::FADDSrr:
6294 case AArch64::FADDDrr:
6295 case AArch64::FADDv4f16:
6296 case AArch64::FADDv8f16:
6297 case AArch64::FADDv2f32:
6298 case AArch64::FADDv2f64:
6299 case AArch64::FADDv4f32:
6300 case AArch64::FSUBHrr:
6301 case AArch64::FSUBSrr:
6302 case AArch64::FSUBDrr:
6303 case AArch64::FSUBv4f16:
6304 case AArch64::FSUBv8f16:
6305 case AArch64::FSUBv2f32:
6306 case AArch64::FSUBv2f64:
6307 case AArch64::FSUBv4f32:
6309 // We can fuse FADD/FSUB with FMUL, if fusion is either allowed globally by
6310 // the target options or if FADD/FSUB has the contract fast-math flag.
6311 return Options.UnsafeFPMath ||
6312 Options.AllowFPOpFusion == FPOpFusion::Fast ||
6314 return true;
6315 }
6316 return false;
6317}
6318
6319// Opcodes that can be combined with a MUL
6320static bool isCombineInstrCandidate(unsigned Opc) {
6322}
6323
6324//
6325// Utility routine that checks if \param MO is defined by an
6326// \param CombineOpc instruction in the basic block \param MBB
6328 unsigned CombineOpc, unsigned ZeroReg = 0,
6329 bool CheckZeroReg = false) {
6331 MachineInstr *MI = nullptr;
6332
6333 if (MO.isReg() && MO.getReg().isVirtual())
6334 MI = MRI.getUniqueVRegDef(MO.getReg());
6335 // And it needs to be in the trace (otherwise, it won't have a depth).
6336 if (!MI || MI->getParent() != &MBB || (unsigned)MI->getOpcode() != CombineOpc)
6337 return false;
6338 // Must only used by the user we combine with.
6339 if (!MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
6340 return false;
6341
6342 if (CheckZeroReg) {
6343 assert(MI->getNumOperands() >= 4 && MI->getOperand(0).isReg() &&
6344 MI->getOperand(1).isReg() && MI->getOperand(2).isReg() &&
6345 MI->getOperand(3).isReg() && "MAdd/MSub must have a least 4 regs");
6346 // The third input reg must be zero.
6347 if (MI->getOperand(3).getReg() != ZeroReg)
6348 return false;
6349 }
6350
6351 if (isCombineInstrSettingFlag(CombineOpc) &&
6352 MI->findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr, true) == -1)
6353 return false;
6354
6355 return true;
6356}
6357
6358//
6359// Is \param MO defined by an integer multiply and can be combined?
6361 unsigned MulOpc, unsigned ZeroReg) {
6362 return canCombine(MBB, MO, MulOpc, ZeroReg, true);
6363}
6364
6365//
6366// Is \param MO defined by a floating-point multiply and can be combined?
6368 unsigned MulOpc) {
6369 return canCombine(MBB, MO, MulOpc);
6370}
6371
6372// TODO: There are many more machine instruction opcodes to match:
6373// 1. Other data types (integer, vectors)
6374// 2. Other math / logic operations (xor, or)
6375// 3. Other forms of the same operation (intrinsics and other variants)
6377 bool Invert) const {
6378 if (Invert)
6379 return false;
6380 switch (Inst.getOpcode()) {
6381 // == Floating-point types ==
6382 // -- Floating-point instructions --
6383 case AArch64::FADDHrr:
6384 case AArch64::FADDSrr:
6385 case AArch64::FADDDrr:
6386 case AArch64::FMULHrr:
6387 case AArch64::FMULSrr:
6388 case AArch64::FMULDrr:
6389 case AArch64::FMULX16:
6390 case AArch64::FMULX32:
6391 case AArch64::FMULX64:
6392 // -- Advanced SIMD instructions --
6393 case AArch64::FADDv4f16:
6394 case AArch64::FADDv8f16:
6395 case AArch64::FADDv2f32:
6396 case AArch64::FADDv4f32:
6397 case AArch64::FADDv2f64:
6398 case AArch64::FMULv4f16:
6399 case AArch64::FMULv8f16:
6400 case AArch64::FMULv2f32:
6401 case AArch64::FMULv4f32:
6402 case AArch64::FMULv2f64:
6403 case AArch64::FMULXv4f16:
6404 case AArch64::FMULXv8f16:
6405 case AArch64::FMULXv2f32:
6406 case AArch64::FMULXv4f32:
6407 case AArch64::FMULXv2f64:
6408 // -- SVE instructions --
6409 // Opcodes FMULX_ZZZ_? don't exist because there is no unpredicated FMULX
6410 // in the SVE instruction set (though there are predicated ones).
6411 case AArch64::FADD_ZZZ_H:
6412 case AArch64::FADD_ZZZ_S:
6413 case AArch64::FADD_ZZZ_D:
6414 case AArch64::FMUL_ZZZ_H:
6415 case AArch64::FMUL_ZZZ_S:
6416 case AArch64::FMUL_ZZZ_D:
6417 return Inst.getParent()->getParent()->getTarget().Options.UnsafeFPMath ||
6420
6421 // == Integer types ==
6422 // -- Base instructions --
6423 // Opcodes MULWrr and MULXrr don't exist because
6424 // `MUL <Wd>, <Wn>, <Wm>` and `MUL <Xd>, <Xn>, <Xm>` are aliases of
6425 // `MADD <Wd>, <Wn>, <Wm>, WZR` and `MADD <Xd>, <Xn>, <Xm>, XZR` respectively.
6426 // The machine-combiner does not support three-source-operands machine
6427 // instruction. So we cannot reassociate MULs.
6428 case AArch64::ADDWrr:
6429 case AArch64::ADDXrr:
6430 case AArch64::ANDWrr:
6431 case AArch64::ANDXrr:
6432 case AArch64::ORRWrr:
6433 case AArch64::ORRXrr:
6434 case AArch64::EORWrr:
6435 case AArch64::EORXrr:
6436 case AArch64::EONWrr:
6437 case AArch64::EONXrr:
6438 // -- Advanced SIMD instructions --
6439 // Opcodes MULv1i64 and MULv2i64 don't exist because there is no 64-bit MUL
6440 // in the Advanced SIMD instruction set.
6441 case AArch64::ADDv8i8:
6442 case AArch64::ADDv16i8:
6443 case AArch64::ADDv4i16:
6444 case AArch64::ADDv8i16:
6445 case AArch64::ADDv2i32:
6446 case AArch64::ADDv4i32:
6447 case AArch64::ADDv1i64:
6448 case AArch64::ADDv2i64:
6449 case AArch64::MULv8i8:
6450 case AArch64::MULv16i8:
6451 case AArch64::MULv4i16:
6452 case AArch64::MULv8i16:
6453 case AArch64::MULv2i32:
6454 case AArch64::MULv4i32:
6455 case AArch64::ANDv8i8:
6456 case AArch64::ANDv16i8:
6457 case AArch64::ORRv8i8:
6458 case AArch64::ORRv16i8:
6459 case AArch64::EORv8i8:
6460 case AArch64::EORv16i8:
6461 // -- SVE instructions --
6462 case AArch64::ADD_ZZZ_B:
6463 case AArch64::ADD_ZZZ_H:
6464 case AArch64::ADD_ZZZ_S:
6465 case AArch64::ADD_ZZZ_D:
6466 case AArch64::MUL_ZZZ_B:
6467 case AArch64::MUL_ZZZ_H:
6468 case AArch64::MUL_ZZZ_S:
6469 case AArch64::MUL_ZZZ_D:
6470 case AArch64::AND_ZZZ:
6471 case AArch64::ORR_ZZZ:
6472 case AArch64::EOR_ZZZ:
6473 return true;
6474
6475 default:
6476 return false;
6477 }
6478}
6479
6480/// Find instructions that can be turned into madd.
6482 SmallVectorImpl<unsigned> &Patterns) {
6483 unsigned Opc = Root.getOpcode();
6484 MachineBasicBlock &MBB = *Root.getParent();
6485 bool Found = false;
6486
6487 if (!isCombineInstrCandidate(Opc))
6488 return false;
6489 if (isCombineInstrSettingFlag(Opc)) {
6490 int Cmp_NZCV =
6491 Root.findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr, true);
6492 // When NZCV is live bail out.
6493 if (Cmp_NZCV == -1)
6494 return false;
6495 unsigned NewOpc = convertToNonFlagSettingOpc(Root);
6496 // When opcode can't change bail out.
6497 // CHECKME: do we miss any cases for opcode conversion?
6498 if (NewOpc == Opc)
6499 return false;
6500 Opc = NewOpc;
6501 }
6502
6503 auto setFound = [&](int Opcode, int Operand, unsigned ZeroReg,
6504 unsigned Pattern) {
6505 if (canCombineWithMUL(MBB, Root.getOperand(Operand), Opcode, ZeroReg)) {
6506 Patterns.push_back(Pattern);
6507 Found = true;
6508 }
6509 };
6510
6511 auto setVFound = [&](int Opcode, int Operand, unsigned Pattern) {
6512 if (canCombine(MBB, Root.getOperand(Operand), Opcode)) {
6513 Patterns.push_back(Pattern);
6514 Found = true;
6515 }
6516 };
6517
6519
6520 switch (Opc) {
6521 default:
6522 break;
6523 case AArch64::ADDWrr:
6524 assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
6525 "ADDWrr does not have register operands");
6526 setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULADDW_OP1);
6527 setFound(AArch64::MADDWrrr, 2, AArch64::WZR, MCP::MULADDW_OP2);
6528 break;
6529 case AArch64::ADDXrr:
6530 setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULADDX_OP1);
6531 setFound(AArch64::MADDXrrr, 2, AArch64::XZR, MCP::MULADDX_OP2);
6532 break;
6533 case AArch64::SUBWrr:
6534 setFound(AArch64::MADDWrrr, 2, AArch64::WZR, MCP::MULSUBW_OP2);
6535 setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULSUBW_OP1);
6536 break;
6537 case AArch64::SUBXrr:
6538 setFound(AArch64::MADDXrrr, 2, AArch64::XZR, MCP::MULSUBX_OP2);
6539 setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULSUBX_OP1);
6540 break;
6541 case AArch64::ADDWri:
6542 setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULADDWI_OP1);
6543 break;
6544 case AArch64::ADDXri:
6545 setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULADDXI_OP1);
6546 break;
6547 case AArch64::SUBWri:
6548 setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULSUBWI_OP1);
6549 break;
6550 case AArch64::SUBXri:
6551 setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULSUBXI_OP1);
6552 break;
6553 case AArch64::ADDv8i8:
6554 setVFound(AArch64::MULv8i8, 1, MCP::MULADDv8i8_OP1);
6555 setVFound(AArch64::MULv8i8, 2, MCP::MULADDv8i8_OP2);
6556 break;
6557 case AArch64::ADDv16i8:
6558 setVFound(AArch64::MULv16i8, 1, MCP::MULADDv16i8_OP1);
6559 setVFound(AArch64::MULv16i8, 2, MCP::MULADDv16i8_OP2);
6560 break;
6561 case AArch64::ADDv4i16:
6562 setVFound(AArch64::MULv4i16, 1, MCP::MULADDv4i16_OP1);
6563 setVFound(AArch64::MULv4i16, 2, MCP::MULADDv4i16_OP2);
6564 setVFound(AArch64::MULv4i16_indexed, 1, MCP::MULADDv4i16_indexed_OP1);
6565 setVFound(AArch64::MULv4i16_indexed, 2, MCP::MULADDv4i16_indexed_OP2);
6566 break;
6567 case AArch64::ADDv8i16:
6568 setVFound(AArch64::MULv8i16, 1, MCP::MULADDv8i16_OP1);
6569 setVFound(AArch64::MULv8i16, 2, MCP::MULADDv8i16_OP2);
6570 setVFound(AArch64::MULv8i16_indexed, 1, MCP::MULADDv8i16_indexed_OP1);
6571 setVFound(AArch64::MULv8i16_indexed, 2, MCP::MULADDv8i16_indexed_OP2);
6572 break;
6573 case AArch64::ADDv2i32:
6574 setVFound(AArch64::MULv2i32, 1, MCP::MULADDv2i32_OP1);
6575 setVFound(AArch64::MULv2i32, 2, MCP::MULADDv2i32_OP2);
6576 setVFound(AArch64::MULv2i32_indexed, 1, MCP::MULADDv2i32_indexed_OP1);
6577 setVFound(AArch64::MULv2i32_indexed, 2, MCP::MULADDv2i32_indexed_OP2);
6578 break;
6579 case AArch64::ADDv4i32:
6580 setVFound(AArch64::MULv4i32, 1, MCP::MULADDv4i32_OP1);
6581 setVFound(AArch64::MULv4i32, 2, MCP::MULADDv4i32_OP2);
6582 setVFound(AArch64::MULv4i32_indexed, 1, MCP::MULADDv4i32_indexed_OP1);
6583 setVFound(AArch64::MULv4i32_indexed, 2, MCP::MULADDv4i32_indexed_OP2);
6584 break;
6585 case AArch64::SUBv8i8:
6586 setVFound(AArch64::MULv8i8, 1, MCP::MULSUBv8i8_OP1);
6587 setVFound(AArch64::MULv8i8, 2, MCP::MULSUBv8i8_OP2);
6588 break;
6589 case AArch64::SUBv16i8:
6590 setVFound(AArch64::MULv16i8, 1, MCP::MULSUBv16i8_OP1);
6591 setVFound(AArch64::MULv16i8, 2, MCP::MULSUBv16i8_OP2);
6592 break;
6593 case AArch64::SUBv4i16:
6594 setVFound(AArch64::MULv4i16, 1, MCP::MULSUBv4i16_OP1);
6595 setVFound(AArch64::MULv4i16, 2, MCP::MULSUBv4i16_OP2);
6596 setVFound(AArch64::MULv4i16_indexed, 1, MCP::MULSUBv4i16_indexed_OP1);
6597 setVFound(AArch64::MULv4i16_indexed, 2, MCP::MULSUBv4i16_indexed_OP2);
6598 break;
6599 case AArch64::SUBv8i16:
6600 setVFound(AArch64::MULv8i16, 1, MCP::MULSUBv8i16_OP1);
6601 setVFound(AArch64::MULv8i16, 2, MCP::MULSUBv8i16_OP2);
6602 setVFound(AArch64::MULv8i16_indexed, 1, MCP::MULSUBv8i16_indexed_OP1);
6603 setVFound(AArch64::MULv8i16_indexed, 2, MCP::MULSUBv8i16_indexed_OP2);
6604 break;
6605 case AArch64::SUBv2i32:
6606 setVFound(AArch64::MULv2i32, 1, MCP::MULSUBv2i32_OP1);
6607 setVFound(AArch64::MULv2i32, 2, MCP::MULSUBv2i32_OP2);
6608 setVFound(AArch64::MULv2i32_indexed, 1, MCP::MULSUBv2i32_indexed_OP1);
6609 setVFound(AArch64::MULv2i32_indexed, 2, MCP::MULSUBv2i32_indexed_OP2);
6610 break;
6611 case AArch64::SUBv4i32:
6612 setVFound(AArch64::MULv4i32, 1, MCP::MULSUBv4i32_OP1);
6613 setVFound(AArch64::MULv4i32, 2, MCP::MULSUBv4i32_OP2);
6614 setVFound(AArch64::MULv4i32_indexed, 1, MCP::MULSUBv4i32_indexed_OP1);
6615 setVFound(AArch64::MULv4i32_indexed, 2, MCP::MULSUBv4i32_indexed_OP2);
6616 break;
6617 }
6618 return Found;
6619}
6620/// Floating-Point Support
6621
6622/// Find instructions that can be turned into madd.
6624 SmallVectorImpl<unsigned> &Patterns) {
6625
6626 if (!isCombineInstrCandidateFP(Root))
6627 return false;
6628
6629 MachineBasicBlock &MBB = *Root.getParent();
6630 bool Found = false;
6631
6632 auto Match = [&](int Opcode, int Operand, unsigned Pattern) -> bool {
6633 if (canCombineWithFMUL(MBB, Root.getOperand(Operand), Opcode)) {
6634 Patterns.push_back(Pattern);
6635 return true;
6636 }
6637 return false;
6638 };
6639
6641
6642 switch (Root.getOpcode()) {
6643 default:
6644 assert(false && "Unsupported FP instruction in combiner\n");
6645 break;
6646 case AArch64::FADDHrr:
6647 assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
6648 "FADDHrr does not have register operands");
6649
6650 Found = Match(AArch64::FMULHrr, 1, MCP::FMULADDH_OP1);
6651 Found |= Match(AArch64::FMULHrr, 2, MCP::FMULADDH_OP2);
6652 break;
6653 case AArch64::FADDSrr:
6654 assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
6655 "FADDSrr does not have register operands");
6656
6657 Found |= Match(AArch64::FMULSrr, 1, MCP::FMULADDS_OP1) ||
6658 Match(AArch64::FMULv1i32_indexed, 1, MCP::FMLAv1i32_indexed_OP1);
6659
6660 Found |= Match(AArch64::FMULSrr, 2, MCP::FMULADDS_OP2) ||
6661 Match(AArch64::FMULv1i32_indexed, 2, MCP::FMLAv1i32_indexed_OP2);
6662 break;
6663 case AArch64::FADDDrr:
6664 Found |= Match(AArch64::FMULDrr, 1, MCP::FMULADDD_OP1) ||
6665 Match(AArch64::FMULv1i64_indexed, 1, MCP::FMLAv1i64_indexed_OP1);
6666
6667 Found |= Match(AArch64::FMULDrr, 2, MCP::FMULADDD_OP2) ||
6668 Match(AArch64::FMULv1i64_indexed, 2, MCP::FMLAv1i64_indexed_OP2);
6669 break;
6670 case AArch64::FADDv4f16:
6671 Found |= Match(AArch64::FMULv4i16_indexed, 1, MCP::FMLAv4i16_indexed_OP1) ||
6672 Match(AArch64::FMULv4f16, 1, MCP::FMLAv4f16_OP1);
6673
6674 Found |= Match(AArch64::FMULv4i16_indexed, 2, MCP::FMLAv4i16_indexed_OP2) ||
6675 Match(AArch64::FMULv4f16, 2, MCP::FMLAv4f16_OP2);
6676 break;
6677 case AArch64::FADDv8f16:
6678 Found |= Match(AArch64::FMULv8i16_indexed, 1, MCP::FMLAv8i16_indexed_OP1) ||
6679 Match(AArch64::FMULv8f16, 1, MCP::FMLAv8f16_OP1);
6680
6681 Found |= Match(AArch64::FMULv8i16_indexed, 2, MCP::FMLAv8i16_indexed_OP2) ||
6682 Match(AArch64::FMULv8f16, 2, MCP::FMLAv8f16_OP2);
6683 break;
6684 case AArch64::FADDv2f32:
6685 Found |= Match(AArch64::FMULv2i32_indexed, 1, MCP::FMLAv2i32_indexed_OP1) ||
6686 Match(AArch64::FMULv2f32, 1, MCP::FMLAv2f32_OP1);
6687
6688 Found |= Match(AArch64::FMULv2i32_indexed, 2, MCP::FMLAv2i32_indexed_OP2) ||
6689 Match(AArch64::FMULv2f32, 2, MCP::FMLAv2f32_OP2);
6690 break;
6691 case AArch64::FADDv2f64:
6692 Found |= Match(AArch64::FMULv2i64_indexed, 1, MCP::FMLAv2i64_indexed_OP1) ||
6693 Match(AArch64::FMULv2f64, 1, MCP::FMLAv2f64_OP1);
6694
6695 Found |= Match(AArch64::FMULv2i64_indexed, 2, MCP::FMLAv2i64_indexed_OP2) ||
6696 Match(AArch64::FMULv2f64, 2, MCP::FMLAv2f64_OP2);
6697 break;
6698 case AArch64::FADDv4f32:
6699 Found |= Match(AArch64::FMULv4i32_indexed, 1, MCP::FMLAv4i32_indexed_OP1) ||
6700 Match(AArch64::FMULv4f32, 1, MCP::FMLAv4f32_OP1);
6701
6702 Found |= Match(AArch64::FMULv4i32_indexed, 2, MCP::FMLAv4i32_indexed_OP2) ||
6703 Match(AArch64::FMULv4f32, 2, MCP::FMLAv4f32_OP2);
6704 break;
6705 case AArch64::FSUBHrr:
6706 Found = Match(AArch64::FMULHrr, 1, MCP::FMULSUBH_OP1);
6707 Found |= Match(AArch64::FMULHrr, 2, MCP::FMULSUBH_OP2);
6708 Found |= Match(AArch64::FNMULHrr, 1, MCP::FNMULSUBH_OP1);
6709 break;
6710 case AArch64::FSUBSrr:
6711 Found = Match(AArch64::FMULSrr, 1, MCP::FMULSUBS_OP1);
6712
6713 Found |= Match(AArch64::FMULSrr, 2, MCP::FMULSUBS_OP2) ||
6714 Match(AArch64::FMULv1i32_indexed, 2, MCP::FMLSv1i32_indexed_OP2);
6715
6716 Found |= Match(AArch64::FNMULSrr, 1, MCP::FNMULSUBS_OP1);
6717 break;
6718 case AArch64::FSUBDrr:
6719 Found = Match(AArch64::FMULDrr, 1, MCP::FMULSUBD_OP1);
6720
6721 Found |= Match(AArch64::FMULDrr, 2, MCP::FMULSUBD_OP2) ||
6722 Match(AArch64::FMULv1i64_indexed, 2, MCP::FMLSv1i64_indexed_OP2);
6723
6724 Found |= Match(AArch64::FNMULDrr, 1, MCP::FNMULSUBD_OP1);
6725 break;
6726 case AArch64::FSUBv4f16:
6727 Found |= Match(AArch64::FMULv4i16_indexed, 2, MCP::FMLSv4i16_indexed_OP2) ||
6728 Match(AArch64::FMULv4f16, 2, MCP::FMLSv4f16_OP2);
6729
6730 Found |= Match(AArch64::FMULv4i16_indexed, 1, MCP::FMLSv4i16_indexed_OP1) ||
6731 Match(AArch64::FMULv4f16, 1, MCP::FMLSv4f16_OP1);
6732 break;
6733 case AArch64::FSUBv8f16:
6734 Found |= Match(AArch64::FMULv8i16_indexed, 2, MCP::FMLSv8i16_indexed_OP2) ||
6735 Match(AArch64::FMULv8f16, 2, MCP::FMLSv8f16_OP2);
6736
6737 Found |= Match(AArch64::FMULv8i16_indexed, 1, MCP::FMLSv8i16_indexed_OP1) ||
6738 Match(AArch64::FMULv8f16, 1, MCP::FMLSv8f16_OP1);
6739 break;
6740 case AArch64::FSUBv2f32:
6741 Found |= Match(AArch64::FMULv2i32_indexed, 2, MCP::FMLSv2i32_indexed_OP2) ||
6742 Match(AArch64::FMULv2f32, 2, MCP::FMLSv2f32_OP2);
6743
6744 Found |= Match(AArch64::FMULv2i32_indexed, 1, MCP::FMLSv2i32_indexed_OP1) ||
6745 Match(AArch64::FMULv2f32, 1, MCP::FMLSv2f32_OP1);
6746 break;
6747 case AArch64::FSUBv2f64:
6748 Found |= Match(AArch64::FMULv2i64_indexed, 2, MCP::FMLSv2i64_indexed_OP2) ||
6749 Match(AArch64::FMULv2f64, 2, MCP::FMLSv2f64_OP2);
6750
6751 Found |= Match(AArch64::FMULv2i64_indexed, 1, MCP::FMLSv2i64_indexed_OP1) ||
6752 Match(AArch64::FMULv2f64, 1, MCP::FMLSv2f64_OP1);
6753 break;
6754 case AArch64::FSUBv4f32:
6755 Found |= Match(AArch64::FMULv4i32_indexed, 2, MCP::FMLSv4i32_indexed_OP2) ||
6756 Match(AArch64::FMULv4f32, 2, MCP::FMLSv4f32_OP2);
6757
6758 Found |= Match(AArch64::FMULv4i32_indexed, 1, MCP::FMLSv4i32_indexed_OP1) ||
6759 Match(AArch64::FMULv4f32, 1, MCP::FMLSv4f32_OP1);
6760 break;
6761 }
6762 return Found;
6763}
6764
6766 SmallVectorImpl<unsigned> &Patterns) {
6767 MachineBasicBlock &MBB = *Root.getParent();
6768 bool Found = false;
6769
6770 auto Match = [&](unsigned Opcode, int Operand, unsigned Pattern) -> bool {
6772 MachineOperand &MO = Root.getOperand(Operand);
6773 MachineInstr *MI = nullptr;
6774 if (MO.isReg() && MO.getReg().isVirtual())
6775 MI = MRI.getUniqueVRegDef(MO.getReg());
6776 // Ignore No-op COPYs in FMUL(COPY(DUP(..)))
6777 if (MI && MI->getOpcode() == TargetOpcode::COPY &&
6778 MI->getOperand(1).getReg().isVirtual())
6779 MI = MRI.getUniqueVRegDef(MI->getOperand(1).getReg());
6780 if (MI && MI->getOpcode() == Opcode) {
6781 Patterns.push_back(Pattern);
6782 return true;
6783 }
6784 return false;
6785 };
6786
6788
6789 switch (Root.getOpcode()) {
6790 default:
6791 return false;
6792 case AArch64::FMULv2f32:
6793 Found = Match(AArch64::DUPv2i32lane, 1, MCP::FMULv2i32_indexed_OP1);
6794 Found |= Match(AArch64::DUPv2i32lane, 2, MCP::FMULv2i32_indexed_OP2);
6795 break;
6796 case AArch64::FMULv2f64:
6797 Found = Match(AArch64::DUPv2i64lane, 1, MCP::FMULv2i64_indexed_OP1);
6798 Found |= Match(AArch64::DUPv2i64lane, 2, MCP::FMULv2i64_indexed_OP2);
6799 break;
6800 case AArch64::FMULv4f16:
6801 Found = Match(AArch64::DUPv4i16lane, 1, MCP::FMULv4i16_indexed_OP1);
6802 Found |= Match(AArch64::DUPv4i16lane, 2, MCP::FMULv4i16_indexed_OP2);
6803 break;
6804 case AArch64::FMULv4f32:
6805 Found = Match(AArch64::DUPv4i32lane, 1, MCP::FMULv4i32_indexed_OP1);
6806 Found |= Match(AArch64::DUPv4i32lane, 2, MCP::FMULv4i32_indexed_OP2);
6807 break;
6808 case AArch64::FMULv8f16:
6809 Found = Match(AArch64::DUPv8i16lane, 1, MCP::FMULv8i16_indexed_OP1);
6810 Found |= Match(AArch64::DUPv8i16lane, 2, MCP::FMULv8i16_indexed_OP2);
6811 break;
6812 }
6813
6814 return Found;
6815}
6816
6818 SmallVectorImpl<unsigned> &Patterns) {
6819 unsigned Opc = Root.getOpcode();
6820 MachineBasicBlock &MBB = *Root.getParent();
6822
6823 auto Match = [&](unsigned Opcode, unsigned Pattern) -> bool {
6824 MachineOperand &MO = Root.getOperand(1);
6825 MachineInstr *MI = MRI.getUniqueVRegDef(MO.getReg());
6826 if (MI != nullptr && (MI->getOpcode() == Opcode) &&
6827 MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()) &&
6831 MI->getFlag(MachineInstr::MIFlag::FmNsz)) {
6832 Patterns.push_back(Pattern);
6833 return true;
6834 }
6835 return false;
6836 };
6837
6838 switch (Opc) {
6839 default:
6840 break;
6841 case AArch64::FNEGDr:
6842 return Match(AArch64::FMADDDrrr, AArch64MachineCombinerPattern::FNMADD);
6843 case AArch64::FNEGSr:
6844 return Match(AArch64::FMADDSrrr, AArch64MachineCombinerPattern::FNMADD);
6845 }
6846
6847 return false;
6848}
6849
6850/// Return true when a code sequence can improve throughput. It
6851/// should be called only for instructions in loops.
6852/// \param Pattern - combiner pattern
6854 switch (Pattern) {
6855 default:
6856 break;
6962 return true;
6963 } // end switch (Pattern)
6964 return false;
6965}
6966
6967/// Find other MI combine patterns.
6969 SmallVectorImpl<unsigned> &Patterns) {
6970 // A - (B + C) ==> (A - B) - C or (A - C) - B
6971 unsigned Opc = Root.getOpcode();
6972 MachineBasicBlock &MBB = *Root.getParent();
6973
6974 switch (Opc) {
6975 case AArch64::SUBWrr:
6976 case AArch64::SUBSWrr:
6977 case AArch64::SUBXrr:
6978 case AArch64::SUBSXrr:
6979 // Found candidate root.
6980 break;
6981 default:
6982 return false;
6983 }
6984
6985 if (isCombineInstrSettingFlag(Opc) &&
6986 Root.findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr, true) ==
6987 -1)
6988 return false;
6989
6990 if (canCombine(MBB, Root.getOperand(2), AArch64::ADDWrr) ||
6991 canCombine(MBB, Root.getOperand(2), AArch64::ADDSWrr) ||
6992 canCombine(MBB, Root.getOperand(2), AArch64::ADDXrr) ||
6993 canCombine(MBB, Root.getOperand(2), AArch64::ADDSXrr)) {
6996 return true;
6997 }
6998
6999 return false;
7000}
7001
7004 switch (Pattern) {
7008 default:
7010 }
7011}
7012
7013/// Return true when there is potentially a faster code sequence for an
7014/// instruction chain ending in \p Root. All potential patterns are listed in
7015/// the \p Pattern vector. Pattern should be sorted in priority order since the
7016/// pattern evaluator stops checking as soon as it finds a faster sequence.
7017
7019 MachineInstr &Root, SmallVectorImpl<unsigned> &Patterns,
7020 bool DoRegPressureReduce) const {
7021 // Integer patterns
7022 if (getMaddPatterns(Root, Patterns))
7023 return true;
7024 // Floating point patterns
7025 if (getFMULPatterns(Root, Patterns))
7026 return true;
7027 if (getFMAPatterns(Root, Patterns))
7028 return true;
7029 if (getFNEGPatterns(Root, Patterns))
7030 return true;
7031
7032 // Other patterns
7033 if (getMiscPatterns(Root, Patterns))
7034 return true;
7035
7036 return TargetInstrInfo::getMachineCombinerPatterns(Root, Patterns,
7037 DoRegPressureReduce);
7038}
7039
7041/// genFusedMultiply - Generate fused multiply instructions.
7042/// This function supports both integer and floating point instructions.
7043/// A typical example:
7044/// F|MUL I=A,B,0
7045/// F|ADD R,I,C
7046/// ==> F|MADD R,A,B,C
7047/// \param MF Containing MachineFunction
7048/// \param MRI Register information
7049/// \param TII Target information
7050/// \param Root is the F|ADD instruction
7051/// \param [out] InsInstrs is a vector of machine instructions and will
7052/// contain the generated madd instruction
7053/// \param IdxMulOpd is index of operand in Root that is the result of
7054/// the F|MUL. In the example above IdxMulOpd is 1.
7055/// \param MaddOpc the opcode fo the f|madd instruction
7056/// \param RC Register class of operands
7057/// \param kind of fma instruction (addressing mode) to be generated
7058/// \param ReplacedAddend is the result register from the instruction
7059/// replacing the non-combined operand, if any.
7060static MachineInstr *
7062 const TargetInstrInfo *TII, MachineInstr &Root,
7063 SmallVectorImpl<MachineInstr *> &InsInstrs, unsigned IdxMulOpd,
7064 unsigned MaddOpc, const TargetRegisterClass *RC,
7065 FMAInstKind kind = FMAInstKind::Default,
7066 const Register *ReplacedAddend = nullptr) {
7067 assert(IdxMulOpd == 1 || IdxMulOpd == 2);
7068
7069 unsigned IdxOtherOpd = IdxMulOpd == 1 ? 2 : 1;
7070 MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg());
7071 Register ResultReg = Root.getOperand(0).getReg();
7072 Register SrcReg0 = MUL->getOperand(1).getReg();
7073 bool Src0IsKill = MUL->getOperand(1).isKill();
7074 Register SrcReg1 = MUL->getOperand(2).getReg();
7075 bool Src1IsKill = MUL->getOperand(2).isKill();
7076
7077 Register SrcReg2;
7078 bool Src2IsKill;
7079 if (ReplacedAddend) {
7080 // If we just generated a new addend, we must be it's only use.
7081 SrcReg2 = *ReplacedAddend;
7082 Src2IsKill = true;
7083 } else {
7084 SrcReg2 = Root.getOperand(IdxOtherOpd).getReg();
7085 Src2IsKill = Root.getOperand(IdxOtherOpd).isKill();
7086 }
7087
7088 if (ResultReg.isVirtual())
7089 MRI.constrainRegClass(ResultReg, RC);
7090 if (SrcReg0.isVirtual())
7091 MRI.constrainRegClass(SrcReg0, RC);
7092 if (SrcReg1.isVirtual())
7093 MRI.constrainRegClass(SrcReg1, RC);
7094 if (SrcReg2.isVirtual())
7095 MRI.constrainRegClass(SrcReg2, RC);
7096
7098 if (kind == FMAInstKind::Default)
7099 MIB = BuildMI(MF, MIMetadata(Root), TII->get(MaddOpc), ResultReg)
7100 .addReg(SrcReg0, getKillRegState(Src0IsKill))
7101 .addReg(SrcReg1, getKillRegState(Src1IsKill))
7102 .addReg(SrcReg2, getKillRegState(Src2IsKill));
7103 else if (kind == FMAInstKind::Indexed)
7104 MIB = BuildMI(MF, MIMetadata(Root), TII->get(MaddOpc), ResultReg)
7105 .addReg(SrcReg2, getKillRegState(Src2IsKill))
7106 .addReg(SrcReg0, getKillRegState(Src0IsKill))
7107 .addReg(SrcReg1, getKillRegState(Src1IsKill))
7108 .addImm(MUL->getOperand(3).getImm());
7109 else if (kind == FMAInstKind::Accumulator)
7110 MIB = BuildMI(MF, MIMetadata(Root), TII->get(MaddOpc), ResultReg)
7111 .addReg(SrcReg2, getKillRegState(Src2IsKill))
7112 .addReg(SrcReg0, getKillRegState(Src0IsKill))
7113 .addReg(SrcReg1, getKillRegState(Src1IsKill));
7114 else
7115 assert(false && "Invalid FMA instruction kind \n");
7116 // Insert the MADD (MADD, FMA, FMS, FMLA, FMSL)
7117 InsInstrs.push_back(MIB);
7118 return MUL;
7119}
7120
7121static MachineInstr *
7123 const TargetInstrInfo *TII, MachineInstr &Root,
7125 MachineInstr *MAD = MRI.getUniqueVRegDef(Root.getOperand(1).getReg());
7126
7127 unsigned Opc = 0;
7128 const TargetRegisterClass *RC = MRI.getRegClass(MAD->getOperand(0).getReg());
7129 if (AArch64::FPR32RegClass.hasSubClassEq(RC))
7130 Opc = AArch64::FNMADDSrrr;
7131 else if (AArch64::FPR64RegClass.hasSubClassEq(RC))
7132 Opc = AArch64::FNMADDDrrr;
7133 else
7134 return nullptr;
7135
7136 Register ResultReg = Root.getOperand(0).getReg();
7137 Register SrcReg0 = MAD->getOperand(1).getReg();
7138 Register SrcReg1 = MAD->getOperand(2).getReg();
7139 Register SrcReg2 = MAD->getOperand(3).getReg();
7140 bool Src0IsKill = MAD->getOperand(1).isKill();
7141 bool Src1IsKill = MAD->getOperand(2).isKill();
7142 bool Src2IsKill = MAD->getOperand(3).isKill();
7143 if (ResultReg.isVirtual())
7144 MRI.constrainRegClass(ResultReg, RC);
7145 if (SrcReg0.isVirtual())
7146 MRI.constrainRegClass(SrcReg0, RC);
7147 if (SrcReg1.isVirtual())
7148 MRI.constrainRegClass(SrcReg1, RC);
7149 if (SrcReg2.isVirtual())
7150 MRI.constrainRegClass(SrcReg2, RC);
7151
7153 BuildMI(MF, MIMetadata(Root), TII->get(Opc), ResultReg)
7154 .addReg(SrcReg0, getKillRegState(Src0IsKill))
7155 .addReg(SrcReg1, getKillRegState(Src1IsKill))
7156 .addReg(SrcReg2, getKillRegState(Src2IsKill));
7157 InsInstrs.push_back(MIB);
7158
7159 return MAD;
7160}
7161
7162/// Fold (FMUL x (DUP y lane)) into (FMUL_indexed x y lane)
7163static MachineInstr *
7166 unsigned IdxDupOp, unsigned MulOpc,
7168 assert(((IdxDupOp == 1) || (IdxDupOp == 2)) &&
7169 "Invalid index of FMUL operand");
7170
7171 MachineFunction &MF = *Root.getMF();
7173
7174 MachineInstr *Dup =
7175 MF.getRegInfo().getUniqueVRegDef(Root.getOperand(IdxDupOp).getReg());
7176
7177 if (Dup->getOpcode() == TargetOpcode::COPY)
7178 Dup = MRI.getUniqueVRegDef(Dup->getOperand(1).getReg());
7179
7180 Register DupSrcReg = Dup->getOperand(1).getReg();
7181 MRI.clearKillFlags(DupSrcReg);
7182 MRI.constrainRegClass(DupSrcReg, RC);
7183
7184 unsigned DupSrcLane = Dup->getOperand(2).getImm();
7185
7186 unsigned IdxMulOp = IdxDupOp == 1 ? 2 : 1;
7187 MachineOperand &MulOp = Root.getOperand(IdxMulOp);
7188
7189 Register ResultReg = Root.getOperand(0).getReg();
7190
7192 MIB = BuildMI(MF, MIMetadata(Root), TII->get(MulOpc), ResultReg)
7193 .add(MulOp)
7194 .addReg(DupSrcReg)
7195 .addImm(DupSrcLane);
7196
7197 InsInstrs.push_back(MIB);
7198 return &Root;
7199}
7200
7201/// genFusedMultiplyAcc - Helper to generate fused multiply accumulate
7202/// instructions.
7203///
7204/// \see genFusedMultiply
7208 unsigned IdxMulOpd, unsigned MaddOpc, const TargetRegisterClass *RC) {
7209 return genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd, MaddOpc, RC,
7210 FMAInstKind::Accumulator);
7211}
7212
7213/// genNeg - Helper to generate an intermediate negation of the second operand
7214/// of Root
7216 const TargetInstrInfo *TII, MachineInstr &Root,
7218 DenseMap<unsigned, unsigned> &InstrIdxForVirtReg,
7219 unsigned MnegOpc, const TargetRegisterClass *RC) {
7220 Register NewVR = MRI.createVirtualRegister(RC);
7222 BuildMI(MF, MIMetadata(Root), TII->get(MnegOpc), NewVR)
7223 .add(Root.getOperand(2));
7224 InsInstrs.push_back(MIB);
7225
7226 assert(InstrIdxForVirtReg.empty());
7227 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
7228
7229 return NewVR;
7230}
7231
7232/// genFusedMultiplyAccNeg - Helper to generate fused multiply accumulate
7233/// instructions with an additional negation of the accumulator
7237 DenseMap<unsigned, unsigned> &InstrIdxForVirtReg, unsigned IdxMulOpd,
7238 unsigned MaddOpc, unsigned MnegOpc, const TargetRegisterClass *RC) {
7239 assert(IdxMulOpd == 1);
7240
7241 Register NewVR =
7242 genNeg(MF, MRI, TII, Root, InsInstrs, InstrIdxForVirtReg, MnegOpc, RC);
7243 return genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd, MaddOpc, RC,
7244 FMAInstKind::Accumulator, &NewVR);
7245}
7246
7247/// genFusedMultiplyIdx - Helper to generate fused multiply accumulate
7248/// instructions.
7249///
7250/// \see genFusedMultiply
7254 unsigned IdxMulOpd, unsigned MaddOpc, const TargetRegisterClass *RC) {
7255 return genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd, MaddOpc, RC,
7256 FMAInstKind::Indexed);
7257}
7258
7259/// genFusedMultiplyAccNeg - Helper to generate fused multiply accumulate
7260/// instructions with an additional negation of the accumulator
7264 DenseMap<unsigned, unsigned> &InstrIdxForVirtReg, unsigned IdxMulOpd,
7265 unsigned MaddOpc, unsigned MnegOpc, const TargetRegisterClass *RC) {
7266 assert(IdxMulOpd == 1);
7267
7268 Register NewVR =
7269 genNeg(MF, MRI, TII, Root, InsInstrs, InstrIdxForVirtReg, MnegOpc, RC);
7270
7271 return genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd, MaddOpc, RC,
7272 FMAInstKind::Indexed, &NewVR);
7273}
7274
7275/// genMaddR - Generate madd instruction and combine mul and add using
7276/// an extra virtual register
7277/// Example - an ADD intermediate needs to be stored in a register:
7278/// MUL I=A,B,0
7279/// ADD R,I,Imm
7280/// ==> ORR V, ZR, Imm
7281/// ==> MADD R,A,B,V
7282/// \param MF Containing MachineFunction
7283/// \param MRI Register information
7284/// \param TII Target information
7285/// \param Root is the ADD instruction
7286/// \param [out] InsInstrs is a vector of machine instructions and will
7287/// contain the generated madd instruction
7288/// \param IdxMulOpd is index of operand in Root that is the result of
7289/// the MUL. In the example above IdxMulOpd is 1.
7290/// \param MaddOpc the opcode fo the madd instruction
7291/// \param VR is a virtual register that holds the value of an ADD operand
7292/// (V in the example above).
7293/// \param RC Register class of operands
7295 const TargetInstrInfo *TII, MachineInstr &Root,
7297 unsigned IdxMulOpd, unsigned MaddOpc, unsigned VR,
7298 const TargetRegisterClass *RC) {
7299 assert(IdxMulOpd == 1 || IdxMulOpd == 2);
7300
7301 MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg());
7302 Register ResultReg = Root.getOperand(0).getReg();
7303 Register SrcReg0 = MUL->getOperand(1).getReg();
7304 bool Src0IsKill = MUL->getOperand(1).isKill();
7305 Register SrcReg1 = MUL->getOperand(2).getReg();
7306 bool Src1IsKill = MUL->getOperand(2).isKill();
7307
7308 if (ResultReg.isVirtual())
7309 MRI.constrainRegClass(ResultReg, RC);
7310 if (SrcReg0.isVirtual())
7311 MRI.constrainRegClass(SrcReg0, RC);
7312 if (SrcReg1.isVirtual())
7313 MRI.constrainRegClass(SrcReg1, RC);
7315 MRI.constrainRegClass(VR, RC);
7316
7318 BuildMI(MF, MIMetadata(Root), TII->get(MaddOpc), ResultReg)
7319 .addReg(SrcReg0, getKillRegState(Src0IsKill))
7320 .addReg(SrcReg1, getKillRegState(Src1IsKill))
7321 .addReg(VR);
7322 // Insert the MADD
7323 InsInstrs.push_back(MIB);
7324 return MUL;
7325}
7326
7327/// Do the following transformation
7328/// A - (B + C) ==> (A - B) - C
7329/// A - (B + C) ==> (A - C) - B
7330static void
7332 const TargetInstrInfo *TII, MachineInstr &Root,
7335 unsigned IdxOpd1,
7336 DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) {
7337 assert(IdxOpd1 == 1 || IdxOpd1 == 2);
7338 unsigned IdxOtherOpd = IdxOpd1 == 1 ? 2 : 1;
7339 MachineInstr *AddMI = MRI.getUniqueVRegDef(Root.getOperand(2).getReg());
7340
7341 Register ResultReg = Root.getOperand(0).getReg();
7342 Register RegA = Root.getOperand(1).getReg();
7343 bool RegAIsKill = Root.getOperand(1).isKill();
7344 Register RegB = AddMI->getOperand(IdxOpd1).getReg();
7345 bool RegBIsKill = AddMI->getOperand(IdxOpd1).isKill();
7346 Register RegC = AddMI->getOperand(IdxOtherOpd).getReg();
7347 bool RegCIsKill = AddMI->getOperand(IdxOtherOpd).isKill();
7348 Register NewVR = MRI.createVirtualRegister(MRI.getRegClass(RegA));
7349
7350 unsigned Opcode = Root.getOpcode();
7351 if (Opcode == AArch64::SUBSWrr)
7352 Opcode = AArch64::SUBWrr;
7353 else if (Opcode == AArch64::SUBSXrr)
7354 Opcode = AArch64::SUBXrr;
7355 else
7356 assert((Opcode == AArch64::SUBWrr || Opcode == AArch64::SUBXrr) &&
7357 "Unexpected instruction opcode.");
7358
7359 uint32_t Flags = Root.mergeFlagsWith(*AddMI);
7360 Flags &= ~MachineInstr::NoSWrap;
7361 Flags &= ~MachineInstr::NoUWrap;
7362
7363 MachineInstrBuilder MIB1 =
7364 BuildMI(MF, MIMetadata(Root), TII->get(Opcode), NewVR)
7365 .addReg(RegA, getKillRegState(RegAIsKill))
7366 .addReg(RegB, getKillRegState(RegBIsKill))
7367 .setMIFlags(Flags);
7368 MachineInstrBuilder MIB2 =
7369 BuildMI(MF, MIMetadata(Root), TII->get(Opcode), ResultReg)
7370 .addReg(NewVR, getKillRegState(true))
7371 .addReg(RegC, getKillRegState(RegCIsKill))
7372 .setMIFlags(Flags);
7373
7374 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
7375 InsInstrs.push_back(MIB1);
7376 InsInstrs.push_back(MIB2);
7377 DelInstrs.push_back(AddMI);
7378 DelInstrs.push_back(&Root);
7379}
7380
7381/// When getMachineCombinerPatterns() finds potential patterns,
7382/// this function generates the instructions that could replace the
7383/// original code sequence
7385 MachineInstr &Root, unsigned Pattern,
7388 DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const {
7389 MachineBasicBlock &MBB = *Root.getParent();
7391 MachineFunction &MF = *MBB.getParent();
7393
7394 MachineInstr *MUL = nullptr;
7395 const TargetRegisterClass *RC;
7396 unsigned Opc;
7397 switch (Pattern) {
7398 default:
7399 // Reassociate instructions.
7401 DelInstrs, InstrIdxForVirtReg);
7402 return;
7404 // A - (B + C)
7405 // ==> (A - B) - C
7406 genSubAdd2SubSub(MF, MRI, TII, Root, InsInstrs, DelInstrs, 1,
7407 InstrIdxForVirtReg);
7408 return;
7410 // A - (B + C)
7411 // ==> (A - C) - B
7412 genSubAdd2SubSub(MF, MRI, TII, Root, InsInstrs, DelInstrs, 2,
7413 InstrIdxForVirtReg);
7414 return;
7417 // MUL I=A,B,0
7418 // ADD R,I,C
7419 // ==> MADD R,A,B,C
7420 // --- Create(MADD);
7422 Opc = AArch64::MADDWrrr;
7423 RC = &AArch64::GPR32RegClass;
7424 } else {
7425 Opc = AArch64::MADDXrrr;
7426 RC = &AArch64::GPR64RegClass;
7427 }
7428 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
7429 break;
7432 // MUL I=A,B,0
7433 // ADD R,C,I
7434 // ==> MADD R,A,B,C
7435 // --- Create(MADD);
7437 Opc = AArch64::MADDWrrr;
7438 RC = &AArch64::GPR32RegClass;
7439 } else {
7440 Opc = AArch64::MADDXrrr;
7441 RC = &AArch64::GPR64RegClass;
7442 }
7443 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7444 break;
7447 // MUL I=A,B,0
7448 // ADD R,I,Imm
7449 // ==> MOV V, Imm
7450 // ==> MADD R,A,B,V
7451 // --- Create(MADD);
7452 const TargetRegisterClass *OrrRC;
7453 unsigned BitSize, OrrOpc, ZeroReg;
7455 OrrOpc = AArch64::ORRWri;
7456 OrrRC = &AArch64::GPR32spRegClass;
7457 BitSize = 32;
7458 ZeroReg = AArch64::WZR;
7459 Opc = AArch64::MADDWrrr;
7460 RC = &AArch64::GPR32RegClass;
7461 } else {
7462 OrrOpc = AArch64::ORRXri;
7463 OrrRC = &AArch64::GPR64spRegClass;
7464 BitSize = 64;
7465 ZeroReg = AArch64::XZR;
7466 Opc = AArch64::MADDXrrr;
7467 RC = &AArch64::GPR64RegClass;
7468 }
7469 Register NewVR = MRI.createVirtualRegister(OrrRC);
7470 uint64_t Imm = Root.getOperand(2).getImm();
7471
7472 if (Root.getOperand(3).isImm()) {
7473 unsigned Val = Root.getOperand(3).getImm();
7474 Imm = Imm << Val;
7475 }
7476 uint64_t UImm = SignExtend64(Imm, BitSize);
7477 // The immediate can be composed via a single instruction.
7479 AArch64_IMM::expandMOVImm(UImm, BitSize, Insn);
7480 if (Insn.size() != 1)
7481 return;
7482 auto MovI = Insn.begin();
7484 // MOV is an alias for one of three instructions: movz, movn, and orr.
7485 if (MovI->Opcode == OrrOpc)
7486 MIB1 = BuildMI(MF, MIMetadata(Root), TII->get(OrrOpc), NewVR)
7487 .addReg(ZeroReg)
7488 .addImm(MovI->Op2);
7489 else {
7490 if (BitSize == 32)
7491 assert((MovI->Opcode == AArch64::MOVNWi ||
7492 MovI->Opcode == AArch64::MOVZWi) &&
7493 "Expected opcode");
7494 else
7495 assert((MovI->Opcode == AArch64::MOVNXi ||
7496 MovI->Opcode == AArch64::MOVZXi) &&
7497 "Expected opcode");
7498 MIB1 = BuildMI(MF, MIMetadata(Root), TII->get(MovI->Opcode), NewVR)
7499 .addImm(MovI->Op1)
7500 .addImm(MovI->Op2);
7501 }
7502 InsInstrs.push_back(MIB1);
7503 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
7504 MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
7505 break;
7506 }
7509 // MUL I=A,B,0
7510 // SUB R,I, C
7511 // ==> SUB V, 0, C
7512 // ==> MADD R,A,B,V // = -C + A*B
7513 // --- Create(MADD);
7514 const TargetRegisterClass *SubRC;
7515 unsigned SubOpc, ZeroReg;
7517 SubOpc = AArch64::SUBWrr;
7518 SubRC = &AArch64::GPR32spRegClass;
7519 ZeroReg = AArch64::WZR;
7520 Opc = AArch64::MADDWrrr;
7521 RC = &AArch64::GPR32RegClass;
7522 } else {
7523 SubOpc = AArch64::SUBXrr;
7524 SubRC = &AArch64::GPR64spRegClass;
7525 ZeroReg = AArch64::XZR;
7526 Opc = AArch64::MADDXrrr;
7527 RC = &AArch64::GPR64RegClass;
7528 }
7529 Register NewVR = MRI.createVirtualRegister(SubRC);
7530 // SUB NewVR, 0, C
7531 MachineInstrBuilder MIB1 =
7532 BuildMI(MF, MIMetadata(Root), TII->get(SubOpc), NewVR)
7533 .addReg(ZeroReg)
7534 .add(Root.getOperand(2));
7535 InsInstrs.push_back(MIB1);
7536 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
7537 MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
7538 break;
7539 }
7542 // MUL I=A,B,0
7543 // SUB R,C,I
7544 // ==> MSUB R,A,B,C (computes C - A*B)
7545 // --- Create(MSUB);
7547 Opc = AArch64::MSUBWrrr;
7548 RC = &AArch64::GPR32RegClass;
7549 } else {
7550 Opc = AArch64::MSUBXrrr;
7551 RC = &AArch64::GPR64RegClass;
7552 }
7553 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7554 break;
7557 // MUL I=A,B,0
7558 // SUB R,I, Imm
7559 // ==> MOV V, -Imm
7560 // ==> MADD R,A,B,V // = -Imm + A*B
7561 // --- Create(MADD);
7562 const TargetRegisterClass *OrrRC;
7563 unsigned BitSize, OrrOpc, ZeroReg;
7565 OrrOpc = AArch64::ORRWri;
7566 OrrRC = &AArch64::GPR32spRegClass;
7567 BitSize = 32;
7568 ZeroReg = AArch64::WZR;
7569 Opc = AArch64::MADDWrrr;
7570 RC = &AArch64::GPR32RegClass;
7571 } else {
7572 OrrOpc = AArch64::ORRXri;
7573 OrrRC = &AArch64::GPR64spRegClass;
7574 BitSize = 64;
7575 ZeroReg = AArch64::XZR;
7576 Opc = AArch64::MADDXrrr;
7577 RC = &AArch64::GPR64RegClass;
7578 }
7579 Register NewVR = MRI.createVirtualRegister(OrrRC);
7580 uint64_t Imm = Root.getOperand(2).getImm();
7581 if (Root.getOperand(3).isImm()) {
7582 unsigned Val = Root.getOperand(3).getImm();
7583 Imm = Imm << Val;
7584 }
7585 uint64_t UImm = SignExtend64(-Imm, BitSize);
7586 // The immediate can be composed via a single instruction.
7588 AArch64_IMM::expandMOVImm(UImm, BitSize, Insn);
7589 if (Insn.size() != 1)
7590 return;
7591 auto MovI = Insn.begin();
7593 // MOV is an alias for one of three instructions: movz, movn, and orr.
7594 if (MovI->Opcode == OrrOpc)
7595 MIB1 = BuildMI(MF, MIMetadata(Root), TII->get(OrrOpc), NewVR)
7596 .addReg(ZeroReg)
7597 .addImm(MovI->Op2);
7598 else {
7599 if (BitSize == 32)
7600 assert((MovI->Opcode == AArch64::MOVNWi ||
7601 MovI->Opcode == AArch64::MOVZWi) &&
7602 "Expected opcode");
7603 else
7604 assert((MovI->Opcode == AArch64::MOVNXi ||
7605 MovI->Opcode == AArch64::MOVZXi) &&
7606 "Expected opcode");
7607 MIB1 = BuildMI(MF, MIMetadata(Root), TII->get(MovI->Opcode), NewVR)
7608 .addImm(MovI->Op1)
7609 .addImm(MovI->Op2);
7610 }
7611 InsInstrs.push_back(MIB1);
7612 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
7613 MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
7614 break;
7615 }
7616
7618 Opc = AArch64::MLAv8i8;
7619 RC = &AArch64::FPR64RegClass;
7620 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
7621 break;
7623 Opc = AArch64::MLAv8i8;
7624 RC = &AArch64::FPR64RegClass;
7625 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7626 break;
7628 Opc = AArch64::MLAv16i8;
7629 RC = &AArch64::FPR128RegClass;
7630 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
7631 break;
7633 Opc = AArch64::MLAv16i8;
7634 RC = &AArch64::FPR128RegClass;
7635 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7636 break;
7638 Opc = AArch64::MLAv4i16;
7639 RC = &AArch64::FPR64RegClass;
7640 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
7641 break;
7643 Opc = AArch64::MLAv4i16;
7644 RC = &AArch64::FPR64RegClass;
7645 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7646 break;
7648 Opc = AArch64::MLAv8i16;
7649 RC = &AArch64::FPR128RegClass;
7650 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
7651 break;
7653 Opc = AArch64::MLAv8i16;
7654 RC = &AArch64::FPR128RegClass;
7655 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7656 break;
7658 Opc = AArch64::MLAv2i32;
7659 RC = &AArch64::FPR64RegClass;
7660 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
7661 break;
7663 Opc = AArch64::MLAv2i32;
7664 RC = &AArch64::FPR64RegClass;
7665 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7666 break;
7668 Opc = AArch64::MLAv4i32;
7669 RC = &AArch64::FPR128RegClass;
7670 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
7671 break;
7673 Opc = AArch64::MLAv4i32;
7674 RC = &AArch64::FPR128RegClass;
7675 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7676 break;
7677
7679 Opc = AArch64::MLAv8i8;
7680 RC = &AArch64::FPR64RegClass;
7681 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
7682 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv8i8,
7683 RC);
7684 break;
7686 Opc = AArch64::MLSv8i8;
7687 RC = &AArch64::FPR64RegClass;
7688 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7689 break;
7691 Opc = AArch64::MLAv16i8;
7692 RC = &AArch64::FPR128RegClass;
7693 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
7694 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv16i8,
7695 RC);
7696 break;
7698 Opc = AArch64::MLSv16i8;
7699 RC = &AArch64::FPR128RegClass;
7700 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7701 break;
7703 Opc = AArch64::MLAv4i16;
7704 RC = &AArch64::FPR64RegClass;
7705 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
7706 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i16,
7707 RC);
7708 break;
7710 Opc = AArch64::MLSv4i16;
7711 RC = &AArch64::FPR64RegClass;
7712 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7713 break;
7715 Opc = AArch64::MLAv8i16;
7716 RC = &AArch64::FPR128RegClass;
7717 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
7718 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv8i16,
7719 RC);
7720 break;
7722 Opc = AArch64::MLSv8i16;
7723 RC = &AArch64::FPR128RegClass;
7724 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7725 break;
7727 Opc = AArch64::MLAv2i32;
7728 RC = &AArch64::FPR64RegClass;
7729 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
7730 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv2i32,
7731 RC);
7732 break;
7734 Opc = AArch64::MLSv2i32;
7735 RC = &AArch64::FPR64RegClass;
7736 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7737 break;
7739 Opc = AArch64::MLAv4i32;
7740 RC = &AArch64::FPR128RegClass;
7741 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
7742 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i32,
7743 RC);
7744 break;
7746 Opc = AArch64::MLSv4i32;
7747 RC = &AArch64::FPR128RegClass;
7748 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7749 break;
7750
7752 Opc = AArch64::MLAv4i16_indexed;
7753 RC = &AArch64::FPR64RegClass;
7754 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
7755 break;
7757 Opc = AArch64::MLAv4i16_indexed;
7758 RC = &AArch64::FPR64RegClass;
7759 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7760 break;
7762 Opc = AArch64::MLAv8i16_indexed;
7763 RC = &AArch64::FPR128RegClass;
7764 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
7765 break;
7767 Opc = AArch64::MLAv8i16_indexed;
7768 RC = &AArch64::FPR128RegClass;
7769 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7770 break;
7772 Opc = AArch64::MLAv2i32_indexed;
7773 RC = &AArch64::FPR64RegClass;
7774 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
7775 break;
7777 Opc = AArch64::MLAv2i32_indexed;
7778 RC = &AArch64::FPR64RegClass;
7779 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7780 break;
7782 Opc = AArch64::MLAv4i32_indexed;
7783 RC = &AArch64::FPR128RegClass;
7784 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
7785 break;
7787 Opc = AArch64::MLAv4i32_indexed;
7788 RC = &AArch64::FPR128RegClass;
7789 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7790 break;
7791
7793 Opc = AArch64::MLAv4i16_indexed;
7794 RC = &AArch64::FPR64RegClass;
7795 MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs,
7796 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i16,
7797 RC);
7798 break;
7800 Opc = AArch64::MLSv4i16_indexed;
7801 RC = &AArch64::FPR64RegClass;
7802 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7803 break;
7805 Opc = AArch64::MLAv8i16_indexed;
7806 RC = &AArch64::FPR128RegClass;
7807 MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs,
7808 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv8i16,
7809 RC);
7810 break;
7812 Opc = AArch64::MLSv8i16_indexed;
7813 RC = &AArch64::FPR128RegClass;
7814 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7815 break;
7817 Opc = AArch64::MLAv2i32_indexed;
7818 RC = &AArch64::FPR64RegClass;
7819 MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs,
7820 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv2i32,
7821 RC);
7822 break;
7824 Opc = AArch64::MLSv2i32_indexed;
7825 RC = &AArch64::FPR64RegClass;
7826 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7827 break;
7829 Opc = AArch64::MLAv4i32_indexed;
7830 RC = &AArch64::FPR128RegClass;
7831 MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs,
7832 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i32,
7833 RC);
7834 break;
7836 Opc = AArch64::MLSv4i32_indexed;
7837 RC = &AArch64::FPR128RegClass;
7838 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7839 break;
7840
7841 // Floating Point Support
7843 Opc = AArch64::FMADDHrrr;
7844 RC = &AArch64::FPR16RegClass;
7845 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
7846 break;
7848 Opc = AArch64::FMADDSrrr;
7849 RC = &AArch64::FPR32RegClass;
7850 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
7851 break;
7853 Opc = AArch64::FMADDDrrr;
7854 RC = &AArch64::FPR64RegClass;
7855 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
7856 break;
7857
7859 Opc = AArch64::FMADDHrrr;
7860 RC = &AArch64::FPR16RegClass;
7861 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7862 break;
7864 Opc = AArch64::FMADDSrrr;
7865 RC = &AArch64::FPR32RegClass;
7866 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7867 break;
7869 Opc = AArch64::FMADDDrrr;
7870 RC = &AArch64::FPR64RegClass;
7871 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7872 break;
7873
7875 Opc = AArch64::FMLAv1i32_indexed;
7876 RC = &AArch64::FPR32RegClass;
7877 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
7878 FMAInstKind::Indexed);
7879 break;
7881 Opc = AArch64::FMLAv1i32_indexed;
7882 RC = &AArch64::FPR32RegClass;
7883 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
7884 FMAInstKind::Indexed);
7885 break;
7886
7888 Opc = AArch64::FMLAv1i64_indexed;
7889 RC = &AArch64::FPR64RegClass;
7890 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
7891 FMAInstKind::Indexed);
7892 break;
7894 Opc = AArch64::FMLAv1i64_indexed;
7895 RC = &AArch64::FPR64RegClass;
7896 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
7897 FMAInstKind::Indexed);
7898 break;
7899
7901 RC = &AArch64::FPR64RegClass;
7902 Opc = AArch64::FMLAv4i16_indexed;
7903 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
7904 FMAInstKind::Indexed);
7905 break;
7907 RC = &AArch64::FPR64RegClass;
7908 Opc = AArch64::FMLAv4f16;
7909 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
7910 FMAInstKind::Accumulator);
7911 break;
7913 RC = &AArch64::FPR64RegClass;
7914 Opc = AArch64::FMLAv4i16_indexed;
7915 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
7916 FMAInstKind::Indexed);
7917 break;
7919 RC = &AArch64::FPR64RegClass;
7920 Opc = AArch64::FMLAv4f16;
7921 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
7922 FMAInstKind::Accumulator);
7923 break;
7924
7927 RC = &AArch64::FPR64RegClass;
7929 Opc = AArch64::FMLAv2i32_indexed;
7930 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
7931 FMAInstKind::Indexed);
7932 } else {
7933 Opc = AArch64::FMLAv2f32;
7934 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
7935 FMAInstKind::Accumulator);
7936 }
7937 break;
7940 RC = &AArch64::FPR64RegClass;
7942 Opc = AArch64::FMLAv2i32_indexed;
7943 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
7944 FMAInstKind::Indexed);
7945 } else {
7946 Opc = AArch64::FMLAv2f32;
7947 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
7948 FMAInstKind::Accumulator);
7949 }
7950 break;
7951
7953 RC = &AArch64::FPR128RegClass;
7954 Opc = AArch64::FMLAv8i16_indexed;
7955 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
7956 FMAInstKind::Indexed);
7957 break;
7959 RC = &AArch64::FPR128RegClass;
7960 Opc = AArch64::FMLAv8f16;
7961 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
7962 FMAInstKind::Accumulator);
7963 break;
7965 RC = &AArch64::FPR128RegClass;
7966 Opc = AArch64::FMLAv8i16_indexed;
7967 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
7968 FMAInstKind::Indexed);
7969 break;
7971 RC = &AArch64::FPR128RegClass;
7972 Opc = AArch64::FMLAv8f16;
7973 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
7974 FMAInstKind::Accumulator);
7975 break;
7976
7979 RC = &AArch64::FPR128RegClass;
7981 Opc = AArch64::FMLAv2i64_indexed;
7982 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
7983 FMAInstKind::Indexed);
7984 } else {
7985 Opc = AArch64::FMLAv2f64;
7986 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
7987 FMAInstKind::Accumulator);
7988 }
7989 break;
7992 RC = &AArch64::FPR128RegClass;
7994 Opc = AArch64::FMLAv2i64_indexed;
7995 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
7996 FMAInstKind::Indexed);
7997 } else {
7998 Opc = AArch64::FMLAv2f64;
7999 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8000 FMAInstKind::Accumulator);
8001 }
8002 break;
8003
8006 RC = &AArch64::FPR128RegClass;
8008 Opc = AArch64::FMLAv4i32_indexed;
8009 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
8010 FMAInstKind::Indexed);
8011 } else {
8012 Opc = AArch64::FMLAv4f32;
8013 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
8014 FMAInstKind::Accumulator);
8015 }
8016 break;
8017
8020 RC = &AArch64::FPR128RegClass;
8022 Opc = AArch64::FMLAv4i32_indexed;
8023 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8024 FMAInstKind::Indexed);
8025 } else {
8026 Opc = AArch64::FMLAv4f32;
8027 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8028 FMAInstKind::Accumulator);
8029 }
8030 break;
8031
8033 Opc = AArch64::FNMSUBHrrr;
8034 RC = &AArch64::FPR16RegClass;
8035 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8036 break;
8038 Opc = AArch64::FNMSUBSrrr;
8039 RC = &AArch64::FPR32RegClass;
8040 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8041 break;
8043 Opc = AArch64::FNMSUBDrrr;
8044 RC = &AArch64::FPR64RegClass;
8045 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8046 break;
8047
8049 Opc = AArch64::FNMADDHrrr;
8050 RC = &AArch64::FPR16RegClass;
8051 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8052 break;
8054 Opc = AArch64::FNMADDSrrr;
8055 RC = &AArch64::FPR32RegClass;
8056 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8057 break;
8059 Opc = AArch64::FNMADDDrrr;
8060 RC = &AArch64::FPR64RegClass;
8061 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8062 break;
8063
8065 Opc = AArch64::FMSUBHrrr;
8066 RC = &AArch64::FPR16RegClass;
8067 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8068 break;
8070 Opc = AArch64::FMSUBSrrr;
8071 RC = &AArch64::FPR32RegClass;
8072 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8073 break;
8075 Opc = AArch64::FMSUBDrrr;
8076 RC = &AArch64::FPR64RegClass;
8077 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8078 break;
8079
8081 Opc = AArch64::FMLSv1i32_indexed;
8082 RC = &AArch64::FPR32RegClass;
8083 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8084 FMAInstKind::Indexed);
8085 break;
8086
8088 Opc = AArch64::FMLSv1i64_indexed;
8089 RC = &AArch64::FPR64RegClass;
8090 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8091 FMAInstKind::Indexed);
8092 break;
8093
8096 RC = &AArch64::FPR64RegClass;
8097 Register NewVR = MRI.createVirtualRegister(RC);
8098 MachineInstrBuilder MIB1 =
8099 BuildMI(MF, MIMetadata(Root), TII->get(AArch64::FNEGv4f16), NewVR)
8100 .add(Root.getOperand(2));
8101 InsInstrs.push_back(MIB1);
8102 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
8104 Opc = AArch64::FMLAv4f16;
8105 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
8106 FMAInstKind::Accumulator, &NewVR);
8107 } else {
8108 Opc = AArch64::FMLAv4i16_indexed;
8109 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
8110 FMAInstKind::Indexed, &NewVR);
8111 }
8112 break;
8113 }
8115 RC = &AArch64::FPR64RegClass;
8116 Opc = AArch64::FMLSv4f16;
8117 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8118 FMAInstKind::Accumulator);
8119 break;
8121 RC = &AArch64::FPR64RegClass;
8122 Opc = AArch64::FMLSv4i16_indexed;
8123 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8124 FMAInstKind::Indexed);
8125 break;
8126
8129 RC = &AArch64::FPR64RegClass;
8131 Opc = AArch64::FMLSv2i32_indexed;
8132 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8133 FMAInstKind::Indexed);
8134 } else {
8135 Opc = AArch64::FMLSv2f32;
8136 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8137 FMAInstKind::Accumulator);
8138 }
8139 break;
8140
8143 RC = &AArch64::FPR128RegClass;
8144 Register NewVR = MRI.createVirtualRegister(RC);
8145 MachineInstrBuilder MIB1 =
8146 BuildMI(MF, MIMetadata(Root), TII->get(AArch64::FNEGv8f16), NewVR)
8147 .add(Root.getOperand(2));
8148 InsInstrs.push_back(MIB1);
8149 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
8151 Opc = AArch64::FMLAv8f16;
8152 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
8153 FMAInstKind::Accumulator, &NewVR);
8154 } else {
8155 Opc = AArch64::FMLAv8i16_indexed;
8156 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
8157 FMAInstKind::Indexed, &NewVR);
8158 }
8159 break;
8160 }
8162 RC = &AArch64::FPR128RegClass;
8163 Opc = AArch64::FMLSv8f16;
8164 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8165 FMAInstKind::Accumulator);
8166 break;
8168 RC = &AArch64::FPR128RegClass;
8169 Opc = AArch64::FMLSv8i16_indexed;
8170 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8171 FMAInstKind::Indexed);
8172 break;
8173
8176 RC = &AArch64::FPR128RegClass;
8178 Opc = AArch64::FMLSv2i64_indexed;
8179 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8180 FMAInstKind::Indexed);
8181 } else {
8182 Opc = AArch64::FMLSv2f64;
8183 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8184 FMAInstKind::Accumulator);
8185 }
8186 break;
8187
8190 RC = &AArch64::FPR128RegClass;
8192 Opc = AArch64::FMLSv4i32_indexed;
8193 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8194 FMAInstKind::Indexed);
8195 } else {
8196 Opc = AArch64::FMLSv4f32;
8197 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8198 FMAInstKind::Accumulator);
8199 }
8200 break;
8203 RC = &AArch64::FPR64RegClass;
8204 Register NewVR = MRI.createVirtualRegister(RC);
8205 MachineInstrBuilder MIB1 =
8206 BuildMI(MF, MIMetadata(Root), TII->get(AArch64::FNEGv2f32), NewVR)
8207 .add(Root.getOperand(2));
8208 InsInstrs.push_back(MIB1);
8209 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
8211 Opc = AArch64::FMLAv2i32_indexed;
8212 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
8213 FMAInstKind::Indexed, &NewVR);
8214 } else {
8215 Opc = AArch64::FMLAv2f32;
8216 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
8217 FMAInstKind::Accumulator, &NewVR);
8218 }
8219 break;
8220 }
8223 RC = &AArch64::FPR128RegClass;
8224 Register NewVR = MRI.createVirtualRegister(RC);
8225 MachineInstrBuilder MIB1 =
8226 BuildMI(MF, MIMetadata(Root), TII->get(AArch64::FNEGv4f32), NewVR)
8227 .add(Root.getOperand(2));
8228 InsInstrs.push_back(MIB1);
8229 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
8231 Opc = AArch64::FMLAv4i32_indexed;
8232 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
8233 FMAInstKind::Indexed, &NewVR);
8234 } else {
8235 Opc = AArch64::FMLAv4f32;
8236 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
8237 FMAInstKind::Accumulator, &NewVR);
8238 }
8239 break;
8240 }
8243 RC = &AArch64::FPR128RegClass;
8244 Register NewVR = MRI.createVirtualRegister(RC);
8245 MachineInstrBuilder MIB1 =
8246 BuildMI(MF, MIMetadata(Root), TII->get(AArch64::FNEGv2f64), NewVR)
8247 .add(Root.getOperand(2));
8248 InsInstrs.push_back(MIB1);
8249 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
8251 Opc = AArch64::FMLAv2i64_indexed;
8252 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
8253 FMAInstKind::Indexed, &NewVR);
8254 } else {
8255 Opc = AArch64::FMLAv2f64;
8256 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
8257 FMAInstKind::Accumulator, &NewVR);
8258 }
8259 break;
8260 }
8263 unsigned IdxDupOp =
8265 : 2;
8266 genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv2i32_indexed,
8267 &AArch64::FPR128RegClass, MRI);
8268 break;
8269 }
8272 unsigned IdxDupOp =
8274 : 2;
8275 genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv2i64_indexed,
8276 &AArch64::FPR128RegClass, MRI);
8277 break;
8278 }
8281 unsigned IdxDupOp =
8283 : 2;
8284 genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv4i16_indexed,
8285 &AArch64::FPR128_loRegClass, MRI);
8286 break;
8287 }
8290 unsigned IdxDupOp =
8292 : 2;
8293 genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv4i32_indexed,
8294 &AArch64::FPR128RegClass, MRI);
8295 break;
8296 }
8299 unsigned IdxDupOp =
8301 : 2;
8302 genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv8i16_indexed,
8303 &AArch64::FPR128_loRegClass, MRI);
8304 break;
8305 }
8307 MUL = genFNegatedMAD(MF, MRI, TII, Root, InsInstrs);
8308 break;
8309 }
8310
8311 } // end switch (Pattern)
8312 // Record MUL and ADD/SUB for deletion
8313 if (MUL)
8314 DelInstrs.push_back(MUL);
8315 DelInstrs.push_back(&Root);
8316
8317 // Set the flags on the inserted instructions to be the merged flags of the
8318 // instructions that we have combined.
8319 uint32_t Flags = Root.getFlags();
8320 if (MUL)
8321 Flags = Root.mergeFlagsWith(*MUL);
8322 for (auto *MI : InsInstrs)
8323 MI->setFlags(Flags);
8324}
8325
8326/// Replace csincr-branch sequence by simple conditional branch
8327///
8328/// Examples:
8329/// 1. \code
8330/// csinc w9, wzr, wzr, <condition code>
8331/// tbnz w9, #0, 0x44
8332/// \endcode
8333/// to
8334/// \code
8335/// b.<inverted condition code>
8336/// \endcode
8337///
8338/// 2. \code
8339/// csinc w9, wzr, wzr, <condition code>
8340/// tbz w9, #0, 0x44
8341/// \endcode
8342/// to
8343/// \code
8344/// b.<condition code>
8345/// \endcode
8346///
8347/// Replace compare and branch sequence by TBZ/TBNZ instruction when the
8348/// compare's constant operand is power of 2.
8349///
8350/// Examples:
8351/// \code
8352/// and w8, w8, #0x400
8353/// cbnz w8, L1
8354/// \endcode
8355/// to
8356/// \code
8357/// tbnz w8, #10, L1
8358/// \endcode
8359///
8360/// \param MI Conditional Branch
8361/// \return True when the simple conditional branch is generated
8362///
8364 bool IsNegativeBranch = false;
8365 bool IsTestAndBranch = false;
8366 unsigned TargetBBInMI = 0;
8367 switch (MI.getOpcode()) {
8368 default:
8369 llvm_unreachable("Unknown branch instruction?");
8370 case AArch64::Bcc:
8371 return false;
8372 case AArch64::CBZW:
8373 case AArch64::CBZX:
8374 TargetBBInMI = 1;
8375 break;
8376 case AArch64::CBNZW:
8377 case AArch64::CBNZX:
8378 TargetBBInMI = 1;
8379 IsNegativeBranch = true;
8380 break;
8381 case AArch64::TBZW:
8382 case AArch64::TBZX:
8383 TargetBBInMI = 2;
8384 IsTestAndBranch = true;
8385 break;
8386 case AArch64::TBNZW:
8387 case AArch64::TBNZX:
8388 TargetBBInMI = 2;
8389 IsNegativeBranch = true;
8390 IsTestAndBranch = true;
8391 break;
8392 }
8393 // So we increment a zero register and test for bits other
8394 // than bit 0? Conservatively bail out in case the verifier
8395 // missed this case.
8396 if (IsTestAndBranch && MI.getOperand(1).getImm())
8397 return false;
8398
8399 // Find Definition.
8400 assert(MI.getParent() && "Incomplete machine instruciton\n");
8401 MachineBasicBlock *MBB = MI.getParent();
8402 MachineFunction *MF = MBB->getParent();
8404 Register VReg = MI.getOperand(0).getReg();
8405 if (!VReg.isVirtual())
8406 return false;
8407
8408 MachineInstr *DefMI = MRI->getVRegDef(VReg);
8409
8410 // Look through COPY instructions to find definition.
8411 while (DefMI->isCopy()) {
8412 Register CopyVReg = DefMI->getOperand(1).getReg();
8413 if (!MRI->hasOneNonDBGUse(CopyVReg))
8414 return false;
8415 if (!MRI->hasOneDef(CopyVReg))
8416 return false;
8417 DefMI = MRI->getVRegDef(CopyVReg);
8418 }
8419
8420 switch (DefMI->getOpcode()) {
8421 default:
8422 return false;
8423 // Fold AND into a TBZ/TBNZ if constant operand is power of 2.
8424 case AArch64::ANDWri:
8425 case AArch64::ANDXri: {
8426 if (IsTestAndBranch)
8427 return false;
8428 if (DefMI->getParent() != MBB)
8429 return false;
8430 if (!MRI->hasOneNonDBGUse(VReg))
8431 return false;
8432
8433 bool Is32Bit = (DefMI->getOpcode() == AArch64::ANDWri);
8435 DefMI->getOperand(2).getImm(), Is32Bit ? 32 : 64);
8436 if (!isPowerOf2_64(Mask))
8437 return false;
8438
8440 Register NewReg = MO.getReg();
8441 if (!NewReg.isVirtual())
8442 return false;
8443
8444 assert(!MRI->def_empty(NewReg) && "Register must be defined.");
8445
8446 MachineBasicBlock &RefToMBB = *MBB;
8447 MachineBasicBlock *TBB = MI.getOperand(1).getMBB();
8448 DebugLoc DL = MI.getDebugLoc();
8449 unsigned Imm = Log2_64(Mask);
8450 unsigned Opc = (Imm < 32)
8451 ? (IsNegativeBranch ? AArch64::TBNZW : AArch64::TBZW)
8452 : (IsNegativeBranch ? AArch64::TBNZX : AArch64::TBZX);
8453 MachineInstr *NewMI = BuildMI(RefToMBB, MI, DL, get(Opc))
8454 .addReg(NewReg)
8455 .addImm(Imm)
8456 .addMBB(TBB);
8457 // Register lives on to the CBZ now.
8458 MO.setIsKill(false);
8459
8460 // For immediate smaller than 32, we need to use the 32-bit
8461 // variant (W) in all cases. Indeed the 64-bit variant does not
8462 // allow to encode them.
8463 // Therefore, if the input register is 64-bit, we need to take the
8464 // 32-bit sub-part.
8465 if (!Is32Bit && Imm < 32)
8466 NewMI->getOperand(0).setSubReg(AArch64::sub_32);
8467 MI.eraseFromParent();
8468 return true;
8469 }
8470 // Look for CSINC
8471 case AArch64::CSINCWr:
8472 case AArch64::CSINCXr: {
8473 if (!(DefMI->getOperand(1).getReg() == AArch64::WZR &&
8474 DefMI->getOperand(2).getReg() == AArch64::WZR) &&
8475 !(DefMI->getOperand(1).getReg() == AArch64::XZR &&
8476 DefMI->getOperand(2).getReg() == AArch64::XZR))
8477 return false;
8478
8479 if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr,
8480 true) != -1)
8481 return false;
8482
8484 // Convert only when the condition code is not modified between
8485 // the CSINC and the branch. The CC may be used by other
8486 // instructions in between.
8488 return false;
8489 MachineBasicBlock &RefToMBB = *MBB;
8490 MachineBasicBlock *TBB = MI.getOperand(TargetBBInMI).getMBB();
8491 DebugLoc DL = MI.getDebugLoc();
8492 if (IsNegativeBranch)
8494 BuildMI(RefToMBB, MI, DL, get(AArch64::Bcc)).addImm(CC).addMBB(TBB);
8495 MI.eraseFromParent();
8496 return true;
8497 }
8498 }
8499}
8500
8501std::pair<unsigned, unsigned>
8503 const unsigned Mask = AArch64II::MO_FRAGMENT;
8504 return std::make_pair(TF & Mask, TF & ~Mask);
8505}
8506
8509 using namespace AArch64II;
8510
8511 static const std::pair<unsigned, const char *> TargetFlags[] = {
8512 {MO_PAGE, "aarch64-page"}, {MO_PAGEOFF, "aarch64-pageoff"},
8513 {MO_G3, "aarch64-g3"}, {MO_G2, "aarch64-g2"},
8514 {MO_G1, "aarch64-g1"}, {MO_G0, "aarch64-g0"},
8515 {MO_HI12, "aarch64-hi12"}};
8516 return ArrayRef(TargetFlags);
8517}
8518
8521 using namespace AArch64II;
8522
8523 static const std::pair<unsigned, const char *> TargetFlags[] = {
8524 {MO_COFFSTUB, "aarch64-coffstub"},
8525 {MO_GOT, "aarch64-got"},
8526 {MO_NC, "aarch64-nc"},
8527 {MO_S, "aarch64-s"},
8528 {MO_TLS, "aarch64-tls"},
8529 {MO_DLLIMPORT, "aarch64-dllimport"},
8530 {MO_PREL, "aarch64-prel"},
8531 {MO_TAGGED, "aarch64-tagged"},
8532 {MO_ARM64EC_CALLMANGLE, "aarch64-arm64ec-callmangle"},
8533 };
8534 return ArrayRef(TargetFlags);
8535}
8536
8539 static const std::pair<MachineMemOperand::Flags, const char *> TargetFlags[] =
8540 {{MOSuppressPair, "aarch64-suppress-pair"},
8541 {MOStridedAccess, "aarch64-strided-access"}};
8542 return ArrayRef(TargetFlags);
8543}
8544
8545/// Constants defining how certain sequences should be outlined.
8546/// This encompasses how an outlined function should be called, and what kind of
8547/// frame should be emitted for that outlined function.
8548///
8549/// \p MachineOutlinerDefault implies that the function should be called with
8550/// a save and restore of LR to the stack.
8551///
8552/// That is,
8553///
8554/// I1 Save LR OUTLINED_FUNCTION:
8555/// I2 --> BL OUTLINED_FUNCTION I1
8556/// I3 Restore LR I2
8557/// I3
8558/// RET
8559///
8560/// * Call construction overhead: 3 (save + BL + restore)
8561/// * Frame construction overhead: 1 (ret)
8562/// * Requires stack fixups? Yes
8563///
8564/// \p MachineOutlinerTailCall implies that the function is being created from
8565/// a sequence of instructions ending in a return.
8566///
8567/// That is,
8568///
8569/// I1 OUTLINED_FUNCTION:
8570/// I2 --> B OUTLINED_FUNCTION I1
8571/// RET I2
8572/// RET
8573///
8574/// * Call construction overhead: 1 (B)
8575/// * Frame construction overhead: 0 (Return included in sequence)
8576/// * Requires stack fixups? No
8577///
8578/// \p MachineOutlinerNoLRSave implies that the function should be called using
8579/// a BL instruction, but doesn't require LR to be saved and restored. This
8580/// happens when LR is known to be dead.
8581///
8582/// That is,
8583///
8584/// I1 OUTLINED_FUNCTION:
8585/// I2 --> BL OUTLINED_FUNCTION I1
8586/// I3 I2
8587/// I3
8588/// RET
8589///
8590/// * Call construction overhead: 1 (BL)
8591/// * Frame construction overhead: 1 (RET)
8592/// * Requires stack fixups? No
8593///
8594/// \p MachineOutlinerThunk implies that the function is being created from
8595/// a sequence of instructions ending in a call. The outlined function is
8596/// called with a BL instruction, and the outlined function tail-calls the
8597/// original call destination.
8598///
8599/// That is,
8600///
8601/// I1 OUTLINED_FUNCTION:
8602/// I2 --> BL OUTLINED_FUNCTION I1
8603/// BL f I2
8604/// B f
8605/// * Call construction overhead: 1 (BL)
8606/// * Frame construction overhead: 0
8607/// * Requires stack fixups? No
8608///
8609/// \p MachineOutlinerRegSave implies that the function should be called with a
8610/// save and restore of LR to an available register. This allows us to avoid
8611/// stack fixups. Note that this outlining variant is compatible with the
8612/// NoLRSave case.
8613///
8614/// That is,
8615///
8616/// I1 Save LR OUTLINED_FUNCTION:
8617/// I2 --> BL OUTLINED_FUNCTION I1
8618/// I3 Restore LR I2
8619/// I3
8620/// RET
8621///
8622/// * Call construction overhead: 3 (save + BL + restore)
8623/// * Frame construction overhead: 1 (ret)
8624/// * Requires stack fixups? No
8626 MachineOutlinerDefault, /// Emit a save, restore, call, and return.
8627 MachineOutlinerTailCall, /// Only emit a branch.
8628 MachineOutlinerNoLRSave, /// Emit a call and return.
8629 MachineOutlinerThunk, /// Emit a call and tail-call.
8630 MachineOutlinerRegSave /// Same as default, but save to a register.
8632
8636 UnsafeRegsDead = 0x8
8638
8640AArch64InstrInfo::findRegisterToSaveLRTo(outliner::Candidate &C) const {
8641 MachineFunction *MF = C.getMF();
8643 const AArch64RegisterInfo *ARI =
8644 static_cast<const AArch64RegisterInfo *>(&TRI);
8645 // Check if there is an available register across the sequence that we can
8646 // use.
8647 for (unsigned Reg : AArch64::GPR64RegClass) {
8648 if (!ARI->isReservedReg(*MF, Reg) &&
8649 Reg != AArch64::LR && // LR is not reserved, but don't use it.
8650 Reg != AArch64::X16 && // X16 is not guaranteed to be preserved.
8651 Reg != AArch64::X17 && // Ditto for X17.
8652 C.isAvailableAcrossAndOutOfSeq(Reg, TRI) &&
8653 C.isAvailableInsideSeq(Reg, TRI))
8654 return Reg;
8655 }
8656 return Register();
8657}
8658
8659static bool
8661 const outliner::Candidate &b) {
8662 const auto &MFIa = a.getMF()->getInfo<AArch64FunctionInfo>();
8663 const auto &MFIb = b.getMF()->getInfo<AArch64FunctionInfo>();
8664
8665 return MFIa->shouldSignReturnAddress(false) == MFIb->shouldSignReturnAddress(false) &&
8666 MFIa->shouldSignReturnAddress(true) == MFIb->shouldSignReturnAddress(true);
8667}
8668
8669static bool
8671 const outliner::Candidate &b) {
8672 const auto &MFIa = a.getMF()->getInfo<AArch64FunctionInfo>();
8673 const auto &MFIb = b.getMF()->getInfo<AArch64FunctionInfo>();
8674
8675 return MFIa->shouldSignWithBKey() == MFIb->shouldSignWithBKey();
8676}
8677
8679 const outliner::Candidate &b) {
8680 const AArch64Subtarget &SubtargetA =
8682 const AArch64Subtarget &SubtargetB =
8683 b.getMF()->getSubtarget<AArch64Subtarget>();
8684 return SubtargetA.hasV8_3aOps() == SubtargetB.hasV8_3aOps();
8685}
8686
8687std::optional<outliner::OutlinedFunction>
8689 const MachineModuleInfo &MMI,
8690 std::vector<outliner::Candidate> &RepeatedSequenceLocs) const {
8691 unsigned SequenceSize = 0;
8692 for (auto &MI : RepeatedSequenceLocs[0])
8693 SequenceSize += getInstSizeInBytes(MI);
8694
8695 unsigned NumBytesToCreateFrame = 0;
8696
8697 // We only allow outlining for functions having exactly matching return
8698 // address signing attributes, i.e., all share the same value for the
8699 // attribute "sign-return-address" and all share the same type of key they
8700 // are signed with.
8701 // Additionally we require all functions to simultaniously either support
8702 // v8.3a features or not. Otherwise an outlined function could get signed
8703 // using dedicated v8.3 instructions and a call from a function that doesn't
8704 // support v8.3 instructions would therefore be invalid.
8705 if (std::adjacent_find(
8706 RepeatedSequenceLocs.begin(), RepeatedSequenceLocs.end(),
8707 [](const outliner::Candidate &a, const outliner::Candidate &b) {
8708 // Return true if a and b are non-equal w.r.t. return address
8709 // signing or support of v8.3a features
8710 if (outliningCandidatesSigningScopeConsensus(a, b) &&
8711 outliningCandidatesSigningKeyConsensus(a, b) &&
8712 outliningCandidatesV8_3OpsConsensus(a, b)) {
8713 return false;
8714 }
8715 return true;
8716 }) != RepeatedSequenceLocs.end()) {
8717 return std::nullopt;
8718 }
8719
8720 // Since at this point all candidates agree on their return address signing
8721 // picking just one is fine. If the candidate functions potentially sign their
8722 // return addresses, the outlined function should do the same. Note that in
8723 // the case of "sign-return-address"="non-leaf" this is an assumption: It is
8724 // not certainly true that the outlined function will have to sign its return
8725 // address but this decision is made later, when the decision to outline
8726 // has already been made.
8727 // The same holds for the number of additional instructions we need: On
8728 // v8.3a RET can be replaced by RETAA/RETAB and no AUT instruction is
8729 // necessary. However, at this point we don't know if the outlined function
8730 // will have a RET instruction so we assume the worst.
8731 const TargetRegisterInfo &TRI = getRegisterInfo();
8732 // Performing a tail call may require extra checks when PAuth is enabled.
8733 // If PAuth is disabled, set it to zero for uniformity.
8734 unsigned NumBytesToCheckLRInTCEpilogue = 0;
8735 if (RepeatedSequenceLocs[0]
8736 .getMF()
8737 ->getInfo<AArch64FunctionInfo>()
8738 ->shouldSignReturnAddress(true)) {
8739 // One PAC and one AUT instructions
8740 NumBytesToCreateFrame += 8;
8741
8742 // PAuth is enabled - set extra tail call cost, if any.
8743 auto LRCheckMethod = Subtarget.getAuthenticatedLRCheckMethod(
8744 *RepeatedSequenceLocs[0].getMF());
8745 NumBytesToCheckLRInTCEpilogue =
8747 // Checking the authenticated LR value may significantly impact
8748 // SequenceSize, so account for it for more precise results.
8749 if (isTailCallReturnInst(RepeatedSequenceLocs[0].back()))
8750 SequenceSize += NumBytesToCheckLRInTCEpilogue;
8751
8752 // We have to check if sp modifying instructions would get outlined.
8753 // If so we only allow outlining if sp is unchanged overall, so matching
8754 // sub and add instructions are okay to outline, all other sp modifications
8755 // are not
8756 auto hasIllegalSPModification = [&TRI](outliner::Candidate &C) {
8757 int SPValue = 0;
8758 for (auto &MI : C) {
8759 if (MI.modifiesRegister(AArch64::SP, &TRI)) {
8760 switch (MI.getOpcode()) {
8761 case AArch64::ADDXri:
8762 case AArch64::ADDWri:
8763 assert(MI.getNumOperands() == 4 && "Wrong number of operands");
8764 assert(MI.getOperand(2).isImm() &&
8765 "Expected operand to be immediate");
8766 assert(MI.getOperand(1).isReg() &&
8767 "Expected operand to be a register");
8768 // Check if the add just increments sp. If so, we search for
8769 // matching sub instructions that decrement sp. If not, the
8770 // modification is illegal
8771 if (MI.getOperand(1).getReg() == AArch64::SP)
8772 SPValue += MI.getOperand(2).getImm();
8773 else
8774 return true;
8775 break;
8776 case AArch64::SUBXri:
8777 case AArch64::SUBWri:
8778 assert(MI.getNumOperands() == 4 && "Wrong number of operands");
8779 assert(MI.getOperand(2).isImm() &&
8780 "Expected operand to be immediate");
8781 assert(MI.getOperand(1).isReg() &&
8782 "Expected operand to be a register");
8783 // Check if the sub just decrements sp. If so, we search for
8784 // matching add instructions that increment sp. If not, the
8785 // modification is illegal
8786 if (MI.getOperand(1).getReg() == AArch64::SP)
8787 SPValue -= MI.getOperand(2).getImm();
8788 else
8789 return true;
8790 break;
8791 default:
8792 return true;
8793 }
8794 }
8795 }
8796 if (SPValue)
8797 return true;
8798 return false;
8799 };
8800 // Remove candidates with illegal stack modifying instructions
8801 llvm::erase_if(RepeatedSequenceLocs, hasIllegalSPModification);
8802
8803 // If the sequence doesn't have enough candidates left, then we're done.
8804 if (RepeatedSequenceLocs.size() < 2)
8805 return std::nullopt;
8806 }
8807
8808 // Properties about candidate MBBs that hold for all of them.
8809 unsigned FlagsSetInAll = 0xF;
8810
8811 // Compute liveness information for each candidate, and set FlagsSetInAll.
8812 for (outliner::Candidate &C : RepeatedSequenceLocs)
8813 FlagsSetInAll &= C.Flags;
8814
8815 unsigned LastInstrOpcode = RepeatedSequenceLocs[0].back().getOpcode();
8816
8817 // Helper lambda which sets call information for every candidate.
8818 auto SetCandidateCallInfo =
8819 [&RepeatedSequenceLocs](unsigned CallID, unsigned NumBytesForCall) {
8820 for (outliner::Candidate &C : RepeatedSequenceLocs)
8821 C.setCallInfo(CallID, NumBytesForCall);
8822 };
8823
8824 unsigned FrameID = MachineOutlinerDefault;
8825 NumBytesToCreateFrame += 4;
8826
8827 bool HasBTI = any_of(RepeatedSequenceLocs, [](outliner::Candidate &C) {
8828 return C.getMF()->getInfo<AArch64FunctionInfo>()->branchTargetEnforcement();
8829 });
8830
8831 // We check to see if CFI Instructions are present, and if they are
8832 // we find the number of CFI Instructions in the candidates.
8833 unsigned CFICount = 0;
8834 for (auto &I : RepeatedSequenceLocs[0]) {
8835 if (I.isCFIInstruction())
8836 CFICount++;
8837 }
8838
8839 // We compare the number of found CFI Instructions to the number of CFI
8840 // instructions in the parent function for each candidate. We must check this
8841 // since if we outline one of the CFI instructions in a function, we have to
8842 // outline them all for correctness. If we do not, the address offsets will be
8843 // incorrect between the two sections of the program.
8844 for (outliner::Candidate &C : RepeatedSequenceLocs) {
8845 std::vector<MCCFIInstruction> CFIInstructions =
8846 C.getMF()->getFrameInstructions();
8847
8848 if (CFICount > 0 && CFICount != CFIInstructions.size())
8849 return std::nullopt;
8850 }
8851
8852 // Returns true if an instructions is safe to fix up, false otherwise.
8853 auto IsSafeToFixup = [this, &TRI](MachineInstr &MI) {
8854 if (MI.isCall())
8855 return true;
8856
8857 if (!MI.modifiesRegister(AArch64::SP, &TRI) &&
8858 !MI.readsRegister(AArch64::SP, &TRI))
8859 return true;
8860
8861 // Any modification of SP will break our code to save/restore LR.
8862 // FIXME: We could handle some instructions which add a constant
8863 // offset to SP, with a bit more work.
8864 if (MI.modifiesRegister(AArch64::SP, &TRI))
8865 return false;
8866
8867 // At this point, we have a stack instruction that we might need to
8868 // fix up. We'll handle it if it's a load or store.
8869 if (MI.mayLoadOrStore()) {
8870 const MachineOperand *Base; // Filled with the base operand of MI.
8871 int64_t Offset; // Filled with the offset of MI.
8872 bool OffsetIsScalable;
8873
8874 // Does it allow us to offset the base operand and is the base the
8875 // register SP?
8876 if (!getMemOperandWithOffset(MI, Base, Offset, OffsetIsScalable, &TRI) ||
8877 !Base->isReg() || Base->getReg() != AArch64::SP)
8878 return false;
8879
8880 // Fixe-up code below assumes bytes.
8881 if (OffsetIsScalable)
8882 return false;
8883
8884 // Find the minimum/maximum offset for this instruction and check
8885 // if fixing it up would be in range.
8886 int64_t MinOffset,
8887 MaxOffset; // Unscaled offsets for the instruction.
8888 // The scale to multiply the offsets by.
8889 TypeSize Scale(0U, false), DummyWidth(0U, false);
8890 getMemOpInfo(MI.getOpcode(), Scale, DummyWidth, MinOffset, MaxOffset);
8891
8892 Offset += 16; // Update the offset to what it would be if we outlined.
8893 if (Offset < MinOffset * (int64_t)Scale.getFixedValue() ||
8894 Offset > MaxOffset * (int64_t)Scale.getFixedValue())
8895 return false;
8896
8897 // It's in range, so we can outline it.
8898 return true;
8899 }
8900
8901 // FIXME: Add handling for instructions like "add x0, sp, #8".
8902
8903 // We can't fix it up, so don't outline it.
8904 return false;
8905 };
8906
8907 // True if it's possible to fix up each stack instruction in this sequence.
8908 // Important for frames/call variants that modify the stack.
8909 bool AllStackInstrsSafe =
8910 llvm::all_of(RepeatedSequenceLocs[0], IsSafeToFixup);
8911
8912 // If the last instruction in any candidate is a terminator, then we should
8913 // tail call all of the candidates.
8914 if (RepeatedSequenceLocs[0].back().isTerminator()) {
8915 FrameID = MachineOutlinerTailCall;
8916 NumBytesToCreateFrame = 0;
8917 unsigned NumBytesForCall = 4 + NumBytesToCheckLRInTCEpilogue;
8918 SetCandidateCallInfo(MachineOutlinerTailCall, NumBytesForCall);
8919 }
8920
8921 else if (LastInstrOpcode == AArch64::BL ||
8922 ((LastInstrOpcode == AArch64::BLR ||
8923 LastInstrOpcode == AArch64::BLRNoIP) &&
8924 !HasBTI)) {
8925 // FIXME: Do we need to check if the code after this uses the value of LR?
8926 FrameID = MachineOutlinerThunk;
8927 NumBytesToCreateFrame = NumBytesToCheckLRInTCEpilogue;
8928 SetCandidateCallInfo(MachineOutlinerThunk, 4);
8929 }
8930
8931 else {
8932 // We need to decide how to emit calls + frames. We can always emit the same
8933 // frame if we don't need to save to the stack. If we have to save to the
8934 // stack, then we need a different frame.
8935 unsigned NumBytesNoStackCalls = 0;
8936 std::vector<outliner::Candidate> CandidatesWithoutStackFixups;
8937
8938 // Check if we have to save LR.
8939 for (outliner::Candidate &C : RepeatedSequenceLocs) {
8940 bool LRAvailable =
8941 (C.Flags & MachineOutlinerMBBFlags::LRUnavailableSomewhere)
8942 ? C.isAvailableAcrossAndOutOfSeq(AArch64::LR, TRI)
8943 : true;
8944 // If we have a noreturn caller, then we're going to be conservative and
8945 // say that we have to save LR. If we don't have a ret at the end of the
8946 // block, then we can't reason about liveness accurately.
8947 //
8948 // FIXME: We can probably do better than always disabling this in
8949 // noreturn functions by fixing up the liveness info.
8950 bool IsNoReturn =
8951 C.getMF()->getFunction().hasFnAttribute(Attribute::NoReturn);
8952
8953 // Is LR available? If so, we don't need a save.
8954 if (LRAvailable && !IsNoReturn) {
8955 NumBytesNoStackCalls += 4;
8956 C.setCallInfo(MachineOutlinerNoLRSave, 4);
8957 CandidatesWithoutStackFixups.push_back(C);
8958 }
8959
8960 // Is an unused register available? If so, we won't modify the stack, so
8961 // we can outline with the same frame type as those that don't save LR.
8962 else if (findRegisterToSaveLRTo(C)) {
8963 NumBytesNoStackCalls += 12;
8964 C.setCallInfo(MachineOutlinerRegSave, 12);
8965 CandidatesWithoutStackFixups.push_back(C);
8966 }
8967
8968 // Is SP used in the sequence at all? If not, we don't have to modify
8969 // the stack, so we are guaranteed to get the same frame.
8970 else if (C.isAvailableInsideSeq(AArch64::SP, TRI)) {
8971 NumBytesNoStackCalls += 12;
8972 C.setCallInfo(MachineOutlinerDefault, 12);
8973 CandidatesWithoutStackFixups.push_back(C);
8974 }
8975
8976 // If we outline this, we need to modify the stack. Pretend we don't
8977 // outline this by saving all of its bytes.
8978 else {
8979 NumBytesNoStackCalls += SequenceSize;
8980 }
8981 }
8982
8983 // If there are no places where we have to save LR, then note that we
8984 // don't have to update the stack. Otherwise, give every candidate the
8985 // default call type, as long as it's safe to do so.
8986 if (!AllStackInstrsSafe ||
8987 NumBytesNoStackCalls <= RepeatedSequenceLocs.size() * 12) {
8988 RepeatedSequenceLocs = CandidatesWithoutStackFixups;
8989 FrameID = MachineOutlinerNoLRSave;
8990 if (RepeatedSequenceLocs.size() < 2)
8991 return std::nullopt;
8992 } else {
8993 SetCandidateCallInfo(MachineOutlinerDefault, 12);
8994
8995 // Bugzilla ID: 46767
8996 // TODO: Check if fixing up the stack more than once is safe so we can
8997 // outline these.
8998 //
8999 // An outline resulting in a caller that requires stack fixups at the
9000 // callsite to a callee that also requires stack fixups can happen when
9001 // there are no available registers at the candidate callsite for a
9002 // candidate that itself also has calls.
9003 //
9004 // In other words if function_containing_sequence in the following pseudo
9005 // assembly requires that we save LR at the point of the call, but there
9006 // are no available registers: in this case we save using SP and as a
9007 // result the SP offsets requires stack fixups by multiples of 16.
9008 //
9009 // function_containing_sequence:
9010 // ...
9011 // save LR to SP <- Requires stack instr fixups in OUTLINED_FUNCTION_N
9012 // call OUTLINED_FUNCTION_N
9013 // restore LR from SP
9014 // ...
9015 //
9016 // OUTLINED_FUNCTION_N:
9017 // save LR to SP <- Requires stack instr fixups in OUTLINED_FUNCTION_N
9018 // ...
9019 // bl foo
9020 // restore LR from SP
9021 // ret
9022 //
9023 // Because the code to handle more than one stack fixup does not
9024 // currently have the proper checks for legality, these cases will assert
9025 // in the AArch64 MachineOutliner. This is because the code to do this
9026 // needs more hardening, testing, better checks that generated code is
9027 // legal, etc and because it is only verified to handle a single pass of
9028 // stack fixup.
9029 //
9030 // The assert happens in AArch64InstrInfo::buildOutlinedFrame to catch
9031 // these cases until they are known to be handled. Bugzilla 46767 is
9032 // referenced in comments at the assert site.
9033 //
9034 // To avoid asserting (or generating non-legal code on noassert builds)
9035 // we remove all candidates which would need more than one stack fixup by
9036 // pruning the cases where the candidate has calls while also having no
9037 // available LR and having no available general purpose registers to copy
9038 // LR to (ie one extra stack save/restore).
9039 //
9040 if (FlagsSetInAll & MachineOutlinerMBBFlags::HasCalls) {
9041 erase_if(RepeatedSequenceLocs, [this, &TRI](outliner::Candidate &C) {
9042 auto IsCall = [](const MachineInstr &MI) { return MI.isCall(); };
9043 return (llvm::any_of(C, IsCall)) &&
9044 (!C.isAvailableAcrossAndOutOfSeq(AArch64::LR, TRI) ||
9045 !findRegisterToSaveLRTo(C));
9046 });
9047 }
9048 }
9049
9050 // If we dropped all of the candidates, bail out here.
9051 if (RepeatedSequenceLocs.size() < 2) {
9052 RepeatedSequenceLocs.clear();
9053 return std::nullopt;
9054 }
9055 }
9056
9057 // Does every candidate's MBB contain a call? If so, then we might have a call
9058 // in the range.
9059 if (FlagsSetInAll & MachineOutlinerMBBFlags::HasCalls) {
9060 // Check if the range contains a call. These require a save + restore of the
9061 // link register.
9062 outliner::Candidate &FirstCand = RepeatedSequenceLocs[0];
9063 bool ModStackToSaveLR = false;
9064 if (any_of(drop_end(FirstCand),
9065 [](const MachineInstr &MI) { return MI.isCall(); }))
9066 ModStackToSaveLR = true;
9067
9068 // Handle the last instruction separately. If this is a tail call, then the
9069 // last instruction is a call. We don't want to save + restore in this case.
9070 // However, it could be possible that the last instruction is a call without
9071 // it being valid to tail call this sequence. We should consider this as
9072 // well.
9073 else if (FrameID != MachineOutlinerThunk &&
9074 FrameID != MachineOutlinerTailCall && FirstCand.back().isCall())
9075 ModStackToSaveLR = true;
9076
9077 if (ModStackToSaveLR) {
9078 // We can't fix up the stack. Bail out.
9079 if (!AllStackInstrsSafe) {
9080 RepeatedSequenceLocs.clear();
9081 return std::nullopt;
9082 }
9083
9084 // Save + restore LR.
9085 NumBytesToCreateFrame += 8;
9086 }
9087 }
9088
9089 // If we have CFI instructions, we can only outline if the outlined section
9090 // can be a tail call
9091 if (FrameID != MachineOutlinerTailCall && CFICount > 0)
9092 return std::nullopt;
9093
9094 return outliner::OutlinedFunction(RepeatedSequenceLocs, SequenceSize,
9095 NumBytesToCreateFrame, FrameID);
9096}
9097
9099 Function &F, std::vector<outliner::Candidate> &Candidates) const {
9100 // If a bunch of candidates reach this point they must agree on their return
9101 // address signing. It is therefore enough to just consider the signing
9102 // behaviour of one of them
9103 const auto &CFn = Candidates.front().getMF()->getFunction();
9104
9105 if (CFn.hasFnAttribute("ptrauth-returns"))
9106 F.addFnAttr(CFn.getFnAttribute("ptrauth-returns"));
9107 if (CFn.hasFnAttribute("ptrauth-auth-traps"))
9108 F.addFnAttr(CFn.getFnAttribute("ptrauth-auth-traps"));
9109 // Since all candidates belong to the same module, just copy the
9110 // function-level attributes of an arbitrary function.
9111 if (CFn.hasFnAttribute("sign-return-address"))
9112 F.addFnAttr(CFn.getFnAttribute("sign-return-address"));
9113 if (CFn.hasFnAttribute("sign-return-address-key"))
9114 F.addFnAttr(CFn.getFnAttribute("sign-return-address-key"));
9115
9116 AArch64GenInstrInfo::mergeOutliningCandidateAttributes(F, Candidates);
9117}
9118
9120 MachineFunction &MF, bool OutlineFromLinkOnceODRs) const {
9121 const Function &F = MF.getFunction();
9122
9123 // Can F be deduplicated by the linker? If it can, don't outline from it.
9124 if (!OutlineFromLinkOnceODRs && F.hasLinkOnceODRLinkage())
9125 return false;
9126
9127 // Don't outline from functions with section markings; the program could
9128 // expect that all the code is in the named section.
9129 // FIXME: Allow outlining from multiple functions with the same section
9130 // marking.
9131 if (F.hasSection())
9132 return false;
9133
9134 // Outlining from functions with redzones is unsafe since the outliner may
9135 // modify the stack. Check if hasRedZone is true or unknown; if yes, don't
9136 // outline from it.
9138 if (!AFI || AFI->hasRedZone().value_or(true))
9139 return false;
9140
9141 // FIXME: Determine whether it is safe to outline from functions which contain
9142 // streaming-mode changes. We may need to ensure any smstart/smstop pairs are
9143 // outlined together and ensure it is safe to outline with async unwind info,
9144 // required for saving & restoring VG around calls.
9145 if (AFI->hasStreamingModeChanges())
9146 return false;
9147
9148 // FIXME: Teach the outliner to generate/handle Windows unwind info.
9150 return false;
9151
9152 // It's safe to outline from MF.
9153 return true;
9154}
9155
9158 unsigned &Flags) const {
9160 "Must track liveness!");
9162 std::pair<MachineBasicBlock::iterator, MachineBasicBlock::iterator>>
9163 Ranges;
9164 // According to the AArch64 Procedure Call Standard, the following are
9165 // undefined on entry/exit from a function call:
9166 //
9167 // * Registers x16, x17, (and thus w16, w17)
9168 // * Condition codes (and thus the NZCV register)
9169 //
9170 // If any of these registers are used inside or live across an outlined
9171 // function, then they may be modified later, either by the compiler or
9172 // some other tool (like the linker).
9173 //
9174 // To avoid outlining in these situations, partition each block into ranges
9175 // where these registers are dead. We will only outline from those ranges.
9177 auto AreAllUnsafeRegsDead = [&LRU]() {
9178 return LRU.available(AArch64::W16) && LRU.available(AArch64::W17) &&
9179 LRU.available(AArch64::NZCV);
9180 };
9181
9182 // We need to know if LR is live across an outlining boundary later on in
9183 // order to decide how we'll create the outlined call, frame, etc.
9184 //
9185 // It's pretty expensive to check this for *every candidate* within a block.
9186 // That's some potentially n^2 behaviour, since in the worst case, we'd need
9187 // to compute liveness from the end of the block for O(n) candidates within
9188 // the block.
9189 //
9190 // So, to improve the average case, let's keep track of liveness from the end
9191 // of the block to the beginning of *every outlinable range*. If we know that
9192 // LR is available in every range we could outline from, then we know that
9193 // we don't need to check liveness for any candidate within that range.
9194 bool LRAvailableEverywhere = true;
9195 // Compute liveness bottom-up.
9196 LRU.addLiveOuts(MBB);
9197 // Update flags that require info about the entire MBB.
9198 auto UpdateWholeMBBFlags = [&Flags](const MachineInstr &MI) {
9199 if (MI.isCall() && !MI.isTerminator())
9200 Flags |= MachineOutlinerMBBFlags::HasCalls;
9201 };
9202 // Range: [RangeBegin, RangeEnd)
9203 MachineBasicBlock::instr_iterator RangeBegin, RangeEnd;
9204 unsigned RangeLen;
9205 auto CreateNewRangeStartingAt =
9206 [&RangeBegin, &RangeEnd,
9207 &RangeLen](MachineBasicBlock::instr_iterator NewBegin) {
9208 RangeBegin = NewBegin;
9209 RangeEnd = std::next(RangeBegin);
9210 RangeLen = 0;
9211 };
9212 auto SaveRangeIfNonEmpty = [&RangeLen, &Ranges, &RangeBegin, &RangeEnd]() {
9213 // At least one unsafe register is not dead. We do not want to outline at
9214 // this point. If it is long enough to outline from, save the range
9215 // [RangeBegin, RangeEnd).
9216 if (RangeLen > 1)
9217 Ranges.push_back(std::make_pair(RangeBegin, RangeEnd));
9218 };
9219 // Find the first point where all unsafe registers are dead.
9220 // FIND: <safe instr> <-- end of first potential range
9221 // SKIP: <unsafe def>
9222 // SKIP: ... everything between ...
9223 // SKIP: <unsafe use>
9224 auto FirstPossibleEndPt = MBB.instr_rbegin();
9225 for (; FirstPossibleEndPt != MBB.instr_rend(); ++FirstPossibleEndPt) {
9226 LRU.stepBackward(*FirstPossibleEndPt);
9227 // Update flags that impact how we outline across the entire block,
9228 // regardless of safety.
9229 UpdateWholeMBBFlags(*FirstPossibleEndPt);
9230 if (AreAllUnsafeRegsDead())
9231 break;
9232 }
9233 // If we exhausted the entire block, we have no safe ranges to outline.
9234 if (FirstPossibleEndPt == MBB.instr_rend())
9235 return Ranges;
9236 // Current range.
9237 CreateNewRangeStartingAt(FirstPossibleEndPt->getIterator());
9238 // StartPt points to the first place where all unsafe registers
9239 // are dead (if there is any such point). Begin partitioning the MBB into
9240 // ranges.
9241 for (auto &MI : make_range(FirstPossibleEndPt, MBB.instr_rend())) {
9242 LRU.stepBackward(MI);
9243 UpdateWholeMBBFlags(MI);
9244 if (!AreAllUnsafeRegsDead()) {
9245 SaveRangeIfNonEmpty();
9246 CreateNewRangeStartingAt(MI.getIterator());
9247 continue;
9248 }
9249 LRAvailableEverywhere &= LRU.available(AArch64::LR);
9250 RangeBegin = MI.getIterator();
9251 ++RangeLen;
9252 }
9253 // Above loop misses the last (or only) range. If we are still safe, then
9254 // let's save the range.
9255 if (AreAllUnsafeRegsDead())
9256 SaveRangeIfNonEmpty();
9257 if (Ranges.empty())
9258 return Ranges;
9259 // We found the ranges bottom-up. Mapping expects the top-down. Reverse
9260 // the order.
9261 std::reverse(Ranges.begin(), Ranges.end());
9262 // If there is at least one outlinable range where LR is unavailable
9263 // somewhere, remember that.
9264 if (!LRAvailableEverywhere)
9265 Flags |= MachineOutlinerMBBFlags::LRUnavailableSomewhere;
9266 return Ranges;
9267}
9268
9272 unsigned Flags) const {
9273 MachineInstr &MI = *MIT;
9274 MachineBasicBlock *MBB = MI.getParent();
9275 MachineFunction *MF = MBB->getParent();
9277
9278 // Don't outline anything used for return address signing. The outlined
9279 // function will get signed later if needed
9280 switch (MI.getOpcode()) {
9281 case AArch64::PACM:
9282 case AArch64::PACIASP:
9283 case AArch64::PACIBSP:
9284 case AArch64::PACIASPPC:
9285 case AArch64::PACIBSPPC:
9286 case AArch64::AUTIASP:
9287 case AArch64::AUTIBSP:
9288 case AArch64::AUTIASPPCi:
9289 case AArch64::AUTIASPPCr:
9290 case AArch64::AUTIBSPPCi:
9291 case AArch64::AUTIBSPPCr:
9292 case AArch64::RETAA:
9293 case AArch64::RETAB:
9294 case AArch64::RETAASPPCi:
9295 case AArch64::RETAASPPCr:
9296 case AArch64::RETABSPPCi:
9297 case AArch64::RETABSPPCr:
9298 case AArch64::EMITBKEY:
9299 case AArch64::PAUTH_PROLOGUE:
9300 case AArch64::PAUTH_EPILOGUE:
9302 }
9303
9304 // Don't outline LOHs.
9305 if (FuncInfo->getLOHRelated().count(&MI))
9307
9308 // We can only outline these if we will tail call the outlined function, or
9309 // fix up the CFI offsets. Currently, CFI instructions are outlined only if
9310 // in a tail call.
9311 //
9312 // FIXME: If the proper fixups for the offset are implemented, this should be
9313 // possible.
9314 if (MI.isCFIInstruction())
9316
9317 // Is this a terminator for a basic block?
9318 if (MI.isTerminator())
9319 // TargetInstrInfo::getOutliningType has already filtered out anything
9320 // that would break this, so we can allow it here.
9322
9323 // Make sure none of the operands are un-outlinable.
9324 for (const MachineOperand &MOP : MI.operands()) {
9325 // A check preventing CFI indices was here before, but only CFI
9326 // instructions should have those.
9327 assert(!MOP.isCFIIndex());
9328
9329 // If it uses LR or W30 explicitly, then don't touch it.
9330 if (MOP.isReg() && !MOP.isImplicit() &&
9331 (MOP.getReg() == AArch64::LR || MOP.getReg() == AArch64::W30))
9333 }
9334
9335 // Special cases for instructions that can always be outlined, but will fail
9336 // the later tests. e.g, ADRPs, which are PC-relative use LR, but can always
9337 // be outlined because they don't require a *specific* value to be in LR.
9338 if (MI.getOpcode() == AArch64::ADRP)
9340
9341 // If MI is a call we might be able to outline it. We don't want to outline
9342 // any calls that rely on the position of items on the stack. When we outline
9343 // something containing a call, we have to emit a save and restore of LR in
9344 // the outlined function. Currently, this always happens by saving LR to the
9345 // stack. Thus, if we outline, say, half the parameters for a function call
9346 // plus the call, then we'll break the callee's expectations for the layout
9347 // of the stack.
9348 //
9349 // FIXME: Allow calls to functions which construct a stack frame, as long
9350 // as they don't access arguments on the stack.
9351 // FIXME: Figure out some way to analyze functions defined in other modules.
9352 // We should be able to compute the memory usage based on the IR calling
9353 // convention, even if we can't see the definition.
9354 if (MI.isCall()) {
9355 // Get the function associated with the call. Look at each operand and find
9356 // the one that represents the callee and get its name.
9357 const Function *Callee = nullptr;
9358 for (const MachineOperand &MOP : MI.operands()) {
9359 if (MOP.isGlobal()) {
9360 Callee = dyn_cast<Function>(MOP.getGlobal());
9361 break;
9362 }
9363 }
9364
9365 // Never outline calls to mcount. There isn't any rule that would require
9366 // this, but the Linux kernel's "ftrace" feature depends on it.
9367 if (Callee && Callee->getName() == "\01_mcount")
9369
9370 // If we don't know anything about the callee, assume it depends on the
9371 // stack layout of the caller. In that case, it's only legal to outline
9372 // as a tail-call. Explicitly list the call instructions we know about so we
9373 // don't get unexpected results with call pseudo-instructions.
9374 auto UnknownCallOutlineType = outliner::InstrType::Illegal;
9375 if (MI.getOpcode() == AArch64::BLR ||
9376 MI.getOpcode() == AArch64::BLRNoIP || MI.getOpcode() == AArch64::BL)
9377 UnknownCallOutlineType = outliner::InstrType::LegalTerminator;
9378
9379 if (!Callee)
9380 return UnknownCallOutlineType;
9381
9382 // We have a function we have information about. Check it if it's something
9383 // can safely outline.
9384 MachineFunction *CalleeMF = MMI.getMachineFunction(*Callee);
9385
9386 // We don't know what's going on with the callee at all. Don't touch it.
9387 if (!CalleeMF)
9388 return UnknownCallOutlineType;
9389
9390 // Check if we know anything about the callee saves on the function. If we
9391 // don't, then don't touch it, since that implies that we haven't
9392 // computed anything about its stack frame yet.
9393 MachineFrameInfo &MFI = CalleeMF->getFrameInfo();
9394 if (!MFI.isCalleeSavedInfoValid() || MFI.getStackSize() > 0 ||
9395 MFI.getNumObjects() > 0)
9396 return UnknownCallOutlineType;
9397
9398 // At this point, we can say that CalleeMF ought to not pass anything on the
9399 // stack. Therefore, we can outline it.
9401 }
9402
9403 // Don't touch the link register or W30.
9404 if (MI.readsRegister(AArch64::W30, &getRegisterInfo()) ||
9405 MI.modifiesRegister(AArch64::W30, &getRegisterInfo()))
9407
9408 // Don't outline BTI instructions, because that will prevent the outlining
9409 // site from being indirectly callable.
9410 if (hasBTISemantics(MI))
9412
9414}
9415
9416void AArch64InstrInfo::fixupPostOutline(MachineBasicBlock &MBB) const {
9417 for (MachineInstr &MI : MBB) {
9418 const MachineOperand *Base;
9419 TypeSize Width(0, false);
9420 int64_t Offset;
9421 bool OffsetIsScalable;
9422
9423 // Is this a load or store with an immediate offset with SP as the base?
9424 if (!MI.mayLoadOrStore() ||
9425 !getMemOperandWithOffsetWidth(MI, Base, Offset, OffsetIsScalable, Width,
9426 &RI) ||
9427 (Base->isReg() && Base->getReg() != AArch64::SP))
9428 continue;
9429
9430 // It is, so we have to fix it up.
9431 TypeSize Scale(0U, false);
9432 int64_t Dummy1, Dummy2;
9433
9435 assert(StackOffsetOperand.isImm() && "Stack offset wasn't immediate!");
9436 getMemOpInfo(MI.getOpcode(), Scale, Width, Dummy1, Dummy2);
9437 assert(Scale != 0 && "Unexpected opcode!");
9438 assert(!OffsetIsScalable && "Expected offset to be a byte offset");
9439
9440 // We've pushed the return address to the stack, so add 16 to the offset.
9441 // This is safe, since we already checked if it would overflow when we
9442 // checked if this instruction was legal to outline.
9443 int64_t NewImm = (Offset + 16) / (int64_t)Scale.getFixedValue();
9444 StackOffsetOperand.setImm(NewImm);
9445 }
9446}
9447
9449 const AArch64InstrInfo *TII,
9450 bool ShouldSignReturnAddr) {
9451 if (!ShouldSignReturnAddr)
9452 return;
9453
9454 BuildMI(MBB, MBB.begin(), DebugLoc(), TII->get(AArch64::PAUTH_PROLOGUE))
9457 TII->get(AArch64::PAUTH_EPILOGUE))
9459}
9460
9463 const outliner::OutlinedFunction &OF) const {
9464
9466
9468 FI->setOutliningStyle("Tail Call");
9470 // For thunk outlining, rewrite the last instruction from a call to a
9471 // tail-call.
9472 MachineInstr *Call = &*--MBB.instr_end();
9473 unsigned TailOpcode;
9474 if (Call->getOpcode() == AArch64::BL) {
9475 TailOpcode = AArch64::TCRETURNdi;
9476 } else {
9477 assert(Call->getOpcode() == AArch64::BLR ||
9478 Call->getOpcode() == AArch64::BLRNoIP);
9479 TailOpcode = AArch64::TCRETURNriALL;
9480 }
9481 MachineInstr *TC = BuildMI(MF, DebugLoc(), get(TailOpcode))
9482 .add(Call->getOperand(0))
9483 .addImm(0);
9484 MBB.insert(MBB.end(), TC);
9485 Call->eraseFromParent();
9486
9487 FI->setOutliningStyle("Thunk");
9488 }
9489
9490 bool IsLeafFunction = true;
9491
9492 // Is there a call in the outlined range?
9493 auto IsNonTailCall = [](const MachineInstr &MI) {
9494 return MI.isCall() && !MI.isReturn();
9495 };
9496
9497 if (llvm::any_of(MBB.instrs(), IsNonTailCall)) {
9498 // Fix up the instructions in the range, since we're going to modify the
9499 // stack.
9500
9501 // Bugzilla ID: 46767
9502 // TODO: Check if fixing up twice is safe so we can outline these.
9504 "Can only fix up stack references once");
9505 fixupPostOutline(MBB);
9506
9507 IsLeafFunction = false;
9508
9509 // LR has to be a live in so that we can save it.
9510 if (!MBB.isLiveIn(AArch64::LR))
9511 MBB.addLiveIn(AArch64::LR);
9512
9515
9518 Et = std::prev(MBB.end());
9519
9520 // Insert a save before the outlined region
9521 MachineInstr *STRXpre = BuildMI(MF, DebugLoc(), get(AArch64::STRXpre))
9522 .addReg(AArch64::SP, RegState::Define)
9523 .addReg(AArch64::LR)
9524 .addReg(AArch64::SP)
9525 .addImm(-16);
9526 It = MBB.insert(It, STRXpre);
9527
9529 const TargetSubtargetInfo &STI = MF.getSubtarget();
9530 const MCRegisterInfo *MRI = STI.getRegisterInfo();
9531 unsigned DwarfReg = MRI->getDwarfRegNum(AArch64::LR, true);
9532
9533 // Add a CFI saying the stack was moved 16 B down.
9534 int64_t StackPosEntry =
9536 BuildMI(MBB, It, DebugLoc(), get(AArch64::CFI_INSTRUCTION))
9537 .addCFIIndex(StackPosEntry)
9539
9540 // Add a CFI saying that the LR that we want to find is now 16 B higher
9541 // than before.
9542 int64_t LRPosEntry = MF.addFrameInst(
9543 MCCFIInstruction::createOffset(nullptr, DwarfReg, -16));
9544 BuildMI(MBB, It, DebugLoc(), get(AArch64::CFI_INSTRUCTION))
9545 .addCFIIndex(LRPosEntry)
9547 }
9548
9549 // Insert a restore before the terminator for the function.
9550 MachineInstr *LDRXpost = BuildMI(MF, DebugLoc(), get(AArch64::LDRXpost))
9551 .addReg(AArch64::SP, RegState::Define)
9552 .addReg(AArch64::LR, RegState::Define)
9553 .addReg(AArch64::SP)
9554 .addImm(16);
9555 Et = MBB.insert(Et, LDRXpost);
9556 }
9557
9558 bool ShouldSignReturnAddr = FI->shouldSignReturnAddress(!IsLeafFunction);
9559
9560 // If this is a tail call outlined function, then there's already a return.
9563 signOutlinedFunction(MF, MBB, this, ShouldSignReturnAddr);
9564 return;
9565 }
9566
9567 // It's not a tail call, so we have to insert the return ourselves.
9568
9569 // LR has to be a live in so that we can return to it.
9570 if (!MBB.isLiveIn(AArch64::LR))
9571 MBB.addLiveIn(AArch64::LR);
9572
9573 MachineInstr *ret = BuildMI(MF, DebugLoc(), get(AArch64::RET))
9574 .addReg(AArch64::LR);
9575 MBB.insert(MBB.end(), ret);
9576
9577 signOutlinedFunction(MF, MBB, this, ShouldSignReturnAddr);
9578
9579 FI->setOutliningStyle("Function");
9580
9581 // Did we have to modify the stack by saving the link register?
9583 return;
9584
9585 // We modified the stack.
9586 // Walk over the basic block and fix up all the stack accesses.
9587 fixupPostOutline(MBB);
9588}
9589
9593
9594 // Are we tail calling?
9595 if (C.CallConstructionID == MachineOutlinerTailCall) {
9596 // If yes, then we can just branch to the label.
9597 It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::TCRETURNdi))
9598 .addGlobalAddress(M.getNamedValue(MF.getName()))
9599 .addImm(0));
9600 return It;
9601 }
9602
9603 // Are we saving the link register?
9604 if (C.CallConstructionID == MachineOutlinerNoLRSave ||
9605 C.CallConstructionID == MachineOutlinerThunk) {
9606 // No, so just insert the call.
9607 It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::BL))
9608 .addGlobalAddress(M.getNamedValue(MF.getName())));
9609 return It;
9610 }
9611
9612 // We want to return the spot where we inserted the call.
9614
9615 // Instructions for saving and restoring LR around the call instruction we're
9616 // going to insert.
9617 MachineInstr *Save;
9618 MachineInstr *Restore;
9619 // Can we save to a register?
9620 if (C.CallConstructionID == MachineOutlinerRegSave) {
9621 // FIXME: This logic should be sunk into a target-specific interface so that
9622 // we don't have to recompute the register.
9623 Register Reg = findRegisterToSaveLRTo(C);
9624 assert(Reg && "No callee-saved register available?");
9625
9626 // LR has to be a live in so that we can save it.
9627 if (!MBB.isLiveIn(AArch64::LR))
9628 MBB.addLiveIn(AArch64::LR);
9629
9630 // Save and restore LR from Reg.
9631 Save = BuildMI(MF, DebugLoc(), get(AArch64::ORRXrs), Reg)
9632 .addReg(AArch64::XZR)
9633 .addReg(AArch64::LR)
9634 .addImm(0);
9635 Restore = BuildMI(MF, DebugLoc(), get(AArch64::ORRXrs), AArch64::LR)
9636 .addReg(AArch64::XZR)
9637 .addReg(Reg)
9638 .addImm(0);
9639 } else {
9640 // We have the default case. Save and restore from SP.
9641 Save = BuildMI(MF, DebugLoc(), get(AArch64::STRXpre))
9642 .addReg(AArch64::SP, RegState::Define)
9643 .addReg(AArch64::LR)
9644 .addReg(AArch64::SP)
9645 .addImm(-16);
9646 Restore = BuildMI(MF, DebugLoc(), get(AArch64::LDRXpost))
9647 .addReg(AArch64::SP, RegState::Define)
9648 .addReg(AArch64::LR, RegState::Define)
9649 .addReg(AArch64::SP)
9650 .addImm(16);
9651 }
9652
9653 It = MBB.insert(It, Save);
9654 It++;
9655
9656 // Insert the call.
9657 It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::BL))
9658 .addGlobalAddress(M.getNamedValue(MF.getName())));
9659 CallPt = It;
9660 It++;
9661
9662 It = MBB.insert(It, Restore);
9663 return CallPt;
9664}
9665
9667 MachineFunction &MF) const {
9668 return MF.getFunction().hasMinSize();
9669}
9670
9673 DebugLoc &DL,
9674 bool AllowSideEffects) const {
9675 const MachineFunction &MF = *MBB.getParent();
9677 const AArch64RegisterInfo &TRI = *STI.getRegisterInfo();
9678
9679 if (TRI.isGeneralPurposeRegister(MF, Reg)) {
9680 BuildMI(MBB, Iter, DL, get(AArch64::MOVZXi), Reg).addImm(0).addImm(0);
9681 } else if (STI.hasSVE()) {
9682 BuildMI(MBB, Iter, DL, get(AArch64::DUP_ZI_D), Reg)
9683 .addImm(0)
9684 .addImm(0);
9685 } else {
9686 BuildMI(MBB, Iter, DL, get(AArch64::MOVIv2d_ns), Reg)
9687 .addImm(0);
9688 }
9689}
9690
9691std::optional<DestSourcePair>
9693
9694 // AArch64::ORRWrs and AArch64::ORRXrs with WZR/XZR reg
9695 // and zero immediate operands used as an alias for mov instruction.
9696 if (MI.getOpcode() == AArch64::ORRWrs &&
9697 MI.getOperand(1).getReg() == AArch64::WZR &&
9698 MI.getOperand(3).getImm() == 0x0 &&
9699 // Check that the w->w move is not a zero-extending w->x mov.
9700 (!MI.getOperand(0).getReg().isVirtual() ||
9701 MI.getOperand(0).getSubReg() == 0) &&
9702 (!MI.getOperand(0).getReg().isPhysical() ||
9703 MI.findRegisterDefOperandIdx(MI.getOperand(0).getReg() - AArch64::W0 +
9704 AArch64::X0,
9705 /*TRI=*/nullptr) == -1))
9706 return DestSourcePair{MI.getOperand(0), MI.getOperand(2)};
9707
9708 if (MI.getOpcode() == AArch64::ORRXrs &&
9709 MI.getOperand(1).getReg() == AArch64::XZR &&
9710 MI.getOperand(3).getImm() == 0x0)
9711 return DestSourcePair{MI.getOperand(0), MI.getOperand(2)};
9712
9713 return std::nullopt;
9714}
9715
9716std::optional<DestSourcePair>
9718 if (MI.getOpcode() == AArch64::ORRWrs &&
9719 MI.getOperand(1).getReg() == AArch64::WZR &&
9720 MI.getOperand(3).getImm() == 0x0)
9721 return DestSourcePair{MI.getOperand(0), MI.getOperand(2)};
9722 return std::nullopt;
9723}
9724
9725std::optional<RegImmPair>
9727 int Sign = 1;
9728 int64_t Offset = 0;
9729
9730 // TODO: Handle cases where Reg is a super- or sub-register of the
9731 // destination register.
9732 const MachineOperand &Op0 = MI.getOperand(0);
9733 if (!Op0.isReg() || Reg != Op0.getReg())
9734 return std::nullopt;
9735
9736 switch (MI.getOpcode()) {
9737 default:
9738 return std::nullopt;
9739 case AArch64::SUBWri:
9740 case AArch64::SUBXri:
9741 case AArch64::SUBSWri:
9742 case AArch64::SUBSXri:
9743 Sign *= -1;
9744 [[fallthrough]];
9745 case AArch64::ADDSWri:
9746 case AArch64::ADDSXri:
9747 case AArch64::ADDWri:
9748 case AArch64::ADDXri: {
9749 // TODO: Third operand can be global address (usually some string).
9750 if (!MI.getOperand(0).isReg() || !MI.getOperand(1).isReg() ||
9751 !MI.getOperand(2).isImm())
9752 return std::nullopt;
9753 int Shift = MI.getOperand(3).getImm();
9754 assert((Shift == 0 || Shift == 12) && "Shift can be either 0 or 12");
9755 Offset = Sign * (MI.getOperand(2).getImm() << Shift);
9756 }
9757 }
9758 return RegImmPair{MI.getOperand(1).getReg(), Offset};
9759}
9760
9761/// If the given ORR instruction is a copy, and \p DescribedReg overlaps with
9762/// the destination register then, if possible, describe the value in terms of
9763/// the source register.
9764static std::optional<ParamLoadedValue>
9766 const TargetInstrInfo *TII,
9767 const TargetRegisterInfo *TRI) {
9768 auto DestSrc = TII->isCopyLikeInstr(MI);
9769 if (!DestSrc)
9770 return std::nullopt;
9771
9772 Register DestReg = DestSrc->Destination->getReg();
9773 Register SrcReg = DestSrc->Source->getReg();
9774
9775 auto Expr = DIExpression::get(MI.getMF()->getFunction().getContext(), {});
9776
9777 // If the described register is the destination, just return the source.
9778 if (DestReg == DescribedReg)
9779 return ParamLoadedValue(MachineOperand::CreateReg(SrcReg, false), Expr);
9780
9781 // ORRWrs zero-extends to 64-bits, so we need to consider such cases.
9782 if (MI.getOpcode() == AArch64::ORRWrs &&
9783 TRI->isSuperRegister(DestReg, DescribedReg))
9784 return ParamLoadedValue(MachineOperand::CreateReg(SrcReg, false), Expr);
9785
9786 // We may need to describe the lower part of a ORRXrs move.
9787 if (MI.getOpcode() == AArch64::ORRXrs &&
9788 TRI->isSubRegister(DestReg, DescribedReg)) {
9789 Register SrcSubReg = TRI->getSubReg(SrcReg, AArch64::sub_32);
9790 return ParamLoadedValue(MachineOperand::CreateReg(SrcSubReg, false), Expr);
9791 }
9792
9793 assert(!TRI->isSuperOrSubRegisterEq(DestReg, DescribedReg) &&
9794 "Unhandled ORR[XW]rs copy case");
9795
9796 return std::nullopt;
9797}
9798
9800 // Functions cannot be split to different sections on AArch64 if they have
9801 // a red zone. This is because relaxing a cross-section branch may require
9802 // incrementing the stack pointer to spill a register, which would overwrite
9803 // the red zone.
9804 if (MF.getInfo<AArch64FunctionInfo>()->hasRedZone().value_or(true))
9805 return false;
9806
9808}
9809
9811 const MachineBasicBlock &MBB) const {
9812 // Asm Goto blocks can contain conditional branches to goto labels, which can
9813 // get moved out of range of the branch instruction.
9814 auto isAsmGoto = [](const MachineInstr &MI) {
9815 return MI.getOpcode() == AArch64::INLINEASM_BR;
9816 };
9817 if (llvm::any_of(MBB, isAsmGoto) || MBB.isInlineAsmBrIndirectTarget())
9818 return false;
9819
9820 // Because jump tables are label-relative instead of table-relative, they all
9821 // must be in the same section or relocation fixup handling will fail.
9822
9823 // Check if MBB is a jump table target
9825 auto containsMBB = [&MBB](const MachineJumpTableEntry &JTE) {
9826 return llvm::is_contained(JTE.MBBs, &MBB);
9827 };
9828 if (MJTI != nullptr && llvm::any_of(MJTI->getJumpTables(), containsMBB))
9829 return false;
9830
9831 // Check if MBB contains a jump table lookup
9832 for (const MachineInstr &MI : MBB) {
9833 switch (MI.getOpcode()) {
9834 case TargetOpcode::G_BRJT:
9835 case AArch64::JumpTableDest32:
9836 case AArch64::JumpTableDest16:
9837 case AArch64::JumpTableDest8:
9838 return false;
9839 default:
9840 continue;
9841 }
9842 }
9843
9844 // MBB isn't a special case, so it's safe to be split to the cold section.
9845 return true;
9846}
9847
9848std::optional<ParamLoadedValue>
9850 Register Reg) const {
9851 const MachineFunction *MF = MI.getMF();
9853 switch (MI.getOpcode()) {
9854 case AArch64::MOVZWi:
9855 case AArch64::MOVZXi: {
9856 // MOVZWi may be used for producing zero-extended 32-bit immediates in
9857 // 64-bit parameters, so we need to consider super-registers.
9858 if (!TRI->isSuperRegisterEq(MI.getOperand(0).getReg(), Reg))
9859 return std::nullopt;
9860
9861 if (!MI.getOperand(1).isImm())
9862 return std::nullopt;
9863 int64_t Immediate = MI.getOperand(1).getImm();
9864 int Shift = MI.getOperand(2).getImm();
9865 return ParamLoadedValue(MachineOperand::CreateImm(Immediate << Shift),
9866 nullptr);
9867 }
9868 case AArch64::ORRWrs:
9869 case AArch64::ORRXrs:
9870 return describeORRLoadedValue(MI, Reg, this, TRI);
9871 }
9872
9874}
9875
9877 MachineInstr &ExtMI, MachineRegisterInfo &MRI) const {
9878 assert(ExtMI.getOpcode() == TargetOpcode::G_SEXT ||
9879 ExtMI.getOpcode() == TargetOpcode::G_ZEXT ||
9880 ExtMI.getOpcode() == TargetOpcode::G_ANYEXT);
9881
9882 // Anyexts are nops.
9883 if (ExtMI.getOpcode() == TargetOpcode::G_ANYEXT)
9884 return true;
9885
9886 Register DefReg = ExtMI.getOperand(0).getReg();
9887 if (!MRI.hasOneNonDBGUse(DefReg))
9888 return false;
9889
9890 // It's likely that a sext/zext as a G_PTR_ADD offset will be folded into an
9891 // addressing mode.
9892 auto *UserMI = &*MRI.use_instr_nodbg_begin(DefReg);
9893 return UserMI->getOpcode() == TargetOpcode::G_PTR_ADD;
9894}
9895
9897 return get(Opc).TSFlags & AArch64::ElementSizeMask;
9898}
9899
9900bool AArch64InstrInfo::isPTestLikeOpcode(unsigned Opc) const {
9901 return get(Opc).TSFlags & AArch64::InstrFlagIsPTestLike;
9902}
9903
9904bool AArch64InstrInfo::isWhileOpcode(unsigned Opc) const {
9905 return get(Opc).TSFlags & AArch64::InstrFlagIsWhile;
9906}
9907
9908unsigned int
9910 return OptLevel >= CodeGenOptLevel::Aggressive ? 6 : 2;
9911}
9912
9913bool AArch64InstrInfo::isLegalAddressingMode(unsigned NumBytes, int64_t Offset,
9914 unsigned Scale) const {
9915 if (Offset && Scale)
9916 return false;
9917
9918 // Check Reg + Imm
9919 if (!Scale) {
9920 // 9-bit signed offset
9921 if (isInt<9>(Offset))
9922 return true;
9923
9924 // 12-bit unsigned offset
9925 unsigned Shift = Log2_64(NumBytes);
9926 if (NumBytes && Offset > 0 && (Offset / NumBytes) <= (1LL << 12) - 1 &&
9927 // Must be a multiple of NumBytes (NumBytes is a power of 2)
9928 (Offset >> Shift) << Shift == Offset)
9929 return true;
9930 return false;
9931 }
9932
9933 // Check reg1 + SIZE_IN_BYTES * reg2 and reg1 + reg2
9934 return Scale == 1 || (Scale > 0 && Scale == NumBytes);
9935}
9936
9938 if (MF.getSubtarget<AArch64Subtarget>().hardenSlsBlr())
9939 return AArch64::BLRNoIP;
9940 else
9941 return AArch64::BLR;
9942}
9943
9946 Register TargetReg, bool FrameSetup) const {
9947 assert(TargetReg != AArch64::SP && "New top of stack cannot aleady be in SP");
9948
9950 MachineFunction &MF = *MBB.getParent();
9951 const AArch64InstrInfo *TII =
9952 MF.getSubtarget<AArch64Subtarget>().getInstrInfo();
9953 int64_t ProbeSize = MF.getInfo<AArch64FunctionInfo>()->getStackProbeSize();
9955
9956 MachineFunction::iterator MBBInsertPoint = std::next(MBB.getIterator());
9957 MachineBasicBlock *LoopTestMBB =
9959 MF.insert(MBBInsertPoint, LoopTestMBB);
9960 MachineBasicBlock *LoopBodyMBB =
9962 MF.insert(MBBInsertPoint, LoopBodyMBB);
9964 MF.insert(MBBInsertPoint, ExitMBB);
9965 MachineInstr::MIFlag Flags =
9967
9968 // LoopTest:
9969 // SUB SP, SP, #ProbeSize
9970 emitFrameOffset(*LoopTestMBB, LoopTestMBB->end(), DL, AArch64::SP,
9971 AArch64::SP, StackOffset::getFixed(-ProbeSize), TII, Flags);
9972
9973 // CMP SP, TargetReg
9974 BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL, TII->get(AArch64::SUBSXrx64),
9975 AArch64::XZR)
9976 .addReg(AArch64::SP)
9977 .addReg(TargetReg)
9979 .setMIFlags(Flags);
9980
9981 // B.<Cond> LoopExit
9982 BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL, TII->get(AArch64::Bcc))
9984 .addMBB(ExitMBB)
9985 .setMIFlags(Flags);
9986
9987 // STR XZR, [SP]
9988 BuildMI(*LoopBodyMBB, LoopBodyMBB->end(), DL, TII->get(AArch64::STRXui))
9989 .addReg(AArch64::XZR)
9990 .addReg(AArch64::SP)
9991 .addImm(0)
9992 .setMIFlags(Flags);
9993
9994 // B loop
9995 BuildMI(*LoopBodyMBB, LoopBodyMBB->end(), DL, TII->get(AArch64::B))
9996 .addMBB(LoopTestMBB)
9997 .setMIFlags(Flags);
9998
9999 // LoopExit:
10000 // MOV SP, TargetReg
10001 BuildMI(*ExitMBB, ExitMBB->end(), DL, TII->get(AArch64::ADDXri), AArch64::SP)
10002 .addReg(TargetReg)
10003 .addImm(0)
10005 .setMIFlags(Flags);
10006
10007 // LDR XZR, [SP]
10008 BuildMI(*ExitMBB, ExitMBB->end(), DL, TII->get(AArch64::LDRXui))
10009 .addReg(AArch64::XZR, RegState::Define)
10010 .addReg(AArch64::SP)
10011 .addImm(0)
10012 .setMIFlags(Flags);
10013
10014 ExitMBB->splice(ExitMBB->end(), &MBB, std::next(MBBI), MBB.end());
10016
10017 LoopTestMBB->addSuccessor(ExitMBB);
10018 LoopTestMBB->addSuccessor(LoopBodyMBB);
10019 LoopBodyMBB->addSuccessor(LoopTestMBB);
10020 MBB.addSuccessor(LoopTestMBB);
10021
10022 // Update liveins.
10023 if (MF.getRegInfo().reservedRegsFrozen())
10024 fullyRecomputeLiveIns({ExitMBB, LoopBodyMBB, LoopTestMBB});
10025
10026 return ExitMBB->begin();
10027}
10028
10029namespace {
10030class AArch64PipelinerLoopInfo : public TargetInstrInfo::PipelinerLoopInfo {
10031 MachineFunction *MF;
10032 const TargetInstrInfo *TII;
10033 const TargetRegisterInfo *TRI;
10035
10036 /// The block of the loop
10037 MachineBasicBlock *LoopBB;
10038 /// The conditional branch of the loop
10039 MachineInstr *CondBranch;
10040 /// The compare instruction for loop control
10041 MachineInstr *Comp;
10042 /// The number of the operand of the loop counter value in Comp
10043 unsigned CompCounterOprNum;
10044 /// The instruction that updates the loop counter value
10045 MachineInstr *Update;
10046 /// The number of the operand of the loop counter value in Update
10047 unsigned UpdateCounterOprNum;
10048 /// The initial value of the loop counter
10049 Register Init;
10050 /// True iff Update is a predecessor of Comp
10051 bool IsUpdatePriorComp;
10052
10053 /// The normalized condition used by createTripCountGreaterCondition()
10055
10056public:
10057 AArch64PipelinerLoopInfo(MachineBasicBlock *LoopBB, MachineInstr *CondBranch,
10058 MachineInstr *Comp, unsigned CompCounterOprNum,
10059 MachineInstr *Update, unsigned UpdateCounterOprNum,
10060 Register Init, bool IsUpdatePriorComp,
10062 : MF(Comp->getParent()->getParent()),
10063 TII(MF->getSubtarget().getInstrInfo()),
10064 TRI(MF->getSubtarget().getRegisterInfo()), MRI(MF->getRegInfo()),
10065 LoopBB(LoopBB), CondBranch(CondBranch), Comp(Comp),
10066 CompCounterOprNum(CompCounterOprNum), Update(Update),
10067 UpdateCounterOprNum(UpdateCounterOprNum), Init(Init),
10068 IsUpdatePriorComp(IsUpdatePriorComp), Cond(Cond.begin(), Cond.end()) {}
10069
10070 bool shouldIgnoreForPipelining(const MachineInstr *MI) const override {
10071 // Make the instructions for loop control be placed in stage 0.
10072 // The predecessors of Comp are considered by the caller.
10073 return MI == Comp;
10074 }
10075
10076 std::optional<bool> createTripCountGreaterCondition(
10077 int TC, MachineBasicBlock &MBB,
10078 SmallVectorImpl<MachineOperand> &CondParam) override {
10079 // A branch instruction will be inserted as "if (Cond) goto epilogue".
10080 // Cond is normalized for such use.
10081 // The predecessors of the branch are assumed to have already been inserted.
10082 CondParam = Cond;
10083 return {};
10084 }
10085
10086 void createRemainingIterationsGreaterCondition(
10088 DenseMap<MachineInstr *, MachineInstr *> &LastStage0Insts) override;
10089
10090 void setPreheader(MachineBasicBlock *NewPreheader) override {}
10091
10092 void adjustTripCount(int TripCountAdjust) override {}
10093
10094 void disposed() override {}
10095 bool isMVEExpanderSupported() override { return true; }
10096};
10097} // namespace
10098
10099/// Clone an instruction from MI. The register of ReplaceOprNum-th operand
10100/// is replaced by ReplaceReg. The output register is newly created.
10101/// The other operands are unchanged from MI.
10102static Register cloneInstr(const MachineInstr *MI, unsigned ReplaceOprNum,
10103 Register ReplaceReg, MachineBasicBlock &MBB,
10104 MachineBasicBlock::iterator InsertTo) {
10107 const TargetRegisterInfo *TRI =
10110 Register Result = 0;
10111 for (unsigned I = 0; I < NewMI->getNumOperands(); ++I) {
10112 if (I == 0 && NewMI->getOperand(0).getReg().isVirtual()) {
10113 Result = MRI.createVirtualRegister(
10114 MRI.getRegClass(NewMI->getOperand(0).getReg()));
10115 NewMI->getOperand(I).setReg(Result);
10116 } else if (I == ReplaceOprNum) {
10117 MRI.constrainRegClass(
10118 ReplaceReg,
10119 TII->getRegClass(NewMI->getDesc(), I, TRI, *MBB.getParent()));
10120 NewMI->getOperand(I).setReg(ReplaceReg);
10121 }
10122 }
10123 MBB.insert(InsertTo, NewMI);
10124 return Result;
10125}
10126
10127void AArch64PipelinerLoopInfo::createRemainingIterationsGreaterCondition(
10130 // Create and accumulate conditions for next TC iterations.
10131 // Example:
10132 // SUBSXrr N, counter, implicit-def $nzcv # compare instruction for the last
10133 // # iteration of the kernel
10134 //
10135 // # insert the following instructions
10136 // cond = CSINCXr 0, 0, C, implicit $nzcv
10137 // counter = ADDXri counter, 1 # clone from this->Update
10138 // SUBSXrr n, counter, implicit-def $nzcv # clone from this->Comp
10139 // cond = CSINCXr cond, cond, C, implicit $nzcv
10140 // ... (repeat TC times)
10141 // SUBSXri cond, 0, implicit-def $nzcv
10142
10143 assert(CondBranch->getOpcode() == AArch64::Bcc);
10144 // CondCode to exit the loop
10146 (AArch64CC::CondCode)CondBranch->getOperand(0).getImm();
10147 if (CondBranch->getOperand(1).getMBB() == LoopBB)
10149
10150 // Accumulate conditions to exit the loop
10151 Register AccCond = AArch64::XZR;
10152
10153 // If CC holds, CurCond+1 is returned; otherwise CurCond is returned.
10154 auto AccumulateCond = [&](Register CurCond,
10156 Register NewCond = MRI.createVirtualRegister(&AArch64::GPR64commonRegClass);
10157 BuildMI(MBB, MBB.end(), Comp->getDebugLoc(), TII->get(AArch64::CSINCXr))
10158 .addReg(NewCond, RegState::Define)
10159 .addReg(CurCond)
10160 .addReg(CurCond)
10162 return NewCond;
10163 };
10164
10165 if (!LastStage0Insts.empty() && LastStage0Insts[Comp]->getParent() == &MBB) {
10166 // Update and Comp for I==0 are already exists in MBB
10167 // (MBB is an unrolled kernel)
10168 Register Counter;
10169 for (int I = 0; I <= TC; ++I) {
10170 Register NextCounter;
10171 if (I != 0)
10172 NextCounter =
10173 cloneInstr(Comp, CompCounterOprNum, Counter, MBB, MBB.end());
10174
10175 AccCond = AccumulateCond(AccCond, CC);
10176
10177 if (I != TC) {
10178 if (I == 0) {
10179 if (Update != Comp && IsUpdatePriorComp) {
10180 Counter =
10181 LastStage0Insts[Comp]->getOperand(CompCounterOprNum).getReg();
10182 NextCounter = cloneInstr(Update, UpdateCounterOprNum, Counter, MBB,
10183 MBB.end());
10184 } else {
10185 // can use already calculated value
10186 NextCounter = LastStage0Insts[Update]->getOperand(0).getReg();
10187 }
10188 } else if (Update != Comp) {
10189 NextCounter =
10190 cloneInstr(Update, UpdateCounterOprNum, Counter, MBB, MBB.end());
10191 }
10192 }
10193 Counter = NextCounter;
10194 }
10195 } else {
10196 Register Counter;
10197 if (LastStage0Insts.empty()) {
10198 // use initial counter value (testing if the trip count is sufficient to
10199 // be executed by pipelined code)
10200 Counter = Init;
10201 if (IsUpdatePriorComp)
10202 Counter =
10203 cloneInstr(Update, UpdateCounterOprNum, Counter, MBB, MBB.end());
10204 } else {
10205 // MBB is an epilogue block. LastStage0Insts[Comp] is in the kernel block.
10206 Counter = LastStage0Insts[Comp]->getOperand(CompCounterOprNum).getReg();
10207 }
10208
10209 for (int I = 0; I <= TC; ++I) {
10210 Register NextCounter;
10211 NextCounter =
10212 cloneInstr(Comp, CompCounterOprNum, Counter, MBB, MBB.end());
10213 AccCond = AccumulateCond(AccCond, CC);
10214 if (I != TC && Update != Comp)
10215 NextCounter =
10216 cloneInstr(Update, UpdateCounterOprNum, Counter, MBB, MBB.end());
10217 Counter = NextCounter;
10218 }
10219 }
10220
10221 // If AccCond == 0, the remainder is greater than TC.
10222 BuildMI(MBB, MBB.end(), Comp->getDebugLoc(), TII->get(AArch64::SUBSXri))
10223 .addReg(AArch64::XZR, RegState::Define | RegState::Dead)
10224 .addReg(AccCond)
10225 .addImm(0)
10226 .addImm(0);
10227 Cond.clear();
10229}
10230
10231static void extractPhiReg(const MachineInstr &Phi, const MachineBasicBlock *MBB,
10232 Register &RegMBB, Register &RegOther) {
10233 assert(Phi.getNumOperands() == 5);
10234 if (Phi.getOperand(2).getMBB() == MBB) {
10235 RegMBB = Phi.getOperand(1).getReg();
10236 RegOther = Phi.getOperand(3).getReg();
10237 } else {
10238 assert(Phi.getOperand(4).getMBB() == MBB);
10239 RegMBB = Phi.getOperand(3).getReg();
10240 RegOther = Phi.getOperand(1).getReg();
10241 }
10242}
10243
10244static bool isDefinedOutside(Register Reg, const MachineBasicBlock *BB) {
10245 if (!Reg.isVirtual())
10246 return false;
10247 const MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
10248 return MRI.getVRegDef(Reg)->getParent() != BB;
10249}
10250
10251/// If Reg is an induction variable, return true and set some parameters
10252static bool getIndVarInfo(Register Reg, const MachineBasicBlock *LoopBB,
10253 MachineInstr *&UpdateInst,
10254 unsigned &UpdateCounterOprNum, Register &InitReg,
10255 bool &IsUpdatePriorComp) {
10256 // Example:
10257 //
10258 // Preheader:
10259 // InitReg = ...
10260 // LoopBB:
10261 // Reg0 = PHI (InitReg, Preheader), (Reg1, LoopBB)
10262 // Reg = COPY Reg0 ; COPY is ignored.
10263 // Reg1 = ADD Reg, #1; UpdateInst. Incremented by a loop invariant value.
10264 // ; Reg is the value calculated in the previous
10265 // ; iteration, so IsUpdatePriorComp == false.
10266
10267 if (LoopBB->pred_size() != 2)
10268 return false;
10269 if (!Reg.isVirtual())
10270 return false;
10271 const MachineRegisterInfo &MRI = LoopBB->getParent()->getRegInfo();
10272 UpdateInst = nullptr;
10273 UpdateCounterOprNum = 0;
10274 InitReg = 0;
10275 IsUpdatePriorComp = true;
10276 Register CurReg = Reg;
10277 while (true) {
10278 MachineInstr *Def = MRI.getVRegDef(CurReg);
10279 if (Def->getParent() != LoopBB)
10280 return false;
10281 if (Def->isCopy()) {
10282 // Ignore copy instructions unless they contain subregisters
10283 if (Def->getOperand(0).getSubReg() || Def->getOperand(1).getSubReg())
10284 return false;
10285 CurReg = Def->getOperand(1).getReg();
10286 } else if (Def->isPHI()) {
10287 if (InitReg != 0)
10288 return false;
10289 if (!UpdateInst)
10290 IsUpdatePriorComp = false;
10291 extractPhiReg(*Def, LoopBB, CurReg, InitReg);
10292 } else {
10293 if (UpdateInst)
10294 return false;
10295 switch (Def->getOpcode()) {
10296 case AArch64::ADDSXri:
10297 case AArch64::ADDSWri:
10298 case AArch64::SUBSXri:
10299 case AArch64::SUBSWri:
10300 case AArch64::ADDXri:
10301 case AArch64::ADDWri:
10302 case AArch64::SUBXri:
10303 case AArch64::SUBWri:
10304 UpdateInst = Def;
10305 UpdateCounterOprNum = 1;
10306 break;
10307 case AArch64::ADDSXrr:
10308 case AArch64::ADDSWrr:
10309 case AArch64::SUBSXrr:
10310 case AArch64::SUBSWrr:
10311 case AArch64::ADDXrr:
10312 case AArch64::ADDWrr:
10313 case AArch64::SUBXrr:
10314 case AArch64::SUBWrr:
10315 UpdateInst = Def;
10316 if (isDefinedOutside(Def->getOperand(2).getReg(), LoopBB))
10317 UpdateCounterOprNum = 1;
10318 else if (isDefinedOutside(Def->getOperand(1).getReg(), LoopBB))
10319 UpdateCounterOprNum = 2;
10320 else
10321 return false;
10322 break;
10323 default:
10324 return false;
10325 }
10326 CurReg = Def->getOperand(UpdateCounterOprNum).getReg();
10327 }
10328
10329 if (!CurReg.isVirtual())
10330 return false;
10331 if (Reg == CurReg)
10332 break;
10333 }
10334
10335 if (!UpdateInst)
10336 return false;
10337
10338 return true;
10339}
10340
10341std::unique_ptr<TargetInstrInfo::PipelinerLoopInfo>
10343 // Accept loops that meet the following conditions
10344 // * The conditional branch is BCC
10345 // * The compare instruction is ADDS/SUBS/WHILEXX
10346 // * One operand of the compare is an induction variable and the other is a
10347 // loop invariant value
10348 // * The induction variable is incremented/decremented by a single instruction
10349 // * Does not contain CALL or instructions which have unmodeled side effects
10350
10351 for (MachineInstr &MI : *LoopBB)
10352 if (MI.isCall() || MI.hasUnmodeledSideEffects())
10353 // This instruction may use NZCV, which interferes with the instruction to
10354 // be inserted for loop control.
10355 return nullptr;
10356
10357 MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
10359 if (analyzeBranch(*LoopBB, TBB, FBB, Cond))
10360 return nullptr;
10361
10362 // Infinite loops are not supported
10363 if (TBB == LoopBB && FBB == LoopBB)
10364 return nullptr;
10365
10366 // Must be conditional branch
10367 if (TBB != LoopBB && FBB == nullptr)
10368 return nullptr;
10369
10370 assert((TBB == LoopBB || FBB == LoopBB) &&
10371 "The Loop must be a single-basic-block loop");
10372
10373 MachineInstr *CondBranch = &*LoopBB->getFirstTerminator();
10375
10376 if (CondBranch->getOpcode() != AArch64::Bcc)
10377 return nullptr;
10378
10379 // Normalization for createTripCountGreaterCondition()
10380 if (TBB == LoopBB)
10382
10383 MachineInstr *Comp = nullptr;
10384 unsigned CompCounterOprNum = 0;
10385 for (MachineInstr &MI : reverse(*LoopBB)) {
10386 if (MI.modifiesRegister(AArch64::NZCV, &TRI)) {
10387 // Guarantee that the compare is SUBS/ADDS/WHILEXX and that one of the
10388 // operands is a loop invariant value
10389
10390 switch (MI.getOpcode()) {
10391 case AArch64::SUBSXri:
10392 case AArch64::SUBSWri:
10393 case AArch64::ADDSXri:
10394 case AArch64::ADDSWri:
10395 Comp = &MI;
10396 CompCounterOprNum = 1;
10397 break;
10398 case AArch64::ADDSWrr:
10399 case AArch64::ADDSXrr:
10400 case AArch64::SUBSWrr:
10401 case AArch64::SUBSXrr:
10402 Comp = &MI;
10403 break;
10404 default:
10405 if (isWhileOpcode(MI.getOpcode())) {
10406 Comp = &MI;
10407 break;
10408 }
10409 return nullptr;
10410 }
10411
10412 if (CompCounterOprNum == 0) {
10413 if (isDefinedOutside(Comp->getOperand(1).getReg(), LoopBB))
10414 CompCounterOprNum = 2;
10415 else if (isDefinedOutside(Comp->getOperand(2).getReg(), LoopBB))
10416 CompCounterOprNum = 1;
10417 else
10418 return nullptr;
10419 }
10420 break;
10421 }
10422 }
10423 if (!Comp)
10424 return nullptr;
10425
10426 MachineInstr *Update = nullptr;
10427 Register Init;
10428 bool IsUpdatePriorComp;
10429 unsigned UpdateCounterOprNum;
10430 if (!getIndVarInfo(Comp->getOperand(CompCounterOprNum).getReg(), LoopBB,
10431 Update, UpdateCounterOprNum, Init, IsUpdatePriorComp))
10432 return nullptr;
10433
10434 return std::make_unique<AArch64PipelinerLoopInfo>(
10435 LoopBB, CondBranch, Comp, CompCounterOprNum, Update, UpdateCounterOprNum,
10436 Init, IsUpdatePriorComp, Cond);
10437}
10438
10439/// verifyInstruction - Perform target specific instruction verification.
10440bool AArch64InstrInfo::verifyInstruction(const MachineInstr &MI,
10441 StringRef &ErrInfo) const {
10442
10443 // Verify that immediate offsets on load/store instructions are within range.
10444 // Stack objects with an FI operand are excluded as they can be fixed up
10445 // during PEI.
10446 TypeSize Scale(0U, false), Width(0U, false);
10447 int64_t MinOffset, MaxOffset;
10448 if (getMemOpInfo(MI.getOpcode(), Scale, Width, MinOffset, MaxOffset)) {
10449 unsigned ImmIdx = getLoadStoreImmIdx(MI.getOpcode());
10450 if (MI.getOperand(ImmIdx).isImm() && !MI.getOperand(ImmIdx - 1).isFI()) {
10451 int64_t Imm = MI.getOperand(ImmIdx).getImm();
10452 if (Imm < MinOffset || Imm > MaxOffset) {
10453 ErrInfo = "Unexpected immediate on load/store instruction";
10454 return false;
10455 }
10456 }
10457 }
10458 return true;
10459}
10460
10461#define GET_INSTRINFO_HELPERS
10462#define GET_INSTRMAP_INFO
10463#include "AArch64GenInstrInfo.inc"
unsigned SubReg
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
static bool forwardCopyWillClobberTuple(unsigned DestReg, unsigned SrcReg, unsigned NumRegs)
static cl::opt< unsigned > BCCDisplacementBits("aarch64-bcc-offset-bits", cl::Hidden, cl::init(19), cl::desc("Restrict range of Bcc instructions (DEBUG)"))
static void genSubAdd2SubSub(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, SmallVectorImpl< MachineInstr * > &DelInstrs, unsigned IdxOpd1, DenseMap< unsigned, unsigned > &InstrIdxForVirtReg)
Do the following transformation A - (B + C) ==> (A - B) - C A - (B + C) ==> (A - C) - B.
static bool getMaddPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns)
Find instructions that can be turned into madd.
static AArch64CC::CondCode findCondCodeUsedByInstr(const MachineInstr &Instr)
Find a condition code used by the instruction.
static MachineInstr * genFusedMultiplyAcc(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, unsigned IdxMulOpd, unsigned MaddOpc, const TargetRegisterClass *RC)
genFusedMultiplyAcc - Helper to generate fused multiply accumulate instructions.
static bool isCombineInstrCandidate64(unsigned Opc)
static unsigned removeCopies(const MachineRegisterInfo &MRI, unsigned VReg)
static bool areCFlagsAccessedBetweenInstrs(MachineBasicBlock::iterator From, MachineBasicBlock::iterator To, const TargetRegisterInfo *TRI, const AccessKind AccessToCheck=AK_All)
True when condition flags are accessed (either by writing or reading) on the instruction trace starti...
static bool getFMAPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns)
Floating-Point Support.
static bool isADDSRegImm(unsigned Opcode)
static MachineInstr * genFusedMultiplyIdxNeg(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, DenseMap< unsigned, unsigned > &InstrIdxForVirtReg, unsigned IdxMulOpd, unsigned MaddOpc, unsigned MnegOpc, const TargetRegisterClass *RC)
genFusedMultiplyAccNeg - Helper to generate fused multiply accumulate instructions with an additional...
static unsigned sForm(MachineInstr &Instr)
Get opcode of S version of Instr.
static bool isCombineInstrSettingFlag(unsigned Opc)
@ AK_Write
static bool getFNEGPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns)
static bool getIndVarInfo(Register Reg, const MachineBasicBlock *LoopBB, MachineInstr *&UpdateInst, unsigned &UpdateCounterOprNum, Register &InitReg, bool &IsUpdatePriorComp)
If Reg is an induction variable, return true and set some parameters.
static bool canPairLdStOpc(unsigned FirstOpc, unsigned SecondOpc)
static int findCondCodeUseOperandIdxForBranchOrSelect(const MachineInstr &Instr)
static bool isPostIndexLdStOpcode(unsigned Opcode)
Return true if the opcode is a post-index ld/st instruction, which really loads from base+0.
static unsigned getBranchDisplacementBits(unsigned Opc)
static std::optional< ParamLoadedValue > describeORRLoadedValue(const MachineInstr &MI, Register DescribedReg, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
If the given ORR instruction is a copy, and DescribedReg overlaps with the destination register then,...
static bool getFMULPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns)
static MachineInstr * genFusedMultiplyAccNeg(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, DenseMap< unsigned, unsigned > &InstrIdxForVirtReg, unsigned IdxMulOpd, unsigned MaddOpc, unsigned MnegOpc, const TargetRegisterClass *RC)
genFusedMultiplyAccNeg - Helper to generate fused multiply accumulate instructions with an additional...
static void appendVGScaledOffsetExpr(SmallVectorImpl< char > &Expr, int NumBytes, int NumVGScaledBytes, unsigned VG, llvm::raw_string_ostream &Comment)
static MachineInstr * genMaddR(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, unsigned IdxMulOpd, unsigned MaddOpc, unsigned VR, const TargetRegisterClass *RC)
genMaddR - Generate madd instruction and combine mul and add using an extra virtual register Example ...
static Register cloneInstr(const MachineInstr *MI, unsigned ReplaceOprNum, Register ReplaceReg, MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertTo)
Clone an instruction from MI.
static bool scaleOffset(unsigned Opc, int64_t &Offset)
static bool canCombineWithFMUL(MachineBasicBlock &MBB, MachineOperand &MO, unsigned MulOpc)
unsigned scaledOffsetOpcode(unsigned Opcode, unsigned &Scale)
static MachineInstr * genFusedMultiplyIdx(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, unsigned IdxMulOpd, unsigned MaddOpc, const TargetRegisterClass *RC)
genFusedMultiplyIdx - Helper to generate fused multiply accumulate instructions.
static MachineInstr * genIndexedMultiply(MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, unsigned IdxDupOp, unsigned MulOpc, const TargetRegisterClass *RC, MachineRegisterInfo &MRI)
Fold (FMUL x (DUP y lane)) into (FMUL_indexed x y lane)
static bool isSUBSRegImm(unsigned Opcode)
static bool UpdateOperandRegClass(MachineInstr &Instr)
static const TargetRegisterClass * getRegClass(const MachineInstr &MI, Register Reg)
static bool canCmpInstrBeRemoved(MachineInstr &MI, MachineInstr &CmpInstr, int CmpValue, const TargetRegisterInfo &TRI, SmallVectorImpl< MachineInstr * > &CCUseInstrs, bool &IsInvertCC)
unsigned unscaledOffsetOpcode(unsigned Opcode)
static bool canInstrSubstituteCmpInstr(MachineInstr &MI, MachineInstr &CmpInstr, const TargetRegisterInfo &TRI)
Check if CmpInstr can be substituted by MI.
static UsedNZCV getUsedNZCV(AArch64CC::CondCode CC)
static Register genNeg(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, DenseMap< unsigned, unsigned > &InstrIdxForVirtReg, unsigned MnegOpc, const TargetRegisterClass *RC)
genNeg - Helper to generate an intermediate negation of the second operand of Root
static bool isCombineInstrCandidateFP(const MachineInstr &Inst)
static bool canCombine(MachineBasicBlock &MBB, MachineOperand &MO, unsigned CombineOpc, unsigned ZeroReg=0, bool CheckZeroReg=false)
static unsigned convertToNonFlagSettingOpc(const MachineInstr &MI)
Return the opcode that does not set flags when possible - otherwise return the original opcode.
static const MachineInstrBuilder & AddSubReg(const MachineInstrBuilder &MIB, unsigned Reg, unsigned SubIdx, unsigned State, const TargetRegisterInfo *TRI)
static bool outliningCandidatesV8_3OpsConsensus(const outliner::Candidate &a, const outliner::Candidate &b)
static bool isCombineInstrCandidate32(unsigned Opc)
static void parseCondBranch(MachineInstr *LastInst, MachineBasicBlock *&Target, SmallVectorImpl< MachineOperand > &Cond)
static unsigned offsetExtendOpcode(unsigned Opcode)
MachineOutlinerMBBFlags
@ LRUnavailableSomewhere
@ HasCalls
@ UnsafeRegsDead
static void loadRegPairFromStackSlot(const TargetRegisterInfo &TRI, MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore, const MCInstrDesc &MCID, Register DestReg, unsigned SubIdx0, unsigned SubIdx1, int FI, MachineMemOperand *MMO)
static bool getMiscPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns)
Find other MI combine patterns.
static bool outliningCandidatesSigningKeyConsensus(const outliner::Candidate &a, const outliner::Candidate &b)
static bool outliningCandidatesSigningScopeConsensus(const outliner::Candidate &a, const outliner::Candidate &b)
static bool shouldClusterFI(const MachineFrameInfo &MFI, int FI1, int64_t Offset1, unsigned Opcode1, int FI2, int64_t Offset2, unsigned Opcode2)
static cl::opt< unsigned > TBZDisplacementBits("aarch64-tbz-offset-bits", cl::Hidden, cl::init(14), cl::desc("Restrict range of TB[N]Z instructions (DEBUG)"))
static void extractPhiReg(const MachineInstr &Phi, const MachineBasicBlock *MBB, Register &RegMBB, Register &RegOther)
static MCCFIInstruction createDefCFAExpression(const TargetRegisterInfo &TRI, unsigned Reg, const StackOffset &Offset)
static bool isDefinedOutside(Register Reg, const MachineBasicBlock *BB)
static MachineInstr * genFusedMultiply(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, unsigned IdxMulOpd, unsigned MaddOpc, const TargetRegisterClass *RC, FMAInstKind kind=FMAInstKind::Default, const Register *ReplacedAddend=nullptr)
genFusedMultiply - Generate fused multiply instructions.
static bool isCombineInstrCandidate(unsigned Opc)
static unsigned regOffsetOpcode(unsigned Opcode)
MachineOutlinerClass
Constants defining how certain sequences should be outlined.
@ MachineOutlinerTailCall
Emit a save, restore, call, and return.
@ MachineOutlinerRegSave
Emit a call and tail-call.
@ MachineOutlinerNoLRSave
Only emit a branch.
@ MachineOutlinerThunk
Emit a call and return.
@ MachineOutlinerDefault
static cl::opt< unsigned > BDisplacementBits("aarch64-b-offset-bits", cl::Hidden, cl::init(26), cl::desc("Restrict range of B instructions (DEBUG)"))
static bool areCFlagsAliveInSuccessors(const MachineBasicBlock *MBB)
Check if AArch64::NZCV should be alive in successors of MBB.
static void emitFrameOffsetAdj(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, unsigned DestReg, unsigned SrcReg, int64_t Offset, unsigned Opc, const TargetInstrInfo *TII, MachineInstr::MIFlag Flag, bool NeedsWinCFI, bool *HasWinCFI, bool EmitCFAOffset, StackOffset CFAOffset, unsigned FrameReg)
static bool isCheapImmediate(const MachineInstr &MI, unsigned BitSize)
static cl::opt< unsigned > CBZDisplacementBits("aarch64-cbz-offset-bits", cl::Hidden, cl::init(19), cl::desc("Restrict range of CB[N]Z instructions (DEBUG)"))
static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg, unsigned *NewVReg=nullptr)
static void signOutlinedFunction(MachineFunction &MF, MachineBasicBlock &MBB, const AArch64InstrInfo *TII, bool ShouldSignReturnAddr)
static MachineInstr * genFNegatedMAD(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs)
static bool canCombineWithMUL(MachineBasicBlock &MBB, MachineOperand &MO, unsigned MulOpc, unsigned ZeroReg)
static void storeRegPairToStackSlot(const TargetRegisterInfo &TRI, MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore, const MCInstrDesc &MCID, Register SrcReg, bool IsKill, unsigned SubIdx0, unsigned SubIdx1, int FI, MachineMemOperand *MMO)
SmallVector< AArch64_IMM::ImmInsnModel, 4 > Insn
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static const Function * getParent(const Value *V)
BlockVerifier::State From
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
uint64_t Size
bool End
Definition: ELF_riscv.cpp:480
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
static LVOptions Options
Definition: LVOptions.cpp:25
This file implements the LivePhysRegs utility for tracking liveness of physical registers.
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
unsigned const TargetRegisterInfo * TRI
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
Module.h This file contains the declarations for the Module class.
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
const SmallVectorImpl< MachineOperand > & Cond
This file declares the machine register scavenger class.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallVector class.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition: Value.cpp:469
AArch64FunctionInfo - This class is derived from MachineFunctionInfo and contains private AArch64-spe...
bool shouldSignReturnAddress(const MachineFunction &MF) const
const SetOfInstructions & getLOHRelated() const
bool needsDwarfUnwindInfo(const MachineFunction &MF) const
void setOutliningStyle(std::string Style)
std::optional< bool > hasRedZone() const
static bool isHForm(const MachineInstr &MI)
Returns whether the instruction is in H form (16 bit operands)
void insertSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, Register DstReg, ArrayRef< MachineOperand > Cond, Register TrueReg, Register FalseReg) const override
static bool hasBTISemantics(const MachineInstr &MI)
Returns whether the instruction can be compatible with non-zero BTYPE.
static bool isQForm(const MachineInstr &MI)
Returns whether the instruction is in Q form (128 bit operands)
static void decomposeStackOffsetForFrameOffsets(const StackOffset &Offset, int64_t &NumBytes, int64_t &NumPredicateVectors, int64_t &NumDataVectors)
Returns the offset in parts to which this frame offset can be decomposed for the purpose of describin...
static bool getMemOpInfo(unsigned Opcode, TypeSize &Scale, TypeSize &Width, int64_t &MinOffset, int64_t &MaxOffset)
Returns true if opcode Opc is a memory operation.
static bool isTailCallReturnInst(const MachineInstr &MI)
Returns true if MI is one of the TCRETURN* instructions.
static bool isFPRCopy(const MachineInstr &MI)
Does this instruction rename an FPR without modifying bits?
MachineInstr * emitLdStWithAddr(MachineInstr &MemI, const ExtAddrMode &AM) const override
std::optional< DestSourcePair > isCopyInstrImpl(const MachineInstr &MI) const override
If the specific machine instruction is an instruction that moves/copies value from one register to an...
MachineBasicBlock * getBranchDestBlock(const MachineInstr &MI) const override
std::optional< RegImmPair > isAddImmediate(const MachineInstr &MI, Register Reg) const override
unsigned getInstSizeInBytes(const MachineInstr &MI) const override
GetInstSize - Return the number of bytes of code the specified instruction may be.
uint64_t getElementSizeForOpcode(unsigned Opc) const
Returns the vector element size (B, H, S or D) of an SVE opcode.
bool areMemAccessesTriviallyDisjoint(const MachineInstr &MIa, const MachineInstr &MIb) const override
void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc) const override
static bool isGPRCopy(const MachineInstr &MI)
Does this instruction rename a GPR without modifying bits?
static unsigned convertToFlagSettingOpc(unsigned Opc)
Return the opcode that set flags when possible.
bool getMachineCombinerPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns, bool DoRegPressureReduce) const override
Return true when there is potentially a faster code sequence for an instruction chain ending in Root.
bool isBranchOffsetInRange(unsigned BranchOpc, int64_t BrOffset) const override
bool canInsertSelect(const MachineBasicBlock &, ArrayRef< MachineOperand > Cond, Register, Register, Register, int &, int &, int &) const override
static const MachineOperand & getLdStOffsetOp(const MachineInstr &MI)
Returns the immediate offset operator of a load/store.
bool isCoalescableExtInstr(const MachineInstr &MI, Register &SrcReg, Register &DstReg, unsigned &SubIdx) const override
bool isWhileOpcode(unsigned Opc) const
Returns true if the opcode is for an SVE WHILE## instruction.
static std::optional< unsigned > getUnscaledLdSt(unsigned Opc)
Returns the unscaled load/store for the scaled load/store opcode, if there is a corresponding unscale...
static bool hasUnscaledLdStOffset(unsigned Opc)
Return true if it has an unscaled load/store offset.
static const MachineOperand & getLdStAmountOp(const MachineInstr &MI)
Returns the shift amount operator of a load/store.
static bool isPreLdSt(const MachineInstr &MI)
Returns whether the instruction is a pre-indexed load/store.
MachineBasicBlock::iterator insertOutlinedCall(Module &M, MachineBasicBlock &MBB, MachineBasicBlock::iterator &It, MachineFunction &MF, outliner::Candidate &C) const override
std::optional< ExtAddrMode > getAddrModeFromMemoryOp(const MachineInstr &MemI, const TargetRegisterInfo *TRI) const override
bool getMemOperandsWithOffsetWidth(const MachineInstr &MI, SmallVectorImpl< const MachineOperand * > &BaseOps, int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width, const TargetRegisterInfo *TRI) const override
bool analyzeBranchPredicate(MachineBasicBlock &MBB, MachineBranchPredicate &MBP, bool AllowModify) const override
static bool isSEHInstruction(const MachineInstr &MI)
Return true if the instructions is a SEH instruciton used for unwinding on Windows.
void insertIndirectBranch(MachineBasicBlock &MBB, MachineBasicBlock &NewDestBB, MachineBasicBlock &RestoreBB, const DebugLoc &DL, int64_t BrOffset, RegScavenger *RS) const override
SmallVector< std::pair< MachineBasicBlock::iterator, MachineBasicBlock::iterator > > getOutlinableRanges(MachineBasicBlock &MBB, unsigned &Flags) const override
static bool isPairableLdStInst(const MachineInstr &MI)
Return true if pairing the given load or store may be paired with another.
void genAlternativeCodeSequence(MachineInstr &Root, unsigned Pattern, SmallVectorImpl< MachineInstr * > &InsInstrs, SmallVectorImpl< MachineInstr * > &DelInstrs, DenseMap< unsigned, unsigned > &InstrIdxForVirtReg) const override
When getMachineCombinerPatterns() finds patterns, this function generates the instructions that could...
const AArch64RegisterInfo & getRegisterInfo() const
getRegisterInfo - TargetInstrInfo is a superset of MRegister info.
static bool isPreSt(const MachineInstr &MI)
Returns whether the instruction is a pre-indexed store.
MachineInstr * foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI, ArrayRef< unsigned > Ops, MachineBasicBlock::iterator InsertPt, int FrameIndex, LiveIntervals *LIS=nullptr, VirtRegMap *VRM=nullptr) const override
void insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const override
AArch64InstrInfo(const AArch64Subtarget &STI)
static bool isPairedLdSt(const MachineInstr &MI)
Returns whether the instruction is a paired load/store.
outliner::InstrType getOutliningTypeImpl(const MachineModuleInfo &MMI, MachineBasicBlock::iterator &MIT, unsigned Flags) const override
bool useMachineCombiner() const override
AArch64 supports MachineCombiner.
ArrayRef< std::pair< MachineMemOperand::Flags, const char * > > getSerializableMachineMemOperandTargetFlags() const override
void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register DestReg, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg) const override
std::optional< outliner::OutlinedFunction > getOutliningCandidateInfo(const MachineModuleInfo &MMI, std::vector< outliner::Candidate > &RepeatedSequenceLocs) const override
bool isExtendLikelyToBeFolded(MachineInstr &ExtMI, MachineRegisterInfo &MRI) const override
static bool isFalkorShiftExtFast(const MachineInstr &MI)
Returns true if the instruction has a shift by immediate that can be executed in one cycle less.
std::optional< ParamLoadedValue > describeLoadedValue(const MachineInstr &MI, Register Reg) const override
bool getMemOperandWithOffsetWidth(const MachineInstr &MI, const MachineOperand *&BaseOp, int64_t &Offset, bool &OffsetIsScalable, TypeSize &Width, const TargetRegisterInfo *TRI) const
If OffsetIsScalable is set to 'true', the offset is scaled by vscale.
Register isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const override
bool reverseBranchCondition(SmallVectorImpl< MachineOperand > &Cond) const override
static bool isStridedAccess(const MachineInstr &MI)
Return true if the given load or store is a strided memory access.
bool shouldClusterMemOps(ArrayRef< const MachineOperand * > BaseOps1, int64_t Offset1, bool OffsetIsScalable1, ArrayRef< const MachineOperand * > BaseOps2, int64_t Offset2, bool OffsetIsScalable2, unsigned ClusterSize, unsigned NumBytes) const override
Detect opportunities for ldp/stp formation.
bool expandPostRAPseudo(MachineInstr &MI) const override
unsigned int getTailDuplicateSize(CodeGenOptLevel OptLevel) const override
bool isFunctionSafeToOutlineFrom(MachineFunction &MF, bool OutlineFromLinkOnceODRs) const override
void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg) const override
bool shouldOutlineFromFunctionByDefault(MachineFunction &MF) const override
unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const override
bool isThroughputPattern(unsigned Pattern) const override
Return true when a code sequence can improve throughput.
void copyGPRRegTuple(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, DebugLoc DL, unsigned DestReg, unsigned SrcReg, bool KillSrc, unsigned Opcode, unsigned ZeroReg, llvm::ArrayRef< unsigned > Indices) const
void buildClearRegister(Register Reg, MachineBasicBlock &MBB, MachineBasicBlock::iterator Iter, DebugLoc &DL, bool AllowSideEffects=true) const override
void buildOutlinedFrame(MachineBasicBlock &MBB, MachineFunction &MF, const outliner::OutlinedFunction &OF) const override
ArrayRef< std::pair< unsigned, const char * > > getSerializableDirectMachineOperandTargetFlags() const override
MachineOperand & getMemOpBaseRegImmOfsOffsetOperand(MachineInstr &LdSt) const
Return the immediate offset of the base register in a load/store LdSt.
bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify=false) const override
bool canFoldIntoAddrMode(const MachineInstr &MemI, Register Reg, const MachineInstr &AddrI, ExtAddrMode &AM) const override
static bool isLdStPairSuppressed(const MachineInstr &MI)
Return true if pairing the given load or store is hinted to be unprofitable.
bool isFunctionSafeToSplit(const MachineFunction &MF) const override
bool isAssociativeAndCommutative(const MachineInstr &Inst, bool Invert) const override
Return true when Inst is associative and commutative so that it can be reassociated.
std::unique_ptr< TargetInstrInfo::PipelinerLoopInfo > analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const override
bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const override
unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const override
MachineBasicBlock::iterator probedStackAlloc(MachineBasicBlock::iterator MBBI, Register TargetReg, bool FrameSetup) const
bool optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int64_t CmpMask, int64_t CmpValue, const MachineRegisterInfo *MRI) const override
optimizeCompareInstr - Convert the instruction supplying the argument to the comparison into one that...
static unsigned getLoadStoreImmIdx(unsigned Opc)
Returns the index for the immediate for a given instruction.
static bool isGPRZero(const MachineInstr &MI)
Does this instruction set its full destination register to zero?
std::pair< unsigned, unsigned > decomposeMachineOperandsTargetFlags(unsigned TF) const override
bool analyzeCompare(const MachineInstr &MI, Register &SrcReg, Register &SrcReg2, int64_t &CmpMask, int64_t &CmpValue) const override
analyzeCompare - For a comparison instruction, return the source registers in SrcReg and SrcReg2,...
CombinerObjective getCombinerObjective(unsigned Pattern) const override
static bool isFpOrNEON(Register Reg)
Returns whether the physical register is FP or NEON.
bool isMBBSafeToSplitToCold(const MachineBasicBlock &MBB) const override
bool isAsCheapAsAMove(const MachineInstr &MI) const override
bool isLegalAddressingMode(unsigned NumBytes, int64_t Offset, unsigned Scale) const
std::optional< DestSourcePair > isCopyLikeInstrImpl(const MachineInstr &MI) const override
static void suppressLdStPair(MachineInstr &MI)
Hint that pairing the given load or store is unprofitable.
Register isStoreToStackSlot(const MachineInstr &MI, int &FrameIndex) const override
static bool isPreLd(const MachineInstr &MI)
Returns whether the instruction is a pre-indexed load.
ArrayRef< std::pair< unsigned, const char * > > getSerializableBitmaskMachineOperandTargetFlags() const override
void copyPhysRegTuple(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc, unsigned Opcode, llvm::ArrayRef< unsigned > Indices) const
bool optimizeCondBranch(MachineInstr &MI) const override
Replace csincr-branch sequence by simple conditional branch.
static int getMemScale(unsigned Opc)
Scaling factor for (scaled or unscaled) load or store.
bool isCandidateToMergeOrPair(const MachineInstr &MI) const
Return true if this is a load/store that can be potentially paired/merged.
MCInst getNop() const override
static const MachineOperand & getLdStBaseOp(const MachineInstr &MI)
Returns the base register operator of a load/store.
bool isPTestLikeOpcode(unsigned Opc) const
Returns true if the opcode is for an SVE instruction that sets the condition codes as if it's results...
void mergeOutliningCandidateAttributes(Function &F, std::vector< outliner::Candidate > &Candidates) const override
static void decomposeStackOffsetForDwarfOffsets(const StackOffset &Offset, int64_t &ByteSized, int64_t &VGSized)
bool isReservedReg(const MachineFunction &MF, MCRegister Reg) const
const AArch64RegisterInfo * getRegisterInfo() const override
bool isNeonAvailable() const
Returns true if the target has NEON and the function at runtime is known to have NEON enabled (e....
bool isSVEorStreamingSVEAvailable() const
Returns true if the target has access to either the full range of SVE instructions,...
unsigned ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const
ClassifyGlobalReference - Find the target operand flags that describe how a global value should be re...
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
const T & front() const
front - Get the first element.
Definition: ArrayRef.h:168
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
This class represents an Operation in the Expression.
A debug info location.
Definition: DebugLoc.h:33
bool empty() const
Definition: DenseMap.h:98
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:211
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition: Function.h:705
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:702
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:656
A set of register units used to track register liveness.
Definition: LiveRegUnits.h:30
bool available(MCPhysReg Reg) const
Returns true if no part of physical register Reg is live.
Definition: LiveRegUnits.h:116
void stepBackward(const MachineInstr &MI)
Updates liveness when stepping backwards over the instruction MI.
void addLiveOuts(const MachineBasicBlock &MBB)
Adds registers living out of block MBB.
static LocationSize precise(uint64_t Value)
This class is intended to be used as a base class for asm properties and features specific to the tar...
Definition: MCAsmInfo.h:56
bool usesWindowsCFI() const
Definition: MCAsmInfo.h:759
static MCCFIInstruction cfiDefCfa(MCSymbol *L, unsigned Register, int64_t Offset, SMLoc Loc={})
.cfi_def_cfa defines a rule for computing CFA as: take address from Register and add Offset to it.
Definition: MCDwarf.h:558
static MCCFIInstruction createOffset(MCSymbol *L, unsigned Register, int64_t Offset, SMLoc Loc={})
.cfi_offset Previous value of Register is saved at offset Offset from CFA.
Definition: MCDwarf.h:600
static MCCFIInstruction cfiDefCfaOffset(MCSymbol *L, int64_t Offset, SMLoc Loc={})
.cfi_def_cfa_offset modifies a rule for computing CFA.
Definition: MCDwarf.h:573
static MCCFIInstruction createEscape(MCSymbol *L, StringRef Vals, SMLoc Loc={}, StringRef Comment="")
.cfi_escape Allows the user to add arbitrary bytes to the unwind info.
Definition: MCDwarf.h:664
MCInstBuilder & addImm(int64_t Val)
Add a new integer immediate operand.
Definition: MCInstBuilder.h:43
Instances of this class represent a single low-level machine instruction.
Definition: MCInst.h:184
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:198
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
Wrapper class representing physical registers. Should be passed by value.
Definition: MCRegister.h:33
constexpr bool isValid() const
Definition: MCRegister.h:81
static constexpr unsigned NoRegister
Definition: MCRegister.h:52
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition: Metadata.h:1542
Set of metadata that should be preserved when using BuildMI().
bool isInlineAsmBrIndirectTarget() const
Returns true if this is the indirect dest of an INLINEASM_BR.
unsigned pred_size() const
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
reverse_instr_iterator instr_rbegin()
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
bool isLiveIn(MCPhysReg Reg, LaneBitmask LaneMask=LaneBitmask::getAll()) const
Return true if the specified register is in the live in set.
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
MBBSectionID getSectionID() const
Returns the section ID of this basic block.
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
reverse_instr_iterator instr_rend()
DebugLoc findDebugLoc(instr_iterator MBBI)
Find the next valid DebugLoc starting at MBBI, skipping any debug instructions.
Instructions::iterator instr_iterator
iterator getLastNonDebugInstr(bool SkipPseudoOp=true)
Returns an iterator to the last non-debug instruction in the basic block, or end().
bool isLayoutSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB will be emitted immediately after this block, such that if this bloc...
instr_iterator instr_end()
Instructions::const_iterator const_instr_iterator
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
iterator_range< succ_iterator > successors()
instr_iterator getFirstInstrTerminator()
Same getFirstTerminator but it ignores bundles and return an instr_iterator instead.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
uint64_t getStackSize() const
Return the number of bytes that must be allocated to hold all of the fixed size frame objects.
void setStackID(int ObjectIdx, uint8_t ID)
bool isCalleeSavedInfoValid() const
Has the callee saved info been calculated yet?
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
unsigned getNumObjects() const
Return the number of objects.
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
unsigned addFrameInst(const MCCFIInstruction &Inst)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
MachineInstr * CloneMachineInstr(const MachineInstr *Orig)
Create a new MachineInstr which is a copy of Orig, identical in all ways except the instruction has n...
const MachineJumpTableInfo * getJumpTableInfo() const
getJumpTableInfo - Return the jump table info object for the current function.
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
void insert(iterator MBBI, MachineBasicBlock *MBB)
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & setMemRefs(ArrayRef< MachineMemOperand * > MMOs) const
const MachineInstrBuilder & addCFIIndex(unsigned CFIIndex) const
const MachineInstrBuilder & setMIFlag(MachineInstr::MIFlag Flag) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addSym(MCSymbol *Sym, unsigned char TargetFlags=0) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
reverse_iterator getReverse() const
Get a reverse iterator to the same node.
Representation of each machine instruction.
Definition: MachineInstr.h:69
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:569
bool mayLoadOrStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read or modify memory.
bool isCopy() const
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:346
bool isCall(QueryType Type=AnyInBundle) const
Definition: MachineInstr.h:950
bool getFlag(MIFlag Flag) const
Return whether an MI flag is set.
Definition: MachineInstr.h:396
uint32_t mergeFlagsWith(const MachineInstr &Other) const
Return the MIFlags which represent both MachineInstrs.
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:572
unsigned getNumExplicitOperands() const
Returns the number of non-implicit operands.
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
bool isFullCopy() const
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
Definition: MachineInstr.h:566
bool hasUnmodeledSideEffects() const
Return true if this instruction has side effects that are not modeled by mayLoad / mayStore,...
bool registerDefIsDead(Register Reg, const TargetRegisterInfo *TRI) const
Returns true if the register is dead in this machine instruction.
bool definesRegister(Register Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr fully defines the specified register.
void setDesc(const MCInstrDesc &TID)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one.
bool hasOrderedMemoryRef() const
Return true if this instruction may have an ordered or volatile memory reference, or if the informati...
const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
ArrayRef< MachineMemOperand * > memoperands() const
Access to memory operands of the instruction.
Definition: MachineInstr.h:782
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:498
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
void removeOperand(unsigned OpNo)
Erase an operand from an instruction, leaving it with one fewer operand than it started with.
void addRegisterDefined(Register Reg, const TargetRegisterInfo *RegInfo=nullptr)
We have determined MI defines a register.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:579
uint32_t getFlags() const
Return the MI flags bitvector.
Definition: MachineInstr.h:391
int findRegisterDefOperandIdx(Register Reg, const TargetRegisterInfo *TRI, bool isDead=false, bool Overlap=false) const
Returns the operand index that is a def of the specified register or -1 if it is not found.
const std::vector< MachineJumpTableEntry > & getJumpTables() const
A description of a memory reference used in the backend.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
This class contains meta information specific to a module.
MachineFunction * getMachineFunction(const Function &F) const
Returns the MachineFunction associated to IR function F if there is one, otherwise nullptr.
MachineOperand class - Representation of each machine instruction operand.
void setSubReg(unsigned subReg)
unsigned getSubReg() const
void setImm(int64_t immVal)
int64_t getImm() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
MachineBasicBlock * getMBB() const
void setIsDead(bool Val=true)
void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
void setIsKill(bool Val=true)
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
static MachineOperand CreateImm(int64_t Val)
MachineOperandType getType() const
getType - Returns the MachineOperandType for this operand.
void setIsUndef(bool Val=true)
Register getReg() const
getReg - Returns the register number.
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
bool tracksLiveness() const
tracksLiveness - Returns true when tracking register liveness accurately.
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
bool reservedRegsFrozen() const
reservedRegsFrozen - Returns true after freezeReservedRegs() was called to ensure the set of reserved...
const TargetRegisterClass * getRegClassOrNull(Register Reg) const
Return the register class of Reg, or null if Reg has not been assigned a register class yet.
const TargetRegisterClass * constrainRegClass(Register Reg, const TargetRegisterClass *RC, unsigned MinNumRegs=0)
constrainRegClass - Constrain the register class of the specified virtual register to be a common sub...
MachineInstr * getUniqueVRegDef(Register Reg) const
getUniqueVRegDef - Return the unique machine instr that defines the specified virtual register or nul...
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
MI-level patchpoint operands.
Definition: StackMaps.h:76
uint32_t getNumPatchBytes() const
Return the number of patchable bytes the given patchpoint should emit.
Definition: StackMaps.h:104
void enterBasicBlockEnd(MachineBasicBlock &MBB)
Start tracking liveness from the end of basic block MBB.
bool isRegUsed(Register Reg, bool includeReserved=true) const
Return if a specific register is currently used.
Register FindUnusedReg(const TargetRegisterClass *RC) const
Find an unused register of the specified register class.
void setRegUsed(Register Reg, LaneBitmask LaneMask=LaneBitmask::getAll())
Tell the scavenger a register is used.
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
constexpr bool isValid() const
Definition: Register.h:116
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
static constexpr bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:71
static constexpr bool isPhysicalRegister(unsigned Reg)
Return true if the specified register number is in the physical register namespace.
Definition: Register.h:65
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition: Register.h:95
Represents a location in source code.
Definition: SMLoc.h:23
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:435
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition: SmallString.h:26
void append(StringRef RHS)
Append from a StringRef.
Definition: SmallString.h:68
StringRef str() const
Explicit conversion to StringRef.
Definition: SmallString.h:254
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:696
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
MI-level stackmap operands.
Definition: StackMaps.h:35
uint32_t getNumPatchBytes() const
Return the number of patchable bytes the given stackmap should emit.
Definition: StackMaps.h:50
StackOffset holds a fixed and a scalable offset in bytes.
Definition: TypeSize.h:33
int64_t getFixed() const
Returns the fixed component of the stack.
Definition: TypeSize.h:49
int64_t getScalable() const
Returns the scalable component of the stack.
Definition: TypeSize.h:52
static StackOffset get(int64_t Fixed, int64_t Scalable)
Definition: TypeSize.h:44
static StackOffset getScalable(int64_t Scalable)
Definition: TypeSize.h:43
static StackOffset getFixed(int64_t Fixed)
Definition: TypeSize.h:42
MI-level Statepoint operands.
Definition: StackMaps.h:158
uint32_t getNumPatchBytes() const
Return the number of patchable bytes the given statepoint should emit.
Definition: StackMaps.h:207
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
Object returned by analyzeLoopForPipelining.
TargetInstrInfo - Interface to description of machine instruction set.
virtual void genAlternativeCodeSequence(MachineInstr &Root, unsigned Pattern, SmallVectorImpl< MachineInstr * > &InsInstrs, SmallVectorImpl< MachineInstr * > &DelInstrs, DenseMap< unsigned, unsigned > &InstIdxForVirtReg) const
When getMachineCombinerPatterns() finds patterns, this function generates the instructions that could...
virtual std::optional< ParamLoadedValue > describeLoadedValue(const MachineInstr &MI, Register Reg) const
Produce the expression describing the MI loading a value into the physical register Reg.
virtual bool getMachineCombinerPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns, bool DoRegPressureReduce) const
Return true when there is potentially a faster code sequence for an instruction chain ending in Root.
virtual bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const
Test if the given instruction should be considered a scheduling boundary.
virtual CombinerObjective getCombinerObjective(unsigned Pattern) const
Return the objective of a combiner pattern.
virtual bool isFunctionSafeToSplit(const MachineFunction &MF) const
Return true if the function is a viable candidate for machine function splitting.
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:77
TargetOptions Options
const MCAsmInfo * getMCAsmInfo() const
Return target specific asm information.
unsigned UnsafeFPMath
UnsafeFPMath - This flag is enabled when the -enable-unsafe-fp-math flag is specified on the command ...
bool contains(Register Reg) const
Return true if the specified register is included in this register class.
bool hasSubClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a sub-class of or equal to this class.
bool hasSuperClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a super-class of or equal to this class.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
TargetSubtargetInfo - Generic base class for all target subtargets.
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
virtual const TargetInstrInfo * getInstrInfo() const
Target - Wrapper for Target specific information.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition: TypeSize.h:345
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
Definition: TypeSize.h:348
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:171
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition: TypeSize.h:168
self_iterator getIterator()
Definition: ilist_node.h:132
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
Definition: ilist_node.h:353
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:661
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static CondCode getInvertedCondCode(CondCode Code)
@ MO_NC
MO_NC - Indicates whether the linker is expected to check the symbol reference for overflow.
@ MO_G1
MO_G1 - A symbol operand with this flag (granule 1) represents the bits 16-31 of a 64-bit address,...
@ MO_PAGEOFF
MO_PAGEOFF - A symbol operand with this flag represents the offset of that symbol within a 4K page.
@ MO_GOT
MO_GOT - This flag indicates that a symbol operand represents the address of the GOT entry for the sy...
@ MO_G0
MO_G0 - A symbol operand with this flag (granule 0) represents the bits 0-15 of a 64-bit address,...
@ MO_PAGE
MO_PAGE - A symbol operand with this flag represents the pc-relative offset of the 4K page containing...
@ MO_G2
MO_G2 - A symbol operand with this flag (granule 2) represents the bits 32-47 of a 64-bit address,...
@ MO_G3
MO_G3 - A symbol operand with this flag (granule 3) represents the high 16-bits of a 64-bit address,...
unsigned getCheckerSizeInBytes(AuthCheckMethod Method)
Returns the number of bytes added by checkAuthenticatedRegister.
const SysReg * lookupSysRegByName(StringRef)
static uint64_t decodeLogicalImmediate(uint64_t val, unsigned regSize)
decodeLogicalImmediate - Decode a logical immediate value in the form "N:immr:imms" (where the immr a...
static unsigned getShiftValue(unsigned Imm)
getShiftValue - Extract the shift value.
static unsigned getArithExtendImm(AArch64_AM::ShiftExtendType ET, unsigned Imm)
getArithExtendImm - Encode the extend type and shift amount for an arithmetic instruction: imm: 3-bit...
static unsigned getArithShiftValue(unsigned Imm)
getArithShiftValue - get the arithmetic shift value.
static uint64_t encodeLogicalImmediate(uint64_t imm, unsigned regSize)
encodeLogicalImmediate - Return the encoded immediate value for a logical immediate instruction of th...
static AArch64_AM::ShiftExtendType getArithExtendType(unsigned Imm)
static AArch64_AM::ShiftExtendType getShiftType(unsigned Imm)
getShiftType - Extract the shift type.
static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)
getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...
void expandMOVImm(uint64_t Imm, unsigned BitSize, SmallVectorImpl< ImmInsnModel > &Insn)
Expand a MOVi32imm or MOVi64imm pseudo instruction to one or more real move-immediate instructions to...
static const uint64_t InstrFlagIsWhile
static const uint64_t InstrFlagIsPTestLike
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:121
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Dead
Unused definition.
@ Renamable
Register that may be renamed.
@ Define
Register definition.
@ Kill
The last use of a register.
@ Undef
Value of the register doesn't matter.
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
constexpr double e
Definition: MathExtras.h:47
InstrType
Represents how an instruction should be mapped by the outliner.
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:480
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1722
static bool isCondBranchOpcode(int Opc)
std::pair< MachineOperand, DIExpression * > ParamLoadedValue
MCCFIInstruction createDefCFA(const TargetRegisterInfo &TRI, unsigned FrameReg, unsigned Reg, const StackOffset &Offset, bool LastAdjustmentWasScalable=true)
static bool isPTrueOpcode(unsigned Opc)
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
bool succeeded(LogicalResult Result)
Utility function that returns true if the provided LogicalResult corresponds to a success value.
Definition: LogicalResult.h:67
int isAArch64FrameOffsetLegal(const MachineInstr &MI, StackOffset &Offset, bool *OutUseUnscaledOp=nullptr, unsigned *OutUnscaledOp=nullptr, int64_t *EmittableOffset=nullptr)
Check if the Offset is a valid frame offset for MI.
static bool isIndirectBranchOpcode(int Opc)
MCCFIInstruction createCFAOffset(const TargetRegisterInfo &MRI, unsigned Reg, const StackOffset &OffsetFromDefCFA)
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
unsigned getBLRCallOpcode(const MachineFunction &MF)
Return opcode to be used for indirect calls.
@ AArch64FrameOffsetIsLegal
Offset is legal.
@ AArch64FrameOffsetCanUpdate
Offset can apply, at least partly.
@ AArch64FrameOffsetCannotUpdate
Offset cannot apply.
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition: MathExtras.h:296
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:346
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1729
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:419
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
AArch64MachineCombinerPattern
@ MULSUBv8i16_OP2
@ FMULv4i16_indexed_OP1
@ FMLSv1i32_indexed_OP2
@ MULSUBv2i32_indexed_OP1
@ MULADDXI_OP1
@ FMLAv2i32_indexed_OP2
@ MULADDv4i16_indexed_OP2
@ FMLAv1i64_indexed_OP1
@ MULSUBv16i8_OP1
@ FMLAv8i16_indexed_OP2
@ FMULv2i32_indexed_OP1
@ MULSUBv8i16_indexed_OP2
@ FMLAv1i64_indexed_OP2
@ MULSUBv4i16_indexed_OP2
@ FMLAv1i32_indexed_OP1
@ FMLAv2i64_indexed_OP2
@ FMLSv8i16_indexed_OP1
@ MULSUBv2i32_OP1
@ FMULv4i16_indexed_OP2
@ MULSUBv4i32_indexed_OP2
@ FMULv2i64_indexed_OP2
@ MULSUBXI_OP1
@ FMLAv4i32_indexed_OP1
@ MULADDWI_OP1
@ MULADDv4i16_OP2
@ FMULv8i16_indexed_OP2
@ MULSUBv4i16_OP1
@ MULADDv4i32_OP2
@ MULADDv8i8_OP1
@ MULADDv2i32_OP2
@ MULADDv16i8_OP2
@ MULADDv8i8_OP2
@ FMLSv4i16_indexed_OP1
@ MULADDv16i8_OP1
@ FMLAv2i64_indexed_OP1
@ FMLAv1i32_indexed_OP2
@ FMLSv2i64_indexed_OP2
@ MULADDv2i32_OP1
@ MULADDv4i32_OP1
@ MULADDv2i32_indexed_OP1
@ MULSUBv16i8_OP2
@ MULADDv4i32_indexed_OP1
@ MULADDv2i32_indexed_OP2
@ FMLAv4i16_indexed_OP2
@ MULSUBv8i16_OP1
@ FMULv2i32_indexed_OP2
@ FMLSv2i32_indexed_OP2
@ FMLSv4i32_indexed_OP1
@ FMULv2i64_indexed_OP1
@ MULSUBv4i16_OP2
@ FMLSv4i16_indexed_OP2
@ FMLAv2i32_indexed_OP1
@ FMLSv2i32_indexed_OP1
@ FMLAv8i16_indexed_OP1
@ MULSUBv4i16_indexed_OP1
@ FMLSv4i32_indexed_OP2
@ MULADDv4i32_indexed_OP2
@ MULSUBv4i32_OP2
@ MULSUBv8i16_indexed_OP1
@ MULADDv8i16_OP2
@ MULSUBv2i32_indexed_OP2
@ FMULv4i32_indexed_OP2
@ FMLSv2i64_indexed_OP1
@ MULADDv4i16_OP1
@ FMLAv4i32_indexed_OP2
@ MULADDv8i16_indexed_OP1
@ FMULv4i32_indexed_OP1
@ FMLAv4i16_indexed_OP1
@ FMULv8i16_indexed_OP1
@ MULSUBv8i8_OP1
@ MULADDv8i16_OP1
@ MULSUBv4i32_indexed_OP1
@ MULSUBv4i32_OP1
@ FMLSv8i16_indexed_OP2
@ MULADDv8i16_indexed_OP2
@ MULSUBWI_OP1
@ MULSUBv2i32_OP2
@ FMLSv1i64_indexed_OP2
@ MULADDv4i16_indexed_OP1
@ MULSUBv8i8_OP2
void emitFrameOffset(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, unsigned DestReg, unsigned SrcReg, StackOffset Offset, const TargetInstrInfo *TII, MachineInstr::MIFlag=MachineInstr::NoFlags, bool SetNZCV=false, bool NeedsWinCFI=false, bool *HasWinCFI=nullptr, bool EmitCFAOffset=false, StackOffset InitialOffset={}, unsigned FrameReg=AArch64::SP)
emitFrameOffset - Emit instructions as needed to set DestReg to SrcReg plus Offset.
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:167
CombinerObjective
The combiner's goal may differ based on which pattern it is attempting to optimize.
std::optional< UsedNZCV > examineCFlagsUse(MachineInstr &MI, MachineInstr &CmpInstr, const TargetRegisterInfo &TRI, SmallVectorImpl< MachineInstr * > *CCUseInstrs=nullptr)
CodeGenOptLevel
Code generation optimization level.
Definition: CodeGen.h:54
auto instructionsWithoutDebug(IterT It, IterT End, bool SkipPseudoOp=true)
Construct a range iterator which begins at It and moves forwards until End is reached,...
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
auto drop_end(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the last N elements excluded.
Definition: STLExtras.h:336
unsigned getUndefRegState(bool B)
unsigned getDefRegState(bool B)
unsigned getKillRegState(bool B)
bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
Definition: MathExtras.h:260
DWARFExpression::Operation Op
static bool isUncondBranchOpcode(int Opc)
unsigned encodeSLEB128(int64_t Value, raw_ostream &OS, unsigned PadTo=0)
Utility function to encode a SLEB128 value to an output stream.
Definition: LEB128.h:23
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition: STLExtras.h:2082
bool rewriteAArch64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, unsigned FrameReg, StackOffset &Offset, const AArch64InstrInfo *TII)
rewriteAArch64FrameIndex - Rewrite MI to access 'Offset' bytes from the FP.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1886
static const MachineMemOperand::Flags MOSuppressPair
unsigned encodeULEB128(uint64_t Value, raw_ostream &OS, unsigned PadTo=0)
Utility function to encode a ULEB128 value to an output stream.
Definition: LEB128.h:80
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition: MathExtras.h:581
bool isNZCVTouchedInInstructionRange(const MachineInstr &DefMI, const MachineInstr &UseMI, const TargetRegisterInfo *TRI)
Return true if there is an instruction /after/ DefMI and before UseMI which either reads or clobbers ...
static const MachineMemOperand::Flags MOStridedAccess
@ Default
The result values are uniform if and only if all operands are uniform.
void fullyRecomputeLiveIns(ArrayRef< MachineBasicBlock * > MBBs)
Convenience function for recomputing live-in's for a set of MBBs until the computation converges.
Definition: LivePhysRegs.h:215
Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
Description of the encoding of one expression Op.
Used to describe addressing mode similar to ExtAddrMode in CodeGenPrepare.
static const MBBSectionID ColdSectionID
MachineJumpTableEntry - One jump table in the jump table info.
This class contains a discriminated union of information about pointers in memory operands,...
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
Used to describe a register and immediate addition.
An individual sequence of instructions to be replaced with a call to an outlined function.
MachineFunction * getMF() const
The information necessary to create an outlined function for some class of candidate.
unsigned FrameConstructionID
Target-defined identifier for constructing a frame for this function.