LLVM 20.0.0git
AArch64InstrInfo.cpp
Go to the documentation of this file.
1//===- AArch64InstrInfo.cpp - AArch64 Instruction Information -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains the AArch64 implementation of the TargetInstrInfo class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "AArch64InstrInfo.h"
14#include "AArch64ExpandImm.h"
17#include "AArch64PointerAuth.h"
18#include "AArch64Subtarget.h"
22#include "llvm/ADT/ArrayRef.h"
23#include "llvm/ADT/STLExtras.h"
41#include "llvm/IR/DebugLoc.h"
42#include "llvm/IR/GlobalValue.h"
43#include "llvm/IR/Module.h"
44#include "llvm/MC/MCAsmInfo.h"
45#include "llvm/MC/MCInst.h"
47#include "llvm/MC/MCInstrDesc.h"
52#include "llvm/Support/LEB128.h"
56#include <cassert>
57#include <cstdint>
58#include <iterator>
59#include <utility>
60
61using namespace llvm;
62
63#define GET_INSTRINFO_CTOR_DTOR
64#include "AArch64GenInstrInfo.inc"
65
67 "aarch64-tbz-offset-bits", cl::Hidden, cl::init(14),
68 cl::desc("Restrict range of TB[N]Z instructions (DEBUG)"));
69
71 "aarch64-cbz-offset-bits", cl::Hidden, cl::init(19),
72 cl::desc("Restrict range of CB[N]Z instructions (DEBUG)"));
73
75 BCCDisplacementBits("aarch64-bcc-offset-bits", cl::Hidden, cl::init(19),
76 cl::desc("Restrict range of Bcc instructions (DEBUG)"));
77
79 BDisplacementBits("aarch64-b-offset-bits", cl::Hidden, cl::init(26),
80 cl::desc("Restrict range of B instructions (DEBUG)"));
81
83 : AArch64GenInstrInfo(AArch64::ADJCALLSTACKDOWN, AArch64::ADJCALLSTACKUP,
84 AArch64::CATCHRET),
85 RI(STI.getTargetTriple()), Subtarget(STI) {}
86
87/// GetInstSize - Return the number of bytes of code the specified
88/// instruction may be. This returns the maximum number of bytes.
90 const MachineBasicBlock &MBB = *MI.getParent();
91 const MachineFunction *MF = MBB.getParent();
92 const Function &F = MF->getFunction();
93 const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo();
94
95 {
96 auto Op = MI.getOpcode();
97 if (Op == AArch64::INLINEASM || Op == AArch64::INLINEASM_BR)
98 return getInlineAsmLength(MI.getOperand(0).getSymbolName(), *MAI);
99 }
100
101 // Meta-instructions emit no code.
102 if (MI.isMetaInstruction())
103 return 0;
104
105 // FIXME: We currently only handle pseudoinstructions that don't get expanded
106 // before the assembly printer.
107 unsigned NumBytes = 0;
108 const MCInstrDesc &Desc = MI.getDesc();
109
110 // Size should be preferably set in
111 // llvm/lib/Target/AArch64/AArch64InstrInfo.td (default case).
112 // Specific cases handle instructions of variable sizes
113 switch (Desc.getOpcode()) {
114 default:
115 if (Desc.getSize())
116 return Desc.getSize();
117
118 // Anything not explicitly designated otherwise (i.e. pseudo-instructions
119 // with fixed constant size but not specified in .td file) is a normal
120 // 4-byte insn.
121 NumBytes = 4;
122 break;
123 case TargetOpcode::STACKMAP:
124 // The upper bound for a stackmap intrinsic is the full length of its shadow
125 NumBytes = StackMapOpers(&MI).getNumPatchBytes();
126 assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
127 break;
128 case TargetOpcode::PATCHPOINT:
129 // The size of the patchpoint intrinsic is the number of bytes requested
130 NumBytes = PatchPointOpers(&MI).getNumPatchBytes();
131 assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
132 break;
133 case TargetOpcode::STATEPOINT:
134 NumBytes = StatepointOpers(&MI).getNumPatchBytes();
135 assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
136 // No patch bytes means a normal call inst is emitted
137 if (NumBytes == 0)
138 NumBytes = 4;
139 break;
140 case TargetOpcode::PATCHABLE_FUNCTION_ENTER:
141 // If `patchable-function-entry` is set, PATCHABLE_FUNCTION_ENTER
142 // instructions are expanded to the specified number of NOPs. Otherwise,
143 // they are expanded to 36-byte XRay sleds.
144 NumBytes =
145 F.getFnAttributeAsParsedInteger("patchable-function-entry", 9) * 4;
146 break;
147 case TargetOpcode::PATCHABLE_FUNCTION_EXIT:
148 case TargetOpcode::PATCHABLE_TYPED_EVENT_CALL:
149 // An XRay sled can be 4 bytes of alignment plus a 32-byte block.
150 NumBytes = 36;
151 break;
152 case TargetOpcode::PATCHABLE_EVENT_CALL:
153 // EVENT_CALL XRay sleds are exactly 6 instructions long (no alignment).
154 NumBytes = 24;
155 break;
156
157 case AArch64::SPACE:
158 NumBytes = MI.getOperand(1).getImm();
159 break;
160 case TargetOpcode::BUNDLE:
161 NumBytes = getInstBundleLength(MI);
162 break;
163 }
164
165 return NumBytes;
166}
167
168unsigned AArch64InstrInfo::getInstBundleLength(const MachineInstr &MI) const {
169 unsigned Size = 0;
171 MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end();
172 while (++I != E && I->isInsideBundle()) {
173 assert(!I->isBundle() && "No nested bundle!");
175 }
176 return Size;
177}
178
181 // Block ends with fall-through condbranch.
182 switch (LastInst->getOpcode()) {
183 default:
184 llvm_unreachable("Unknown branch instruction?");
185 case AArch64::Bcc:
186 Target = LastInst->getOperand(1).getMBB();
187 Cond.push_back(LastInst->getOperand(0));
188 break;
189 case AArch64::CBZW:
190 case AArch64::CBZX:
191 case AArch64::CBNZW:
192 case AArch64::CBNZX:
193 Target = LastInst->getOperand(1).getMBB();
194 Cond.push_back(MachineOperand::CreateImm(-1));
195 Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
196 Cond.push_back(LastInst->getOperand(0));
197 break;
198 case AArch64::TBZW:
199 case AArch64::TBZX:
200 case AArch64::TBNZW:
201 case AArch64::TBNZX:
202 Target = LastInst->getOperand(2).getMBB();
203 Cond.push_back(MachineOperand::CreateImm(-1));
204 Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
205 Cond.push_back(LastInst->getOperand(0));
206 Cond.push_back(LastInst->getOperand(1));
207 }
208}
209
210static unsigned getBranchDisplacementBits(unsigned Opc) {
211 switch (Opc) {
212 default:
213 llvm_unreachable("unexpected opcode!");
214 case AArch64::B:
215 return BDisplacementBits;
216 case AArch64::TBNZW:
217 case AArch64::TBZW:
218 case AArch64::TBNZX:
219 case AArch64::TBZX:
220 return TBZDisplacementBits;
221 case AArch64::CBNZW:
222 case AArch64::CBZW:
223 case AArch64::CBNZX:
224 case AArch64::CBZX:
225 return CBZDisplacementBits;
226 case AArch64::Bcc:
227 return BCCDisplacementBits;
228 }
229}
230
232 int64_t BrOffset) const {
233 unsigned Bits = getBranchDisplacementBits(BranchOp);
234 assert(Bits >= 3 && "max branch displacement must be enough to jump"
235 "over conditional branch expansion");
236 return isIntN(Bits, BrOffset / 4);
237}
238
241 switch (MI.getOpcode()) {
242 default:
243 llvm_unreachable("unexpected opcode!");
244 case AArch64::B:
245 return MI.getOperand(0).getMBB();
246 case AArch64::TBZW:
247 case AArch64::TBNZW:
248 case AArch64::TBZX:
249 case AArch64::TBNZX:
250 return MI.getOperand(2).getMBB();
251 case AArch64::CBZW:
252 case AArch64::CBNZW:
253 case AArch64::CBZX:
254 case AArch64::CBNZX:
255 case AArch64::Bcc:
256 return MI.getOperand(1).getMBB();
257 }
258}
259
261 MachineBasicBlock &NewDestBB,
262 MachineBasicBlock &RestoreBB,
263 const DebugLoc &DL,
264 int64_t BrOffset,
265 RegScavenger *RS) const {
266 assert(RS && "RegScavenger required for long branching");
267 assert(MBB.empty() &&
268 "new block should be inserted for expanding unconditional branch");
269 assert(MBB.pred_size() == 1);
270 assert(RestoreBB.empty() &&
271 "restore block should be inserted for restoring clobbered registers");
272
273 auto buildIndirectBranch = [&](Register Reg, MachineBasicBlock &DestBB) {
274 // Offsets outside of the signed 33-bit range are not supported for ADRP +
275 // ADD.
276 if (!isInt<33>(BrOffset))
278 "Branch offsets outside of the signed 33-bit range not supported");
279
280 BuildMI(MBB, MBB.end(), DL, get(AArch64::ADRP), Reg)
281 .addSym(DestBB.getSymbol(), AArch64II::MO_PAGE);
282 BuildMI(MBB, MBB.end(), DL, get(AArch64::ADDXri), Reg)
283 .addReg(Reg)
284 .addSym(DestBB.getSymbol(), AArch64II::MO_PAGEOFF | AArch64II::MO_NC)
285 .addImm(0);
286 BuildMI(MBB, MBB.end(), DL, get(AArch64::BR)).addReg(Reg);
287 };
288
290 // If X16 is unused, we can rely on the linker to insert a range extension
291 // thunk if NewDestBB is out of range of a single B instruction.
292 constexpr Register Reg = AArch64::X16;
293 if (!RS->isRegUsed(Reg)) {
294 insertUnconditionalBranch(MBB, &NewDestBB, DL);
295 RS->setRegUsed(Reg);
296 return;
297 }
298
299 // If there's a free register and it's worth inflating the code size,
300 // manually insert the indirect branch.
301 Register Scavenged = RS->FindUnusedReg(&AArch64::GPR64RegClass);
302 if (Scavenged != AArch64::NoRegister &&
304 buildIndirectBranch(Scavenged, NewDestBB);
305 RS->setRegUsed(Scavenged);
306 return;
307 }
308
309 // Note: Spilling X16 briefly moves the stack pointer, making it incompatible
310 // with red zones.
312 if (!AFI || AFI->hasRedZone().value_or(true))
314 "Unable to insert indirect branch inside function that has red zone");
315
316 // Otherwise, spill X16 and defer range extension to the linker.
317 BuildMI(MBB, MBB.end(), DL, get(AArch64::STRXpre))
318 .addReg(AArch64::SP, RegState::Define)
319 .addReg(Reg)
320 .addReg(AArch64::SP)
321 .addImm(-16);
322
323 BuildMI(MBB, MBB.end(), DL, get(AArch64::B)).addMBB(&RestoreBB);
324
325 BuildMI(RestoreBB, RestoreBB.end(), DL, get(AArch64::LDRXpost))
326 .addReg(AArch64::SP, RegState::Define)
328 .addReg(AArch64::SP)
329 .addImm(16);
330}
331
332// Branch analysis.
335 MachineBasicBlock *&FBB,
337 bool AllowModify) const {
338 // If the block has no terminators, it just falls into the block after it.
340 if (I == MBB.end())
341 return false;
342
343 // Skip over SpeculationBarrierEndBB terminators
344 if (I->getOpcode() == AArch64::SpeculationBarrierISBDSBEndBB ||
345 I->getOpcode() == AArch64::SpeculationBarrierSBEndBB) {
346 --I;
347 }
348
349 if (!isUnpredicatedTerminator(*I))
350 return false;
351
352 // Get the last instruction in the block.
353 MachineInstr *LastInst = &*I;
354
355 // If there is only one terminator instruction, process it.
356 unsigned LastOpc = LastInst->getOpcode();
357 if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
358 if (isUncondBranchOpcode(LastOpc)) {
359 TBB = LastInst->getOperand(0).getMBB();
360 return false;
361 }
362 if (isCondBranchOpcode(LastOpc)) {
363 // Block ends with fall-through condbranch.
364 parseCondBranch(LastInst, TBB, Cond);
365 return false;
366 }
367 return true; // Can't handle indirect branch.
368 }
369
370 // Get the instruction before it if it is a terminator.
371 MachineInstr *SecondLastInst = &*I;
372 unsigned SecondLastOpc = SecondLastInst->getOpcode();
373
374 // If AllowModify is true and the block ends with two or more unconditional
375 // branches, delete all but the first unconditional branch.
376 if (AllowModify && isUncondBranchOpcode(LastOpc)) {
377 while (isUncondBranchOpcode(SecondLastOpc)) {
378 LastInst->eraseFromParent();
379 LastInst = SecondLastInst;
380 LastOpc = LastInst->getOpcode();
381 if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
382 // Return now the only terminator is an unconditional branch.
383 TBB = LastInst->getOperand(0).getMBB();
384 return false;
385 }
386 SecondLastInst = &*I;
387 SecondLastOpc = SecondLastInst->getOpcode();
388 }
389 }
390
391 // If we're allowed to modify and the block ends in a unconditional branch
392 // which could simply fallthrough, remove the branch. (Note: This case only
393 // matters when we can't understand the whole sequence, otherwise it's also
394 // handled by BranchFolding.cpp.)
395 if (AllowModify && isUncondBranchOpcode(LastOpc) &&
397 LastInst->eraseFromParent();
398 LastInst = SecondLastInst;
399 LastOpc = LastInst->getOpcode();
400 if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
401 assert(!isUncondBranchOpcode(LastOpc) &&
402 "unreachable unconditional branches removed above");
403
404 if (isCondBranchOpcode(LastOpc)) {
405 // Block ends with fall-through condbranch.
406 parseCondBranch(LastInst, TBB, Cond);
407 return false;
408 }
409 return true; // Can't handle indirect branch.
410 }
411 SecondLastInst = &*I;
412 SecondLastOpc = SecondLastInst->getOpcode();
413 }
414
415 // If there are three terminators, we don't know what sort of block this is.
416 if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(*--I))
417 return true;
418
419 // If the block ends with a B and a Bcc, handle it.
420 if (isCondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
421 parseCondBranch(SecondLastInst, TBB, Cond);
422 FBB = LastInst->getOperand(0).getMBB();
423 return false;
424 }
425
426 // If the block ends with two unconditional branches, handle it. The second
427 // one is not executed, so remove it.
428 if (isUncondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
429 TBB = SecondLastInst->getOperand(0).getMBB();
430 I = LastInst;
431 if (AllowModify)
432 I->eraseFromParent();
433 return false;
434 }
435
436 // ...likewise if it ends with an indirect branch followed by an unconditional
437 // branch.
438 if (isIndirectBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
439 I = LastInst;
440 if (AllowModify)
441 I->eraseFromParent();
442 return true;
443 }
444
445 // Otherwise, can't handle this.
446 return true;
447}
448
450 MachineBranchPredicate &MBP,
451 bool AllowModify) const {
452 // For the moment, handle only a block which ends with a cb(n)zx followed by
453 // a fallthrough. Why this? Because it is a common form.
454 // TODO: Should we handle b.cc?
455
457 if (I == MBB.end())
458 return true;
459
460 // Skip over SpeculationBarrierEndBB terminators
461 if (I->getOpcode() == AArch64::SpeculationBarrierISBDSBEndBB ||
462 I->getOpcode() == AArch64::SpeculationBarrierSBEndBB) {
463 --I;
464 }
465
466 if (!isUnpredicatedTerminator(*I))
467 return true;
468
469 // Get the last instruction in the block.
470 MachineInstr *LastInst = &*I;
471 unsigned LastOpc = LastInst->getOpcode();
472 if (!isCondBranchOpcode(LastOpc))
473 return true;
474
475 switch (LastOpc) {
476 default:
477 return true;
478 case AArch64::CBZW:
479 case AArch64::CBZX:
480 case AArch64::CBNZW:
481 case AArch64::CBNZX:
482 break;
483 };
484
485 MBP.TrueDest = LastInst->getOperand(1).getMBB();
486 assert(MBP.TrueDest && "expected!");
487 MBP.FalseDest = MBB.getNextNode();
488
489 MBP.ConditionDef = nullptr;
490 MBP.SingleUseCondition = false;
491
492 MBP.LHS = LastInst->getOperand(0);
493 MBP.RHS = MachineOperand::CreateImm(0);
494 MBP.Predicate = LastOpc == AArch64::CBNZX ? MachineBranchPredicate::PRED_NE
495 : MachineBranchPredicate::PRED_EQ;
496 return false;
497}
498
501 if (Cond[0].getImm() != -1) {
502 // Regular Bcc
505 } else {
506 // Folded compare-and-branch
507 switch (Cond[1].getImm()) {
508 default:
509 llvm_unreachable("Unknown conditional branch!");
510 case AArch64::CBZW:
511 Cond[1].setImm(AArch64::CBNZW);
512 break;
513 case AArch64::CBNZW:
514 Cond[1].setImm(AArch64::CBZW);
515 break;
516 case AArch64::CBZX:
517 Cond[1].setImm(AArch64::CBNZX);
518 break;
519 case AArch64::CBNZX:
520 Cond[1].setImm(AArch64::CBZX);
521 break;
522 case AArch64::TBZW:
523 Cond[1].setImm(AArch64::TBNZW);
524 break;
525 case AArch64::TBNZW:
526 Cond[1].setImm(AArch64::TBZW);
527 break;
528 case AArch64::TBZX:
529 Cond[1].setImm(AArch64::TBNZX);
530 break;
531 case AArch64::TBNZX:
532 Cond[1].setImm(AArch64::TBZX);
533 break;
534 }
535 }
536
537 return false;
538}
539
541 int *BytesRemoved) const {
543 if (I == MBB.end())
544 return 0;
545
546 if (!isUncondBranchOpcode(I->getOpcode()) &&
547 !isCondBranchOpcode(I->getOpcode()))
548 return 0;
549
550 // Remove the branch.
551 I->eraseFromParent();
552
553 I = MBB.end();
554
555 if (I == MBB.begin()) {
556 if (BytesRemoved)
557 *BytesRemoved = 4;
558 return 1;
559 }
560 --I;
561 if (!isCondBranchOpcode(I->getOpcode())) {
562 if (BytesRemoved)
563 *BytesRemoved = 4;
564 return 1;
565 }
566
567 // Remove the branch.
568 I->eraseFromParent();
569 if (BytesRemoved)
570 *BytesRemoved = 8;
571
572 return 2;
573}
574
575void AArch64InstrInfo::instantiateCondBranch(
578 if (Cond[0].getImm() != -1) {
579 // Regular Bcc
580 BuildMI(&MBB, DL, get(AArch64::Bcc)).addImm(Cond[0].getImm()).addMBB(TBB);
581 } else {
582 // Folded compare-and-branch
583 // Note that we use addOperand instead of addReg to keep the flags.
584 const MachineInstrBuilder MIB =
585 BuildMI(&MBB, DL, get(Cond[1].getImm())).add(Cond[2]);
586 if (Cond.size() > 3)
587 MIB.addImm(Cond[3].getImm());
588 MIB.addMBB(TBB);
589 }
590}
591
594 ArrayRef<MachineOperand> Cond, const DebugLoc &DL, int *BytesAdded) const {
595 // Shouldn't be a fall through.
596 assert(TBB && "insertBranch must not be told to insert a fallthrough");
597
598 if (!FBB) {
599 if (Cond.empty()) // Unconditional branch?
600 BuildMI(&MBB, DL, get(AArch64::B)).addMBB(TBB);
601 else
602 instantiateCondBranch(MBB, DL, TBB, Cond);
603
604 if (BytesAdded)
605 *BytesAdded = 4;
606
607 return 1;
608 }
609
610 // Two-way conditional branch.
611 instantiateCondBranch(MBB, DL, TBB, Cond);
612 BuildMI(&MBB, DL, get(AArch64::B)).addMBB(FBB);
613
614 if (BytesAdded)
615 *BytesAdded = 8;
616
617 return 2;
618}
619
620// Find the original register that VReg is copied from.
621static unsigned removeCopies(const MachineRegisterInfo &MRI, unsigned VReg) {
622 while (Register::isVirtualRegister(VReg)) {
623 const MachineInstr *DefMI = MRI.getVRegDef(VReg);
624 if (!DefMI->isFullCopy())
625 return VReg;
626 VReg = DefMI->getOperand(1).getReg();
627 }
628 return VReg;
629}
630
631// Determine if VReg is defined by an instruction that can be folded into a
632// csel instruction. If so, return the folded opcode, and the replacement
633// register.
634static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg,
635 unsigned *NewVReg = nullptr) {
636 VReg = removeCopies(MRI, VReg);
638 return 0;
639
640 bool Is64Bit = AArch64::GPR64allRegClass.hasSubClassEq(MRI.getRegClass(VReg));
641 const MachineInstr *DefMI = MRI.getVRegDef(VReg);
642 unsigned Opc = 0;
643 unsigned SrcOpNum = 0;
644 switch (DefMI->getOpcode()) {
645 case AArch64::ADDSXri:
646 case AArch64::ADDSWri:
647 // if NZCV is used, do not fold.
648 if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr,
649 true) == -1)
650 return 0;
651 // fall-through to ADDXri and ADDWri.
652 [[fallthrough]];
653 case AArch64::ADDXri:
654 case AArch64::ADDWri:
655 // add x, 1 -> csinc.
656 if (!DefMI->getOperand(2).isImm() || DefMI->getOperand(2).getImm() != 1 ||
657 DefMI->getOperand(3).getImm() != 0)
658 return 0;
659 SrcOpNum = 1;
660 Opc = Is64Bit ? AArch64::CSINCXr : AArch64::CSINCWr;
661 break;
662
663 case AArch64::ORNXrr:
664 case AArch64::ORNWrr: {
665 // not x -> csinv, represented as orn dst, xzr, src.
666 unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg());
667 if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)
668 return 0;
669 SrcOpNum = 2;
670 Opc = Is64Bit ? AArch64::CSINVXr : AArch64::CSINVWr;
671 break;
672 }
673
674 case AArch64::SUBSXrr:
675 case AArch64::SUBSWrr:
676 // if NZCV is used, do not fold.
677 if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr,
678 true) == -1)
679 return 0;
680 // fall-through to SUBXrr and SUBWrr.
681 [[fallthrough]];
682 case AArch64::SUBXrr:
683 case AArch64::SUBWrr: {
684 // neg x -> csneg, represented as sub dst, xzr, src.
685 unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg());
686 if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)
687 return 0;
688 SrcOpNum = 2;
689 Opc = Is64Bit ? AArch64::CSNEGXr : AArch64::CSNEGWr;
690 break;
691 }
692 default:
693 return 0;
694 }
695 assert(Opc && SrcOpNum && "Missing parameters");
696
697 if (NewVReg)
698 *NewVReg = DefMI->getOperand(SrcOpNum).getReg();
699 return Opc;
700}
701
704 Register DstReg, Register TrueReg,
705 Register FalseReg, int &CondCycles,
706 int &TrueCycles,
707 int &FalseCycles) const {
708 // Check register classes.
710 const TargetRegisterClass *RC =
711 RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg));
712 if (!RC)
713 return false;
714
715 // Also need to check the dest regclass, in case we're trying to optimize
716 // something like:
717 // %1(gpr) = PHI %2(fpr), bb1, %(fpr), bb2
718 if (!RI.getCommonSubClass(RC, MRI.getRegClass(DstReg)))
719 return false;
720
721 // Expanding cbz/tbz requires an extra cycle of latency on the condition.
722 unsigned ExtraCondLat = Cond.size() != 1;
723
724 // GPRs are handled by csel.
725 // FIXME: Fold in x+1, -x, and ~x when applicable.
726 if (AArch64::GPR64allRegClass.hasSubClassEq(RC) ||
727 AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
728 // Single-cycle csel, csinc, csinv, and csneg.
729 CondCycles = 1 + ExtraCondLat;
730 TrueCycles = FalseCycles = 1;
731 if (canFoldIntoCSel(MRI, TrueReg))
732 TrueCycles = 0;
733 else if (canFoldIntoCSel(MRI, FalseReg))
734 FalseCycles = 0;
735 return true;
736 }
737
738 // Scalar floating point is handled by fcsel.
739 // FIXME: Form fabs, fmin, and fmax when applicable.
740 if (AArch64::FPR64RegClass.hasSubClassEq(RC) ||
741 AArch64::FPR32RegClass.hasSubClassEq(RC)) {
742 CondCycles = 5 + ExtraCondLat;
743 TrueCycles = FalseCycles = 2;
744 return true;
745 }
746
747 // Can't do vectors.
748 return false;
749}
750
753 const DebugLoc &DL, Register DstReg,
755 Register TrueReg, Register FalseReg) const {
757
758 // Parse the condition code, see parseCondBranch() above.
760 switch (Cond.size()) {
761 default:
762 llvm_unreachable("Unknown condition opcode in Cond");
763 case 1: // b.cc
764 CC = AArch64CC::CondCode(Cond[0].getImm());
765 break;
766 case 3: { // cbz/cbnz
767 // We must insert a compare against 0.
768 bool Is64Bit;
769 switch (Cond[1].getImm()) {
770 default:
771 llvm_unreachable("Unknown branch opcode in Cond");
772 case AArch64::CBZW:
773 Is64Bit = false;
775 break;
776 case AArch64::CBZX:
777 Is64Bit = true;
779 break;
780 case AArch64::CBNZW:
781 Is64Bit = false;
783 break;
784 case AArch64::CBNZX:
785 Is64Bit = true;
787 break;
788 }
789 Register SrcReg = Cond[2].getReg();
790 if (Is64Bit) {
791 // cmp reg, #0 is actually subs xzr, reg, #0.
792 MRI.constrainRegClass(SrcReg, &AArch64::GPR64spRegClass);
793 BuildMI(MBB, I, DL, get(AArch64::SUBSXri), AArch64::XZR)
794 .addReg(SrcReg)
795 .addImm(0)
796 .addImm(0);
797 } else {
798 MRI.constrainRegClass(SrcReg, &AArch64::GPR32spRegClass);
799 BuildMI(MBB, I, DL, get(AArch64::SUBSWri), AArch64::WZR)
800 .addReg(SrcReg)
801 .addImm(0)
802 .addImm(0);
803 }
804 break;
805 }
806 case 4: { // tbz/tbnz
807 // We must insert a tst instruction.
808 switch (Cond[1].getImm()) {
809 default:
810 llvm_unreachable("Unknown branch opcode in Cond");
811 case AArch64::TBZW:
812 case AArch64::TBZX:
814 break;
815 case AArch64::TBNZW:
816 case AArch64::TBNZX:
818 break;
819 }
820 // cmp reg, #foo is actually ands xzr, reg, #1<<foo.
821 if (Cond[1].getImm() == AArch64::TBZW || Cond[1].getImm() == AArch64::TBNZW)
822 BuildMI(MBB, I, DL, get(AArch64::ANDSWri), AArch64::WZR)
823 .addReg(Cond[2].getReg())
824 .addImm(
825 AArch64_AM::encodeLogicalImmediate(1ull << Cond[3].getImm(), 32));
826 else
827 BuildMI(MBB, I, DL, get(AArch64::ANDSXri), AArch64::XZR)
828 .addReg(Cond[2].getReg())
829 .addImm(
830 AArch64_AM::encodeLogicalImmediate(1ull << Cond[3].getImm(), 64));
831 break;
832 }
833 }
834
835 unsigned Opc = 0;
836 const TargetRegisterClass *RC = nullptr;
837 bool TryFold = false;
838 if (MRI.constrainRegClass(DstReg, &AArch64::GPR64RegClass)) {
839 RC = &AArch64::GPR64RegClass;
840 Opc = AArch64::CSELXr;
841 TryFold = true;
842 } else if (MRI.constrainRegClass(DstReg, &AArch64::GPR32RegClass)) {
843 RC = &AArch64::GPR32RegClass;
844 Opc = AArch64::CSELWr;
845 TryFold = true;
846 } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR64RegClass)) {
847 RC = &AArch64::FPR64RegClass;
848 Opc = AArch64::FCSELDrrr;
849 } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR32RegClass)) {
850 RC = &AArch64::FPR32RegClass;
851 Opc = AArch64::FCSELSrrr;
852 }
853 assert(RC && "Unsupported regclass");
854
855 // Try folding simple instructions into the csel.
856 if (TryFold) {
857 unsigned NewVReg = 0;
858 unsigned FoldedOpc = canFoldIntoCSel(MRI, TrueReg, &NewVReg);
859 if (FoldedOpc) {
860 // The folded opcodes csinc, csinc and csneg apply the operation to
861 // FalseReg, so we need to invert the condition.
863 TrueReg = FalseReg;
864 } else
865 FoldedOpc = canFoldIntoCSel(MRI, FalseReg, &NewVReg);
866
867 // Fold the operation. Leave any dead instructions for DCE to clean up.
868 if (FoldedOpc) {
869 FalseReg = NewVReg;
870 Opc = FoldedOpc;
871 // The extends the live range of NewVReg.
872 MRI.clearKillFlags(NewVReg);
873 }
874 }
875
876 // Pull all virtual register into the appropriate class.
877 MRI.constrainRegClass(TrueReg, RC);
878 MRI.constrainRegClass(FalseReg, RC);
879
880 // Insert the csel.
881 BuildMI(MBB, I, DL, get(Opc), DstReg)
882 .addReg(TrueReg)
883 .addReg(FalseReg)
884 .addImm(CC);
885}
886
887// Return true if Imm can be loaded into a register by a "cheap" sequence of
888// instructions. For now, "cheap" means at most two instructions.
889static bool isCheapImmediate(const MachineInstr &MI, unsigned BitSize) {
890 if (BitSize == 32)
891 return true;
892
893 assert(BitSize == 64 && "Only bit sizes of 32 or 64 allowed");
894 uint64_t Imm = static_cast<uint64_t>(MI.getOperand(1).getImm());
896 AArch64_IMM::expandMOVImm(Imm, BitSize, Is);
897
898 return Is.size() <= 2;
899}
900
901// FIXME: this implementation should be micro-architecture dependent, so a
902// micro-architecture target hook should be introduced here in future.
904 if (Subtarget.hasExynosCheapAsMoveHandling()) {
905 if (isExynosCheapAsMove(MI))
906 return true;
907 return MI.isAsCheapAsAMove();
908 }
909
910 switch (MI.getOpcode()) {
911 default:
912 return MI.isAsCheapAsAMove();
913
914 case AArch64::ADDWrs:
915 case AArch64::ADDXrs:
916 case AArch64::SUBWrs:
917 case AArch64::SUBXrs:
918 return Subtarget.hasALULSLFast() && MI.getOperand(3).getImm() <= 4;
919
920 // If MOVi32imm or MOVi64imm can be expanded into ORRWri or
921 // ORRXri, it is as cheap as MOV.
922 // Likewise if it can be expanded to MOVZ/MOVN/MOVK.
923 case AArch64::MOVi32imm:
924 return isCheapImmediate(MI, 32);
925 case AArch64::MOVi64imm:
926 return isCheapImmediate(MI, 64);
927 }
928}
929
931 switch (MI.getOpcode()) {
932 default:
933 return false;
934
935 case AArch64::ADDWrs:
936 case AArch64::ADDXrs:
937 case AArch64::ADDSWrs:
938 case AArch64::ADDSXrs: {
939 unsigned Imm = MI.getOperand(3).getImm();
940 unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
941 if (ShiftVal == 0)
942 return true;
943 return AArch64_AM::getShiftType(Imm) == AArch64_AM::LSL && ShiftVal <= 5;
944 }
945
946 case AArch64::ADDWrx:
947 case AArch64::ADDXrx:
948 case AArch64::ADDXrx64:
949 case AArch64::ADDSWrx:
950 case AArch64::ADDSXrx:
951 case AArch64::ADDSXrx64: {
952 unsigned Imm = MI.getOperand(3).getImm();
953 switch (AArch64_AM::getArithExtendType(Imm)) {
954 default:
955 return false;
956 case AArch64_AM::UXTB:
957 case AArch64_AM::UXTH:
958 case AArch64_AM::UXTW:
959 case AArch64_AM::UXTX:
960 return AArch64_AM::getArithShiftValue(Imm) <= 4;
961 }
962 }
963
964 case AArch64::SUBWrs:
965 case AArch64::SUBSWrs: {
966 unsigned Imm = MI.getOperand(3).getImm();
967 unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
968 return ShiftVal == 0 ||
969 (AArch64_AM::getShiftType(Imm) == AArch64_AM::ASR && ShiftVal == 31);
970 }
971
972 case AArch64::SUBXrs:
973 case AArch64::SUBSXrs: {
974 unsigned Imm = MI.getOperand(3).getImm();
975 unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
976 return ShiftVal == 0 ||
977 (AArch64_AM::getShiftType(Imm) == AArch64_AM::ASR && ShiftVal == 63);
978 }
979
980 case AArch64::SUBWrx:
981 case AArch64::SUBXrx:
982 case AArch64::SUBXrx64:
983 case AArch64::SUBSWrx:
984 case AArch64::SUBSXrx:
985 case AArch64::SUBSXrx64: {
986 unsigned Imm = MI.getOperand(3).getImm();
987 switch (AArch64_AM::getArithExtendType(Imm)) {
988 default:
989 return false;
990 case AArch64_AM::UXTB:
991 case AArch64_AM::UXTH:
992 case AArch64_AM::UXTW:
993 case AArch64_AM::UXTX:
994 return AArch64_AM::getArithShiftValue(Imm) == 0;
995 }
996 }
997
998 case AArch64::LDRBBroW:
999 case AArch64::LDRBBroX:
1000 case AArch64::LDRBroW:
1001 case AArch64::LDRBroX:
1002 case AArch64::LDRDroW:
1003 case AArch64::LDRDroX:
1004 case AArch64::LDRHHroW:
1005 case AArch64::LDRHHroX:
1006 case AArch64::LDRHroW:
1007 case AArch64::LDRHroX:
1008 case AArch64::LDRQroW:
1009 case AArch64::LDRQroX:
1010 case AArch64::LDRSBWroW:
1011 case AArch64::LDRSBWroX:
1012 case AArch64::LDRSBXroW:
1013 case AArch64::LDRSBXroX:
1014 case AArch64::LDRSHWroW:
1015 case AArch64::LDRSHWroX:
1016 case AArch64::LDRSHXroW:
1017 case AArch64::LDRSHXroX:
1018 case AArch64::LDRSWroW:
1019 case AArch64::LDRSWroX:
1020 case AArch64::LDRSroW:
1021 case AArch64::LDRSroX:
1022 case AArch64::LDRWroW:
1023 case AArch64::LDRWroX:
1024 case AArch64::LDRXroW:
1025 case AArch64::LDRXroX:
1026 case AArch64::PRFMroW:
1027 case AArch64::PRFMroX:
1028 case AArch64::STRBBroW:
1029 case AArch64::STRBBroX:
1030 case AArch64::STRBroW:
1031 case AArch64::STRBroX:
1032 case AArch64::STRDroW:
1033 case AArch64::STRDroX:
1034 case AArch64::STRHHroW:
1035 case AArch64::STRHHroX:
1036 case AArch64::STRHroW:
1037 case AArch64::STRHroX:
1038 case AArch64::STRQroW:
1039 case AArch64::STRQroX:
1040 case AArch64::STRSroW:
1041 case AArch64::STRSroX:
1042 case AArch64::STRWroW:
1043 case AArch64::STRWroX:
1044 case AArch64::STRXroW:
1045 case AArch64::STRXroX: {
1046 unsigned IsSigned = MI.getOperand(3).getImm();
1047 return !IsSigned;
1048 }
1049 }
1050}
1051
1053 unsigned Opc = MI.getOpcode();
1054 switch (Opc) {
1055 default:
1056 return false;
1057 case AArch64::SEH_StackAlloc:
1058 case AArch64::SEH_SaveFPLR:
1059 case AArch64::SEH_SaveFPLR_X:
1060 case AArch64::SEH_SaveReg:
1061 case AArch64::SEH_SaveReg_X:
1062 case AArch64::SEH_SaveRegP:
1063 case AArch64::SEH_SaveRegP_X:
1064 case AArch64::SEH_SaveFReg:
1065 case AArch64::SEH_SaveFReg_X:
1066 case AArch64::SEH_SaveFRegP:
1067 case AArch64::SEH_SaveFRegP_X:
1068 case AArch64::SEH_SetFP:
1069 case AArch64::SEH_AddFP:
1070 case AArch64::SEH_Nop:
1071 case AArch64::SEH_PrologEnd:
1072 case AArch64::SEH_EpilogStart:
1073 case AArch64::SEH_EpilogEnd:
1074 case AArch64::SEH_PACSignLR:
1075 case AArch64::SEH_SaveAnyRegQP:
1076 case AArch64::SEH_SaveAnyRegQPX:
1077 return true;
1078 }
1079}
1080
1082 Register &SrcReg, Register &DstReg,
1083 unsigned &SubIdx) const {
1084 switch (MI.getOpcode()) {
1085 default:
1086 return false;
1087 case AArch64::SBFMXri: // aka sxtw
1088 case AArch64::UBFMXri: // aka uxtw
1089 // Check for the 32 -> 64 bit extension case, these instructions can do
1090 // much more.
1091 if (MI.getOperand(2).getImm() != 0 || MI.getOperand(3).getImm() != 31)
1092 return false;
1093 // This is a signed or unsigned 32 -> 64 bit extension.
1094 SrcReg = MI.getOperand(1).getReg();
1095 DstReg = MI.getOperand(0).getReg();
1096 SubIdx = AArch64::sub_32;
1097 return true;
1098 }
1099}
1100
1102 const MachineInstr &MIa, const MachineInstr &MIb) const {
1104 const MachineOperand *BaseOpA = nullptr, *BaseOpB = nullptr;
1105 int64_t OffsetA = 0, OffsetB = 0;
1106 TypeSize WidthA(0, false), WidthB(0, false);
1107 bool OffsetAIsScalable = false, OffsetBIsScalable = false;
1108
1109 assert(MIa.mayLoadOrStore() && "MIa must be a load or store.");
1110 assert(MIb.mayLoadOrStore() && "MIb must be a load or store.");
1111
1114 return false;
1115
1116 // Retrieve the base, offset from the base and width. Width
1117 // is the size of memory that is being loaded/stored (e.g. 1, 2, 4, 8). If
1118 // base are identical, and the offset of a lower memory access +
1119 // the width doesn't overlap the offset of a higher memory access,
1120 // then the memory accesses are different.
1121 // If OffsetAIsScalable and OffsetBIsScalable are both true, they
1122 // are assumed to have the same scale (vscale).
1123 if (getMemOperandWithOffsetWidth(MIa, BaseOpA, OffsetA, OffsetAIsScalable,
1124 WidthA, TRI) &&
1125 getMemOperandWithOffsetWidth(MIb, BaseOpB, OffsetB, OffsetBIsScalable,
1126 WidthB, TRI)) {
1127 if (BaseOpA->isIdenticalTo(*BaseOpB) &&
1128 OffsetAIsScalable == OffsetBIsScalable) {
1129 int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB;
1130 int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA;
1131 TypeSize LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
1132 if (LowWidth.isScalable() == OffsetAIsScalable &&
1133 LowOffset + (int)LowWidth.getKnownMinValue() <= HighOffset)
1134 return true;
1135 }
1136 }
1137 return false;
1138}
1139
1141 const MachineBasicBlock *MBB,
1142 const MachineFunction &MF) const {
1144 return true;
1145
1146 // Do not move an instruction that can be recognized as a branch target.
1147 if (hasBTISemantics(MI))
1148 return true;
1149
1150 switch (MI.getOpcode()) {
1151 case AArch64::HINT:
1152 // CSDB hints are scheduling barriers.
1153 if (MI.getOperand(0).getImm() == 0x14)
1154 return true;
1155 break;
1156 case AArch64::DSB:
1157 case AArch64::ISB:
1158 // DSB and ISB also are scheduling barriers.
1159 return true;
1160 case AArch64::MSRpstatesvcrImm1:
1161 // SMSTART and SMSTOP are also scheduling barriers.
1162 return true;
1163 default:;
1164 }
1165 if (isSEHInstruction(MI))
1166 return true;
1167 auto Next = std::next(MI.getIterator());
1168 return Next != MBB->end() && Next->isCFIInstruction();
1169}
1170
1171/// analyzeCompare - For a comparison instruction, return the source registers
1172/// in SrcReg and SrcReg2, and the value it compares against in CmpValue.
1173/// Return true if the comparison instruction can be analyzed.
1175 Register &SrcReg2, int64_t &CmpMask,
1176 int64_t &CmpValue) const {
1177 // The first operand can be a frame index where we'd normally expect a
1178 // register.
1179 assert(MI.getNumOperands() >= 2 && "All AArch64 cmps should have 2 operands");
1180 if (!MI.getOperand(1).isReg())
1181 return false;
1182
1183 switch (MI.getOpcode()) {
1184 default:
1185 break;
1186 case AArch64::PTEST_PP:
1187 case AArch64::PTEST_PP_ANY:
1188 SrcReg = MI.getOperand(0).getReg();
1189 SrcReg2 = MI.getOperand(1).getReg();
1190 // Not sure about the mask and value for now...
1191 CmpMask = ~0;
1192 CmpValue = 0;
1193 return true;
1194 case AArch64::SUBSWrr:
1195 case AArch64::SUBSWrs:
1196 case AArch64::SUBSWrx:
1197 case AArch64::SUBSXrr:
1198 case AArch64::SUBSXrs:
1199 case AArch64::SUBSXrx:
1200 case AArch64::ADDSWrr:
1201 case AArch64::ADDSWrs:
1202 case AArch64::ADDSWrx:
1203 case AArch64::ADDSXrr:
1204 case AArch64::ADDSXrs:
1205 case AArch64::ADDSXrx:
1206 // Replace SUBSWrr with SUBWrr if NZCV is not used.
1207 SrcReg = MI.getOperand(1).getReg();
1208 SrcReg2 = MI.getOperand(2).getReg();
1209 CmpMask = ~0;
1210 CmpValue = 0;
1211 return true;
1212 case AArch64::SUBSWri:
1213 case AArch64::ADDSWri:
1214 case AArch64::SUBSXri:
1215 case AArch64::ADDSXri:
1216 SrcReg = MI.getOperand(1).getReg();
1217 SrcReg2 = 0;
1218 CmpMask = ~0;
1219 CmpValue = MI.getOperand(2).getImm();
1220 return true;
1221 case AArch64::ANDSWri:
1222 case AArch64::ANDSXri:
1223 // ANDS does not use the same encoding scheme as the others xxxS
1224 // instructions.
1225 SrcReg = MI.getOperand(1).getReg();
1226 SrcReg2 = 0;
1227 CmpMask = ~0;
1229 MI.getOperand(2).getImm(),
1230 MI.getOpcode() == AArch64::ANDSWri ? 32 : 64);
1231 return true;
1232 }
1233
1234 return false;
1235}
1236
1238 MachineBasicBlock *MBB = Instr.getParent();
1239 assert(MBB && "Can't get MachineBasicBlock here");
1240 MachineFunction *MF = MBB->getParent();
1241 assert(MF && "Can't get MachineFunction here");
1245
1246 for (unsigned OpIdx = 0, EndIdx = Instr.getNumOperands(); OpIdx < EndIdx;
1247 ++OpIdx) {
1248 MachineOperand &MO = Instr.getOperand(OpIdx);
1249 const TargetRegisterClass *OpRegCstraints =
1250 Instr.getRegClassConstraint(OpIdx, TII, TRI);
1251
1252 // If there's no constraint, there's nothing to do.
1253 if (!OpRegCstraints)
1254 continue;
1255 // If the operand is a frame index, there's nothing to do here.
1256 // A frame index operand will resolve correctly during PEI.
1257 if (MO.isFI())
1258 continue;
1259
1260 assert(MO.isReg() &&
1261 "Operand has register constraints without being a register!");
1262
1263 Register Reg = MO.getReg();
1264 if (Reg.isPhysical()) {
1265 if (!OpRegCstraints->contains(Reg))
1266 return false;
1267 } else if (!OpRegCstraints->hasSubClassEq(MRI->getRegClass(Reg)) &&
1268 !MRI->constrainRegClass(Reg, OpRegCstraints))
1269 return false;
1270 }
1271
1272 return true;
1273}
1274
1275/// Return the opcode that does not set flags when possible - otherwise
1276/// return the original opcode. The caller is responsible to do the actual
1277/// substitution and legality checking.
1279 // Don't convert all compare instructions, because for some the zero register
1280 // encoding becomes the sp register.
1281 bool MIDefinesZeroReg = false;
1282 if (MI.definesRegister(AArch64::WZR, /*TRI=*/nullptr) ||
1283 MI.definesRegister(AArch64::XZR, /*TRI=*/nullptr))
1284 MIDefinesZeroReg = true;
1285
1286 switch (MI.getOpcode()) {
1287 default:
1288 return MI.getOpcode();
1289 case AArch64::ADDSWrr:
1290 return AArch64::ADDWrr;
1291 case AArch64::ADDSWri:
1292 return MIDefinesZeroReg ? AArch64::ADDSWri : AArch64::ADDWri;
1293 case AArch64::ADDSWrs:
1294 return MIDefinesZeroReg ? AArch64::ADDSWrs : AArch64::ADDWrs;
1295 case AArch64::ADDSWrx:
1296 return AArch64::ADDWrx;
1297 case AArch64::ADDSXrr:
1298 return AArch64::ADDXrr;
1299 case AArch64::ADDSXri:
1300 return MIDefinesZeroReg ? AArch64::ADDSXri : AArch64::ADDXri;
1301 case AArch64::ADDSXrs:
1302 return MIDefinesZeroReg ? AArch64::ADDSXrs : AArch64::ADDXrs;
1303 case AArch64::ADDSXrx:
1304 return AArch64::ADDXrx;
1305 case AArch64::SUBSWrr:
1306 return AArch64::SUBWrr;
1307 case AArch64::SUBSWri:
1308 return MIDefinesZeroReg ? AArch64::SUBSWri : AArch64::SUBWri;
1309 case AArch64::SUBSWrs:
1310 return MIDefinesZeroReg ? AArch64::SUBSWrs : AArch64::SUBWrs;
1311 case AArch64::SUBSWrx:
1312 return AArch64::SUBWrx;
1313 case AArch64::SUBSXrr:
1314 return AArch64::SUBXrr;
1315 case AArch64::SUBSXri:
1316 return MIDefinesZeroReg ? AArch64::SUBSXri : AArch64::SUBXri;
1317 case AArch64::SUBSXrs:
1318 return MIDefinesZeroReg ? AArch64::SUBSXrs : AArch64::SUBXrs;
1319 case AArch64::SUBSXrx:
1320 return AArch64::SUBXrx;
1321 }
1322}
1323
1324enum AccessKind { AK_Write = 0x01, AK_Read = 0x10, AK_All = 0x11 };
1325
1326/// True when condition flags are accessed (either by writing or reading)
1327/// on the instruction trace starting at From and ending at To.
1328///
1329/// Note: If From and To are from different blocks it's assumed CC are accessed
1330/// on the path.
1333 const TargetRegisterInfo *TRI, const AccessKind AccessToCheck = AK_All) {
1334 // Early exit if To is at the beginning of the BB.
1335 if (To == To->getParent()->begin())
1336 return true;
1337
1338 // Check whether the instructions are in the same basic block
1339 // If not, assume the condition flags might get modified somewhere.
1340 if (To->getParent() != From->getParent())
1341 return true;
1342
1343 // From must be above To.
1344 assert(std::any_of(
1345 ++To.getReverse(), To->getParent()->rend(),
1346 [From](MachineInstr &MI) { return MI.getIterator() == From; }));
1347
1348 // We iterate backward starting at \p To until we hit \p From.
1349 for (const MachineInstr &Instr :
1350 instructionsWithoutDebug(++To.getReverse(), From.getReverse())) {
1351 if (((AccessToCheck & AK_Write) &&
1352 Instr.modifiesRegister(AArch64::NZCV, TRI)) ||
1353 ((AccessToCheck & AK_Read) && Instr.readsRegister(AArch64::NZCV, TRI)))
1354 return true;
1355 }
1356 return false;
1357}
1358
1359std::optional<unsigned>
1360AArch64InstrInfo::canRemovePTestInstr(MachineInstr *PTest, MachineInstr *Mask,
1361 MachineInstr *Pred,
1362 const MachineRegisterInfo *MRI) const {
1363 unsigned MaskOpcode = Mask->getOpcode();
1364 unsigned PredOpcode = Pred->getOpcode();
1365 bool PredIsPTestLike = isPTestLikeOpcode(PredOpcode);
1366 bool PredIsWhileLike = isWhileOpcode(PredOpcode);
1367
1368 if (PredIsWhileLike) {
1369 // For PTEST(PG, PG), PTEST is redundant when PG is the result of a WHILEcc
1370 // instruction and the condition is "any" since WHILcc does an implicit
1371 // PTEST(ALL, PG) check and PG is always a subset of ALL.
1372 if ((Mask == Pred) && PTest->getOpcode() == AArch64::PTEST_PP_ANY)
1373 return PredOpcode;
1374
1375 // For PTEST(PTRUE_ALL, WHILE), if the element size matches, the PTEST is
1376 // redundant since WHILE performs an implicit PTEST with an all active
1377 // mask.
1378 if (isPTrueOpcode(MaskOpcode) && Mask->getOperand(1).getImm() == 31 &&
1379 getElementSizeForOpcode(MaskOpcode) ==
1380 getElementSizeForOpcode(PredOpcode))
1381 return PredOpcode;
1382
1383 return {};
1384 }
1385
1386 if (PredIsPTestLike) {
1387 // For PTEST(PG, PG), PTEST is redundant when PG is the result of an
1388 // instruction that sets the flags as PTEST would and the condition is
1389 // "any" since PG is always a subset of the governing predicate of the
1390 // ptest-like instruction.
1391 if ((Mask == Pred) && PTest->getOpcode() == AArch64::PTEST_PP_ANY)
1392 return PredOpcode;
1393
1394 // For PTEST(PTRUE_ALL, PTEST_LIKE), the PTEST is redundant if the
1395 // the element size matches and either the PTEST_LIKE instruction uses
1396 // the same all active mask or the condition is "any".
1397 if (isPTrueOpcode(MaskOpcode) && Mask->getOperand(1).getImm() == 31 &&
1398 getElementSizeForOpcode(MaskOpcode) ==
1399 getElementSizeForOpcode(PredOpcode)) {
1400 auto PTestLikeMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg());
1401 if (Mask == PTestLikeMask || PTest->getOpcode() == AArch64::PTEST_PP_ANY)
1402 return PredOpcode;
1403 }
1404
1405 // For PTEST(PG, PTEST_LIKE(PG, ...)), the PTEST is redundant since the
1406 // flags are set based on the same mask 'PG', but PTEST_LIKE must operate
1407 // on 8-bit predicates like the PTEST. Otherwise, for instructions like
1408 // compare that also support 16/32/64-bit predicates, the implicit PTEST
1409 // performed by the compare could consider fewer lanes for these element
1410 // sizes.
1411 //
1412 // For example, consider
1413 //
1414 // ptrue p0.b ; P0=1111-1111-1111-1111
1415 // index z0.s, #0, #1 ; Z0=<0,1,2,3>
1416 // index z1.s, #1, #1 ; Z1=<1,2,3,4>
1417 // cmphi p1.s, p0/z, z1.s, z0.s ; P1=0001-0001-0001-0001
1418 // ; ^ last active
1419 // ptest p0, p1.b ; P1=0001-0001-0001-0001
1420 // ; ^ last active
1421 //
1422 // where the compare generates a canonical all active 32-bit predicate
1423 // (equivalent to 'ptrue p1.s, all'). The implicit PTEST sets the last
1424 // active flag, whereas the PTEST instruction with the same mask doesn't.
1425 // For PTEST_ANY this doesn't apply as the flags in this case would be
1426 // identical regardless of element size.
1427 auto PTestLikeMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg());
1428 uint64_t PredElementSize = getElementSizeForOpcode(PredOpcode);
1429 if (Mask == PTestLikeMask && (PredElementSize == AArch64::ElementSizeB ||
1430 PTest->getOpcode() == AArch64::PTEST_PP_ANY))
1431 return PredOpcode;
1432
1433 return {};
1434 }
1435
1436 // If OP in PTEST(PG, OP(PG, ...)) has a flag-setting variant change the
1437 // opcode so the PTEST becomes redundant.
1438 switch (PredOpcode) {
1439 case AArch64::AND_PPzPP:
1440 case AArch64::BIC_PPzPP:
1441 case AArch64::EOR_PPzPP:
1442 case AArch64::NAND_PPzPP:
1443 case AArch64::NOR_PPzPP:
1444 case AArch64::ORN_PPzPP:
1445 case AArch64::ORR_PPzPP:
1446 case AArch64::BRKA_PPzP:
1447 case AArch64::BRKPA_PPzPP:
1448 case AArch64::BRKB_PPzP:
1449 case AArch64::BRKPB_PPzPP:
1450 case AArch64::RDFFR_PPz: {
1451 // Check to see if our mask is the same. If not the resulting flag bits
1452 // may be different and we can't remove the ptest.
1453 auto *PredMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg());
1454 if (Mask != PredMask)
1455 return {};
1456 break;
1457 }
1458 case AArch64::BRKN_PPzP: {
1459 // BRKN uses an all active implicit mask to set flags unlike the other
1460 // flag-setting instructions.
1461 // PTEST(PTRUE_B(31), BRKN(PG, A, B)) -> BRKNS(PG, A, B).
1462 if ((MaskOpcode != AArch64::PTRUE_B) ||
1463 (Mask->getOperand(1).getImm() != 31))
1464 return {};
1465 break;
1466 }
1467 case AArch64::PTRUE_B:
1468 // PTEST(OP=PTRUE_B(A), OP) -> PTRUES_B(A)
1469 break;
1470 default:
1471 // Bail out if we don't recognize the input
1472 return {};
1473 }
1474
1475 return convertToFlagSettingOpc(PredOpcode);
1476}
1477
1478/// optimizePTestInstr - Attempt to remove a ptest of a predicate-generating
1479/// operation which could set the flags in an identical manner
1480bool AArch64InstrInfo::optimizePTestInstr(
1481 MachineInstr *PTest, unsigned MaskReg, unsigned PredReg,
1482 const MachineRegisterInfo *MRI) const {
1483 auto *Mask = MRI->getUniqueVRegDef(MaskReg);
1484 auto *Pred = MRI->getUniqueVRegDef(PredReg);
1485 unsigned PredOpcode = Pred->getOpcode();
1486 auto NewOp = canRemovePTestInstr(PTest, Mask, Pred, MRI);
1487 if (!NewOp)
1488 return false;
1489
1491
1492 // If another instruction between Pred and PTest accesses flags, don't remove
1493 // the ptest or update the earlier instruction to modify them.
1494 if (areCFlagsAccessedBetweenInstrs(Pred, PTest, TRI))
1495 return false;
1496
1497 // If we pass all the checks, it's safe to remove the PTEST and use the flags
1498 // as they are prior to PTEST. Sometimes this requires the tested PTEST
1499 // operand to be replaced with an equivalent instruction that also sets the
1500 // flags.
1501 PTest->eraseFromParent();
1502 if (*NewOp != PredOpcode) {
1503 Pred->setDesc(get(*NewOp));
1504 bool succeeded = UpdateOperandRegClass(*Pred);
1505 (void)succeeded;
1506 assert(succeeded && "Operands have incompatible register classes!");
1507 Pred->addRegisterDefined(AArch64::NZCV, TRI);
1508 }
1509
1510 // Ensure that the flags def is live.
1511 if (Pred->registerDefIsDead(AArch64::NZCV, TRI)) {
1512 unsigned i = 0, e = Pred->getNumOperands();
1513 for (; i != e; ++i) {
1514 MachineOperand &MO = Pred->getOperand(i);
1515 if (MO.isReg() && MO.isDef() && MO.getReg() == AArch64::NZCV) {
1516 MO.setIsDead(false);
1517 break;
1518 }
1519 }
1520 }
1521 return true;
1522}
1523
1524/// Try to optimize a compare instruction. A compare instruction is an
1525/// instruction which produces AArch64::NZCV. It can be truly compare
1526/// instruction
1527/// when there are no uses of its destination register.
1528///
1529/// The following steps are tried in order:
1530/// 1. Convert CmpInstr into an unconditional version.
1531/// 2. Remove CmpInstr if above there is an instruction producing a needed
1532/// condition code or an instruction which can be converted into such an
1533/// instruction.
1534/// Only comparison with zero is supported.
1536 MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int64_t CmpMask,
1537 int64_t CmpValue, const MachineRegisterInfo *MRI) const {
1538 assert(CmpInstr.getParent());
1539 assert(MRI);
1540
1541 // Replace SUBSWrr with SUBWrr if NZCV is not used.
1542 int DeadNZCVIdx =
1543 CmpInstr.findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr, true);
1544 if (DeadNZCVIdx != -1) {
1545 if (CmpInstr.definesRegister(AArch64::WZR, /*TRI=*/nullptr) ||
1546 CmpInstr.definesRegister(AArch64::XZR, /*TRI=*/nullptr)) {
1547 CmpInstr.eraseFromParent();
1548 return true;
1549 }
1550 unsigned Opc = CmpInstr.getOpcode();
1551 unsigned NewOpc = convertToNonFlagSettingOpc(CmpInstr);
1552 if (NewOpc == Opc)
1553 return false;
1554 const MCInstrDesc &MCID = get(NewOpc);
1555 CmpInstr.setDesc(MCID);
1556 CmpInstr.removeOperand(DeadNZCVIdx);
1557 bool succeeded = UpdateOperandRegClass(CmpInstr);
1558 (void)succeeded;
1559 assert(succeeded && "Some operands reg class are incompatible!");
1560 return true;
1561 }
1562
1563 if (CmpInstr.getOpcode() == AArch64::PTEST_PP ||
1564 CmpInstr.getOpcode() == AArch64::PTEST_PP_ANY)
1565 return optimizePTestInstr(&CmpInstr, SrcReg, SrcReg2, MRI);
1566
1567 if (SrcReg2 != 0)
1568 return false;
1569
1570 // CmpInstr is a Compare instruction if destination register is not used.
1571 if (!MRI->use_nodbg_empty(CmpInstr.getOperand(0).getReg()))
1572 return false;
1573
1574 if (CmpValue == 0 && substituteCmpToZero(CmpInstr, SrcReg, *MRI))
1575 return true;
1576 return (CmpValue == 0 || CmpValue == 1) &&
1577 removeCmpToZeroOrOne(CmpInstr, SrcReg, CmpValue, *MRI);
1578}
1579
1580/// Get opcode of S version of Instr.
1581/// If Instr is S version its opcode is returned.
1582/// AArch64::INSTRUCTION_LIST_END is returned if Instr does not have S version
1583/// or we are not interested in it.
1584static unsigned sForm(MachineInstr &Instr) {
1585 switch (Instr.getOpcode()) {
1586 default:
1587 return AArch64::INSTRUCTION_LIST_END;
1588
1589 case AArch64::ADDSWrr:
1590 case AArch64::ADDSWri:
1591 case AArch64::ADDSXrr:
1592 case AArch64::ADDSXri:
1593 case AArch64::SUBSWrr:
1594 case AArch64::SUBSWri:
1595 case AArch64::SUBSXrr:
1596 case AArch64::SUBSXri:
1597 return Instr.getOpcode();
1598
1599 case AArch64::ADDWrr:
1600 return AArch64::ADDSWrr;
1601 case AArch64::ADDWri:
1602 return AArch64::ADDSWri;
1603 case AArch64::ADDXrr:
1604 return AArch64::ADDSXrr;
1605 case AArch64::ADDXri:
1606 return AArch64::ADDSXri;
1607 case AArch64::ADCWr:
1608 return AArch64::ADCSWr;
1609 case AArch64::ADCXr:
1610 return AArch64::ADCSXr;
1611 case AArch64::SUBWrr:
1612 return AArch64::SUBSWrr;
1613 case AArch64::SUBWri:
1614 return AArch64::SUBSWri;
1615 case AArch64::SUBXrr:
1616 return AArch64::SUBSXrr;
1617 case AArch64::SUBXri:
1618 return AArch64::SUBSXri;
1619 case AArch64::SBCWr:
1620 return AArch64::SBCSWr;
1621 case AArch64::SBCXr:
1622 return AArch64::SBCSXr;
1623 case AArch64::ANDWri:
1624 return AArch64::ANDSWri;
1625 case AArch64::ANDXri:
1626 return AArch64::ANDSXri;
1627 }
1628}
1629
1630/// Check if AArch64::NZCV should be alive in successors of MBB.
1632 for (auto *BB : MBB->successors())
1633 if (BB->isLiveIn(AArch64::NZCV))
1634 return true;
1635 return false;
1636}
1637
1638/// \returns The condition code operand index for \p Instr if it is a branch
1639/// or select and -1 otherwise.
1640static int
1642 switch (Instr.getOpcode()) {
1643 default:
1644 return -1;
1645
1646 case AArch64::Bcc: {
1647 int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV, /*TRI=*/nullptr);
1648 assert(Idx >= 2);
1649 return Idx - 2;
1650 }
1651
1652 case AArch64::CSINVWr:
1653 case AArch64::CSINVXr:
1654 case AArch64::CSINCWr:
1655 case AArch64::CSINCXr:
1656 case AArch64::CSELWr:
1657 case AArch64::CSELXr:
1658 case AArch64::CSNEGWr:
1659 case AArch64::CSNEGXr:
1660 case AArch64::FCSELSrrr:
1661 case AArch64::FCSELDrrr: {
1662 int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV, /*TRI=*/nullptr);
1663 assert(Idx >= 1);
1664 return Idx - 1;
1665 }
1666 }
1667}
1668
1669/// Find a condition code used by the instruction.
1670/// Returns AArch64CC::Invalid if either the instruction does not use condition
1671/// codes or we don't optimize CmpInstr in the presence of such instructions.
1674 return CCIdx >= 0 ? static_cast<AArch64CC::CondCode>(
1675 Instr.getOperand(CCIdx).getImm())
1677}
1678
1681 UsedNZCV UsedFlags;
1682 switch (CC) {
1683 default:
1684 break;
1685
1686 case AArch64CC::EQ: // Z set
1687 case AArch64CC::NE: // Z clear
1688 UsedFlags.Z = true;
1689 break;
1690
1691 case AArch64CC::HI: // Z clear and C set
1692 case AArch64CC::LS: // Z set or C clear
1693 UsedFlags.Z = true;
1694 [[fallthrough]];
1695 case AArch64CC::HS: // C set
1696 case AArch64CC::LO: // C clear
1697 UsedFlags.C = true;
1698 break;
1699
1700 case AArch64CC::MI: // N set
1701 case AArch64CC::PL: // N clear
1702 UsedFlags.N = true;
1703 break;
1704
1705 case AArch64CC::VS: // V set
1706 case AArch64CC::VC: // V clear
1707 UsedFlags.V = true;
1708 break;
1709
1710 case AArch64CC::GT: // Z clear, N and V the same
1711 case AArch64CC::LE: // Z set, N and V differ
1712 UsedFlags.Z = true;
1713 [[fallthrough]];
1714 case AArch64CC::GE: // N and V the same
1715 case AArch64CC::LT: // N and V differ
1716 UsedFlags.N = true;
1717 UsedFlags.V = true;
1718 break;
1719 }
1720 return UsedFlags;
1721}
1722
1723/// \returns Conditions flags used after \p CmpInstr in its MachineBB if NZCV
1724/// flags are not alive in successors of the same \p CmpInstr and \p MI parent.
1725/// \returns std::nullopt otherwise.
1726///
1727/// Collect instructions using that flags in \p CCUseInstrs if provided.
1728std::optional<UsedNZCV>
1730 const TargetRegisterInfo &TRI,
1731 SmallVectorImpl<MachineInstr *> *CCUseInstrs) {
1732 MachineBasicBlock *CmpParent = CmpInstr.getParent();
1733 if (MI.getParent() != CmpParent)
1734 return std::nullopt;
1735
1736 if (areCFlagsAliveInSuccessors(CmpParent))
1737 return std::nullopt;
1738
1739 UsedNZCV NZCVUsedAfterCmp;
1741 std::next(CmpInstr.getIterator()), CmpParent->instr_end())) {
1742 if (Instr.readsRegister(AArch64::NZCV, &TRI)) {
1744 if (CC == AArch64CC::Invalid) // Unsupported conditional instruction
1745 return std::nullopt;
1746 NZCVUsedAfterCmp |= getUsedNZCV(CC);
1747 if (CCUseInstrs)
1748 CCUseInstrs->push_back(&Instr);
1749 }
1750 if (Instr.modifiesRegister(AArch64::NZCV, &TRI))
1751 break;
1752 }
1753 return NZCVUsedAfterCmp;
1754}
1755
1756static bool isADDSRegImm(unsigned Opcode) {
1757 return Opcode == AArch64::ADDSWri || Opcode == AArch64::ADDSXri;
1758}
1759
1760static bool isSUBSRegImm(unsigned Opcode) {
1761 return Opcode == AArch64::SUBSWri || Opcode == AArch64::SUBSXri;
1762}
1763
1764/// Check if CmpInstr can be substituted by MI.
1765///
1766/// CmpInstr can be substituted:
1767/// - CmpInstr is either 'ADDS %vreg, 0' or 'SUBS %vreg, 0'
1768/// - and, MI and CmpInstr are from the same MachineBB
1769/// - and, condition flags are not alive in successors of the CmpInstr parent
1770/// - and, if MI opcode is the S form there must be no defs of flags between
1771/// MI and CmpInstr
1772/// or if MI opcode is not the S form there must be neither defs of flags
1773/// nor uses of flags between MI and CmpInstr.
1774/// - and, if C/V flags are not used after CmpInstr
1775/// or if N flag is used but MI produces poison value if signed overflow
1776/// occurs.
1778 const TargetRegisterInfo &TRI) {
1779 // NOTE this assertion guarantees that MI.getOpcode() is add or subtraction
1780 // that may or may not set flags.
1781 assert(sForm(MI) != AArch64::INSTRUCTION_LIST_END);
1782
1783 const unsigned CmpOpcode = CmpInstr.getOpcode();
1784 if (!isADDSRegImm(CmpOpcode) && !isSUBSRegImm(CmpOpcode))
1785 return false;
1786
1787 assert((CmpInstr.getOperand(2).isImm() &&
1788 CmpInstr.getOperand(2).getImm() == 0) &&
1789 "Caller guarantees that CmpInstr compares with constant 0");
1790
1791 std::optional<UsedNZCV> NZVCUsed = examineCFlagsUse(MI, CmpInstr, TRI);
1792 if (!NZVCUsed || NZVCUsed->C)
1793 return false;
1794
1795 // CmpInstr is either 'ADDS %vreg, 0' or 'SUBS %vreg, 0', and MI is either
1796 // '%vreg = add ...' or '%vreg = sub ...'.
1797 // Condition flag V is used to indicate signed overflow.
1798 // 1) MI and CmpInstr set N and V to the same value.
1799 // 2) If MI is add/sub with no-signed-wrap, it produces a poison value when
1800 // signed overflow occurs, so CmpInstr could still be simplified away.
1801 if (NZVCUsed->V && !MI.getFlag(MachineInstr::NoSWrap))
1802 return false;
1803
1804 AccessKind AccessToCheck = AK_Write;
1805 if (sForm(MI) != MI.getOpcode())
1806 AccessToCheck = AK_All;
1807 return !areCFlagsAccessedBetweenInstrs(&MI, &CmpInstr, &TRI, AccessToCheck);
1808}
1809
1810/// Substitute an instruction comparing to zero with another instruction
1811/// which produces needed condition flags.
1812///
1813/// Return true on success.
1814bool AArch64InstrInfo::substituteCmpToZero(
1815 MachineInstr &CmpInstr, unsigned SrcReg,
1816 const MachineRegisterInfo &MRI) const {
1817 // Get the unique definition of SrcReg.
1818 MachineInstr *MI = MRI.getUniqueVRegDef(SrcReg);
1819 if (!MI)
1820 return false;
1821
1823
1824 unsigned NewOpc = sForm(*MI);
1825 if (NewOpc == AArch64::INSTRUCTION_LIST_END)
1826 return false;
1827
1828 if (!canInstrSubstituteCmpInstr(*MI, CmpInstr, TRI))
1829 return false;
1830
1831 // Update the instruction to set NZCV.
1832 MI->setDesc(get(NewOpc));
1833 CmpInstr.eraseFromParent();
1835 (void)succeeded;
1836 assert(succeeded && "Some operands reg class are incompatible!");
1837 MI->addRegisterDefined(AArch64::NZCV, &TRI);
1838 return true;
1839}
1840
1841/// \returns True if \p CmpInstr can be removed.
1842///
1843/// \p IsInvertCC is true if, after removing \p CmpInstr, condition
1844/// codes used in \p CCUseInstrs must be inverted.
1846 int CmpValue, const TargetRegisterInfo &TRI,
1848 bool &IsInvertCC) {
1849 assert((CmpValue == 0 || CmpValue == 1) &&
1850 "Only comparisons to 0 or 1 considered for removal!");
1851
1852 // MI is 'CSINCWr %vreg, wzr, wzr, <cc>' or 'CSINCXr %vreg, xzr, xzr, <cc>'
1853 unsigned MIOpc = MI.getOpcode();
1854 if (MIOpc == AArch64::CSINCWr) {
1855 if (MI.getOperand(1).getReg() != AArch64::WZR ||
1856 MI.getOperand(2).getReg() != AArch64::WZR)
1857 return false;
1858 } else if (MIOpc == AArch64::CSINCXr) {
1859 if (MI.getOperand(1).getReg() != AArch64::XZR ||
1860 MI.getOperand(2).getReg() != AArch64::XZR)
1861 return false;
1862 } else {
1863 return false;
1864 }
1866 if (MICC == AArch64CC::Invalid)
1867 return false;
1868
1869 // NZCV needs to be defined
1870 if (MI.findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr, true) != -1)
1871 return false;
1872
1873 // CmpInstr is 'ADDS %vreg, 0' or 'SUBS %vreg, 0' or 'SUBS %vreg, 1'
1874 const unsigned CmpOpcode = CmpInstr.getOpcode();
1875 bool IsSubsRegImm = isSUBSRegImm(CmpOpcode);
1876 if (CmpValue && !IsSubsRegImm)
1877 return false;
1878 if (!CmpValue && !IsSubsRegImm && !isADDSRegImm(CmpOpcode))
1879 return false;
1880
1881 // MI conditions allowed: eq, ne, mi, pl
1882 UsedNZCV MIUsedNZCV = getUsedNZCV(MICC);
1883 if (MIUsedNZCV.C || MIUsedNZCV.V)
1884 return false;
1885
1886 std::optional<UsedNZCV> NZCVUsedAfterCmp =
1887 examineCFlagsUse(MI, CmpInstr, TRI, &CCUseInstrs);
1888 // Condition flags are not used in CmpInstr basic block successors and only
1889 // Z or N flags allowed to be used after CmpInstr within its basic block
1890 if (!NZCVUsedAfterCmp || NZCVUsedAfterCmp->C || NZCVUsedAfterCmp->V)
1891 return false;
1892 // Z or N flag used after CmpInstr must correspond to the flag used in MI
1893 if ((MIUsedNZCV.Z && NZCVUsedAfterCmp->N) ||
1894 (MIUsedNZCV.N && NZCVUsedAfterCmp->Z))
1895 return false;
1896 // If CmpInstr is comparison to zero MI conditions are limited to eq, ne
1897 if (MIUsedNZCV.N && !CmpValue)
1898 return false;
1899
1900 // There must be no defs of flags between MI and CmpInstr
1901 if (areCFlagsAccessedBetweenInstrs(&MI, &CmpInstr, &TRI, AK_Write))
1902 return false;
1903
1904 // Condition code is inverted in the following cases:
1905 // 1. MI condition is ne; CmpInstr is 'ADDS %vreg, 0' or 'SUBS %vreg, 0'
1906 // 2. MI condition is eq, pl; CmpInstr is 'SUBS %vreg, 1'
1907 IsInvertCC = (CmpValue && (MICC == AArch64CC::EQ || MICC == AArch64CC::PL)) ||
1908 (!CmpValue && MICC == AArch64CC::NE);
1909 return true;
1910}
1911
1912/// Remove comparison in csinc-cmp sequence
1913///
1914/// Examples:
1915/// 1. \code
1916/// csinc w9, wzr, wzr, ne
1917/// cmp w9, #0
1918/// b.eq
1919/// \endcode
1920/// to
1921/// \code
1922/// csinc w9, wzr, wzr, ne
1923/// b.ne
1924/// \endcode
1925///
1926/// 2. \code
1927/// csinc x2, xzr, xzr, mi
1928/// cmp x2, #1
1929/// b.pl
1930/// \endcode
1931/// to
1932/// \code
1933/// csinc x2, xzr, xzr, mi
1934/// b.pl
1935/// \endcode
1936///
1937/// \param CmpInstr comparison instruction
1938/// \return True when comparison removed
1939bool AArch64InstrInfo::removeCmpToZeroOrOne(
1940 MachineInstr &CmpInstr, unsigned SrcReg, int CmpValue,
1941 const MachineRegisterInfo &MRI) const {
1942 MachineInstr *MI = MRI.getUniqueVRegDef(SrcReg);
1943 if (!MI)
1944 return false;
1947 bool IsInvertCC = false;
1948 if (!canCmpInstrBeRemoved(*MI, CmpInstr, CmpValue, TRI, CCUseInstrs,
1949 IsInvertCC))
1950 return false;
1951 // Make transformation
1952 CmpInstr.eraseFromParent();
1953 if (IsInvertCC) {
1954 // Invert condition codes in CmpInstr CC users
1955 for (MachineInstr *CCUseInstr : CCUseInstrs) {
1957 assert(Idx >= 0 && "Unexpected instruction using CC.");
1958 MachineOperand &CCOperand = CCUseInstr->getOperand(Idx);
1960 static_cast<AArch64CC::CondCode>(CCOperand.getImm()));
1961 CCOperand.setImm(CCUse);
1962 }
1963 }
1964 return true;
1965}
1966
1968 if (MI.getOpcode() != TargetOpcode::LOAD_STACK_GUARD &&
1969 MI.getOpcode() != AArch64::CATCHRET)
1970 return false;
1971
1972 MachineBasicBlock &MBB = *MI.getParent();
1973 auto &Subtarget = MBB.getParent()->getSubtarget<AArch64Subtarget>();
1974 auto TRI = Subtarget.getRegisterInfo();
1975 DebugLoc DL = MI.getDebugLoc();
1976
1977 if (MI.getOpcode() == AArch64::CATCHRET) {
1978 // Skip to the first instruction before the epilog.
1979 const TargetInstrInfo *TII =
1981 MachineBasicBlock *TargetMBB = MI.getOperand(0).getMBB();
1983 MachineBasicBlock::iterator FirstEpilogSEH = std::prev(MBBI);
1984 while (FirstEpilogSEH->getFlag(MachineInstr::FrameDestroy) &&
1985 FirstEpilogSEH != MBB.begin())
1986 FirstEpilogSEH = std::prev(FirstEpilogSEH);
1987 if (FirstEpilogSEH != MBB.begin())
1988 FirstEpilogSEH = std::next(FirstEpilogSEH);
1989 BuildMI(MBB, FirstEpilogSEH, DL, TII->get(AArch64::ADRP))
1990 .addReg(AArch64::X0, RegState::Define)
1991 .addMBB(TargetMBB);
1992 BuildMI(MBB, FirstEpilogSEH, DL, TII->get(AArch64::ADDXri))
1993 .addReg(AArch64::X0, RegState::Define)
1994 .addReg(AArch64::X0)
1995 .addMBB(TargetMBB)
1996 .addImm(0);
1997 return true;
1998 }
1999
2000 Register Reg = MI.getOperand(0).getReg();
2002 if (M.getStackProtectorGuard() == "sysreg") {
2003 const AArch64SysReg::SysReg *SrcReg =
2004 AArch64SysReg::lookupSysRegByName(M.getStackProtectorGuardReg());
2005 if (!SrcReg)
2006 report_fatal_error("Unknown SysReg for Stack Protector Guard Register");
2007
2008 // mrs xN, sysreg
2009 BuildMI(MBB, MI, DL, get(AArch64::MRS))
2011 .addImm(SrcReg->Encoding);
2012 int Offset = M.getStackProtectorGuardOffset();
2013 if (Offset >= 0 && Offset <= 32760 && Offset % 8 == 0) {
2014 // ldr xN, [xN, #offset]
2015 BuildMI(MBB, MI, DL, get(AArch64::LDRXui))
2016 .addDef(Reg)
2017 .addUse(Reg, RegState::Kill)
2018 .addImm(Offset / 8);
2019 } else if (Offset >= -256 && Offset <= 255) {
2020 // ldur xN, [xN, #offset]
2021 BuildMI(MBB, MI, DL, get(AArch64::LDURXi))
2022 .addDef(Reg)
2023 .addUse(Reg, RegState::Kill)
2024 .addImm(Offset);
2025 } else if (Offset >= -4095 && Offset <= 4095) {
2026 if (Offset > 0) {
2027 // add xN, xN, #offset
2028 BuildMI(MBB, MI, DL, get(AArch64::ADDXri))
2029 .addDef(Reg)
2030 .addUse(Reg, RegState::Kill)
2031 .addImm(Offset)
2032 .addImm(0);
2033 } else {
2034 // sub xN, xN, #offset
2035 BuildMI(MBB, MI, DL, get(AArch64::SUBXri))
2036 .addDef(Reg)
2037 .addUse(Reg, RegState::Kill)
2038 .addImm(-Offset)
2039 .addImm(0);
2040 }
2041 // ldr xN, [xN]
2042 BuildMI(MBB, MI, DL, get(AArch64::LDRXui))
2043 .addDef(Reg)
2044 .addUse(Reg, RegState::Kill)
2045 .addImm(0);
2046 } else {
2047 // Cases that are larger than +/- 4095 and not a multiple of 8, or larger
2048 // than 23760.
2049 // It might be nice to use AArch64::MOVi32imm here, which would get
2050 // expanded in PreSched2 after PostRA, but our lone scratch Reg already
2051 // contains the MRS result. findScratchNonCalleeSaveRegister() in
2052 // AArch64FrameLowering might help us find such a scratch register
2053 // though. If we failed to find a scratch register, we could emit a
2054 // stream of add instructions to build up the immediate. Or, we could try
2055 // to insert a AArch64::MOVi32imm before register allocation so that we
2056 // didn't need to scavenge for a scratch register.
2057 report_fatal_error("Unable to encode Stack Protector Guard Offset");
2058 }
2059 MBB.erase(MI);
2060 return true;
2061 }
2062
2063 const GlobalValue *GV =
2064 cast<GlobalValue>((*MI.memoperands_begin())->getValue());
2065 const TargetMachine &TM = MBB.getParent()->getTarget();
2066 unsigned OpFlags = Subtarget.ClassifyGlobalReference(GV, TM);
2067 const unsigned char MO_NC = AArch64II::MO_NC;
2068
2069 if ((OpFlags & AArch64II::MO_GOT) != 0) {
2070 BuildMI(MBB, MI, DL, get(AArch64::LOADgot), Reg)
2071 .addGlobalAddress(GV, 0, OpFlags);
2072 if (Subtarget.isTargetILP32()) {
2073 unsigned Reg32 = TRI->getSubReg(Reg, AArch64::sub_32);
2074 BuildMI(MBB, MI, DL, get(AArch64::LDRWui))
2075 .addDef(Reg32, RegState::Dead)
2076 .addUse(Reg, RegState::Kill)
2077 .addImm(0)
2078 .addMemOperand(*MI.memoperands_begin())
2080 } else {
2081 BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
2082 .addReg(Reg, RegState::Kill)
2083 .addImm(0)
2084 .addMemOperand(*MI.memoperands_begin());
2085 }
2086 } else if (TM.getCodeModel() == CodeModel::Large) {
2087 assert(!Subtarget.isTargetILP32() && "how can large exist in ILP32?");
2088 BuildMI(MBB, MI, DL, get(AArch64::MOVZXi), Reg)
2089 .addGlobalAddress(GV, 0, AArch64II::MO_G0 | MO_NC)
2090 .addImm(0);
2091 BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
2092 .addReg(Reg, RegState::Kill)
2093 .addGlobalAddress(GV, 0, AArch64II::MO_G1 | MO_NC)
2094 .addImm(16);
2095 BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
2096 .addReg(Reg, RegState::Kill)
2097 .addGlobalAddress(GV, 0, AArch64II::MO_G2 | MO_NC)
2098 .addImm(32);
2099 BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
2100 .addReg(Reg, RegState::Kill)
2102 .addImm(48);
2103 BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
2104 .addReg(Reg, RegState::Kill)
2105 .addImm(0)
2106 .addMemOperand(*MI.memoperands_begin());
2107 } else if (TM.getCodeModel() == CodeModel::Tiny) {
2108 BuildMI(MBB, MI, DL, get(AArch64::ADR), Reg)
2109 .addGlobalAddress(GV, 0, OpFlags);
2110 } else {
2111 BuildMI(MBB, MI, DL, get(AArch64::ADRP), Reg)
2112 .addGlobalAddress(GV, 0, OpFlags | AArch64II::MO_PAGE);
2113 unsigned char LoFlags = OpFlags | AArch64II::MO_PAGEOFF | MO_NC;
2114 if (Subtarget.isTargetILP32()) {
2115 unsigned Reg32 = TRI->getSubReg(Reg, AArch64::sub_32);
2116 BuildMI(MBB, MI, DL, get(AArch64::LDRWui))
2117 .addDef(Reg32, RegState::Dead)
2118 .addUse(Reg, RegState::Kill)
2119 .addGlobalAddress(GV, 0, LoFlags)
2120 .addMemOperand(*MI.memoperands_begin())
2122 } else {
2123 BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
2124 .addReg(Reg, RegState::Kill)
2125 .addGlobalAddress(GV, 0, LoFlags)
2126 .addMemOperand(*MI.memoperands_begin());
2127 }
2128 }
2129
2130 MBB.erase(MI);
2131
2132 return true;
2133}
2134
2135// Return true if this instruction simply sets its single destination register
2136// to zero. This is equivalent to a register rename of the zero-register.
2138 switch (MI.getOpcode()) {
2139 default:
2140 break;
2141 case AArch64::MOVZWi:
2142 case AArch64::MOVZXi: // movz Rd, #0 (LSL #0)
2143 if (MI.getOperand(1).isImm() && MI.getOperand(1).getImm() == 0) {
2144 assert(MI.getDesc().getNumOperands() == 3 &&
2145 MI.getOperand(2).getImm() == 0 && "invalid MOVZi operands");
2146 return true;
2147 }
2148 break;
2149 case AArch64::ANDWri: // and Rd, Rzr, #imm
2150 return MI.getOperand(1).getReg() == AArch64::WZR;
2151 case AArch64::ANDXri:
2152 return MI.getOperand(1).getReg() == AArch64::XZR;
2153 case TargetOpcode::COPY:
2154 return MI.getOperand(1).getReg() == AArch64::WZR;
2155 }
2156 return false;
2157}
2158
2159// Return true if this instruction simply renames a general register without
2160// modifying bits.
2162 switch (MI.getOpcode()) {
2163 default:
2164 break;
2165 case TargetOpcode::COPY: {
2166 // GPR32 copies will by lowered to ORRXrs
2167 Register DstReg = MI.getOperand(0).getReg();
2168 return (AArch64::GPR32RegClass.contains(DstReg) ||
2169 AArch64::GPR64RegClass.contains(DstReg));
2170 }
2171 case AArch64::ORRXrs: // orr Xd, Xzr, Xm (LSL #0)
2172 if (MI.getOperand(1).getReg() == AArch64::XZR) {
2173 assert(MI.getDesc().getNumOperands() == 4 &&
2174 MI.getOperand(3).getImm() == 0 && "invalid ORRrs operands");
2175 return true;
2176 }
2177 break;
2178 case AArch64::ADDXri: // add Xd, Xn, #0 (LSL #0)
2179 if (MI.getOperand(2).getImm() == 0) {
2180 assert(MI.getDesc().getNumOperands() == 4 &&
2181 MI.getOperand(3).getImm() == 0 && "invalid ADDXri operands");
2182 return true;
2183 }
2184 break;
2185 }
2186 return false;
2187}
2188
2189// Return true if this instruction simply renames a general register without
2190// modifying bits.
2192 switch (MI.getOpcode()) {
2193 default:
2194 break;
2195 case TargetOpcode::COPY: {
2196 Register DstReg = MI.getOperand(0).getReg();
2197 return AArch64::FPR128RegClass.contains(DstReg);
2198 }
2199 case AArch64::ORRv16i8:
2200 if (MI.getOperand(1).getReg() == MI.getOperand(2).getReg()) {
2201 assert(MI.getDesc().getNumOperands() == 3 && MI.getOperand(0).isReg() &&
2202 "invalid ORRv16i8 operands");
2203 return true;
2204 }
2205 break;
2206 }
2207 return false;
2208}
2209
2211 int &FrameIndex) const {
2212 switch (MI.getOpcode()) {
2213 default:
2214 break;
2215 case AArch64::LDRWui:
2216 case AArch64::LDRXui:
2217 case AArch64::LDRBui:
2218 case AArch64::LDRHui:
2219 case AArch64::LDRSui:
2220 case AArch64::LDRDui:
2221 case AArch64::LDRQui:
2222 case AArch64::LDR_PXI:
2223 if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() &&
2224 MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) {
2225 FrameIndex = MI.getOperand(1).getIndex();
2226 return MI.getOperand(0).getReg();
2227 }
2228 break;
2229 }
2230
2231 return 0;
2232}
2233
2235 int &FrameIndex) const {
2236 switch (MI.getOpcode()) {
2237 default:
2238 break;
2239 case AArch64::STRWui:
2240 case AArch64::STRXui:
2241 case AArch64::STRBui:
2242 case AArch64::STRHui:
2243 case AArch64::STRSui:
2244 case AArch64::STRDui:
2245 case AArch64::STRQui:
2246 case AArch64::STR_PXI:
2247 if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() &&
2248 MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) {
2249 FrameIndex = MI.getOperand(1).getIndex();
2250 return MI.getOperand(0).getReg();
2251 }
2252 break;
2253 }
2254 return 0;
2255}
2256
2257/// Check all MachineMemOperands for a hint to suppress pairing.
2259 return llvm::any_of(MI.memoperands(), [](MachineMemOperand *MMO) {
2260 return MMO->getFlags() & MOSuppressPair;
2261 });
2262}
2263
2264/// Set a flag on the first MachineMemOperand to suppress pairing.
2266 if (MI.memoperands_empty())
2267 return;
2268 (*MI.memoperands_begin())->setFlags(MOSuppressPair);
2269}
2270
2271/// Check all MachineMemOperands for a hint that the load/store is strided.
2273 return llvm::any_of(MI.memoperands(), [](MachineMemOperand *MMO) {
2274 return MMO->getFlags() & MOStridedAccess;
2275 });
2276}
2277
2279 switch (Opc) {
2280 default:
2281 return false;
2282 case AArch64::STURSi:
2283 case AArch64::STRSpre:
2284 case AArch64::STURDi:
2285 case AArch64::STRDpre:
2286 case AArch64::STURQi:
2287 case AArch64::STRQpre:
2288 case AArch64::STURBBi:
2289 case AArch64::STURHHi:
2290 case AArch64::STURWi:
2291 case AArch64::STRWpre:
2292 case AArch64::STURXi:
2293 case AArch64::STRXpre:
2294 case AArch64::LDURSi:
2295 case AArch64::LDRSpre:
2296 case AArch64::LDURDi:
2297 case AArch64::LDRDpre:
2298 case AArch64::LDURQi:
2299 case AArch64::LDRQpre:
2300 case AArch64::LDURWi:
2301 case AArch64::LDRWpre:
2302 case AArch64::LDURXi:
2303 case AArch64::LDRXpre:
2304 case AArch64::LDRSWpre:
2305 case AArch64::LDURSWi:
2306 case AArch64::LDURHHi:
2307 case AArch64::LDURBBi:
2308 case AArch64::LDURSBWi:
2309 case AArch64::LDURSHWi:
2310 return true;
2311 }
2312}
2313
2314std::optional<unsigned> AArch64InstrInfo::getUnscaledLdSt(unsigned Opc) {
2315 switch (Opc) {
2316 default: return {};
2317 case AArch64::PRFMui: return AArch64::PRFUMi;
2318 case AArch64::LDRXui: return AArch64::LDURXi;
2319 case AArch64::LDRWui: return AArch64::LDURWi;
2320 case AArch64::LDRBui: return AArch64::LDURBi;
2321 case AArch64::LDRHui: return AArch64::LDURHi;
2322 case AArch64::LDRSui: return AArch64::LDURSi;
2323 case AArch64::LDRDui: return AArch64::LDURDi;
2324 case AArch64::LDRQui: return AArch64::LDURQi;
2325 case AArch64::LDRBBui: return AArch64::LDURBBi;
2326 case AArch64::LDRHHui: return AArch64::LDURHHi;
2327 case AArch64::LDRSBXui: return AArch64::LDURSBXi;
2328 case AArch64::LDRSBWui: return AArch64::LDURSBWi;
2329 case AArch64::LDRSHXui: return AArch64::LDURSHXi;
2330 case AArch64::LDRSHWui: return AArch64::LDURSHWi;
2331 case AArch64::LDRSWui: return AArch64::LDURSWi;
2332 case AArch64::STRXui: return AArch64::STURXi;
2333 case AArch64::STRWui: return AArch64::STURWi;
2334 case AArch64::STRBui: return AArch64::STURBi;
2335 case AArch64::STRHui: return AArch64::STURHi;
2336 case AArch64::STRSui: return AArch64::STURSi;
2337 case AArch64::STRDui: return AArch64::STURDi;
2338 case AArch64::STRQui: return AArch64::STURQi;
2339 case AArch64::STRBBui: return AArch64::STURBBi;
2340 case AArch64::STRHHui: return AArch64::STURHHi;
2341 }
2342}
2343
2345 switch (Opc) {
2346 default:
2347 return 2;
2348 case AArch64::LDPXi:
2349 case AArch64::LDPDi:
2350 case AArch64::STPXi:
2351 case AArch64::STPDi:
2352 case AArch64::LDNPXi:
2353 case AArch64::LDNPDi:
2354 case AArch64::STNPXi:
2355 case AArch64::STNPDi:
2356 case AArch64::LDPQi:
2357 case AArch64::STPQi:
2358 case AArch64::LDNPQi:
2359 case AArch64::STNPQi:
2360 case AArch64::LDPWi:
2361 case AArch64::LDPSi:
2362 case AArch64::STPWi:
2363 case AArch64::STPSi:
2364 case AArch64::LDNPWi:
2365 case AArch64::LDNPSi:
2366 case AArch64::STNPWi:
2367 case AArch64::STNPSi:
2368 case AArch64::LDG:
2369 case AArch64::STGPi:
2370
2371 case AArch64::LD1B_IMM:
2372 case AArch64::LD1B_H_IMM:
2373 case AArch64::LD1B_S_IMM:
2374 case AArch64::LD1B_D_IMM:
2375 case AArch64::LD1SB_H_IMM:
2376 case AArch64::LD1SB_S_IMM:
2377 case AArch64::LD1SB_D_IMM:
2378 case AArch64::LD1H_IMM:
2379 case AArch64::LD1H_S_IMM:
2380 case AArch64::LD1H_D_IMM:
2381 case AArch64::LD1SH_S_IMM:
2382 case AArch64::LD1SH_D_IMM:
2383 case AArch64::LD1W_IMM:
2384 case AArch64::LD1W_D_IMM:
2385 case AArch64::LD1SW_D_IMM:
2386 case AArch64::LD1D_IMM:
2387
2388 case AArch64::LD2B_IMM:
2389 case AArch64::LD2H_IMM:
2390 case AArch64::LD2W_IMM:
2391 case AArch64::LD2D_IMM:
2392 case AArch64::LD3B_IMM:
2393 case AArch64::LD3H_IMM:
2394 case AArch64::LD3W_IMM:
2395 case AArch64::LD3D_IMM:
2396 case AArch64::LD4B_IMM:
2397 case AArch64::LD4H_IMM:
2398 case AArch64::LD4W_IMM:
2399 case AArch64::LD4D_IMM:
2400
2401 case AArch64::ST1B_IMM:
2402 case AArch64::ST1B_H_IMM:
2403 case AArch64::ST1B_S_IMM:
2404 case AArch64::ST1B_D_IMM:
2405 case AArch64::ST1H_IMM:
2406 case AArch64::ST1H_S_IMM:
2407 case AArch64::ST1H_D_IMM:
2408 case AArch64::ST1W_IMM:
2409 case AArch64::ST1W_D_IMM:
2410 case AArch64::ST1D_IMM:
2411
2412 case AArch64::ST2B_IMM:
2413 case AArch64::ST2H_IMM:
2414 case AArch64::ST2W_IMM:
2415 case AArch64::ST2D_IMM:
2416 case AArch64::ST3B_IMM:
2417 case AArch64::ST3H_IMM:
2418 case AArch64::ST3W_IMM:
2419 case AArch64::ST3D_IMM:
2420 case AArch64::ST4B_IMM:
2421 case AArch64::ST4H_IMM:
2422 case AArch64::ST4W_IMM:
2423 case AArch64::ST4D_IMM:
2424
2425 case AArch64::LD1RB_IMM:
2426 case AArch64::LD1RB_H_IMM:
2427 case AArch64::LD1RB_S_IMM:
2428 case AArch64::LD1RB_D_IMM:
2429 case AArch64::LD1RSB_H_IMM:
2430 case AArch64::LD1RSB_S_IMM:
2431 case AArch64::LD1RSB_D_IMM:
2432 case AArch64::LD1RH_IMM:
2433 case AArch64::LD1RH_S_IMM:
2434 case AArch64::LD1RH_D_IMM:
2435 case AArch64::LD1RSH_S_IMM:
2436 case AArch64::LD1RSH_D_IMM:
2437 case AArch64::LD1RW_IMM:
2438 case AArch64::LD1RW_D_IMM:
2439 case AArch64::LD1RSW_IMM:
2440 case AArch64::LD1RD_IMM:
2441
2442 case AArch64::LDNT1B_ZRI:
2443 case AArch64::LDNT1H_ZRI:
2444 case AArch64::LDNT1W_ZRI:
2445 case AArch64::LDNT1D_ZRI:
2446 case AArch64::STNT1B_ZRI:
2447 case AArch64::STNT1H_ZRI:
2448 case AArch64::STNT1W_ZRI:
2449 case AArch64::STNT1D_ZRI:
2450
2451 case AArch64::LDNF1B_IMM:
2452 case AArch64::LDNF1B_H_IMM:
2453 case AArch64::LDNF1B_S_IMM:
2454 case AArch64::LDNF1B_D_IMM:
2455 case AArch64::LDNF1SB_H_IMM:
2456 case AArch64::LDNF1SB_S_IMM:
2457 case AArch64::LDNF1SB_D_IMM:
2458 case AArch64::LDNF1H_IMM:
2459 case AArch64::LDNF1H_S_IMM:
2460 case AArch64::LDNF1H_D_IMM:
2461 case AArch64::LDNF1SH_S_IMM:
2462 case AArch64::LDNF1SH_D_IMM:
2463 case AArch64::LDNF1W_IMM:
2464 case AArch64::LDNF1W_D_IMM:
2465 case AArch64::LDNF1SW_D_IMM:
2466 case AArch64::LDNF1D_IMM:
2467 return 3;
2468 case AArch64::ADDG:
2469 case AArch64::STGi:
2470 case AArch64::LDR_PXI:
2471 case AArch64::STR_PXI:
2472 return 2;
2473 }
2474}
2475
2477 switch (MI.getOpcode()) {
2478 default:
2479 return false;
2480 // Scaled instructions.
2481 case AArch64::STRSui:
2482 case AArch64::STRDui:
2483 case AArch64::STRQui:
2484 case AArch64::STRXui:
2485 case AArch64::STRWui:
2486 case AArch64::LDRSui:
2487 case AArch64::LDRDui:
2488 case AArch64::LDRQui:
2489 case AArch64::LDRXui:
2490 case AArch64::LDRWui:
2491 case AArch64::LDRSWui:
2492 // Unscaled instructions.
2493 case AArch64::STURSi:
2494 case AArch64::STRSpre:
2495 case AArch64::STURDi:
2496 case AArch64::STRDpre:
2497 case AArch64::STURQi:
2498 case AArch64::STRQpre:
2499 case AArch64::STURWi:
2500 case AArch64::STRWpre:
2501 case AArch64::STURXi:
2502 case AArch64::STRXpre:
2503 case AArch64::LDURSi:
2504 case AArch64::LDRSpre:
2505 case AArch64::LDURDi:
2506 case AArch64::LDRDpre:
2507 case AArch64::LDURQi:
2508 case AArch64::LDRQpre:
2509 case AArch64::LDURWi:
2510 case AArch64::LDRWpre:
2511 case AArch64::LDURXi:
2512 case AArch64::LDRXpre:
2513 case AArch64::LDURSWi:
2514 case AArch64::LDRSWpre:
2515 return true;
2516 }
2517}
2518
2520 switch (MI.getOpcode()) {
2521 default:
2522 assert((!MI.isCall() || !MI.isReturn()) &&
2523 "Unexpected instruction - was a new tail call opcode introduced?");
2524 return false;
2525 case AArch64::TCRETURNdi:
2526 case AArch64::TCRETURNri:
2527 case AArch64::TCRETURNrix16x17:
2528 case AArch64::TCRETURNrix17:
2529 case AArch64::TCRETURNrinotx16:
2530 case AArch64::TCRETURNriALL:
2531 case AArch64::AUTH_TCRETURN:
2532 case AArch64::AUTH_TCRETURN_BTI:
2533 return true;
2534 }
2535}
2536
2538 switch (Opc) {
2539 default:
2540 llvm_unreachable("Opcode has no flag setting equivalent!");
2541 // 32-bit cases:
2542 case AArch64::ADDWri:
2543 return AArch64::ADDSWri;
2544 case AArch64::ADDWrr:
2545 return AArch64::ADDSWrr;
2546 case AArch64::ADDWrs:
2547 return AArch64::ADDSWrs;
2548 case AArch64::ADDWrx:
2549 return AArch64::ADDSWrx;
2550 case AArch64::ANDWri:
2551 return AArch64::ANDSWri;
2552 case AArch64::ANDWrr:
2553 return AArch64::ANDSWrr;
2554 case AArch64::ANDWrs:
2555 return AArch64::ANDSWrs;
2556 case AArch64::BICWrr:
2557 return AArch64::BICSWrr;
2558 case AArch64::BICWrs:
2559 return AArch64::BICSWrs;
2560 case AArch64::SUBWri:
2561 return AArch64::SUBSWri;
2562 case AArch64::SUBWrr:
2563 return AArch64::SUBSWrr;
2564 case AArch64::SUBWrs:
2565 return AArch64::SUBSWrs;
2566 case AArch64::SUBWrx:
2567 return AArch64::SUBSWrx;
2568 // 64-bit cases:
2569 case AArch64::ADDXri:
2570 return AArch64::ADDSXri;
2571 case AArch64::ADDXrr:
2572 return AArch64::ADDSXrr;
2573 case AArch64::ADDXrs:
2574 return AArch64::ADDSXrs;
2575 case AArch64::ADDXrx:
2576 return AArch64::ADDSXrx;
2577 case AArch64::ANDXri:
2578 return AArch64::ANDSXri;
2579 case AArch64::ANDXrr:
2580 return AArch64::ANDSXrr;
2581 case AArch64::ANDXrs:
2582 return AArch64::ANDSXrs;
2583 case AArch64::BICXrr:
2584 return AArch64::BICSXrr;
2585 case AArch64::BICXrs:
2586 return AArch64::BICSXrs;
2587 case AArch64::SUBXri:
2588 return AArch64::SUBSXri;
2589 case AArch64::SUBXrr:
2590 return AArch64::SUBSXrr;
2591 case AArch64::SUBXrs:
2592 return AArch64::SUBSXrs;
2593 case AArch64::SUBXrx:
2594 return AArch64::SUBSXrx;
2595 // SVE instructions:
2596 case AArch64::AND_PPzPP:
2597 return AArch64::ANDS_PPzPP;
2598 case AArch64::BIC_PPzPP:
2599 return AArch64::BICS_PPzPP;
2600 case AArch64::EOR_PPzPP:
2601 return AArch64::EORS_PPzPP;
2602 case AArch64::NAND_PPzPP:
2603 return AArch64::NANDS_PPzPP;
2604 case AArch64::NOR_PPzPP:
2605 return AArch64::NORS_PPzPP;
2606 case AArch64::ORN_PPzPP:
2607 return AArch64::ORNS_PPzPP;
2608 case AArch64::ORR_PPzPP:
2609 return AArch64::ORRS_PPzPP;
2610 case AArch64::BRKA_PPzP:
2611 return AArch64::BRKAS_PPzP;
2612 case AArch64::BRKPA_PPzPP:
2613 return AArch64::BRKPAS_PPzPP;
2614 case AArch64::BRKB_PPzP:
2615 return AArch64::BRKBS_PPzP;
2616 case AArch64::BRKPB_PPzPP:
2617 return AArch64::BRKPBS_PPzPP;
2618 case AArch64::BRKN_PPzP:
2619 return AArch64::BRKNS_PPzP;
2620 case AArch64::RDFFR_PPz:
2621 return AArch64::RDFFRS_PPz;
2622 case AArch64::PTRUE_B:
2623 return AArch64::PTRUES_B;
2624 }
2625}
2626
2627// Is this a candidate for ld/st merging or pairing? For example, we don't
2628// touch volatiles or load/stores that have a hint to avoid pair formation.
2630
2631 bool IsPreLdSt = isPreLdSt(MI);
2632
2633 // If this is a volatile load/store, don't mess with it.
2634 if (MI.hasOrderedMemoryRef())
2635 return false;
2636
2637 // Make sure this is a reg/fi+imm (as opposed to an address reloc).
2638 // For Pre-inc LD/ST, the operand is shifted by one.
2639 assert((MI.getOperand(IsPreLdSt ? 2 : 1).isReg() ||
2640 MI.getOperand(IsPreLdSt ? 2 : 1).isFI()) &&
2641 "Expected a reg or frame index operand.");
2642
2643 // For Pre-indexed addressing quadword instructions, the third operand is the
2644 // immediate value.
2645 bool IsImmPreLdSt = IsPreLdSt && MI.getOperand(3).isImm();
2646
2647 if (!MI.getOperand(2).isImm() && !IsImmPreLdSt)
2648 return false;
2649
2650 // Can't merge/pair if the instruction modifies the base register.
2651 // e.g., ldr x0, [x0]
2652 // This case will never occur with an FI base.
2653 // However, if the instruction is an LDR<S,D,Q,W,X,SW>pre or
2654 // STR<S,D,Q,W,X>pre, it can be merged.
2655 // For example:
2656 // ldr q0, [x11, #32]!
2657 // ldr q1, [x11, #16]
2658 // to
2659 // ldp q0, q1, [x11, #32]!
2660 if (MI.getOperand(1).isReg() && !IsPreLdSt) {
2661 Register BaseReg = MI.getOperand(1).getReg();
2663 if (MI.modifiesRegister(BaseReg, TRI))
2664 return false;
2665 }
2666
2667 // Check if this load/store has a hint to avoid pair formation.
2668 // MachineMemOperands hints are set by the AArch64StorePairSuppress pass.
2670 return false;
2671
2672 // Do not pair any callee-save store/reload instructions in the
2673 // prologue/epilogue if the CFI information encoded the operations as separate
2674 // instructions, as that will cause the size of the actual prologue to mismatch
2675 // with the prologue size recorded in the Windows CFI.
2676 const MCAsmInfo *MAI = MI.getMF()->getTarget().getMCAsmInfo();
2677 bool NeedsWinCFI = MAI->usesWindowsCFI() &&
2678 MI.getMF()->getFunction().needsUnwindTableEntry();
2679 if (NeedsWinCFI && (MI.getFlag(MachineInstr::FrameSetup) ||
2681 return false;
2682
2683 // On some CPUs quad load/store pairs are slower than two single load/stores.
2684 if (Subtarget.isPaired128Slow()) {
2685 switch (MI.getOpcode()) {
2686 default:
2687 break;
2688 case AArch64::LDURQi:
2689 case AArch64::STURQi:
2690 case AArch64::LDRQui:
2691 case AArch64::STRQui:
2692 return false;
2693 }
2694 }
2695
2696 return true;
2697}
2698
2701 int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width,
2702 const TargetRegisterInfo *TRI) const {
2703 if (!LdSt.mayLoadOrStore())
2704 return false;
2705
2706 const MachineOperand *BaseOp;
2707 TypeSize WidthN(0, false);
2708 if (!getMemOperandWithOffsetWidth(LdSt, BaseOp, Offset, OffsetIsScalable,
2709 WidthN, TRI))
2710 return false;
2711 // The maximum vscale is 16 under AArch64, return the maximal extent for the
2712 // vector.
2713 Width = LocationSize::precise(WidthN);
2714 BaseOps.push_back(BaseOp);
2715 return true;
2716}
2717
2718std::optional<ExtAddrMode>
2720 const TargetRegisterInfo *TRI) const {
2721 const MachineOperand *Base; // Filled with the base operand of MI.
2722 int64_t Offset; // Filled with the offset of MI.
2723 bool OffsetIsScalable;
2724 if (!getMemOperandWithOffset(MemI, Base, Offset, OffsetIsScalable, TRI))
2725 return std::nullopt;
2726
2727 if (!Base->isReg())
2728 return std::nullopt;
2729 ExtAddrMode AM;
2730 AM.BaseReg = Base->getReg();
2731 AM.Displacement = Offset;
2732 AM.ScaledReg = 0;
2733 AM.Scale = 0;
2734 return AM;
2735}
2736
2738 Register Reg,
2739 const MachineInstr &AddrI,
2740 ExtAddrMode &AM) const {
2741 // Filter out instructions into which we cannot fold.
2742 unsigned NumBytes;
2743 int64_t OffsetScale = 1;
2744 switch (MemI.getOpcode()) {
2745 default:
2746 return false;
2747
2748 case AArch64::LDURQi:
2749 case AArch64::STURQi:
2750 NumBytes = 16;
2751 break;
2752
2753 case AArch64::LDURDi:
2754 case AArch64::STURDi:
2755 case AArch64::LDURXi:
2756 case AArch64::STURXi:
2757 NumBytes = 8;
2758 break;
2759
2760 case AArch64::LDURWi:
2761 case AArch64::LDURSWi:
2762 case AArch64::STURWi:
2763 NumBytes = 4;
2764 break;
2765
2766 case AArch64::LDURHi:
2767 case AArch64::STURHi:
2768 case AArch64::LDURHHi:
2769 case AArch64::STURHHi:
2770 case AArch64::LDURSHXi:
2771 case AArch64::LDURSHWi:
2772 NumBytes = 2;
2773 break;
2774
2775 case AArch64::LDRBroX:
2776 case AArch64::LDRBBroX:
2777 case AArch64::LDRSBXroX:
2778 case AArch64::LDRSBWroX:
2779 case AArch64::STRBroX:
2780 case AArch64::STRBBroX:
2781 case AArch64::LDURBi:
2782 case AArch64::LDURBBi:
2783 case AArch64::LDURSBXi:
2784 case AArch64::LDURSBWi:
2785 case AArch64::STURBi:
2786 case AArch64::STURBBi:
2787 case AArch64::LDRBui:
2788 case AArch64::LDRBBui:
2789 case AArch64::LDRSBXui:
2790 case AArch64::LDRSBWui:
2791 case AArch64::STRBui:
2792 case AArch64::STRBBui:
2793 NumBytes = 1;
2794 break;
2795
2796 case AArch64::LDRQroX:
2797 case AArch64::STRQroX:
2798 case AArch64::LDRQui:
2799 case AArch64::STRQui:
2800 NumBytes = 16;
2801 OffsetScale = 16;
2802 break;
2803
2804 case AArch64::LDRDroX:
2805 case AArch64::STRDroX:
2806 case AArch64::LDRXroX:
2807 case AArch64::STRXroX:
2808 case AArch64::LDRDui:
2809 case AArch64::STRDui:
2810 case AArch64::LDRXui:
2811 case AArch64::STRXui:
2812 NumBytes = 8;
2813 OffsetScale = 8;
2814 break;
2815
2816 case AArch64::LDRWroX:
2817 case AArch64::LDRSWroX:
2818 case AArch64::STRWroX:
2819 case AArch64::LDRWui:
2820 case AArch64::LDRSWui:
2821 case AArch64::STRWui:
2822 NumBytes = 4;
2823 OffsetScale = 4;
2824 break;
2825
2826 case AArch64::LDRHroX:
2827 case AArch64::STRHroX:
2828 case AArch64::LDRHHroX:
2829 case AArch64::STRHHroX:
2830 case AArch64::LDRSHXroX:
2831 case AArch64::LDRSHWroX:
2832 case AArch64::LDRHui:
2833 case AArch64::STRHui:
2834 case AArch64::LDRHHui:
2835 case AArch64::STRHHui:
2836 case AArch64::LDRSHXui:
2837 case AArch64::LDRSHWui:
2838 NumBytes = 2;
2839 OffsetScale = 2;
2840 break;
2841 }
2842
2843 // Check the fold operand is not the loaded/stored value.
2844 const MachineOperand &BaseRegOp = MemI.getOperand(0);
2845 if (BaseRegOp.isReg() && BaseRegOp.getReg() == Reg)
2846 return false;
2847
2848 // Handle memory instructions with a [Reg, Reg] addressing mode.
2849 if (MemI.getOperand(2).isReg()) {
2850 // Bail if the addressing mode already includes extension of the offset
2851 // register.
2852 if (MemI.getOperand(3).getImm())
2853 return false;
2854
2855 // Check if we actually have a scaled offset.
2856 if (MemI.getOperand(4).getImm() == 0)
2857 OffsetScale = 1;
2858
2859 // If the address instructions is folded into the base register, then the
2860 // addressing mode must not have a scale. Then we can swap the base and the
2861 // scaled registers.
2862 if (MemI.getOperand(1).getReg() == Reg && OffsetScale != 1)
2863 return false;
2864
2865 switch (AddrI.getOpcode()) {
2866 default:
2867 return false;
2868
2869 case AArch64::SBFMXri:
2870 // sxtw Xa, Wm
2871 // ldr Xd, [Xn, Xa, lsl #N]
2872 // ->
2873 // ldr Xd, [Xn, Wm, sxtw #N]
2874 if (AddrI.getOperand(2).getImm() != 0 ||
2875 AddrI.getOperand(3).getImm() != 31)
2876 return false;
2877
2878 AM.BaseReg = MemI.getOperand(1).getReg();
2879 if (AM.BaseReg == Reg)
2880 AM.BaseReg = MemI.getOperand(2).getReg();
2881 AM.ScaledReg = AddrI.getOperand(1).getReg();
2882 AM.Scale = OffsetScale;
2883 AM.Displacement = 0;
2885 return true;
2886
2887 case TargetOpcode::SUBREG_TO_REG: {
2888 // mov Wa, Wm
2889 // ldr Xd, [Xn, Xa, lsl #N]
2890 // ->
2891 // ldr Xd, [Xn, Wm, uxtw #N]
2892
2893 // Zero-extension looks like an ORRWrs followed by a SUBREG_TO_REG.
2894 if (AddrI.getOperand(1).getImm() != 0 ||
2895 AddrI.getOperand(3).getImm() != AArch64::sub_32)
2896 return false;
2897
2898 const MachineRegisterInfo &MRI = AddrI.getMF()->getRegInfo();
2899 Register OffsetReg = AddrI.getOperand(2).getReg();
2900 if (!OffsetReg.isVirtual() || !MRI.hasOneNonDBGUse(OffsetReg))
2901 return false;
2902
2903 const MachineInstr &DefMI = *MRI.getVRegDef(OffsetReg);
2904 if (DefMI.getOpcode() != AArch64::ORRWrs ||
2905 DefMI.getOperand(1).getReg() != AArch64::WZR ||
2906 DefMI.getOperand(3).getImm() != 0)
2907 return false;
2908
2909 AM.BaseReg = MemI.getOperand(1).getReg();
2910 if (AM.BaseReg == Reg)
2911 AM.BaseReg = MemI.getOperand(2).getReg();
2912 AM.ScaledReg = DefMI.getOperand(2).getReg();
2913 AM.Scale = OffsetScale;
2914 AM.Displacement = 0;
2916 return true;
2917 }
2918 }
2919 }
2920
2921 // Handle memory instructions with a [Reg, #Imm] addressing mode.
2922
2923 // Check we are not breaking a potential conversion to an LDP.
2924 auto validateOffsetForLDP = [](unsigned NumBytes, int64_t OldOffset,
2925 int64_t NewOffset) -> bool {
2926 int64_t MinOffset, MaxOffset;
2927 switch (NumBytes) {
2928 default:
2929 return true;
2930 case 4:
2931 MinOffset = -256;
2932 MaxOffset = 252;
2933 break;
2934 case 8:
2935 MinOffset = -512;
2936 MaxOffset = 504;
2937 break;
2938 case 16:
2939 MinOffset = -1024;
2940 MaxOffset = 1008;
2941 break;
2942 }
2943 return OldOffset < MinOffset || OldOffset > MaxOffset ||
2944 (NewOffset >= MinOffset && NewOffset <= MaxOffset);
2945 };
2946 auto canFoldAddSubImmIntoAddrMode = [&](int64_t Disp) -> bool {
2947 int64_t OldOffset = MemI.getOperand(2).getImm() * OffsetScale;
2948 int64_t NewOffset = OldOffset + Disp;
2949 if (!isLegalAddressingMode(NumBytes, NewOffset, /* Scale */ 0))
2950 return false;
2951 // If the old offset would fit into an LDP, but the new offset wouldn't,
2952 // bail out.
2953 if (!validateOffsetForLDP(NumBytes, OldOffset, NewOffset))
2954 return false;
2955 AM.BaseReg = AddrI.getOperand(1).getReg();
2956 AM.ScaledReg = 0;
2957 AM.Scale = 0;
2958 AM.Displacement = NewOffset;
2960 return true;
2961 };
2962
2963 auto canFoldAddRegIntoAddrMode =
2964 [&](int64_t Scale,
2966 if (MemI.getOperand(2).getImm() != 0)
2967 return false;
2968 if (!isLegalAddressingMode(NumBytes, /* Offset */ 0, Scale))
2969 return false;
2970 AM.BaseReg = AddrI.getOperand(1).getReg();
2971 AM.ScaledReg = AddrI.getOperand(2).getReg();
2972 AM.Scale = Scale;
2973 AM.Displacement = 0;
2974 AM.Form = Form;
2975 return true;
2976 };
2977
2978 auto avoidSlowSTRQ = [&](const MachineInstr &MemI) {
2979 unsigned Opcode = MemI.getOpcode();
2980 return (Opcode == AArch64::STURQi || Opcode == AArch64::STRQui) &&
2981 Subtarget.isSTRQroSlow();
2982 };
2983
2984 int64_t Disp = 0;
2985 const bool OptSize = MemI.getMF()->getFunction().hasOptSize();
2986 switch (AddrI.getOpcode()) {
2987 default:
2988 return false;
2989
2990 case AArch64::ADDXri:
2991 // add Xa, Xn, #N
2992 // ldr Xd, [Xa, #M]
2993 // ->
2994 // ldr Xd, [Xn, #N'+M]
2995 Disp = AddrI.getOperand(2).getImm() << AddrI.getOperand(3).getImm();
2996 return canFoldAddSubImmIntoAddrMode(Disp);
2997
2998 case AArch64::SUBXri:
2999 // sub Xa, Xn, #N
3000 // ldr Xd, [Xa, #M]
3001 // ->
3002 // ldr Xd, [Xn, #N'+M]
3003 Disp = AddrI.getOperand(2).getImm() << AddrI.getOperand(3).getImm();
3004 return canFoldAddSubImmIntoAddrMode(-Disp);
3005
3006 case AArch64::ADDXrs: {
3007 // add Xa, Xn, Xm, lsl #N
3008 // ldr Xd, [Xa]
3009 // ->
3010 // ldr Xd, [Xn, Xm, lsl #N]
3011
3012 // Don't fold the add if the result would be slower, unless optimising for
3013 // size.
3014 unsigned Shift = static_cast<unsigned>(AddrI.getOperand(3).getImm());
3016 return false;
3017 Shift = AArch64_AM::getShiftValue(Shift);
3018 if (!OptSize) {
3019 if (Shift != 2 && Shift != 3 && Subtarget.hasAddrLSLSlow14())
3020 return false;
3021 if (avoidSlowSTRQ(MemI))
3022 return false;
3023 }
3024 return canFoldAddRegIntoAddrMode(1ULL << Shift);
3025 }
3026
3027 case AArch64::ADDXrr:
3028 // add Xa, Xn, Xm
3029 // ldr Xd, [Xa]
3030 // ->
3031 // ldr Xd, [Xn, Xm, lsl #0]
3032
3033 // Don't fold the add if the result would be slower, unless optimising for
3034 // size.
3035 if (!OptSize && avoidSlowSTRQ(MemI))
3036 return false;
3037 return canFoldAddRegIntoAddrMode(1);
3038
3039 case AArch64::ADDXrx:
3040 // add Xa, Xn, Wm, {s,u}xtw #N
3041 // ldr Xd, [Xa]
3042 // ->
3043 // ldr Xd, [Xn, Wm, {s,u}xtw #N]
3044
3045 // Don't fold the add if the result would be slower, unless optimising for
3046 // size.
3047 if (!OptSize && avoidSlowSTRQ(MemI))
3048 return false;
3049
3050 // Can fold only sign-/zero-extend of a word.
3051 unsigned Imm = static_cast<unsigned>(AddrI.getOperand(3).getImm());
3053 if (Extend != AArch64_AM::UXTW && Extend != AArch64_AM::SXTW)
3054 return false;
3055
3056 return canFoldAddRegIntoAddrMode(
3057 1ULL << AArch64_AM::getArithShiftValue(Imm),
3060 }
3061}
3062
3063// Given an opcode for an instruction with a [Reg, #Imm] addressing mode,
3064// return the opcode of an instruction performing the same operation, but using
3065// the [Reg, Reg] addressing mode.
3066static unsigned regOffsetOpcode(unsigned Opcode) {
3067 switch (Opcode) {
3068 default:
3069 llvm_unreachable("Address folding not implemented for instruction");
3070
3071 case AArch64::LDURQi:
3072 case AArch64::LDRQui:
3073 return AArch64::LDRQroX;
3074 case AArch64::STURQi:
3075 case AArch64::STRQui:
3076 return AArch64::STRQroX;
3077 case AArch64::LDURDi:
3078 case AArch64::LDRDui:
3079 return AArch64::LDRDroX;
3080 case AArch64::STURDi:
3081 case AArch64::STRDui:
3082 return AArch64::STRDroX;
3083 case AArch64::LDURXi:
3084 case AArch64::LDRXui:
3085 return AArch64::LDRXroX;
3086 case AArch64::STURXi:
3087 case AArch64::STRXui:
3088 return AArch64::STRXroX;
3089 case AArch64::LDURWi:
3090 case AArch64::LDRWui:
3091 return AArch64::LDRWroX;
3092 case AArch64::LDURSWi:
3093 case AArch64::LDRSWui:
3094 return AArch64::LDRSWroX;
3095 case AArch64::STURWi:
3096 case AArch64::STRWui:
3097 return AArch64::STRWroX;
3098 case AArch64::LDURHi:
3099 case AArch64::LDRHui:
3100 return AArch64::LDRHroX;
3101 case AArch64::STURHi:
3102 case AArch64::STRHui:
3103 return AArch64::STRHroX;
3104 case AArch64::LDURHHi:
3105 case AArch64::LDRHHui:
3106 return AArch64::LDRHHroX;
3107 case AArch64::STURHHi:
3108 case AArch64::STRHHui:
3109 return AArch64::STRHHroX;
3110 case AArch64::LDURSHXi:
3111 case AArch64::LDRSHXui:
3112 return AArch64::LDRSHXroX;
3113 case AArch64::LDURSHWi:
3114 case AArch64::LDRSHWui:
3115 return AArch64::LDRSHWroX;
3116 case AArch64::LDURBi:
3117 case AArch64::LDRBui:
3118 return AArch64::LDRBroX;
3119 case AArch64::LDURBBi:
3120 case AArch64::LDRBBui:
3121 return AArch64::LDRBBroX;
3122 case AArch64::LDURSBXi:
3123 case AArch64::LDRSBXui:
3124 return AArch64::LDRSBXroX;
3125 case AArch64::LDURSBWi:
3126 case AArch64::LDRSBWui:
3127 return AArch64::LDRSBWroX;
3128 case AArch64::STURBi:
3129 case AArch64::STRBui:
3130 return AArch64::STRBroX;
3131 case AArch64::STURBBi:
3132 case AArch64::STRBBui:
3133 return AArch64::STRBBroX;
3134 }
3135}
3136
3137// Given an opcode for an instruction with a [Reg, #Imm] addressing mode, return
3138// the opcode of an instruction performing the same operation, but using the
3139// [Reg, #Imm] addressing mode with scaled offset.
3140unsigned scaledOffsetOpcode(unsigned Opcode, unsigned &Scale) {
3141 switch (Opcode) {
3142 default:
3143 llvm_unreachable("Address folding not implemented for instruction");
3144
3145 case AArch64::LDURQi:
3146 Scale = 16;
3147 return AArch64::LDRQui;
3148 case AArch64::STURQi:
3149 Scale = 16;
3150 return AArch64::STRQui;
3151 case AArch64::LDURDi:
3152 Scale = 8;
3153 return AArch64::LDRDui;
3154 case AArch64::STURDi:
3155 Scale = 8;
3156 return AArch64::STRDui;
3157 case AArch64::LDURXi:
3158 Scale = 8;
3159 return AArch64::LDRXui;
3160 case AArch64::STURXi:
3161 Scale = 8;
3162 return AArch64::STRXui;
3163 case AArch64::LDURWi:
3164 Scale = 4;
3165 return AArch64::LDRWui;
3166 case AArch64::LDURSWi:
3167 Scale = 4;
3168 return AArch64::LDRSWui;
3169 case AArch64::STURWi:
3170 Scale = 4;
3171 return AArch64::STRWui;
3172 case AArch64::LDURHi:
3173 Scale = 2;
3174 return AArch64::LDRHui;
3175 case AArch64::STURHi:
3176 Scale = 2;
3177 return AArch64::STRHui;
3178 case AArch64::LDURHHi:
3179 Scale = 2;
3180 return AArch64::LDRHHui;
3181 case AArch64::STURHHi:
3182 Scale = 2;
3183 return AArch64::STRHHui;
3184 case AArch64::LDURSHXi:
3185 Scale = 2;
3186 return AArch64::LDRSHXui;
3187 case AArch64::LDURSHWi:
3188 Scale = 2;
3189 return AArch64::LDRSHWui;
3190 case AArch64::LDURBi:
3191 Scale = 1;
3192 return AArch64::LDRBui;
3193 case AArch64::LDURBBi:
3194 Scale = 1;
3195 return AArch64::LDRBBui;
3196 case AArch64::LDURSBXi:
3197 Scale = 1;
3198 return AArch64::LDRSBXui;
3199 case AArch64::LDURSBWi:
3200 Scale = 1;
3201 return AArch64::LDRSBWui;
3202 case AArch64::STURBi:
3203 Scale = 1;
3204 return AArch64::STRBui;
3205 case AArch64::STURBBi:
3206 Scale = 1;
3207 return AArch64::STRBBui;
3208 case AArch64::LDRQui:
3209 case AArch64::STRQui:
3210 Scale = 16;
3211 return Opcode;
3212 case AArch64::LDRDui:
3213 case AArch64::STRDui:
3214 case AArch64::LDRXui:
3215 case AArch64::STRXui:
3216 Scale = 8;
3217 return Opcode;
3218 case AArch64::LDRWui:
3219 case AArch64::LDRSWui:
3220 case AArch64::STRWui:
3221 Scale = 4;
3222 return Opcode;
3223 case AArch64::LDRHui:
3224 case AArch64::STRHui:
3225 case AArch64::LDRHHui:
3226 case AArch64::STRHHui:
3227 case AArch64::LDRSHXui:
3228 case AArch64::LDRSHWui:
3229 Scale = 2;
3230 return Opcode;
3231 case AArch64::LDRBui:
3232 case AArch64::LDRBBui:
3233 case AArch64::LDRSBXui:
3234 case AArch64::LDRSBWui:
3235 case AArch64::STRBui:
3236 case AArch64::STRBBui:
3237 Scale = 1;
3238 return Opcode;
3239 }
3240}
3241
3242// Given an opcode for an instruction with a [Reg, #Imm] addressing mode, return
3243// the opcode of an instruction performing the same operation, but using the
3244// [Reg, #Imm] addressing mode with unscaled offset.
3245unsigned unscaledOffsetOpcode(unsigned Opcode) {
3246 switch (Opcode) {
3247 default:
3248 llvm_unreachable("Address folding not implemented for instruction");
3249
3250 case AArch64::LDURQi:
3251 case AArch64::STURQi:
3252 case AArch64::LDURDi:
3253 case AArch64::STURDi:
3254 case AArch64::LDURXi:
3255 case AArch64::STURXi:
3256 case AArch64::LDURWi:
3257 case AArch64::LDURSWi:
3258 case AArch64::STURWi:
3259 case AArch64::LDURHi:
3260 case AArch64::STURHi:
3261 case AArch64::LDURHHi:
3262 case AArch64::STURHHi:
3263 case AArch64::LDURSHXi:
3264 case AArch64::LDURSHWi:
3265 case AArch64::LDURBi:
3266 case AArch64::STURBi:
3267 case AArch64::LDURBBi:
3268 case AArch64::STURBBi:
3269 case AArch64::LDURSBWi:
3270 case AArch64::LDURSBXi:
3271 return Opcode;
3272 case AArch64::LDRQui:
3273 return AArch64::LDURQi;
3274 case AArch64::STRQui:
3275 return AArch64::STURQi;
3276 case AArch64::LDRDui:
3277 return AArch64::LDURDi;
3278 case AArch64::STRDui:
3279 return AArch64::STURDi;
3280 case AArch64::LDRXui:
3281 return AArch64::LDURXi;
3282 case AArch64::STRXui:
3283 return AArch64::STURXi;
3284 case AArch64::LDRWui:
3285 return AArch64::LDURWi;
3286 case AArch64::LDRSWui:
3287 return AArch64::LDURSWi;
3288 case AArch64::STRWui:
3289 return AArch64::STURWi;
3290 case AArch64::LDRHui:
3291 return AArch64::LDURHi;
3292 case AArch64::STRHui:
3293 return AArch64::STURHi;
3294 case AArch64::LDRHHui:
3295 return AArch64::LDURHHi;
3296 case AArch64::STRHHui:
3297 return AArch64::STURHHi;
3298 case AArch64::LDRSHXui:
3299 return AArch64::LDURSHXi;
3300 case AArch64::LDRSHWui:
3301 return AArch64::LDURSHWi;
3302 case AArch64::LDRBBui:
3303 return AArch64::LDURBBi;
3304 case AArch64::LDRBui:
3305 return AArch64::LDURBi;
3306 case AArch64::STRBBui:
3307 return AArch64::STURBBi;
3308 case AArch64::STRBui:
3309 return AArch64::STURBi;
3310 case AArch64::LDRSBWui:
3311 return AArch64::LDURSBWi;
3312 case AArch64::LDRSBXui:
3313 return AArch64::LDURSBXi;
3314 }
3315}
3316
3317// Given the opcode of a memory load/store instruction, return the opcode of an
3318// instruction performing the same operation, but using
3319// the [Reg, Reg, {s,u}xtw #N] addressing mode with sign-/zero-extend of the
3320// offset register.
3321static unsigned offsetExtendOpcode(unsigned Opcode) {
3322 switch (Opcode) {
3323 default:
3324 llvm_unreachable("Address folding not implemented for instruction");
3325
3326 case AArch64::LDRQroX:
3327 case AArch64::LDURQi:
3328 case AArch64::LDRQui:
3329 return AArch64::LDRQroW;
3330 case AArch64::STRQroX:
3331 case AArch64::STURQi:
3332 case AArch64::STRQui:
3333 return AArch64::STRQroW;
3334 case AArch64::LDRDroX:
3335 case AArch64::LDURDi:
3336 case AArch64::LDRDui:
3337 return AArch64::LDRDroW;
3338 case AArch64::STRDroX:
3339 case AArch64::STURDi:
3340 case AArch64::STRDui:
3341 return AArch64::STRDroW;
3342 case AArch64::LDRXroX:
3343 case AArch64::LDURXi:
3344 case AArch64::LDRXui:
3345 return AArch64::LDRXroW;
3346 case AArch64::STRXroX:
3347 case AArch64::STURXi:
3348 case AArch64::STRXui:
3349 return AArch64::STRXroW;
3350 case AArch64::LDRWroX:
3351 case AArch64::LDURWi:
3352 case AArch64::LDRWui:
3353 return AArch64::LDRWroW;
3354 case AArch64::LDRSWroX:
3355 case AArch64::LDURSWi:
3356 case AArch64::LDRSWui:
3357 return AArch64::LDRSWroW;
3358 case AArch64::STRWroX:
3359 case AArch64::STURWi:
3360 case AArch64::STRWui:
3361 return AArch64::STRWroW;
3362 case AArch64::LDRHroX:
3363 case AArch64::LDURHi:
3364 case AArch64::LDRHui:
3365 return AArch64::LDRHroW;
3366 case AArch64::STRHroX:
3367 case AArch64::STURHi:
3368 case AArch64::STRHui:
3369 return AArch64::STRHroW;
3370 case AArch64::LDRHHroX:
3371 case AArch64::LDURHHi:
3372 case AArch64::LDRHHui:
3373 return AArch64::LDRHHroW;
3374 case AArch64::STRHHroX:
3375 case AArch64::STURHHi:
3376 case AArch64::STRHHui:
3377 return AArch64::STRHHroW;
3378 case AArch64::LDRSHXroX:
3379 case AArch64::LDURSHXi:
3380 case AArch64::LDRSHXui:
3381 return AArch64::LDRSHXroW;
3382 case AArch64::LDRSHWroX:
3383 case AArch64::LDURSHWi:
3384 case AArch64::LDRSHWui:
3385 return AArch64::LDRSHWroW;
3386 case AArch64::LDRBroX:
3387 case AArch64::LDURBi:
3388 case AArch64::LDRBui:
3389 return AArch64::LDRBroW;
3390 case AArch64::LDRBBroX:
3391 case AArch64::LDURBBi:
3392 case AArch64::LDRBBui:
3393 return AArch64::LDRBBroW;
3394 case AArch64::LDRSBXroX:
3395 case AArch64::LDURSBXi:
3396 case AArch64::LDRSBXui:
3397 return AArch64::LDRSBXroW;
3398 case AArch64::LDRSBWroX:
3399 case AArch64::LDURSBWi:
3400 case AArch64::LDRSBWui:
3401 return AArch64::LDRSBWroW;
3402 case AArch64::STRBroX:
3403 case AArch64::STURBi:
3404 case AArch64::STRBui:
3405 return AArch64::STRBroW;
3406 case AArch64::STRBBroX:
3407 case AArch64::STURBBi:
3408 case AArch64::STRBBui:
3409 return AArch64::STRBBroW;
3410 }
3411}
3412
3414 const ExtAddrMode &AM) const {
3415
3416 const DebugLoc &DL = MemI.getDebugLoc();
3417 MachineBasicBlock &MBB = *MemI.getParent();
3419
3421 if (AM.ScaledReg) {
3422 // The new instruction will be in the form `ldr Rt, [Xn, Xm, lsl #imm]`.
3423 unsigned Opcode = regOffsetOpcode(MemI.getOpcode());
3424 MRI.constrainRegClass(AM.BaseReg, &AArch64::GPR64spRegClass);
3425 auto B = BuildMI(MBB, MemI, DL, get(Opcode))
3426 .addReg(MemI.getOperand(0).getReg(),
3427 MemI.mayLoad() ? RegState::Define : 0)
3428 .addReg(AM.BaseReg)
3429 .addReg(AM.ScaledReg)
3430 .addImm(0)
3431 .addImm(AM.Scale > 1)
3432 .setMemRefs(MemI.memoperands())
3433 .setMIFlags(MemI.getFlags());
3434 return B.getInstr();
3435 }
3436
3437 assert(AM.ScaledReg == 0 && AM.Scale == 0 &&
3438 "Addressing mode not supported for folding");
3439
3440 // The new instruction will be in the form `ld[u]r Rt, [Xn, #imm]`.
3441 unsigned Scale = 1;
3442 unsigned Opcode = MemI.getOpcode();
3443 if (isInt<9>(AM.Displacement))
3444 Opcode = unscaledOffsetOpcode(Opcode);
3445 else
3446 Opcode = scaledOffsetOpcode(Opcode, Scale);
3447
3448 auto B = BuildMI(MBB, MemI, DL, get(Opcode))
3449 .addReg(MemI.getOperand(0).getReg(),
3450 MemI.mayLoad() ? RegState::Define : 0)
3451 .addReg(AM.BaseReg)
3452 .addImm(AM.Displacement / Scale)
3453 .setMemRefs(MemI.memoperands())
3454 .setMIFlags(MemI.getFlags());
3455 return B.getInstr();
3456 }
3457
3460 // The new instruction will be in the form `ldr Rt, [Xn, Wm, {s,u}xtw #N]`.
3461 assert(AM.ScaledReg && !AM.Displacement &&
3462 "Address offset can be a register or an immediate, but not both");
3463 unsigned Opcode = offsetExtendOpcode(MemI.getOpcode());
3464 MRI.constrainRegClass(AM.BaseReg, &AArch64::GPR64spRegClass);
3465 // Make sure the offset register is in the correct register class.
3466 Register OffsetReg = AM.ScaledReg;
3467 const TargetRegisterClass *RC = MRI.getRegClass(OffsetReg);
3468 if (RC->hasSuperClassEq(&AArch64::GPR64RegClass)) {
3469 OffsetReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3470 BuildMI(MBB, MemI, DL, get(TargetOpcode::COPY), OffsetReg)
3471 .addReg(AM.ScaledReg, 0, AArch64::sub_32);
3472 }
3473 auto B = BuildMI(MBB, MemI, DL, get(Opcode))
3474 .addReg(MemI.getOperand(0).getReg(),
3475 MemI.mayLoad() ? RegState::Define : 0)
3476 .addReg(AM.BaseReg)
3477 .addReg(OffsetReg)
3479 .addImm(AM.Scale != 1)
3480 .setMemRefs(MemI.memoperands())
3481 .setMIFlags(MemI.getFlags());
3482
3483 return B.getInstr();
3484 }
3485
3487 "Function must not be called with an addressing mode it can't handle");
3488}
3489
3491 const MachineInstr &LdSt, const MachineOperand *&BaseOp, int64_t &Offset,
3492 bool &OffsetIsScalable, TypeSize &Width,
3493 const TargetRegisterInfo *TRI) const {
3494 assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
3495 // Handle only loads/stores with base register followed by immediate offset.
3496 if (LdSt.getNumExplicitOperands() == 3) {
3497 // Non-paired instruction (e.g., ldr x1, [x0, #8]).
3498 if ((!LdSt.getOperand(1).isReg() && !LdSt.getOperand(1).isFI()) ||
3499 !LdSt.getOperand(2).isImm())
3500 return false;
3501 } else if (LdSt.getNumExplicitOperands() == 4) {
3502 // Paired instruction (e.g., ldp x1, x2, [x0, #8]).
3503 if (!LdSt.getOperand(1).isReg() ||
3504 (!LdSt.getOperand(2).isReg() && !LdSt.getOperand(2).isFI()) ||
3505 !LdSt.getOperand(3).isImm())
3506 return false;
3507 } else
3508 return false;
3509
3510 // Get the scaling factor for the instruction and set the width for the
3511 // instruction.
3512 TypeSize Scale(0U, false);
3513 int64_t Dummy1, Dummy2;
3514
3515 // If this returns false, then it's an instruction we don't want to handle.
3516 if (!getMemOpInfo(LdSt.getOpcode(), Scale, Width, Dummy1, Dummy2))
3517 return false;
3518
3519 // Compute the offset. Offset is calculated as the immediate operand
3520 // multiplied by the scaling factor. Unscaled instructions have scaling factor
3521 // set to 1.
3522 if (LdSt.getNumExplicitOperands() == 3) {
3523 BaseOp = &LdSt.getOperand(1);
3524 Offset = LdSt.getOperand(2).getImm() * Scale.getKnownMinValue();
3525 } else {
3526 assert(LdSt.getNumExplicitOperands() == 4 && "invalid number of operands");
3527 BaseOp = &LdSt.getOperand(2);
3528 Offset = LdSt.getOperand(3).getImm() * Scale.getKnownMinValue();
3529 }
3530 OffsetIsScalable = Scale.isScalable();
3531
3532 if (!BaseOp->isReg() && !BaseOp->isFI())
3533 return false;
3534
3535 return true;
3536}
3537
3540 assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
3541 MachineOperand &OfsOp = LdSt.getOperand(LdSt.getNumExplicitOperands() - 1);
3542 assert(OfsOp.isImm() && "Offset operand wasn't immediate.");
3543 return OfsOp;
3544}
3545
3546bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
3547 TypeSize &Width, int64_t &MinOffset,
3548 int64_t &MaxOffset) {
3549 switch (Opcode) {
3550 // Not a memory operation or something we want to handle.
3551 default:
3552 Scale = TypeSize::getFixed(0);
3553 Width = TypeSize::getFixed(0);
3554 MinOffset = MaxOffset = 0;
3555 return false;
3556 // LDR / STR
3557 case AArch64::LDRQui:
3558 case AArch64::STRQui:
3559 Scale = TypeSize::getFixed(16);
3560 Width = TypeSize::getFixed(16);
3561 MinOffset = 0;
3562 MaxOffset = 4095;
3563 break;
3564 case AArch64::LDRXui:
3565 case AArch64::LDRDui:
3566 case AArch64::STRXui:
3567 case AArch64::STRDui:
3568 case AArch64::PRFMui:
3569 Scale = TypeSize::getFixed(8);
3570 Width = TypeSize::getFixed(8);
3571 MinOffset = 0;
3572 MaxOffset = 4095;
3573 break;
3574 case AArch64::LDRWui:
3575 case AArch64::LDRSui:
3576 case AArch64::LDRSWui:
3577 case AArch64::STRWui:
3578 case AArch64::STRSui:
3579 Scale = TypeSize::getFixed(4);
3580 Width = TypeSize::getFixed(4);
3581 MinOffset = 0;
3582 MaxOffset = 4095;
3583 break;
3584 case AArch64::LDRHui:
3585 case AArch64::LDRHHui:
3586 case AArch64::LDRSHWui:
3587 case AArch64::LDRSHXui:
3588 case AArch64::STRHui:
3589 case AArch64::STRHHui:
3590 Scale = TypeSize::getFixed(2);
3591 Width = TypeSize::getFixed(2);
3592 MinOffset = 0;
3593 MaxOffset = 4095;
3594 break;
3595 case AArch64::LDRBui:
3596 case AArch64::LDRBBui:
3597 case AArch64::LDRSBWui:
3598 case AArch64::LDRSBXui:
3599 case AArch64::STRBui:
3600 case AArch64::STRBBui:
3601 Scale = TypeSize::getFixed(1);
3602 Width = TypeSize::getFixed(1);
3603 MinOffset = 0;
3604 MaxOffset = 4095;
3605 break;
3606 // post/pre inc
3607 case AArch64::STRQpre:
3608 case AArch64::LDRQpost:
3609 Scale = TypeSize::getFixed(1);
3610 Width = TypeSize::getFixed(16);
3611 MinOffset = -256;
3612 MaxOffset = 255;
3613 break;
3614 case AArch64::STRXpre:
3615 case AArch64::STRDpre:
3616 case AArch64::LDRXpost:
3617 case AArch64::LDRDpost:
3618 Scale = TypeSize::getFixed(1);
3619 Width = TypeSize::getFixed(8);
3620 MinOffset = -256;
3621 MaxOffset = 255;
3622 break;
3623 case AArch64::STRWpost:
3624 case AArch64::LDRWpost:
3625 Scale = TypeSize::getFixed(4);
3626 Width = TypeSize::getFixed(32);
3627 MinOffset = -256;
3628 MaxOffset = 255;
3629 break;
3630 // Unscaled
3631 case AArch64::LDURQi:
3632 case AArch64::STURQi:
3633 Scale = TypeSize::getFixed(1);
3634 Width = TypeSize::getFixed(16);
3635 MinOffset = -256;
3636 MaxOffset = 255;
3637 break;
3638 case AArch64::LDURXi:
3639 case AArch64::LDURDi:
3640 case AArch64::LDAPURXi:
3641 case AArch64::STURXi:
3642 case AArch64::STURDi:
3643 case AArch64::STLURXi:
3644 case AArch64::PRFUMi:
3645 Scale = TypeSize::getFixed(1);
3646 Width = TypeSize::getFixed(8);
3647 MinOffset = -256;
3648 MaxOffset = 255;
3649 break;
3650 case AArch64::LDURWi:
3651 case AArch64::LDURSi:
3652 case AArch64::LDURSWi:
3653 case AArch64::LDAPURi:
3654 case AArch64::LDAPURSWi:
3655 case AArch64::STURWi:
3656 case AArch64::STURSi:
3657 case AArch64::STLURWi:
3658 Scale = TypeSize::getFixed(1);
3659 Width = TypeSize::getFixed(4);
3660 MinOffset = -256;
3661 MaxOffset = 255;
3662 break;
3663 case AArch64::LDURHi:
3664 case AArch64::LDURHHi:
3665 case AArch64::LDURSHXi:
3666 case AArch64::LDURSHWi:
3667 case AArch64::LDAPURHi:
3668 case AArch64::LDAPURSHWi:
3669 case AArch64::LDAPURSHXi:
3670 case AArch64::STURHi:
3671 case AArch64::STURHHi:
3672 case AArch64::STLURHi:
3673 Scale = TypeSize::getFixed(1);
3674 Width = TypeSize::getFixed(2);
3675 MinOffset = -256;
3676 MaxOffset = 255;
3677 break;
3678 case AArch64::LDURBi:
3679 case AArch64::LDURBBi:
3680 case AArch64::LDURSBXi:
3681 case AArch64::LDURSBWi:
3682 case AArch64::LDAPURBi:
3683 case AArch64::LDAPURSBWi:
3684 case AArch64::LDAPURSBXi:
3685 case AArch64::STURBi:
3686 case AArch64::STURBBi:
3687 case AArch64::STLURBi:
3688 Scale = TypeSize::getFixed(1);
3689 Width = TypeSize::getFixed(1);
3690 MinOffset = -256;
3691 MaxOffset = 255;
3692 break;
3693 // LDP / STP
3694 case AArch64::LDPQi:
3695 case AArch64::LDNPQi:
3696 case AArch64::STPQi:
3697 case AArch64::STNPQi:
3698 Scale = TypeSize::getFixed(16);
3699 Width = TypeSize::getFixed(32);
3700 MinOffset = -64;
3701 MaxOffset = 63;
3702 break;
3703 case AArch64::LDPXi:
3704 case AArch64::LDPDi:
3705 case AArch64::LDNPXi:
3706 case AArch64::LDNPDi:
3707 case AArch64::STPXi:
3708 case AArch64::STPDi:
3709 case AArch64::STNPXi:
3710 case AArch64::STNPDi:
3711 Scale = TypeSize::getFixed(8);
3712 Width = TypeSize::getFixed(16);
3713 MinOffset = -64;
3714 MaxOffset = 63;
3715 break;
3716 case AArch64::LDPWi:
3717 case AArch64::LDPSi:
3718 case AArch64::LDNPWi:
3719 case AArch64::LDNPSi:
3720 case AArch64::STPWi:
3721 case AArch64::STPSi:
3722 case AArch64::STNPWi:
3723 case AArch64::STNPSi:
3724 Scale = TypeSize::getFixed(4);
3725 Width = TypeSize::getFixed(8);
3726 MinOffset = -64;
3727 MaxOffset = 63;
3728 break;
3729 // pre/post inc
3730 case AArch64::STPQpre:
3731 case AArch64::LDPQpost:
3732 Scale = TypeSize::getFixed(16);
3733 Width = TypeSize::getFixed(16);
3734 MinOffset = -1024;
3735 MaxOffset = 1008;
3736 break;
3737 case AArch64::STPXpre:
3738 case AArch64::LDPXpost:
3739 case AArch64::STPDpre:
3740 case AArch64::LDPDpost:
3741 Scale = TypeSize::getFixed(8);
3742 Width = TypeSize::getFixed(8);
3743 MinOffset = -512;
3744 MaxOffset = 504;
3745 break;
3746 case AArch64::StoreSwiftAsyncContext:
3747 // Store is an STRXui, but there might be an ADDXri in the expansion too.
3748 Scale = TypeSize::getFixed(1);
3749 Width = TypeSize::getFixed(8);
3750 MinOffset = 0;
3751 MaxOffset = 4095;
3752 break;
3753 case AArch64::ADDG:
3754 Scale = TypeSize::getFixed(16);
3755 Width = TypeSize::getFixed(0);
3756 MinOffset = 0;
3757 MaxOffset = 63;
3758 break;
3759 case AArch64::TAGPstack:
3760 Scale = TypeSize::getFixed(16);
3761 Width = TypeSize::getFixed(0);
3762 // TAGP with a negative offset turns into SUBP, which has a maximum offset
3763 // of 63 (not 64!).
3764 MinOffset = -63;
3765 MaxOffset = 63;
3766 break;
3767 case AArch64::LDG:
3768 case AArch64::STGi:
3769 case AArch64::STZGi:
3770 Scale = TypeSize::getFixed(16);
3771 Width = TypeSize::getFixed(16);
3772 MinOffset = -256;
3773 MaxOffset = 255;
3774 break;
3775 // SVE
3776 case AArch64::STR_ZZZZXI:
3777 case AArch64::LDR_ZZZZXI:
3778 Scale = TypeSize::getScalable(16);
3779 Width = TypeSize::getScalable(16 * 4);
3780 MinOffset = -256;
3781 MaxOffset = 252;
3782 break;
3783 case AArch64::STR_ZZZXI:
3784 case AArch64::LDR_ZZZXI:
3785 Scale = TypeSize::getScalable(16);
3786 Width = TypeSize::getScalable(16 * 3);
3787 MinOffset = -256;
3788 MaxOffset = 253;
3789 break;
3790 case AArch64::STR_ZZXI:
3791 case AArch64::LDR_ZZXI:
3792 Scale = TypeSize::getScalable(16);
3793 Width = TypeSize::getScalable(16 * 2);
3794 MinOffset = -256;
3795 MaxOffset = 254;
3796 break;
3797 case AArch64::LDR_PXI:
3798 case AArch64::STR_PXI:
3799 Scale = TypeSize::getScalable(2);
3800 Width = TypeSize::getScalable(2);
3801 MinOffset = -256;
3802 MaxOffset = 255;
3803 break;
3804 case AArch64::LDR_PPXI:
3805 case AArch64::STR_PPXI:
3806 Scale = TypeSize::getScalable(2);
3807 Width = TypeSize::getScalable(2 * 2);
3808 MinOffset = -256;
3809 MaxOffset = 254;
3810 break;
3811 case AArch64::LDR_ZXI:
3812 case AArch64::STR_ZXI:
3813 Scale = TypeSize::getScalable(16);
3814 Width = TypeSize::getScalable(16);
3815 MinOffset = -256;
3816 MaxOffset = 255;
3817 break;
3818 case AArch64::LD1B_IMM:
3819 case AArch64::LD1H_IMM:
3820 case AArch64::LD1W_IMM:
3821 case AArch64::LD1D_IMM:
3822 case AArch64::LDNT1B_ZRI:
3823 case AArch64::LDNT1H_ZRI:
3824 case AArch64::LDNT1W_ZRI:
3825 case AArch64::LDNT1D_ZRI:
3826 case AArch64::ST1B_IMM:
3827 case AArch64::ST1H_IMM:
3828 case AArch64::ST1W_IMM:
3829 case AArch64::ST1D_IMM:
3830 case AArch64::STNT1B_ZRI:
3831 case AArch64::STNT1H_ZRI:
3832 case AArch64::STNT1W_ZRI:
3833 case AArch64::STNT1D_ZRI:
3834 case AArch64::LDNF1B_IMM:
3835 case AArch64::LDNF1H_IMM:
3836 case AArch64::LDNF1W_IMM:
3837 case AArch64::LDNF1D_IMM:
3838 // A full vectors worth of data
3839 // Width = mbytes * elements
3840 Scale = TypeSize::getScalable(16);
3841 Width = TypeSize::getScalable(16);
3842 MinOffset = -8;
3843 MaxOffset = 7;
3844 break;
3845 case AArch64::LD2B_IMM:
3846 case AArch64::LD2H_IMM:
3847 case AArch64::LD2W_IMM:
3848 case AArch64::LD2D_IMM:
3849 case AArch64::ST2B_IMM:
3850 case AArch64::ST2H_IMM:
3851 case AArch64::ST2W_IMM:
3852 case AArch64::ST2D_IMM:
3853 Scale = TypeSize::getScalable(32);
3854 Width = TypeSize::getScalable(16 * 2);
3855 MinOffset = -8;
3856 MaxOffset = 7;
3857 break;
3858 case AArch64::LD3B_IMM:
3859 case AArch64::LD3H_IMM:
3860 case AArch64::LD3W_IMM:
3861 case AArch64::LD3D_IMM:
3862 case AArch64::ST3B_IMM:
3863 case AArch64::ST3H_IMM:
3864 case AArch64::ST3W_IMM:
3865 case AArch64::ST3D_IMM:
3866 Scale = TypeSize::getScalable(48);
3867 Width = TypeSize::getScalable(16 * 3);
3868 MinOffset = -8;
3869 MaxOffset = 7;
3870 break;
3871 case AArch64::LD4B_IMM:
3872 case AArch64::LD4H_IMM:
3873 case AArch64::LD4W_IMM:
3874 case AArch64::LD4D_IMM:
3875 case AArch64::ST4B_IMM:
3876 case AArch64::ST4H_IMM:
3877 case AArch64::ST4W_IMM:
3878 case AArch64::ST4D_IMM:
3879 Scale = TypeSize::getScalable(64);
3880 Width = TypeSize::getScalable(16 * 4);
3881 MinOffset = -8;
3882 MaxOffset = 7;
3883 break;
3884 case AArch64::LD1B_H_IMM:
3885 case AArch64::LD1SB_H_IMM:
3886 case AArch64::LD1H_S_IMM:
3887 case AArch64::LD1SH_S_IMM:
3888 case AArch64::LD1W_D_IMM:
3889 case AArch64::LD1SW_D_IMM:
3890 case AArch64::ST1B_H_IMM:
3891 case AArch64::ST1H_S_IMM:
3892 case AArch64::ST1W_D_IMM:
3893 case AArch64::LDNF1B_H_IMM:
3894 case AArch64::LDNF1SB_H_IMM:
3895 case AArch64::LDNF1H_S_IMM:
3896 case AArch64::LDNF1SH_S_IMM:
3897 case AArch64::LDNF1W_D_IMM:
3898 case AArch64::LDNF1SW_D_IMM:
3899 // A half vector worth of data
3900 // Width = mbytes * elements
3901 Scale = TypeSize::getScalable(8);
3902 Width = TypeSize::getScalable(8);
3903 MinOffset = -8;
3904 MaxOffset = 7;
3905 break;
3906 case AArch64::LD1B_S_IMM:
3907 case AArch64::LD1SB_S_IMM:
3908 case AArch64::LD1H_D_IMM:
3909 case AArch64::LD1SH_D_IMM:
3910 case AArch64::ST1B_S_IMM:
3911 case AArch64::ST1H_D_IMM:
3912 case AArch64::LDNF1B_S_IMM:
3913 case AArch64::LDNF1SB_S_IMM:
3914 case AArch64::LDNF1H_D_IMM:
3915 case AArch64::LDNF1SH_D_IMM:
3916 // A quarter vector worth of data
3917 // Width = mbytes * elements
3918 Scale = TypeSize::getScalable(4);
3919 Width = TypeSize::getScalable(4);
3920 MinOffset = -8;
3921 MaxOffset = 7;
3922 break;
3923 case AArch64::LD1B_D_IMM:
3924 case AArch64::LD1SB_D_IMM:
3925 case AArch64::ST1B_D_IMM:
3926 case AArch64::LDNF1B_D_IMM:
3927 case AArch64::LDNF1SB_D_IMM:
3928 // A eighth vector worth of data
3929 // Width = mbytes * elements
3930 Scale = TypeSize::getScalable(2);
3931 Width = TypeSize::getScalable(2);
3932 MinOffset = -8;
3933 MaxOffset = 7;
3934 break;
3935 case AArch64::ST2Gi:
3936 case AArch64::STZ2Gi:
3937 Scale = TypeSize::getFixed(16);
3938 Width = TypeSize::getFixed(32);
3939 MinOffset = -256;
3940 MaxOffset = 255;
3941 break;
3942 case AArch64::STGPi:
3943 Scale = TypeSize::getFixed(16);
3944 Width = TypeSize::getFixed(16);
3945 MinOffset = -64;
3946 MaxOffset = 63;
3947 break;
3948 case AArch64::LD1RB_IMM:
3949 case AArch64::LD1RB_H_IMM:
3950 case AArch64::LD1RB_S_IMM:
3951 case AArch64::LD1RB_D_IMM:
3952 case AArch64::LD1RSB_H_IMM:
3953 case AArch64::LD1RSB_S_IMM:
3954 case AArch64::LD1RSB_D_IMM:
3955 Scale = TypeSize::getFixed(1);
3956 Width = TypeSize::getFixed(1);
3957 MinOffset = 0;
3958 MaxOffset = 63;
3959 break;
3960 case AArch64::LD1RH_IMM:
3961 case AArch64::LD1RH_S_IMM:
3962 case AArch64::LD1RH_D_IMM:
3963 case AArch64::LD1RSH_S_IMM:
3964 case AArch64::LD1RSH_D_IMM:
3965 Scale = TypeSize::getFixed(2);
3966 Width = TypeSize::getFixed(2);
3967 MinOffset = 0;
3968 MaxOffset = 63;
3969 break;
3970 case AArch64::LD1RW_IMM:
3971 case AArch64::LD1RW_D_IMM:
3972 case AArch64::LD1RSW_IMM:
3973 Scale = TypeSize::getFixed(4);
3974 Width = TypeSize::getFixed(4);
3975 MinOffset = 0;
3976 MaxOffset = 63;
3977 break;
3978 case AArch64::LD1RD_IMM:
3979 Scale = TypeSize::getFixed(8);
3980 Width = TypeSize::getFixed(8);
3981 MinOffset = 0;
3982 MaxOffset = 63;
3983 break;
3984 }
3985
3986 return true;
3987}
3988
3989// Scaling factor for unscaled load or store.
3991 switch (Opc) {
3992 default:
3993 llvm_unreachable("Opcode has unknown scale!");
3994 case AArch64::LDRBBui:
3995 case AArch64::LDURBBi:
3996 case AArch64::LDRSBWui:
3997 case AArch64::LDURSBWi:
3998 case AArch64::STRBBui:
3999 case AArch64::STURBBi:
4000 return 1;
4001 case AArch64::LDRHHui:
4002 case AArch64::LDURHHi:
4003 case AArch64::LDRSHWui:
4004 case AArch64::LDURSHWi:
4005 case AArch64::STRHHui:
4006 case AArch64::STURHHi:
4007 return 2;
4008 case AArch64::LDRSui:
4009 case AArch64::LDURSi:
4010 case AArch64::LDRSpre:
4011 case AArch64::LDRSWui:
4012 case AArch64::LDURSWi:
4013 case AArch64::LDRSWpre:
4014 case AArch64::LDRWpre:
4015 case AArch64::LDRWui:
4016 case AArch64::LDURWi:
4017 case AArch64::STRSui:
4018 case AArch64::STURSi:
4019 case AArch64::STRSpre:
4020 case AArch64::STRWui:
4021 case AArch64::STURWi:
4022 case AArch64::STRWpre:
4023 case AArch64::LDPSi:
4024 case AArch64::LDPSWi:
4025 case AArch64::LDPWi:
4026 case AArch64::STPSi:
4027 case AArch64::STPWi:
4028 return 4;
4029 case AArch64::LDRDui:
4030 case AArch64::LDURDi:
4031 case AArch64::LDRDpre:
4032 case AArch64::LDRXui:
4033 case AArch64::LDURXi:
4034 case AArch64::LDRXpre:
4035 case AArch64::STRDui:
4036 case AArch64::STURDi:
4037 case AArch64::STRDpre:
4038 case AArch64::STRXui:
4039 case AArch64::STURXi:
4040 case AArch64::STRXpre:
4041 case AArch64::LDPDi:
4042 case AArch64::LDPXi:
4043 case AArch64::STPDi:
4044 case AArch64::STPXi:
4045 return 8;
4046 case AArch64::LDRQui:
4047 case AArch64::LDURQi:
4048 case AArch64::STRQui:
4049 case AArch64::STURQi:
4050 case AArch64::STRQpre:
4051 case AArch64::LDPQi:
4052 case AArch64::LDRQpre:
4053 case AArch64::STPQi:
4054 case AArch64::STGi:
4055 case AArch64::STZGi:
4056 case AArch64::ST2Gi:
4057 case AArch64::STZ2Gi:
4058 case AArch64::STGPi:
4059 return 16;
4060 }
4061}
4062
4064 switch (MI.getOpcode()) {
4065 default:
4066 return false;
4067 case AArch64::LDRWpre:
4068 case AArch64::LDRXpre:
4069 case AArch64::LDRSWpre:
4070 case AArch64::LDRSpre:
4071 case AArch64::LDRDpre:
4072 case AArch64::LDRQpre:
4073 return true;
4074 }
4075}
4076
4078 switch (MI.getOpcode()) {
4079 default:
4080 return false;
4081 case AArch64::STRWpre:
4082 case AArch64::STRXpre:
4083 case AArch64::STRSpre:
4084 case AArch64::STRDpre:
4085 case AArch64::STRQpre:
4086 return true;
4087 }
4088}
4089
4091 return isPreLd(MI) || isPreSt(MI);
4092}
4093
4095 switch (MI.getOpcode()) {
4096 default:
4097 return false;
4098 case AArch64::LDPSi:
4099 case AArch64::LDPSWi:
4100 case AArch64::LDPDi:
4101 case AArch64::LDPQi:
4102 case AArch64::LDPWi:
4103 case AArch64::LDPXi:
4104 case AArch64::STPSi:
4105 case AArch64::STPDi:
4106 case AArch64::STPQi:
4107 case AArch64::STPWi:
4108 case AArch64::STPXi:
4109 case AArch64::STGPi:
4110 return true;
4111 }
4112}
4113
4115 unsigned Idx =
4117 : 1;
4118 return MI.getOperand(Idx);
4119}
4120
4121const MachineOperand &
4123 unsigned Idx =
4125 : 2;
4126 return MI.getOperand(Idx);
4127}
4128
4130 Register Reg) {
4131 if (MI.getParent() == nullptr)
4132 return nullptr;
4133 const MachineFunction *MF = MI.getParent()->getParent();
4134 return MF ? MF->getRegInfo().getRegClassOrNull(Reg) : nullptr;
4135}
4136
4138 auto IsHFPR = [&](const MachineOperand &Op) {
4139 if (!Op.isReg())
4140 return false;
4141 auto Reg = Op.getReg();
4142 if (Reg.isPhysical())
4143 return AArch64::FPR16RegClass.contains(Reg);
4144 const TargetRegisterClass *TRC = ::getRegClass(MI, Reg);
4145 return TRC == &AArch64::FPR16RegClass ||
4146 TRC == &AArch64::FPR16_loRegClass;
4147 };
4148 return llvm::any_of(MI.operands(), IsHFPR);
4149}
4150
4152 auto IsQFPR = [&](const MachineOperand &Op) {
4153 if (!Op.isReg())
4154 return false;
4155 auto Reg = Op.getReg();
4156 if (Reg.isPhysical())
4157 return AArch64::FPR128RegClass.contains(Reg);
4158 const TargetRegisterClass *TRC = ::getRegClass(MI, Reg);
4159 return TRC == &AArch64::FPR128RegClass ||
4160 TRC == &AArch64::FPR128_loRegClass;
4161 };
4162 return llvm::any_of(MI.operands(), IsQFPR);
4163}
4164
4166 switch (MI.getOpcode()) {
4167 case AArch64::BRK:
4168 case AArch64::HLT:
4169 case AArch64::PACIASP:
4170 case AArch64::PACIBSP:
4171 // Implicit BTI behavior.
4172 return true;
4173 case AArch64::PAUTH_PROLOGUE:
4174 // PAUTH_PROLOGUE expands to PACI(A|B)SP.
4175 return true;
4176 case AArch64::HINT: {
4177 unsigned Imm = MI.getOperand(0).getImm();
4178 // Explicit BTI instruction.
4179 if (Imm == 32 || Imm == 34 || Imm == 36 || Imm == 38)
4180 return true;
4181 // PACI(A|B)SP instructions.
4182 if (Imm == 25 || Imm == 27)
4183 return true;
4184 return false;
4185 }
4186 default:
4187 return false;
4188 }
4189}
4190
4192 if (Reg == 0)
4193 return false;
4194 assert(Reg.isPhysical() && "Expected physical register in isFpOrNEON");
4195 return AArch64::FPR128RegClass.contains(Reg) ||
4196 AArch64::FPR64RegClass.contains(Reg) ||
4197 AArch64::FPR32RegClass.contains(Reg) ||
4198 AArch64::FPR16RegClass.contains(Reg) ||
4199 AArch64::FPR8RegClass.contains(Reg);
4200}
4201
4203 auto IsFPR = [&](const MachineOperand &Op) {
4204 if (!Op.isReg())
4205 return false;
4206 auto Reg = Op.getReg();
4207 if (Reg.isPhysical())
4208 return isFpOrNEON(Reg);
4209
4210 const TargetRegisterClass *TRC = ::getRegClass(MI, Reg);
4211 return TRC == &AArch64::FPR128RegClass ||
4212 TRC == &AArch64::FPR128_loRegClass ||
4213 TRC == &AArch64::FPR64RegClass ||
4214 TRC == &AArch64::FPR64_loRegClass ||
4215 TRC == &AArch64::FPR32RegClass || TRC == &AArch64::FPR16RegClass ||
4216 TRC == &AArch64::FPR8RegClass;
4217 };
4218 return llvm::any_of(MI.operands(), IsFPR);
4219}
4220
4221// Scale the unscaled offsets. Returns false if the unscaled offset can't be
4222// scaled.
4223static bool scaleOffset(unsigned Opc, int64_t &Offset) {
4224 int Scale = AArch64InstrInfo::getMemScale(Opc);
4225
4226 // If the byte-offset isn't a multiple of the stride, we can't scale this
4227 // offset.
4228 if (Offset % Scale != 0)
4229 return false;
4230
4231 // Convert the byte-offset used by unscaled into an "element" offset used
4232 // by the scaled pair load/store instructions.
4233 Offset /= Scale;
4234 return true;
4235}
4236
4237static bool canPairLdStOpc(unsigned FirstOpc, unsigned SecondOpc) {
4238 if (FirstOpc == SecondOpc)
4239 return true;
4240 // We can also pair sign-ext and zero-ext instructions.
4241 switch (FirstOpc) {
4242 default:
4243 return false;
4244 case AArch64::STRSui:
4245 case AArch64::STURSi:
4246 return SecondOpc == AArch64::STRSui || SecondOpc == AArch64::STURSi;
4247 case AArch64::STRDui:
4248 case AArch64::STURDi:
4249 return SecondOpc == AArch64::STRDui || SecondOpc == AArch64::STURDi;
4250 case AArch64::STRQui:
4251 case AArch64::STURQi:
4252 return SecondOpc == AArch64::STRQui || SecondOpc == AArch64::STURQi;
4253 case AArch64::STRWui:
4254 case AArch64::STURWi:
4255 return SecondOpc == AArch64::STRWui || SecondOpc == AArch64::STURWi;
4256 case AArch64::STRXui:
4257 case AArch64::STURXi:
4258 return SecondOpc == AArch64::STRXui || SecondOpc == AArch64::STURXi;
4259 case AArch64::LDRSui:
4260 case AArch64::LDURSi:
4261 return SecondOpc == AArch64::LDRSui || SecondOpc == AArch64::LDURSi;
4262 case AArch64::LDRDui:
4263 case AArch64::LDURDi:
4264 return SecondOpc == AArch64::LDRDui || SecondOpc == AArch64::LDURDi;
4265 case AArch64::LDRQui:
4266 case AArch64::LDURQi:
4267 return SecondOpc == AArch64::LDRQui || SecondOpc == AArch64::LDURQi;
4268 case AArch64::LDRWui:
4269 case AArch64::LDURWi:
4270 return SecondOpc == AArch64::LDRSWui || SecondOpc == AArch64::LDURSWi;
4271 case AArch64::LDRSWui:
4272 case AArch64::LDURSWi:
4273 return SecondOpc == AArch64::LDRWui || SecondOpc == AArch64::LDURWi;
4274 case AArch64::LDRXui:
4275 case AArch64::LDURXi:
4276 return SecondOpc == AArch64::LDRXui || SecondOpc == AArch64::LDURXi;
4277 }
4278 // These instructions can't be paired based on their opcodes.
4279 return false;
4280}
4281
4282static bool shouldClusterFI(const MachineFrameInfo &MFI, int FI1,
4283 int64_t Offset1, unsigned Opcode1, int FI2,
4284 int64_t Offset2, unsigned Opcode2) {
4285 // Accesses through fixed stack object frame indices may access a different
4286 // fixed stack slot. Check that the object offsets + offsets match.
4287 if (MFI.isFixedObjectIndex(FI1) && MFI.isFixedObjectIndex(FI2)) {
4288 int64_t ObjectOffset1 = MFI.getObjectOffset(FI1);
4289 int64_t ObjectOffset2 = MFI.getObjectOffset(FI2);
4290 assert(ObjectOffset1 <= ObjectOffset2 && "Object offsets are not ordered.");
4291 // Convert to scaled object offsets.
4292 int Scale1 = AArch64InstrInfo::getMemScale(Opcode1);
4293 if (ObjectOffset1 % Scale1 != 0)
4294 return false;
4295 ObjectOffset1 /= Scale1;
4296 int Scale2 = AArch64InstrInfo::getMemScale(Opcode2);
4297 if (ObjectOffset2 % Scale2 != 0)
4298 return false;
4299 ObjectOffset2 /= Scale2;
4300 ObjectOffset1 += Offset1;
4301 ObjectOffset2 += Offset2;
4302 return ObjectOffset1 + 1 == ObjectOffset2;
4303 }
4304
4305 return FI1 == FI2;
4306}
4307
4308/// Detect opportunities for ldp/stp formation.
4309///
4310/// Only called for LdSt for which getMemOperandWithOffset returns true.
4312 ArrayRef<const MachineOperand *> BaseOps1, int64_t OpOffset1,
4313 bool OffsetIsScalable1, ArrayRef<const MachineOperand *> BaseOps2,
4314 int64_t OpOffset2, bool OffsetIsScalable2, unsigned ClusterSize,
4315 unsigned NumBytes) const {
4316 assert(BaseOps1.size() == 1 && BaseOps2.size() == 1);
4317 const MachineOperand &BaseOp1 = *BaseOps1.front();
4318 const MachineOperand &BaseOp2 = *BaseOps2.front();
4319 const MachineInstr &FirstLdSt = *BaseOp1.getParent();
4320 const MachineInstr &SecondLdSt = *BaseOp2.getParent();
4321 if (BaseOp1.getType() != BaseOp2.getType())
4322 return false;
4323
4324 assert((BaseOp1.isReg() || BaseOp1.isFI()) &&
4325 "Only base registers and frame indices are supported.");
4326
4327 // Check for both base regs and base FI.
4328 if (BaseOp1.isReg() && BaseOp1.getReg() != BaseOp2.getReg())
4329 return false;
4330
4331 // Only cluster up to a single pair.
4332 if (ClusterSize > 2)
4333 return false;
4334
4335 if (!isPairableLdStInst(FirstLdSt) || !isPairableLdStInst(SecondLdSt))
4336 return false;
4337
4338 // Can we pair these instructions based on their opcodes?
4339 unsigned FirstOpc = FirstLdSt.getOpcode();
4340 unsigned SecondOpc = SecondLdSt.getOpcode();
4341 if (!canPairLdStOpc(FirstOpc, SecondOpc))
4342 return false;
4343
4344 // Can't merge volatiles or load/stores that have a hint to avoid pair
4345 // formation, for example.
4346 if (!isCandidateToMergeOrPair(FirstLdSt) ||
4347 !isCandidateToMergeOrPair(SecondLdSt))
4348 return false;
4349
4350 // isCandidateToMergeOrPair guarantees that operand 2 is an immediate.
4351 int64_t Offset1 = FirstLdSt.getOperand(2).getImm();
4352 if (hasUnscaledLdStOffset(FirstOpc) && !scaleOffset(FirstOpc, Offset1))
4353 return false;
4354
4355 int64_t Offset2 = SecondLdSt.getOperand(2).getImm();
4356 if (hasUnscaledLdStOffset(SecondOpc) && !scaleOffset(SecondOpc, Offset2))
4357 return false;
4358
4359 // Pairwise instructions have a 7-bit signed offset field.
4360 if (Offset1 > 63 || Offset1 < -64)
4361 return false;
4362
4363 // The caller should already have ordered First/SecondLdSt by offset.
4364 // Note: except for non-equal frame index bases
4365 if (BaseOp1.isFI()) {
4366 assert((!BaseOp1.isIdenticalTo(BaseOp2) || Offset1 <= Offset2) &&
4367 "Caller should have ordered offsets.");
4368
4369 const MachineFrameInfo &MFI =
4370 FirstLdSt.getParent()->getParent()->getFrameInfo();
4371 return shouldClusterFI(MFI, BaseOp1.getIndex(), Offset1, FirstOpc,
4372 BaseOp2.getIndex(), Offset2, SecondOpc);
4373 }
4374
4375 assert(Offset1 <= Offset2 && "Caller should have ordered offsets.");
4376
4377 return Offset1 + 1 == Offset2;
4378}
4379
4381 unsigned Reg, unsigned SubIdx,
4382 unsigned State,
4383 const TargetRegisterInfo *TRI) {
4384 if (!SubIdx)
4385 return MIB.addReg(Reg, State);
4386
4388 return MIB.addReg(TRI->getSubReg(Reg, SubIdx), State);
4389 return MIB.addReg(Reg, State, SubIdx);
4390}
4391
4392static bool forwardCopyWillClobberTuple(unsigned DestReg, unsigned SrcReg,
4393 unsigned NumRegs) {
4394 // We really want the positive remainder mod 32 here, that happens to be
4395 // easily obtainable with a mask.
4396 return ((DestReg - SrcReg) & 0x1f) < NumRegs;
4397}
4398
4401 const DebugLoc &DL, MCRegister DestReg,
4402 MCRegister SrcReg, bool KillSrc,
4403 unsigned Opcode,
4404 ArrayRef<unsigned> Indices) const {
4405 assert(Subtarget.hasNEON() && "Unexpected register copy without NEON");
4407 uint16_t DestEncoding = TRI->getEncodingValue(DestReg);
4408 uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg);
4409 unsigned NumRegs = Indices.size();
4410
4411 int SubReg = 0, End = NumRegs, Incr = 1;
4412 if (forwardCopyWillClobberTuple(DestEncoding, SrcEncoding, NumRegs)) {
4413 SubReg = NumRegs - 1;
4414 End = -1;
4415 Incr = -1;
4416 }
4417
4418 for (; SubReg != End; SubReg += Incr) {
4419 const MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opcode));
4420 AddSubReg(MIB, DestReg, Indices[SubReg], RegState::Define, TRI);
4421 AddSubReg(MIB, SrcReg, Indices[SubReg], 0, TRI);
4422 AddSubReg(MIB, SrcReg, Indices[SubReg], getKillRegState(KillSrc), TRI);
4423 }
4424}
4425
4428 DebugLoc DL, unsigned DestReg,
4429 unsigned SrcReg, bool KillSrc,
4430 unsigned Opcode, unsigned ZeroReg,
4431 llvm::ArrayRef<unsigned> Indices) const {
4433 unsigned NumRegs = Indices.size();
4434
4435#ifndef NDEBUG
4436 uint16_t DestEncoding = TRI->getEncodingValue(DestReg);
4437 uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg);
4438 assert(DestEncoding % NumRegs == 0 && SrcEncoding % NumRegs == 0 &&
4439 "GPR reg sequences should not be able to overlap");
4440#endif
4441
4442 for (unsigned SubReg = 0; SubReg != NumRegs; ++SubReg) {
4443 const MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opcode));
4444 AddSubReg(MIB, DestReg, Indices[SubReg], RegState::Define, TRI);
4445 MIB.addReg(ZeroReg);
4446 AddSubReg(MIB, SrcReg, Indices[SubReg], getKillRegState(KillSrc), TRI);
4447 MIB.addImm(0);
4448 }
4449}
4450
4453 const DebugLoc &DL, MCRegister DestReg,
4454 MCRegister SrcReg, bool KillSrc) const {
4455 if (AArch64::GPR32spRegClass.contains(DestReg) &&
4456 (AArch64::GPR32spRegClass.contains(SrcReg) || SrcReg == AArch64::WZR)) {
4458
4459 if (DestReg == AArch64::WSP || SrcReg == AArch64::WSP) {
4460 // If either operand is WSP, expand to ADD #0.
4461 if (Subtarget.hasZeroCycleRegMove()) {
4462 // Cyclone recognizes "ADD Xd, Xn, #0" as a zero-cycle register move.
4463 MCRegister DestRegX = TRI->getMatchingSuperReg(
4464 DestReg, AArch64::sub_32, &AArch64::GPR64spRegClass);
4465 MCRegister SrcRegX = TRI->getMatchingSuperReg(
4466 SrcReg, AArch64::sub_32, &AArch64::GPR64spRegClass);
4467 // This instruction is reading and writing X registers. This may upset
4468 // the register scavenger and machine verifier, so we need to indicate
4469 // that we are reading an undefined value from SrcRegX, but a proper
4470 // value from SrcReg.
4471 BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestRegX)
4472 .addReg(SrcRegX, RegState::Undef)
4473 .addImm(0)
4475 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
4476 } else {
4477 BuildMI(MBB, I, DL, get(AArch64::ADDWri), DestReg)
4478 .addReg(SrcReg, getKillRegState(KillSrc))
4479 .addImm(0)
4481 }
4482 } else if (SrcReg == AArch64::WZR && Subtarget.hasZeroCycleZeroingGP()) {
4483 BuildMI(MBB, I, DL, get(AArch64::MOVZWi), DestReg)
4484 .addImm(0)
4486 } else {
4487 if (Subtarget.hasZeroCycleRegMove()) {
4488 // Cyclone recognizes "ORR Xd, XZR, Xm" as a zero-cycle register move.
4489 MCRegister DestRegX = TRI->getMatchingSuperReg(
4490 DestReg, AArch64::sub_32, &AArch64::GPR64spRegClass);
4491 MCRegister SrcRegX = TRI->getMatchingSuperReg(
4492 SrcReg, AArch64::sub_32, &AArch64::GPR64spRegClass);
4493 // This instruction is reading and writing X registers. This may upset
4494 // the register scavenger and machine verifier, so we need to indicate
4495 // that we are reading an undefined value from SrcRegX, but a proper
4496 // value from SrcReg.
4497 BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestRegX)
4498 .addReg(AArch64::XZR)
4499 .addReg(SrcRegX, RegState::Undef)
4500 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
4501 } else {
4502 // Otherwise, expand to ORR WZR.
4503 BuildMI(MBB, I, DL, get(AArch64::ORRWrr), DestReg)
4504 .addReg(AArch64::WZR)
4505 .addReg(SrcReg, getKillRegState(KillSrc));
4506 }
4507 }
4508 return;
4509 }
4510
4511 // Copy a Predicate register by ORRing with itself.
4512 if (AArch64::PPRRegClass.contains(DestReg) &&
4513 AArch64::PPRRegClass.contains(SrcReg)) {
4515 "Unexpected SVE register.");
4516 BuildMI(MBB, I, DL, get(AArch64::ORR_PPzPP), DestReg)
4517 .addReg(SrcReg) // Pg
4518 .addReg(SrcReg)
4519 .addReg(SrcReg, getKillRegState(KillSrc));
4520 return;
4521 }
4522
4523 // Copy a predicate-as-counter register by ORRing with itself as if it
4524 // were a regular predicate (mask) register.
4525 bool DestIsPNR = AArch64::PNRRegClass.contains(DestReg);
4526 bool SrcIsPNR = AArch64::PNRRegClass.contains(SrcReg);
4527 if (DestIsPNR || SrcIsPNR) {
4528 auto ToPPR = [](MCRegister R) -> MCRegister {
4529 return (R - AArch64::PN0) + AArch64::P0;
4530 };
4531 MCRegister PPRSrcReg = SrcIsPNR ? ToPPR(SrcReg) : SrcReg;
4532 MCRegister PPRDestReg = DestIsPNR ? ToPPR(DestReg) : DestReg;
4533
4534 if (PPRSrcReg != PPRDestReg) {
4535 auto NewMI = BuildMI(MBB, I, DL, get(AArch64::ORR_PPzPP), PPRDestReg)
4536 .addReg(PPRSrcReg) // Pg
4537 .addReg(PPRSrcReg)
4538 .addReg(PPRSrcReg, getKillRegState(KillSrc));
4539 if (DestIsPNR)
4540 NewMI.addDef(DestReg, RegState::Implicit);
4541 }
4542 return;
4543 }
4544
4545 // Copy a Z register by ORRing with itself.
4546 if (AArch64::ZPRRegClass.contains(DestReg) &&
4547 AArch64::ZPRRegClass.contains(SrcReg)) {
4549 "Unexpected SVE register.");
4550 BuildMI(MBB, I, DL, get(AArch64::ORR_ZZZ), DestReg)
4551 .addReg(SrcReg)
4552 .addReg(SrcReg, getKillRegState(KillSrc));
4553 return;
4554 }
4555
4556 // Copy a Z register pair by copying the individual sub-registers.
4557 if ((AArch64::ZPR2RegClass.contains(DestReg) ||
4558 AArch64::ZPR2StridedOrContiguousRegClass.contains(DestReg)) &&
4559 (AArch64::ZPR2RegClass.contains(SrcReg) ||
4560 AArch64::ZPR2StridedOrContiguousRegClass.contains(SrcReg))) {
4562 "Unexpected SVE register.");
4563 static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1};
4564 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORR_ZZZ,
4565 Indices);
4566 return;
4567 }
4568
4569 // Copy a Z register triple by copying the individual sub-registers.
4570 if (AArch64::ZPR3RegClass.contains(DestReg) &&
4571 AArch64::ZPR3RegClass.contains(SrcReg)) {
4573 "Unexpected SVE register.");
4574 static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1,
4575 AArch64::zsub2};
4576 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORR_ZZZ,
4577 Indices);
4578 return;
4579 }
4580
4581 // Copy a Z register quad by copying the individual sub-registers.
4582 if ((AArch64::ZPR4RegClass.contains(DestReg) ||
4583 AArch64::ZPR4StridedOrContiguousRegClass.contains(DestReg)) &&
4584 (AArch64::ZPR4RegClass.contains(SrcReg) ||
4585 AArch64::ZPR4StridedOrContiguousRegClass.contains(SrcReg))) {
4587 "Unexpected SVE register.");
4588 static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1,
4589 AArch64::zsub2, AArch64::zsub3};
4590 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORR_ZZZ,
4591 Indices);
4592 return;
4593 }
4594
4595 if (AArch64::GPR64spRegClass.contains(DestReg) &&
4596 (AArch64::GPR64spRegClass.contains(SrcReg) || SrcReg == AArch64::XZR)) {
4597 if (DestReg == AArch64::SP || SrcReg == AArch64::SP) {
4598 // If either operand is SP, expand to ADD #0.
4599 BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestReg)
4600 .addReg(SrcReg, getKillRegState(KillSrc))
4601 .addImm(0)
4603 } else if (SrcReg == AArch64::XZR && Subtarget.hasZeroCycleZeroingGP()) {
4604 BuildMI(MBB, I, DL, get(AArch64::MOVZXi), DestReg)
4605 .addImm(0)
4607 } else {
4608 // Otherwise, expand to ORR XZR.
4609 BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestReg)
4610 .addReg(AArch64::XZR)
4611 .addReg(SrcReg, getKillRegState(KillSrc));
4612 }
4613 return;
4614 }
4615
4616 // Copy a DDDD register quad by copying the individual sub-registers.
4617 if (AArch64::DDDDRegClass.contains(DestReg) &&
4618 AArch64::DDDDRegClass.contains(SrcReg)) {
4619 static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1,
4620 AArch64::dsub2, AArch64::dsub3};
4621 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
4622 Indices);
4623 return;
4624 }
4625
4626 // Copy a DDD register triple by copying the individual sub-registers.
4627 if (AArch64::DDDRegClass.contains(DestReg) &&
4628 AArch64::DDDRegClass.contains(SrcReg)) {
4629 static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1,
4630 AArch64::dsub2};
4631 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
4632 Indices);
4633 return;
4634 }
4635
4636 // Copy a DD register pair by copying the individual sub-registers.
4637 if (AArch64::DDRegClass.contains(DestReg) &&
4638 AArch64::DDRegClass.contains(SrcReg)) {
4639 static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1};
4640 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
4641 Indices);
4642 return;
4643 }
4644
4645 // Copy a QQQQ register quad by copying the individual sub-registers.
4646 if (AArch64::QQQQRegClass.contains(DestReg) &&
4647 AArch64::QQQQRegClass.contains(SrcReg)) {
4648 static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1,
4649 AArch64::qsub2, AArch64::qsub3};
4650 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
4651 Indices);
4652 return;
4653 }
4654
4655 // Copy a QQQ register triple by copying the individual sub-registers.
4656 if (AArch64::QQQRegClass.contains(DestReg) &&
4657 AArch64::QQQRegClass.contains(SrcReg)) {
4658 static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1,
4659 AArch64::qsub2};
4660 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
4661 Indices);
4662 return;
4663 }
4664
4665 // Copy a QQ register pair by copying the individual sub-registers.
4666 if (AArch64::QQRegClass.contains(DestReg) &&
4667 AArch64::QQRegClass.contains(SrcReg)) {
4668 static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1};
4669 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
4670 Indices);
4671 return;
4672 }
4673
4674 if (AArch64::XSeqPairsClassRegClass.contains(DestReg) &&
4675 AArch64::XSeqPairsClassRegClass.contains(SrcReg)) {
4676 static const unsigned Indices[] = {AArch64::sube64, AArch64::subo64};
4677 copyGPRRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRXrs,
4678 AArch64::XZR, Indices);
4679 return;
4680 }
4681
4682 if (AArch64::WSeqPairsClassRegClass.contains(DestReg) &&
4683 AArch64::WSeqPairsClassRegClass.contains(SrcReg)) {
4684 static const unsigned Indices[] = {AArch64::sube32, AArch64::subo32};
4685 copyGPRRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRWrs,
4686 AArch64::WZR, Indices);
4687 return;
4688 }
4689
4690 if (AArch64::FPR128RegClass.contains(DestReg) &&
4691 AArch64::FPR128RegClass.contains(SrcReg)) {
4692 if (Subtarget.isSVEorStreamingSVEAvailable() &&
4693 !Subtarget.isNeonAvailable())
4694 BuildMI(MBB, I, DL, get(AArch64::ORR_ZZZ))
4695 .addReg(AArch64::Z0 + (DestReg - AArch64::Q0), RegState::Define)
4696 .addReg(AArch64::Z0 + (SrcReg - AArch64::Q0))
4697 .addReg(AArch64::Z0 + (SrcReg - AArch64::Q0));
4698 else if (Subtarget.isNeonAvailable())
4699 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
4700 .addReg(SrcReg)
4701 .addReg(SrcReg, getKillRegState(KillSrc));
4702 else {
4703 BuildMI(MBB, I, DL, get(AArch64::STRQpre))
4704 .addReg(AArch64::SP, RegState::Define)
4705 .addReg(SrcReg, getKillRegState(KillSrc))
4706 .addReg(AArch64::SP)
4707 .addImm(-16);
4708 BuildMI(MBB, I, DL, get(AArch64::LDRQpost))
4709 .addReg(AArch64::SP, RegState::Define)
4710 .addReg(DestReg, RegState::Define)
4711 .addReg(AArch64::SP)
4712 .addImm(16);
4713 }
4714 return;
4715 }
4716
4717 if (AArch64::FPR64RegClass.contains(DestReg) &&
4718 AArch64::FPR64RegClass.contains(SrcReg)) {
4719 BuildMI(MBB, I, DL, get(AArch64::FMOVDr), DestReg)
4720 .addReg(SrcReg, getKillRegState(KillSrc));
4721 return;
4722 }
4723
4724 if (AArch64::FPR32RegClass.contains(DestReg) &&
4725 AArch64::FPR32RegClass.contains(SrcReg)) {
4726 BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
4727 .addReg(SrcReg, getKillRegState(KillSrc));
4728 return;
4729 }
4730
4731 if (AArch64::FPR16RegClass.contains(DestReg) &&
4732 AArch64::FPR16RegClass.contains(SrcReg)) {
4733 DestReg =
4734 RI.getMatchingSuperReg(DestReg, AArch64::hsub, &AArch64::FPR32RegClass);
4735 SrcReg =
4736 RI.getMatchingSuperReg(SrcReg, AArch64::hsub, &AArch64::FPR32RegClass);
4737 BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
4738 .addReg(SrcReg, getKillRegState(KillSrc));
4739 return;
4740 }
4741
4742 if (AArch64::FPR8RegClass.contains(DestReg) &&
4743 AArch64::FPR8RegClass.contains(SrcReg)) {
4744 DestReg =
4745 RI.getMatchingSuperReg(DestReg, AArch64::bsub, &AArch64::FPR32RegClass);
4746 SrcReg =
4747 RI.getMatchingSuperReg(SrcReg, AArch64::bsub, &AArch64::FPR32RegClass);
4748 BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
4749 .addReg(SrcReg, getKillRegState(KillSrc));
4750 return;
4751 }
4752
4753 // Copies between GPR64 and FPR64.
4754 if (AArch64::FPR64RegClass.contains(DestReg) &&
4755 AArch64::GPR64RegClass.contains(SrcReg)) {
4756 BuildMI(MBB, I, DL, get(AArch64::FMOVXDr), DestReg)
4757 .addReg(SrcReg, getKillRegState(KillSrc));
4758 return;
4759 }
4760 if (AArch64::GPR64RegClass.contains(DestReg) &&
4761 AArch64::FPR64RegClass.contains(SrcReg)) {
4762 BuildMI(MBB, I, DL, get(AArch64::FMOVDXr), DestReg)
4763 .addReg(SrcReg, getKillRegState(KillSrc));
4764 return;
4765 }
4766 // Copies between GPR32 and FPR32.
4767 if (AArch64::FPR32RegClass.contains(DestReg) &&
4768 AArch64::GPR32RegClass.contains(SrcReg)) {
4769 BuildMI(MBB, I, DL, get(AArch64::FMOVWSr), DestReg)
4770 .addReg(SrcReg, getKillRegState(KillSrc));
4771 return;
4772 }
4773 if (AArch64::GPR32RegClass.contains(DestReg) &&
4774 AArch64::FPR32RegClass.contains(SrcReg)) {
4775 BuildMI(MBB, I, DL, get(AArch64::FMOVSWr), DestReg)
4776 .addReg(SrcReg, getKillRegState(KillSrc));
4777 return;
4778 }
4779
4780 if (DestReg == AArch64::NZCV) {
4781 assert(AArch64::GPR64RegClass.contains(SrcReg) && "Invalid NZCV copy");
4782 BuildMI(MBB, I, DL, get(AArch64::MSR))
4783 .addImm(AArch64SysReg::NZCV)
4784 .addReg(SrcReg, getKillRegState(KillSrc))
4785 .addReg(AArch64::NZCV, RegState::Implicit | RegState::Define);
4786 return;
4787 }
4788
4789 if (SrcReg == AArch64::NZCV) {
4790 assert(AArch64::GPR64RegClass.contains(DestReg) && "Invalid NZCV copy");
4791 BuildMI(MBB, I, DL, get(AArch64::MRS), DestReg)
4792 .addImm(AArch64SysReg::NZCV)
4793 .addReg(AArch64::NZCV, RegState::Implicit | getKillRegState(KillSrc));
4794 return;
4795 }
4796
4797#ifndef NDEBUG
4799 errs() << TRI.getRegAsmName(DestReg) << " = COPY "
4800 << TRI.getRegAsmName(SrcReg) << "\n";
4801#endif
4802 llvm_unreachable("unimplemented reg-to-reg copy");
4803}
4804
4807 MachineBasicBlock::iterator InsertBefore,
4808 const MCInstrDesc &MCID,
4809 Register SrcReg, bool IsKill,
4810 unsigned SubIdx0, unsigned SubIdx1, int FI,
4811 MachineMemOperand *MMO) {
4812 Register SrcReg0 = SrcReg;
4813 Register SrcReg1 = SrcReg;
4814 if (SrcReg.isPhysical()) {
4815 SrcReg0 = TRI.getSubReg(SrcReg, SubIdx0);
4816 SubIdx0 = 0;
4817 SrcReg1 = TRI.getSubReg(SrcReg, SubIdx1);
4818 SubIdx1 = 0;
4819 }
4820 BuildMI(MBB, InsertBefore, DebugLoc(), MCID)
4821 .addReg(SrcReg0, getKillRegState(IsKill), SubIdx0)
4822 .addReg(SrcReg1, getKillRegState(IsKill), SubIdx1)
4823 .addFrameIndex(FI)
4824 .addImm(0)
4825 .addMemOperand(MMO);
4826}
4827
4830 Register SrcReg, bool isKill, int FI,
4831 const TargetRegisterClass *RC,
4832 const TargetRegisterInfo *TRI,
4833 Register VReg) const {
4834 MachineFunction &MF = *MBB.getParent();
4835 MachineFrameInfo &MFI = MF.getFrameInfo();
4836
4838 MachineMemOperand *MMO =
4840 MFI.getObjectSize(FI), MFI.getObjectAlign(FI));
4841 unsigned Opc = 0;
4842 bool Offset = true;
4844 unsigned StackID = TargetStackID::Default;
4845 switch (TRI->getSpillSize(*RC)) {
4846 case 1:
4847 if (AArch64::FPR8RegClass.hasSubClassEq(RC))
4848 Opc = AArch64::STRBui;
4849 break;
4850 case 2: {
4851 if (AArch64::FPR16RegClass.hasSubClassEq(RC))
4852 Opc = AArch64::STRHui;
4853 else if (AArch64::PNRRegClass.hasSubClassEq(RC) ||
4854 AArch64::PPRRegClass.hasSubClassEq(RC)) {
4856 "Unexpected register store without SVE store instructions");
4857 Opc = AArch64::STR_PXI;
4859 }
4860 break;
4861 }
4862 case 4:
4863 if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
4864 Opc = AArch64::STRWui;
4865 if (SrcReg.isVirtual())
4866 MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR32RegClass);
4867 else
4868 assert(SrcReg != AArch64::WSP);
4869 } else if (AArch64::FPR32RegClass.hasSubClassEq(RC))
4870 Opc = AArch64::STRSui;
4871 else if (AArch64::PPR2RegClass.hasSubClassEq(RC)) {
4872 Opc = AArch64::STR_PPXI;
4874 }
4875 break;
4876 case 8:
4877 if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) {
4878 Opc = AArch64::STRXui;
4879 if (SrcReg.isVirtual())
4880 MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass);
4881 else
4882 assert(SrcReg != AArch64::SP);
4883 } else if (AArch64::FPR64RegClass.hasSubClassEq(RC)) {
4884 Opc = AArch64::STRDui;
4885 } else if (AArch64::WSeqPairsClassRegClass.hasSubClassEq(RC)) {
4887 get(AArch64::STPWi), SrcReg, isKill,
4888 AArch64::sube32, AArch64::subo32, FI, MMO);
4889 return;
4890 }
4891 break;
4892 case 16:
4893 if (AArch64::FPR128RegClass.hasSubClassEq(RC))
4894 Opc = AArch64::STRQui;
4895 else if (AArch64::DDRegClass.hasSubClassEq(RC)) {
4896 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
4897 Opc = AArch64::ST1Twov1d;
4898 Offset = false;
4899 } else if (AArch64::XSeqPairsClassRegClass.hasSubClassEq(RC)) {
4901 get(AArch64::STPXi), SrcReg, isKill,
4902 AArch64::sube64, AArch64::subo64, FI, MMO);
4903 return;
4904 } else if (AArch64::ZPRRegClass.hasSubClassEq(RC)) {
4906 "Unexpected register store without SVE store instructions");
4907 Opc = AArch64::STR_ZXI;
4909 }
4910 break;
4911 case 24:
4912 if (AArch64::DDDRegClass.hasSubClassEq(RC)) {
4913 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
4914 Opc = AArch64::ST1Threev1d;
4915 Offset = false;
4916 }
4917 break;
4918 case 32:
4919 if (AArch64::DDDDRegClass.hasSubClassEq(RC)) {
4920 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
4921 Opc = AArch64::ST1Fourv1d;
4922 Offset = false;
4923 } else if (AArch64::QQRegClass.hasSubClassEq(RC)) {
4924 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
4925 Opc = AArch64::ST1Twov2d;
4926 Offset = false;
4927 } else if (AArch64::ZPR2RegClass.hasSubClassEq(RC) ||
4928 AArch64::ZPR2StridedOrContiguousRegClass.hasSubClassEq(RC)) {
4930 "Unexpected register store without SVE store instructions");
4931 Opc = AArch64::STR_ZZXI;
4933 }
4934 break;
4935 case 48:
4936 if (AArch64::QQQRegClass.hasSubClassEq(RC)) {
4937 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
4938 Opc = AArch64::ST1Threev2d;
4939 Offset = false;
4940 } else if (AArch64::ZPR3RegClass.hasSubClassEq(RC)) {
4942 "Unexpected register store without SVE store instructions");
4943 Opc = AArch64::STR_ZZZXI;
4945 }
4946 break;
4947 case 64:
4948 if (AArch64::QQQQRegClass.hasSubClassEq(RC)) {
4949 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
4950 Opc = AArch64::ST1Fourv2d;
4951 Offset = false;
4952 } else if (AArch64::ZPR4RegClass.hasSubClassEq(RC) ||
4953 AArch64::ZPR4StridedOrContiguousRegClass.hasSubClassEq(RC)) {
4955 "Unexpected register store without SVE store instructions");
4956 Opc = AArch64::STR_ZZZZXI;
4958 }
4959 break;
4960 }
4961 assert(Opc && "Unknown register class");
4962 MFI.setStackID(FI, StackID);
4963
4964 const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DebugLoc(), get(Opc))
4965 .addReg(SrcReg, getKillRegState(isKill))
4966 .addFrameIndex(FI);
4967
4968 if (Offset)
4969 MI.addImm(0);
4970 if (PNRReg.isValid())
4971 MI.addDef(PNRReg, RegState::Implicit);
4972 MI.addMemOperand(MMO);
4973}
4974
4977 MachineBasicBlock::iterator InsertBefore,
4978 const MCInstrDesc &MCID,
4979 Register DestReg, unsigned SubIdx0,
4980 unsigned SubIdx1, int FI,
4981 MachineMemOperand *MMO) {
4982 Register DestReg0 = DestReg;
4983 Register DestReg1 = DestReg;
4984 bool IsUndef = true;
4985 if (DestReg.isPhysical()) {
4986 DestReg0 = TRI.getSubReg(DestReg, SubIdx0);
4987 SubIdx0 = 0;
4988 DestReg1 = TRI.getSubReg(DestReg, SubIdx1);
4989 SubIdx1 = 0;
4990 IsUndef = false;
4991 }
4992 BuildMI(MBB, InsertBefore, DebugLoc(), MCID)
4993 .addReg(DestReg0, RegState::Define | getUndefRegState(IsUndef), SubIdx0)
4994 .addReg(DestReg1, RegState::Define | getUndefRegState(IsUndef), SubIdx1)
4995 .addFrameIndex(FI)
4996 .addImm(0)
4997 .addMemOperand(MMO);
4998}
4999
5002 Register DestReg, int FI,
5003 const TargetRegisterClass *RC,
5004 const TargetRegisterInfo *TRI,
5005 Register VReg) const {
5006 MachineFunction &MF = *MBB.getParent();
5007 MachineFrameInfo &MFI = MF.getFrameInfo();
5009 MachineMemOperand *MMO =
5011 MFI.getObjectSize(FI), MFI.getObjectAlign(FI));
5012
5013 unsigned Opc = 0;
5014 bool Offset = true;
5015 unsigned StackID = TargetStackID::Default;
5017 switch (TRI->getSpillSize(*RC)) {
5018 case 1:
5019 if (AArch64::FPR8RegClass.hasSubClassEq(RC))
5020 Opc = AArch64::LDRBui;
5021 break;
5022 case 2: {
5023 bool IsPNR = AArch64::PNRRegClass.hasSubClassEq(RC);
5024 if (AArch64::FPR16RegClass.hasSubClassEq(RC))
5025 Opc = AArch64::LDRHui;
5026 else if (IsPNR || AArch64::PPRRegClass.hasSubClassEq(RC)) {
5028 "Unexpected register load without SVE load instructions");
5029 if (IsPNR)
5030 PNRReg = DestReg;
5031 Opc = AArch64::LDR_PXI;
5033 }
5034 break;
5035 }
5036 case 4:
5037 if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
5038 Opc = AArch64::LDRWui;
5039 if (DestReg.isVirtual())
5040 MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR32RegClass);
5041 else
5042 assert(DestReg != AArch64::WSP);
5043 } else if (AArch64::FPR32RegClass.hasSubClassEq(RC))
5044 Opc = AArch64::LDRSui;
5045 else if (AArch64::PPR2RegClass.hasSubClassEq(RC)) {
5046 Opc = AArch64::LDR_PPXI;
5048 }
5049 break;
5050 case 8:
5051 if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) {
5052 Opc = AArch64::LDRXui;
5053 if (DestReg.isVirtual())
5054 MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR64RegClass);
5055 else
5056 assert(DestReg != AArch64::SP);
5057 } else if (AArch64::FPR64RegClass.hasSubClassEq(RC)) {
5058 Opc = AArch64::LDRDui;
5059 } else if (AArch64::WSeqPairsClassRegClass.hasSubClassEq(RC)) {
5061 get(AArch64::LDPWi), DestReg, AArch64::sube32,
5062 AArch64::subo32, FI, MMO);
5063 return;
5064 }
5065 break;
5066 case 16:
5067 if (AArch64::FPR128RegClass.hasSubClassEq(RC))
5068 Opc = AArch64::LDRQui;
5069 else if (AArch64::DDRegClass.hasSubClassEq(RC)) {
5070 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
5071 Opc = AArch64::LD1Twov1d;
5072 Offset = false;
5073 } else if (AArch64::XSeqPairsClassRegClass.hasSubClassEq(RC)) {
5075 get(AArch64::LDPXi), DestReg, AArch64::sube64,
5076 AArch64::subo64, FI, MMO);
5077 return;
5078 } else if (AArch64::ZPRRegClass.hasSubClassEq(RC)) {
5080 "Unexpected register load without SVE load instructions");
5081 Opc = AArch64::LDR_ZXI;
5083 }
5084 break;
5085 case 24:
5086 if (AArch64::DDDRegClass.hasSubClassEq(RC)) {
5087 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
5088 Opc = AArch64::LD1Threev1d;
5089 Offset = false;
5090 }
5091 break;
5092 case 32:
5093 if (AArch64::DDDDRegClass.hasSubClassEq(RC)) {
5094 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
5095 Opc = AArch64::LD1Fourv1d;
5096 Offset = false;
5097 } else if (AArch64::QQRegClass.hasSubClassEq(RC)) {
5098 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
5099 Opc = AArch64::LD1Twov2d;
5100 Offset = false;
5101 } else if (AArch64::ZPR2RegClass.hasSubClassEq(RC) ||
5102 AArch64::ZPR2StridedOrContiguousRegClass.hasSubClassEq(RC)) {
5104 "Unexpected register load without SVE load instructions");
5105 Opc = AArch64::LDR_ZZXI;
5107 }
5108 break;
5109 case 48:
5110 if (AArch64::QQQRegClass.hasSubClassEq(RC)) {
5111 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
5112 Opc = AArch64::LD1Threev2d;
5113 Offset = false;
5114 } else if (AArch64::ZPR3RegClass.hasSubClassEq(RC)) {
5116 "Unexpected register load without SVE load instructions");
5117 Opc = AArch64::LDR_ZZZXI;
5119 }
5120 break;
5121 case 64:
5122 if (AArch64::QQQQRegClass.hasSubClassEq(RC)) {
5123 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
5124 Opc = AArch64::LD1Fourv2d;
5125 Offset = false;
5126 } else if (AArch64::ZPR4RegClass.hasSubClassEq(RC) ||
5127 AArch64::ZPR4StridedOrContiguousRegClass.hasSubClassEq(RC)) {
5129 "Unexpected register load without SVE load instructions");
5130 Opc = AArch64::LDR_ZZZZXI;
5132 }
5133 break;
5134 }
5135
5136 assert(Opc && "Unknown register class");
5137 MFI.setStackID(FI, StackID);
5138
5139 const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DebugLoc(), get(Opc))
5140 .addReg(DestReg, getDefRegState(true))
5141 .addFrameIndex(FI);
5142 if (Offset)
5143 MI.addImm(0);
5144 if (PNRReg.isValid() && !PNRReg.isVirtual())
5145 MI.addDef(PNRReg, RegState::Implicit);
5146 MI.addMemOperand(MMO);
5147
5148 if (PNRReg.isValid() && PNRReg.isVirtual())
5149 BuildMI(MBB, MBBI, DebugLoc(), get(TargetOpcode::COPY), PNRReg)
5150 .addReg(DestReg);
5151}
5152
5154 const MachineInstr &UseMI,
5155 const TargetRegisterInfo *TRI) {
5156 return any_of(instructionsWithoutDebug(std::next(DefMI.getIterator()),
5157 UseMI.getIterator()),
5158 [TRI](const MachineInstr &I) {
5159 return I.modifiesRegister(AArch64::NZCV, TRI) ||
5160 I.readsRegister(AArch64::NZCV, TRI);
5161 });
5162}
5163
5165 const StackOffset &Offset, int64_t &ByteSized, int64_t &VGSized) {
5166 // The smallest scalable element supported by scaled SVE addressing
5167 // modes are predicates, which are 2 scalable bytes in size. So the scalable
5168 // byte offset must always be a multiple of 2.
5169 assert(Offset.getScalable() % 2 == 0 && "Invalid frame offset");
5170
5171 // VGSized offsets are divided by '2', because the VG register is the
5172 // the number of 64bit granules as opposed to 128bit vector chunks,
5173 // which is how the 'n' in e.g. MVT::nxv1i8 is modelled.
5174 // So, for a stack offset of 16 MVT::nxv1i8's, the size is n x 16 bytes.
5175 // VG = n * 2 and the dwarf offset must be VG * 8 bytes.
5176 ByteSized = Offset.getFixed();
5177 VGSized = Offset.getScalable() / 2;
5178}
5179
5180/// Returns the offset in parts to which this frame offset can be
5181/// decomposed for the purpose of describing a frame offset.
5182/// For non-scalable offsets this is simply its byte size.
5184 const StackOffset &Offset, int64_t &NumBytes, int64_t &NumPredicateVectors,
5185 int64_t &NumDataVectors) {
5186 // The smallest scalable element supported by scaled SVE addressing
5187 // modes are predicates, which are 2 scalable bytes in size. So the scalable
5188 // byte offset must always be a multiple of 2.
5189 assert(Offset.getScalable() % 2 == 0 && "Invalid frame offset");
5190
5191 NumBytes = Offset.getFixed();
5192 NumDataVectors = 0;
5193 NumPredicateVectors = Offset.getScalable() / 2;
5194 // This method is used to get the offsets to adjust the frame offset.
5195 // If the function requires ADDPL to be used and needs more than two ADDPL
5196 // instructions, part of the offset is folded into NumDataVectors so that it
5197 // uses ADDVL for part of it, reducing the number of ADDPL instructions.
5198 if (NumPredicateVectors % 8 == 0 || NumPredicateVectors < -64 ||
5199 NumPredicateVectors > 62) {
5200 NumDataVectors = NumPredicateVectors / 8;
5201 NumPredicateVectors -= NumDataVectors * 8;
5202 }
5203}
5204
5205// Convenience function to create a DWARF expression for
5206// Expr + NumBytes + NumVGScaledBytes * AArch64::VG
5207static void appendVGScaledOffsetExpr(SmallVectorImpl<char> &Expr, int NumBytes,
5208 int NumVGScaledBytes, unsigned VG,
5209 llvm::raw_string_ostream &Comment) {
5210 uint8_t buffer[16];
5211
5212 if (NumBytes) {
5213 Expr.push_back(dwarf::DW_OP_consts);
5214 Expr.append(buffer, buffer + encodeSLEB128(NumBytes, buffer));
5215 Expr.push_back((uint8_t)dwarf::DW_OP_plus);
5216 Comment << (NumBytes < 0 ? " - " : " + ") << std::abs(NumBytes);
5217 }
5218
5219 if (NumVGScaledBytes) {
5220 Expr.push_back((uint8_t)dwarf::DW_OP_consts);
5221 Expr.append(buffer, buffer + encodeSLEB128(NumVGScaledBytes, buffer));
5222
5223 Expr.push_back((uint8_t)dwarf::DW_OP_bregx);
5224 Expr.append(buffer, buffer + encodeULEB128(VG, buffer));
5225 Expr.push_back(0);
5226
5227 Expr.push_back((uint8_t)dwarf::DW_OP_mul);
5228 Expr.push_back((uint8_t)dwarf::DW_OP_plus);
5229
5230 Comment << (NumVGScaledBytes < 0 ? " - " : " + ")
5231 << std::abs(NumVGScaledBytes) << " * VG";
5232 }
5233}
5234
5235// Creates an MCCFIInstruction:
5236// { DW_CFA_def_cfa_expression, ULEB128 (sizeof expr), expr }
5238 unsigned Reg,
5239 const StackOffset &Offset) {
5240 int64_t NumBytes, NumVGScaledBytes;
5242 NumVGScaledBytes);
5243 std::string CommentBuffer;
5244 llvm::raw_string_ostream Comment(CommentBuffer);
5245
5246 if (Reg == AArch64::SP)
5247 Comment << "sp";
5248 else if (Reg == AArch64::FP)
5249 Comment << "fp";
5250 else
5251 Comment << printReg(Reg, &TRI);
5252
5253 // Build up the expression (Reg + NumBytes + NumVGScaledBytes * AArch64::VG)
5254 SmallString<64> Expr;
5255 unsigned DwarfReg = TRI.getDwarfRegNum(Reg, true);
5256 Expr.push_back((uint8_t)(dwarf::DW_OP_breg0 + DwarfReg));
5257 Expr.push_back(0);
5258 appendVGScaledOffsetExpr(Expr, NumBytes, NumVGScaledBytes,
5259 TRI.getDwarfRegNum(AArch64::VG, true), Comment);
5260
5261 // Wrap this into DW_CFA_def_cfa.
5262 SmallString<64> DefCfaExpr;
5263 DefCfaExpr.push_back(dwarf::DW_CFA_def_cfa_expression);
5264 uint8_t buffer[16];
5265 DefCfaExpr.append(buffer, buffer + encodeULEB128(Expr.size(), buffer));
5266 DefCfaExpr.append(Expr.str());
5267 return MCCFIInstruction::createEscape(nullptr, DefCfaExpr.str(), SMLoc(),
5268 Comment.str());
5269}
5270
5272 unsigned FrameReg, unsigned Reg,
5273 const StackOffset &Offset,
5274 bool LastAdjustmentWasScalable) {
5275 if (Offset.getScalable())
5276 return createDefCFAExpression(TRI, Reg, Offset);
5277
5278 if (FrameReg == Reg && !LastAdjustmentWasScalable)
5279 return MCCFIInstruction::cfiDefCfaOffset(nullptr, int(Offset.getFixed()));
5280
5281 unsigned DwarfReg = TRI.getDwarfRegNum(Reg, true);
5282 return MCCFIInstruction::cfiDefCfa(nullptr, DwarfReg, (int)Offset.getFixed());
5283}
5284
5286 unsigned Reg,
5287 const StackOffset &OffsetFromDefCFA) {
5288 int64_t NumBytes, NumVGScaledBytes;
5290 OffsetFromDefCFA, NumBytes, NumVGScaledBytes);
5291
5292 unsigned DwarfReg = TRI.getDwarfRegNum(Reg, true);
5293
5294 // Non-scalable offsets can use DW_CFA_offset directly.
5295 if (!NumVGScaledBytes)
5296 return MCCFIInstruction::createOffset(nullptr, DwarfReg, NumBytes);
5297
5298 std::string CommentBuffer;
5299 llvm::raw_string_ostream Comment(CommentBuffer);
5300 Comment << printReg(Reg, &TRI) << " @ cfa";
5301
5302 // Build up expression (NumBytes + NumVGScaledBytes * AArch64::VG)
5303 SmallString<64> OffsetExpr;
5304 appendVGScaledOffsetExpr(OffsetExpr, NumBytes, NumVGScaledBytes,
5305 TRI.getDwarfRegNum(AArch64::VG, true), Comment);
5306
5307 // Wrap this into DW_CFA_expression
5308 SmallString<64> CfaExpr;
5309 CfaExpr.push_back(dwarf::DW_CFA_expression);
5310 uint8_t buffer[16];
5311 CfaExpr.append(buffer, buffer + encodeULEB128(DwarfReg, buffer));
5312 CfaExpr.append(buffer, buffer + encodeULEB128(OffsetExpr.size(), buffer));
5313 CfaExpr.append(OffsetExpr.str());
5314
5315 return MCCFIInstruction::createEscape(nullptr, CfaExpr.str(), SMLoc(),
5316 Comment.str());
5317}
5318
5319// Helper function to emit a frame offset adjustment from a given
5320// pointer (SrcReg), stored into DestReg. This function is explicit
5321// in that it requires the opcode.
5324 const DebugLoc &DL, unsigned DestReg,
5325 unsigned SrcReg, int64_t Offset, unsigned Opc,
5326 const TargetInstrInfo *TII,
5327 MachineInstr::MIFlag Flag, bool NeedsWinCFI,
5328 bool *HasWinCFI, bool EmitCFAOffset,
5329 StackOffset CFAOffset, unsigned FrameReg) {
5330 int Sign = 1;
5331 unsigned MaxEncoding, ShiftSize;
5332 switch (Opc) {
5333 case AArch64::ADDXri:
5334 case AArch64::ADDSXri:
5335 case AArch64::SUBXri:
5336 case AArch64::SUBSXri:
5337 MaxEncoding = 0xfff;
5338 ShiftSize = 12;
5339 break;
5340 case AArch64::ADDVL_XXI:
5341 case AArch64::ADDPL_XXI:
5342 case AArch64::ADDSVL_XXI:
5343 case AArch64::ADDSPL_XXI:
5344 MaxEncoding = 31;
5345 ShiftSize = 0;
5346 if (Offset < 0) {
5347 MaxEncoding = 32;
5348 Sign = -1;
5349 Offset = -Offset;
5350 }
5351 break;
5352 default:
5353 llvm_unreachable("Unsupported opcode");
5354 }
5355
5356 // `Offset` can be in bytes or in "scalable bytes".
5357 int VScale = 1;
5358 if (Opc == AArch64::ADDVL_XXI || Opc == AArch64::ADDSVL_XXI)
5359 VScale = 16;
5360 else if (Opc == AArch64::ADDPL_XXI || Opc == AArch64::ADDSPL_XXI)
5361 VScale = 2;
5362
5363 // FIXME: If the offset won't fit in 24-bits, compute the offset into a
5364 // scratch register. If DestReg is a virtual register, use it as the
5365 // scratch register; otherwise, create a new virtual register (to be
5366 // replaced by the scavenger at the end of PEI). That case can be optimized
5367 // slightly if DestReg is SP which is always 16-byte aligned, so the scratch
5368 // register can be loaded with offset%8 and the add/sub can use an extending
5369 // instruction with LSL#3.
5370 // Currently the function handles any offsets but generates a poor sequence
5371 // of code.
5372 // assert(Offset < (1 << 24) && "unimplemented reg plus immediate");
5373
5374 const unsigned MaxEncodableValue = MaxEncoding << ShiftSize;
5375 Register TmpReg = DestReg;
5376 if (TmpReg == AArch64::XZR)
5378 &AArch64::GPR64RegClass);
5379 do {
5380 uint64_t ThisVal = std::min<uint64_t>(Offset, MaxEncodableValue);
5381 unsigned LocalShiftSize = 0;
5382 if (ThisVal > MaxEncoding) {
5383 ThisVal = ThisVal >> ShiftSize;
5384 LocalShiftSize = ShiftSize;
5385 }
5386 assert((ThisVal >> ShiftSize) <= MaxEncoding &&
5387 "Encoding cannot handle value that big");
5388
5389 Offset -= ThisVal << LocalShiftSize;
5390 if (Offset == 0)
5391 TmpReg = DestReg;
5392 auto MBI = BuildMI(MBB, MBBI, DL, TII->get(Opc), TmpReg)
5393 .addReg(SrcReg)
5394 .addImm(Sign * (int)ThisVal);
5395 if (ShiftSize)
5396 MBI = MBI.addImm(
5398 MBI = MBI.setMIFlag(Flag);
5399
5400 auto Change =
5401 VScale == 1
5402 ? StackOffset::getFixed(ThisVal << LocalShiftSize)
5403 : StackOffset::getScalable(VScale * (ThisVal << LocalShiftSize));
5404 if (Sign == -1 || Opc == AArch64::SUBXri || Opc == AArch64::SUBSXri)
5405 CFAOffset += Change;
5406 else
5407 CFAOffset -= Change;
5408 if (EmitCFAOffset && DestReg == TmpReg) {
5409 MachineFunction &MF = *MBB.getParent();
5410 const TargetSubtargetInfo &STI = MF.getSubtarget();
5411 const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
5412
5413 unsigned CFIIndex = MF.addFrameInst(
5414 createDefCFA(TRI, FrameReg, DestReg, CFAOffset, VScale != 1));
5415 BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
5416 .addCFIIndex(CFIIndex)
5417 .setMIFlags(Flag);
5418 }
5419
5420 if (NeedsWinCFI) {
5421 assert(Sign == 1 && "SEH directives should always have a positive sign");
5422 int Imm = (int)(ThisVal << LocalShiftSize);
5423 if ((DestReg == AArch64::FP && SrcReg == AArch64::SP) ||
5424 (SrcReg == AArch64::FP && DestReg == AArch64::SP)) {
5425 if (HasWinCFI)
5426 *HasWinCFI = true;
5427 if (Imm == 0)
5428 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_SetFP)).setMIFlag(Flag);
5429 else
5430 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_AddFP))
5431 .addImm(Imm)
5432 .setMIFlag(Flag);
5433 assert(Offset == 0 && "Expected remaining offset to be zero to "
5434 "emit a single SEH directive");
5435 } else if (DestReg == AArch64::SP) {
5436 if (HasWinCFI)
5437 *HasWinCFI = true;
5438 assert(SrcReg == AArch64::SP && "Unexpected SrcReg for SEH_StackAlloc");
5439 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_StackAlloc))
5440 .addImm(Imm)
5441 .setMIFlag(Flag);
5442 }
5443 }
5444
5445 SrcReg = TmpReg;
5446 } while (Offset);
5447}
5448
5451 unsigned DestReg, unsigned SrcReg,
5453 MachineInstr::MIFlag Flag, bool SetNZCV,
5454 bool NeedsWinCFI, bool *HasWinCFI,
5455 bool EmitCFAOffset, StackOffset CFAOffset,
5456 unsigned FrameReg) {
5457 // If a function is marked as arm_locally_streaming, then the runtime value of
5458 // vscale in the prologue/epilogue is different the runtime value of vscale
5459 // in the function's body. To avoid having to consider multiple vscales,
5460 // we can use `addsvl` to allocate any scalable stack-slots, which under
5461 // most circumstances will be only locals, not callee-save slots.
5462 const Function &F = MBB.getParent()->getFunction();
5463 bool UseSVL = F.hasFnAttribute("aarch64_pstate_sm_body");
5464
5465 int64_t Bytes, NumPredicateVectors, NumDataVectors;
5467 Offset, Bytes, NumPredicateVectors, NumDataVectors);
5468
5469 // First emit non-scalable frame offsets, or a simple 'mov'.
5470 if (Bytes || (!Offset && SrcReg != DestReg)) {
5471 assert((DestReg != AArch64::SP || Bytes % 8 == 0) &&
5472 "SP increment/decrement not 8-byte aligned");
5473 unsigned Opc = SetNZCV ? AArch64::ADDSXri : AArch64::ADDXri;
5474 if (Bytes < 0) {
5475 Bytes = -Bytes;
5476 Opc = SetNZCV ? AArch64::SUBSXri : AArch64::SUBXri;
5477 }
5478 emitFrameOffsetAdj(MBB, MBBI, DL, DestReg, SrcReg, Bytes, Opc, TII, Flag,
5479 NeedsWinCFI, HasWinCFI, EmitCFAOffset, CFAOffset,
5480 FrameReg);
5481 CFAOffset += (Opc == AArch64::ADDXri || Opc == AArch64::ADDSXri)
5482 ? StackOffset::getFixed(-Bytes)
5483 : StackOffset::getFixed(Bytes);
5484 SrcReg = DestReg;
5485 FrameReg = DestReg;
5486 }
5487
5488 assert(!(SetNZCV && (NumPredicateVectors || NumDataVectors)) &&
5489 "SetNZCV not supported with SVE vectors");
5490 assert(!(NeedsWinCFI && (NumPredicateVectors || NumDataVectors)) &&
5491 "WinCFI not supported with SVE vectors");
5492
5493 if (NumDataVectors) {
5494 emitFrameOffsetAdj(MBB, MBBI, DL, DestReg, SrcReg, NumDataVectors,
5495 UseSVL ? AArch64::ADDSVL_XXI : AArch64::ADDVL_XXI,
5496 TII, Flag, NeedsWinCFI, nullptr, EmitCFAOffset,
5497 CFAOffset, FrameReg);
5498 CFAOffset += StackOffset::getScalable(-NumDataVectors * 16);
5499 SrcReg = DestReg;
5500 }
5501
5502 if (NumPredicateVectors) {
5503 assert(DestReg != AArch64::SP && "Unaligned access to SP");
5504 emitFrameOffsetAdj(MBB, MBBI, DL, DestReg, SrcReg, NumPredicateVectors,
5505 UseSVL ? AArch64::ADDSPL_XXI : AArch64::ADDPL_XXI,
5506 TII, Flag, NeedsWinCFI, nullptr, EmitCFAOffset,
5507 CFAOffset, FrameReg);
5508 }
5509}
5510
5513 MachineBasicBlock::iterator InsertPt, int FrameIndex,
5514 LiveIntervals *LIS, VirtRegMap *VRM) const {
5515 // This is a bit of a hack. Consider this instruction:
5516 //
5517 // %0 = COPY %sp; GPR64all:%0
5518 //
5519 // We explicitly chose GPR64all for the virtual register so such a copy might
5520 // be eliminated by RegisterCoalescer. However, that may not be possible, and
5521 // %0 may even spill. We can't spill %sp, and since it is in the GPR64all
5522 // register class, TargetInstrInfo::foldMemoryOperand() is going to try.
5523 //
5524 // To prevent that, we are going to constrain the %0 register class here.
5525 if (MI.isFullCopy()) {
5526 Register DstReg = MI.getOperand(0).getReg();
5527 Register SrcReg = MI.getOperand(1).getReg();
5528 if (SrcReg == AArch64::SP && DstReg.isVirtual()) {
5529 MF.getRegInfo().constrainRegClass(DstReg, &AArch64::GPR64RegClass);
5530 return nullptr;
5531 }
5532 if (DstReg == AArch64::SP && SrcReg.isVirtual()) {
5533 MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass);
5534 return nullptr;
5535 }
5536 // Nothing can folded with copy from/to NZCV.
5537 if (SrcReg == AArch64::NZCV || DstReg == AArch64::NZCV)
5538 return nullptr;
5539 }
5540
5541 // Handle the case where a copy is being spilled or filled but the source
5542 // and destination register class don't match. For example:
5543 //
5544 // %0 = COPY %xzr; GPR64common:%0
5545 //
5546 // In this case we can still safely fold away the COPY and generate the
5547 // following spill code:
5548 //
5549 // STRXui %xzr, %stack.0
5550 //
5551 // This also eliminates spilled cross register class COPYs (e.g. between x and
5552 // d regs) of the same size. For example:
5553 //
5554 // %0 = COPY %1; GPR64:%0, FPR64:%1
5555 //
5556 // will be filled as
5557 //
5558 // LDRDui %0, fi<#0>
5559 //
5560 // instead of
5561 //
5562 // LDRXui %Temp, fi<#0>
5563 // %0 = FMOV %Temp
5564 //
5565 if (MI.isCopy() && Ops.size() == 1 &&
5566 // Make sure we're only folding the explicit COPY defs/uses.
5567 (Ops[0] == 0 || Ops[0] == 1)) {
5568 bool IsSpill = Ops[0] == 0;
5569 bool IsFill = !IsSpill;
5571 const MachineRegisterInfo &MRI = MF.getRegInfo();
5572 MachineBasicBlock &MBB = *MI.getParent();
5573 const MachineOperand &DstMO = MI.getOperand(0);
5574 const MachineOperand &SrcMO = MI.getOperand(1);
5575 Register DstReg = DstMO.getReg();
5576 Register SrcReg = SrcMO.getReg();
5577 // This is slightly expensive to compute for physical regs since
5578 // getMinimalPhysRegClass is slow.
5579 auto getRegClass = [&](unsigned Reg) {
5580 return Register::isVirtualRegister(Reg) ? MRI.getRegClass(Reg)
5581 : TRI.getMinimalPhysRegClass(Reg);
5582 };
5583
5584 if (DstMO.getSubReg() == 0 && SrcMO.getSubReg() == 0) {
5585 assert(TRI.getRegSizeInBits(*getRegClass(DstReg)) ==
5586 TRI.getRegSizeInBits(*getRegClass(SrcReg)) &&
5587 "Mismatched register size in non subreg COPY");
5588 if (IsSpill)
5589 storeRegToStackSlot(MBB, InsertPt, SrcReg, SrcMO.isKill(), FrameIndex,
5590 getRegClass(SrcReg), &TRI, Register());
5591 else
5592 loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex,
5593 getRegClass(DstReg), &TRI, Register());
5594 return &*--InsertPt;
5595 }
5596
5597 // Handle cases like spilling def of:
5598 //
5599 // %0:sub_32<def,read-undef> = COPY %wzr; GPR64common:%0
5600 //
5601 // where the physical register source can be widened and stored to the full
5602 // virtual reg destination stack slot, in this case producing:
5603 //
5604 // STRXui %xzr, %stack.0
5605 //
5606 if (IsSpill && DstMO.isUndef() && SrcReg == AArch64::WZR &&
5607 TRI.getRegSizeInBits(*getRegClass(DstReg)) == 64) {
5608 assert(SrcMO.getSubReg() == 0 &&
5609 "Unexpected subreg on physical register");
5610 storeRegToStackSlot(MBB, InsertPt, AArch64::XZR, SrcMO.isKill(),
5611 FrameIndex, &AArch64::GPR64RegClass, &TRI,
5612 Register());
5613 return &*--InsertPt;
5614 }
5615
5616 // Handle cases like filling use of:
5617 //
5618 // %0:sub_32<def,read-undef> = COPY %1; GPR64:%0, GPR32:%1
5619 //
5620 // where we can load the full virtual reg source stack slot, into the subreg
5621 // destination, in this case producing:
5622 //
5623 // LDRWui %0:sub_32<def,read-undef>, %stack.0
5624 //
5625 if (IsFill && SrcMO.getSubReg() == 0 && DstMO.isUndef()) {
5626 const TargetRegisterClass *FillRC;
5627 switch (DstMO.getSubReg()) {
5628 default:
5629 FillRC = nullptr;
5630 break;
5631 case AArch64::sub_32:
5632 FillRC = &AArch64::GPR32RegClass;
5633 break;
5634 case AArch64::ssub:
5635 FillRC = &AArch64::FPR32RegClass;
5636 break;
5637 case AArch64::dsub:
5638 FillRC = &AArch64::FPR64RegClass;
5639 break;
5640 }
5641
5642 if (FillRC) {
5643 assert(TRI.getRegSizeInBits(*getRegClass(SrcReg)) ==
5644 TRI.getRegSizeInBits(*FillRC) &&
5645 "Mismatched regclass size on folded subreg COPY");
5646 loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex, FillRC, &TRI,
5647 Register());
5648 MachineInstr &LoadMI = *--InsertPt;
5649 MachineOperand &LoadDst = LoadMI.getOperand(0);
5650 assert(LoadDst.getSubReg() == 0 && "unexpected subreg on fill load");
5651 LoadDst.setSubReg(DstMO.getSubReg());
5652 LoadDst.setIsUndef();
5653 return &LoadMI;
5654 }
5655 }
5656 }
5657
5658 // Cannot fold.
5659 return nullptr;
5660}
5661
5663 StackOffset &SOffset,
5664 bool *OutUseUnscaledOp,
5665 unsigned *OutUnscaledOp,
5666 int64_t *EmittableOffset) {
5667 // Set output values in case of early exit.
5668 if (EmittableOffset)
5669 *EmittableOffset = 0;
5670 if (OutUseUnscaledOp)
5671 *OutUseUnscaledOp = false;
5672 if (OutUnscaledOp)
5673 *OutUnscaledOp = 0;
5674
5675 // Exit early for structured vector spills/fills as they can't take an
5676 // immediate offset.
5677 switch (MI.getOpcode()) {
5678 default:
5679 break;
5680 case AArch64::LD1Rv1d:
5681 case AArch64::LD1Rv2s:
5682 case AArch64::LD1Rv2d:
5683 case AArch64::LD1Rv4h:
5684 case AArch64::LD1Rv4s:
5685 case AArch64::LD1Rv8b:
5686 case AArch64::LD1Rv8h:
5687 case AArch64::LD1Rv16b:
5688 case AArch64::LD1Twov2d:
5689 case AArch64::LD1Threev2d:
5690 case AArch64::LD1Fourv2d:
5691 case AArch64::LD1Twov1d:
5692 case AArch64::LD1Threev1d:
5693 case AArch64::LD1Fourv1d:
5694 case AArch64::ST1Twov2d:
5695 case AArch64::ST1Threev2d:
5696 case AArch64::ST1Fourv2d:
5697 case AArch64::ST1Twov1d:
5698 case AArch64::ST1Threev1d:
5699 case AArch64::ST1Fourv1d:
5700 case AArch64::ST1i8:
5701 case AArch64::ST1i16:
5702 case AArch64::ST1i32:
5703 case AArch64::ST1i64:
5704 case AArch64::IRG:
5705 case AArch64::IRGstack:
5706 case AArch64::STGloop:
5707 case AArch64::STZGloop:
5709 }
5710
5711 // Get the min/max offset and the scale.
5712 TypeSize ScaleValue(0U, false), Width(0U, false);
5713 int64_t MinOff, MaxOff;
5714 if (!AArch64InstrInfo::getMemOpInfo(MI.getOpcode(), ScaleValue, Width, MinOff,
5715 MaxOff))
5716 llvm_unreachable("unhandled opcode in isAArch64FrameOffsetLegal");
5717
5718 // Construct the complete offset.
5719 bool IsMulVL = ScaleValue.isScalable();
5720 unsigned Scale = ScaleValue.getKnownMinValue();
5721 int64_t Offset = IsMulVL ? SOffset.getScalable() : SOffset.getFixed();
5722
5723 const MachineOperand &ImmOpnd =
5724 MI.getOperand(AArch64InstrInfo::getLoadStoreImmIdx(MI.getOpcode()));
5725 Offset += ImmOpnd.getImm() * Scale;
5726
5727 // If the offset doesn't match the scale, we rewrite the instruction to
5728 // use the unscaled instruction instead. Likewise, if we have a negative
5729 // offset and there is an unscaled op to use.
5730 std::optional<unsigned> UnscaledOp =
5732 bool useUnscaledOp = UnscaledOp && (Offset % Scale || Offset < 0);
5733 if (useUnscaledOp &&
5734 !AArch64InstrInfo::getMemOpInfo(*UnscaledOp, ScaleValue, Width, MinOff,
5735 MaxOff))
5736 llvm_unreachable("unhandled opcode in isAArch64FrameOffsetLegal");
5737
5738 Scale = ScaleValue.getKnownMinValue();
5739 assert(IsMulVL == ScaleValue.isScalable() &&
5740 "Unscaled opcode has different value for scalable");
5741
5742 int64_t Remainder = Offset % Scale;
5743 assert(!(Remainder && useUnscaledOp) &&
5744 "Cannot have remainder when using unscaled op");
5745
5746 assert(MinOff < MaxOff && "Unexpected Min/Max offsets");
5747 int64_t NewOffset = Offset / Scale;
5748 if (MinOff <= NewOffset && NewOffset <= MaxOff)
5749 Offset = Remainder;
5750 else {
5751 NewOffset = NewOffset < 0 ? MinOff : MaxOff;
5752 Offset = Offset - (NewOffset * Scale);
5753 }
5754
5755 if (EmittableOffset)
5756 *EmittableOffset = NewOffset;
5757 if (OutUseUnscaledOp)
5758 *OutUseUnscaledOp = useUnscaledOp;
5759 if (OutUnscaledOp && UnscaledOp)
5760 *OutUnscaledOp = *UnscaledOp;
5761
5762 if (IsMulVL)
5763 SOffset = StackOffset::get(SOffset.getFixed(), Offset);
5764 else
5765 SOffset = StackOffset::get(Offset, SOffset.getScalable());
5767 (SOffset ? 0 : AArch64FrameOffsetIsLegal);
5768}
5769
5771 unsigned FrameReg, StackOffset &Offset,
5772 const AArch64InstrInfo *TII) {
5773 unsigned Opcode = MI.getOpcode();
5774 unsigned ImmIdx = FrameRegIdx + 1;
5775
5776 if (Opcode == AArch64::ADDSXri || Opcode == AArch64::ADDXri) {
5777 Offset += StackOffset::getFixed(MI.getOperand(ImmIdx).getImm());
5778 emitFrameOffset(*MI.getParent(), MI, MI.getDebugLoc(),
5779 MI.getOperand(0).getReg(), FrameReg, Offset, TII,
5780 MachineInstr::NoFlags, (Opcode == AArch64::ADDSXri));
5781 MI.eraseFromParent();
5782 Offset = StackOffset();
5783 return true;
5784 }
5785
5786 int64_t NewOffset;
5787 unsigned UnscaledOp;
5788 bool UseUnscaledOp;
5789 int Status = isAArch64FrameOffsetLegal(MI, Offset, &UseUnscaledOp,
5790 &UnscaledOp, &NewOffset);
5793 // Replace the FrameIndex with FrameReg.
5794 MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
5795 if (UseUnscaledOp)
5796 MI.setDesc(TII->get(UnscaledOp));
5797
5798 MI.getOperand(ImmIdx).ChangeToImmediate(NewOffset);
5799 return !Offset;
5800 }
5801
5802 return false;
5803}
5804
5807 DebugLoc DL;
5808 BuildMI(MBB, MI, DL, get(AArch64::HINT)).addImm(0);
5809}
5810
5812 return MCInstBuilder(AArch64::HINT).addImm(0);
5813}
5814
5815// AArch64 supports MachineCombiner.
5816bool AArch64InstrInfo::useMachineCombiner() const { return true; }
5817
5818// True when Opc sets flag
5819static bool isCombineInstrSettingFlag(unsigned Opc) {
5820 switch (Opc) {
5821 case AArch64::ADDSWrr:
5822 case AArch64::ADDSWri:
5823 case AArch64::ADDSXrr:
5824 case AArch64::ADDSXri:
5825 case AArch64::SUBSWrr:
5826 case AArch64::SUBSXrr:
5827 // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
5828 case AArch64::SUBSWri:
5829 case AArch64::SUBSXri:
5830 return true;
5831 default:
5832 break;
5833 }
5834 return false;
5835}
5836
5837// 32b Opcodes that can be combined with a MUL
5838static bool isCombineInstrCandidate32(unsigned Opc) {
5839 switch (Opc) {
5840 case AArch64::ADDWrr:
5841 case AArch64::ADDWri:
5842 case AArch64::SUBWrr:
5843 case AArch64::ADDSWrr:
5844 case AArch64::ADDSWri:
5845 case AArch64::SUBSWrr:
5846 // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
5847 case AArch64::SUBWri:
5848 case AArch64::SUBSWri:
5849 return true;
5850 default:
5851 break;
5852 }
5853 return false;
5854}
5855
5856// 64b Opcodes that can be combined with a MUL
5857static bool isCombineInstrCandidate64(unsigned Opc) {
5858 switch (Opc) {
5859 case AArch64::ADDXrr:
5860 case AArch64::ADDXri:
5861 case AArch64::SUBXrr:
5862 case AArch64::ADDSXrr:
5863 case AArch64::ADDSXri:
5864 case AArch64::SUBSXrr:
5865 // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
5866 case AArch64::SUBXri:
5867 case AArch64::SUBSXri:
5868 case AArch64::ADDv8i8:
5869 case AArch64::ADDv16i8:
5870 case AArch64::ADDv4i16:
5871 case AArch64::ADDv8i16:
5872 case AArch64::ADDv2i32:
5873 case AArch64::ADDv4i32:
5874 case AArch64::SUBv8i8:
5875 case AArch64::SUBv16i8:
5876 case AArch64::SUBv4i16:
5877 case AArch64::SUBv8i16:
5878 case AArch64::SUBv2i32:
5879 case AArch64::SUBv4i32:
5880 return true;
5881 default:
5882 break;
5883 }
5884 return false;
5885}
5886
5887// FP Opcodes that can be combined with a FMUL.
5888static bool isCombineInstrCandidateFP(const MachineInstr &Inst) {
5889 switch (Inst.getOpcode()) {
5890 default:
5891 break;
5892 case AArch64::FADDHrr:
5893 case AArch64::FADDSrr:
5894 case AArch64::FADDDrr:
5895 case AArch64::FADDv4f16:
5896 case AArch64::FADDv8f16:
5897 case AArch64::FADDv2f32:
5898 case AArch64::FADDv2f64:
5899 case AArch64::FADDv4f32:
5900 case AArch64::FSUBHrr:
5901 case AArch64::FSUBSrr:
5902 case AArch64::FSUBDrr:
5903 case AArch64::FSUBv4f16:
5904 case AArch64::FSUBv8f16:
5905 case AArch64::FSUBv2f32:
5906 case AArch64::FSUBv2f64:
5907 case AArch64::FSUBv4f32:
5909 // We can fuse FADD/FSUB with FMUL, if fusion is either allowed globally by
5910 // the target options or if FADD/FSUB has the contract fast-math flag.
5911 return Options.UnsafeFPMath ||
5912 Options.AllowFPOpFusion == FPOpFusion::Fast ||
5914 return true;
5915 }
5916 return false;
5917}
5918
5919// Opcodes that can be combined with a MUL
5920static bool isCombineInstrCandidate(unsigned Opc) {
5922}
5923
5924//
5925// Utility routine that checks if \param MO is defined by an
5926// \param CombineOpc instruction in the basic block \param MBB
5928 unsigned CombineOpc, unsigned ZeroReg = 0,
5929 bool CheckZeroReg = false) {
5931 MachineInstr *MI = nullptr;
5932
5933 if (MO.isReg() && MO.getReg().isVirtual())
5934 MI = MRI.getUniqueVRegDef(MO.getReg());
5935 // And it needs to be in the trace (otherwise, it won't have a depth).
5936 if (!MI || MI->getParent() != &MBB || (unsigned)MI->getOpcode() != CombineOpc)
5937 return false;
5938 // Must only used by the user we combine with.
5939 if (!MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
5940 return false;
5941
5942 if (CheckZeroReg) {
5943 assert(MI->getNumOperands() >= 4 && MI->getOperand(0).isReg() &&
5944 MI->getOperand(1).isReg() && MI->getOperand(2).isReg() &&
5945 MI->getOperand(3).isReg() && "MAdd/MSub must have a least 4 regs");
5946 // The third input reg must be zero.
5947 if (MI->getOperand(3).getReg() != ZeroReg)
5948 return false;
5949 }
5950
5951 if (isCombineInstrSettingFlag(CombineOpc) &&
5952 MI->findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr, true) == -1)
5953 return false;
5954
5955 return true;
5956}
5957
5958//
5959// Is \param MO defined by an integer multiply and can be combined?
5961 unsigned MulOpc, unsigned ZeroReg) {
5962 return canCombine(MBB, MO, MulOpc, ZeroReg, true);
5963}
5964
5965//
5966// Is \param MO defined by a floating-point multiply and can be combined?
5968 unsigned MulOpc) {
5969 return canCombine(MBB, MO, MulOpc);
5970}
5971
5972// TODO: There are many more machine instruction opcodes to match:
5973// 1. Other data types (integer, vectors)
5974// 2. Other math / logic operations (xor, or)
5975// 3. Other forms of the same operation (intrinsics and other variants)
5977 bool Invert) const {
5978 if (Invert)
5979 return false;
5980 switch (Inst.getOpcode()) {
5981 // == Floating-point types ==
5982 // -- Floating-point instructions --
5983 case AArch64::FADDHrr:
5984 case AArch64::FADDSrr:
5985 case AArch64::FADDDrr:
5986 case AArch64::FMULHrr:
5987 case AArch64::FMULSrr:
5988 case AArch64::FMULDrr:
5989 case AArch64::FMULX16:
5990 case AArch64::FMULX32:
5991 case AArch64::FMULX64:
5992 // -- Advanced SIMD instructions --
5993 case AArch64::FADDv4f16:
5994 case AArch64::FADDv8f16:
5995 case AArch64::FADDv2f32:
5996 case AArch64::FADDv4f32:
5997 case AArch64::FADDv2f64:
5998 case AArch64::FMULv4f16:
5999 case AArch64::FMULv8f16:
6000 case AArch64::FMULv2f32:
6001 case AArch64::FMULv4f32:
6002 case AArch64::FMULv2f64:
6003 case AArch64::FMULXv4f16:
6004 case AArch64::FMULXv8f16:
6005 case AArch64::FMULXv2f32:
6006 case AArch64::FMULXv4f32:
6007 case AArch64::FMULXv2f64:
6008 // -- SVE instructions --
6009 // Opcodes FMULX_ZZZ_? don't exist because there is no unpredicated FMULX
6010 // in the SVE instruction set (though there are predicated ones).
6011 case AArch64::FADD_ZZZ_H:
6012 case AArch64::FADD_ZZZ_S:
6013 case AArch64::FADD_ZZZ_D:
6014 case AArch64::FMUL_ZZZ_H:
6015 case AArch64::FMUL_ZZZ_S:
6016 case AArch64::FMUL_ZZZ_D:
6017 return Inst.getParent()->getParent()->getTarget().Options.UnsafeFPMath ||
6020
6021 // == Integer types ==
6022 // -- Base instructions --
6023 // Opcodes MULWrr and MULXrr don't exist because
6024 // `MUL <Wd>, <Wn>, <Wm>` and `MUL <Xd>, <Xn>, <Xm>` are aliases of
6025 // `MADD <Wd>, <Wn>, <Wm>, WZR` and `MADD <Xd>, <Xn>, <Xm>, XZR` respectively.
6026 // The machine-combiner does not support three-source-operands machine
6027 // instruction. So we cannot reassociate MULs.
6028 case AArch64::ADDWrr:
6029 case AArch64::ADDXrr:
6030 case AArch64::ANDWrr:
6031 case AArch64::ANDXrr:
6032 case AArch64::ORRWrr:
6033 case AArch64::ORRXrr:
6034 case AArch64::EORWrr:
6035 case AArch64::EORXrr:
6036 case AArch64::EONWrr:
6037 case AArch64::EONXrr:
6038 // -- Advanced SIMD instructions --
6039 // Opcodes MULv1i64 and MULv2i64 don't exist because there is no 64-bit MUL
6040 // in the Advanced SIMD instruction set.
6041 case AArch64::ADDv8i8:
6042 case AArch64::ADDv16i8:
6043 case AArch64::ADDv4i16:
6044 case AArch64::ADDv8i16:
6045 case AArch64::ADDv2i32:
6046 case AArch64::ADDv4i32:
6047 case AArch64::ADDv1i64:
6048 case AArch64::ADDv2i64:
6049 case AArch64::MULv8i8:
6050 case AArch64::MULv16i8:
6051 case AArch64::MULv4i16:
6052 case AArch64::MULv8i16:
6053 case AArch64::MULv2i32:
6054 case AArch64::MULv4i32:
6055 case AArch64::ANDv8i8:
6056 case AArch64::ANDv16i8:
6057 case AArch64::ORRv8i8:
6058 case AArch64::ORRv16i8:
6059 case AArch64::EORv8i8:
6060 case AArch64::EORv16i8:
6061 // -- SVE instructions --
6062 case AArch64::ADD_ZZZ_B:
6063 case AArch64::ADD_ZZZ_H:
6064 case AArch64::ADD_ZZZ_S:
6065 case AArch64::ADD_ZZZ_D:
6066 case AArch64::MUL_ZZZ_B:
6067 case AArch64::MUL_ZZZ_H:
6068 case AArch64::MUL_ZZZ_S:
6069 case AArch64::MUL_ZZZ_D:
6070 case AArch64::AND_ZZZ:
6071 case AArch64::ORR_ZZZ:
6072 case AArch64::EOR_ZZZ:
6073 return true;
6074
6075 default:
6076 return false;
6077 }
6078}
6079
6080/// Find instructions that can be turned into madd.
6082 SmallVectorImpl<unsigned> &Patterns) {
6083 unsigned Opc = Root.getOpcode();
6084 MachineBasicBlock &MBB = *Root.getParent();
6085 bool Found = false;
6086
6087 if (!isCombineInstrCandidate(Opc))
6088 return false;
6089 if (isCombineInstrSettingFlag(Opc)) {
6090 int Cmp_NZCV =
6091 Root.findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr, true);
6092 // When NZCV is live bail out.
6093 if (Cmp_NZCV == -1)
6094 return false;
6095 unsigned NewOpc = convertToNonFlagSettingOpc(Root);
6096 // When opcode can't change bail out.
6097 // CHECKME: do we miss any cases for opcode conversion?
6098 if (NewOpc == Opc)
6099 return false;
6100 Opc = NewOpc;
6101 }
6102
6103 auto setFound = [&](int Opcode, int Operand, unsigned ZeroReg,
6104 unsigned Pattern) {
6105 if (canCombineWithMUL(MBB, Root.getOperand(Operand), Opcode, ZeroReg)) {
6106 Patterns.push_back(Pattern);
6107 Found = true;
6108 }
6109 };
6110
6111 auto setVFound = [&](int Opcode, int Operand, unsigned Pattern) {
6112 if (canCombine(MBB, Root.getOperand(Operand), Opcode)) {
6113 Patterns.push_back(Pattern);
6114 Found = true;
6115 }
6116 };
6117
6119
6120 switch (Opc) {
6121 default:
6122 break;
6123 case AArch64::ADDWrr:
6124 assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
6125 "ADDWrr does not have register operands");
6126 setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULADDW_OP1);
6127 setFound(AArch64::MADDWrrr, 2, AArch64::WZR, MCP::MULADDW_OP2);
6128 break;
6129 case AArch64::ADDXrr:
6130 setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULADDX_OP1);
6131 setFound(AArch64::MADDXrrr, 2, AArch64::XZR, MCP::MULADDX_OP2);
6132 break;
6133 case AArch64::SUBWrr:
6134 setFound(AArch64::MADDWrrr, 2, AArch64::WZR, MCP::MULSUBW_OP2);
6135 setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULSUBW_OP1);
6136 break;
6137 case AArch64::SUBXrr:
6138 setFound(AArch64::MADDXrrr, 2, AArch64::XZR, MCP::MULSUBX_OP2);
6139 setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULSUBX_OP1);
6140 break;
6141 case AArch64::ADDWri:
6142 setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULADDWI_OP1);
6143 break;
6144 case AArch64::ADDXri:
6145 setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULADDXI_OP1);
6146 break;
6147 case AArch64::SUBWri:
6148 setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULSUBWI_OP1);
6149 break;
6150 case AArch64::SUBXri:
6151 setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULSUBXI_OP1);
6152 break;
6153 case AArch64::ADDv8i8:
6154 setVFound(AArch64::MULv8i8, 1, MCP::MULADDv8i8_OP1);
6155 setVFound(AArch64::MULv8i8, 2, MCP::MULADDv8i8_OP2);
6156 break;
6157 case AArch64::ADDv16i8:
6158 setVFound(AArch64::MULv16i8, 1, MCP::MULADDv16i8_OP1);
6159 setVFound(AArch64::MULv16i8, 2, MCP::MULADDv16i8_OP2);
6160 break;
6161 case AArch64::ADDv4i16:
6162 setVFound(AArch64::MULv4i16, 1, MCP::MULADDv4i16_OP1);
6163 setVFound(AArch64::MULv4i16, 2, MCP::MULADDv4i16_OP2);
6164 setVFound(AArch64::MULv4i16_indexed, 1, MCP::MULADDv4i16_indexed_OP1);
6165 setVFound(AArch64::MULv4i16_indexed, 2, MCP::MULADDv4i16_indexed_OP2);
6166 break;
6167 case AArch64::ADDv8i16:
6168 setVFound(AArch64::MULv8i16, 1, MCP::MULADDv8i16_OP1);
6169 setVFound(AArch64::MULv8i16, 2, MCP::MULADDv8i16_OP2);
6170 setVFound(AArch64::MULv8i16_indexed, 1, MCP::MULADDv8i16_indexed_OP1);
6171 setVFound(AArch64::MULv8i16_indexed, 2, MCP::MULADDv8i16_indexed_OP2);
6172 break;
6173 case AArch64::ADDv2i32:
6174 setVFound(AArch64::MULv2i32, 1, MCP::MULADDv2i32_OP1);
6175 setVFound(AArch64::MULv2i32, 2, MCP::MULADDv2i32_OP2);
6176 setVFound(AArch64::MULv2i32_indexed, 1, MCP::MULADDv2i32_indexed_OP1);
6177 setVFound(AArch64::MULv2i32_indexed, 2, MCP::MULADDv2i32_indexed_OP2);
6178 break;
6179 case AArch64::ADDv4i32:
6180 setVFound(AArch64::MULv4i32, 1, MCP::MULADDv4i32_OP1);
6181 setVFound(AArch64::MULv4i32, 2, MCP::MULADDv4i32_OP2);
6182 setVFound(AArch64::MULv4i32_indexed, 1, MCP::MULADDv4i32_indexed_OP1);
6183 setVFound(AArch64::MULv4i32_indexed, 2, MCP::MULADDv4i32_indexed_OP2);
6184 break;
6185 case AArch64::SUBv8i8:
6186 setVFound(AArch64::MULv8i8, 1, MCP::MULSUBv8i8_OP1);
6187 setVFound(AArch64::MULv8i8, 2, MCP::MULSUBv8i8_OP2);
6188 break;
6189 case AArch64::SUBv16i8:
6190 setVFound(AArch64::MULv16i8, 1, MCP::MULSUBv16i8_OP1);
6191 setVFound(AArch64::MULv16i8, 2, MCP::MULSUBv16i8_OP2);
6192 break;
6193 case AArch64::SUBv4i16:
6194 setVFound(AArch64::MULv4i16, 1, MCP::MULSUBv4i16_OP1);
6195 setVFound(AArch64::MULv4i16, 2, MCP::MULSUBv4i16_OP2);
6196 setVFound(AArch64::MULv4i16_indexed, 1, MCP::MULSUBv4i16_indexed_OP1);
6197 setVFound(AArch64::MULv4i16_indexed, 2, MCP::MULSUBv4i16_indexed_OP2);
6198 break;
6199 case AArch64::SUBv8i16:
6200 setVFound(AArch64::MULv8i16, 1, MCP::MULSUBv8i16_OP1);
6201 setVFound(AArch64::MULv8i16, 2, MCP::MULSUBv8i16_OP2);
6202 setVFound(AArch64::MULv8i16_indexed, 1, MCP::MULSUBv8i16_indexed_OP1);
6203 setVFound(AArch64::MULv8i16_indexed, 2, MCP::MULSUBv8i16_indexed_OP2);
6204 break;
6205 case AArch64::SUBv2i32:
6206 setVFound(AArch64::MULv2i32, 1, MCP::MULSUBv2i32_OP1);
6207 setVFound(AArch64::MULv2i32, 2, MCP::MULSUBv2i32_OP2);
6208 setVFound(AArch64::MULv2i32_indexed, 1, MCP::MULSUBv2i32_indexed_OP1);
6209 setVFound(AArch64::MULv2i32_indexed, 2, MCP::MULSUBv2i32_indexed_OP2);
6210 break;
6211 case AArch64::SUBv4i32:
6212 setVFound(AArch64::MULv4i32, 1, MCP::MULSUBv4i32_OP1);
6213 setVFound(AArch64::MULv4i32, 2, MCP::MULSUBv4i32_OP2);
6214 setVFound(AArch64::MULv4i32_indexed, 1, MCP::MULSUBv4i32_indexed_OP1);
6215 setVFound(AArch64::MULv4i32_indexed, 2, MCP::MULSUBv4i32_indexed_OP2);
6216 break;
6217 }
6218 return Found;
6219}
6220/// Floating-Point Support
6221
6222/// Find instructions that can be turned into madd.
6224 SmallVectorImpl<unsigned> &Patterns) {
6225
6226 if (!isCombineInstrCandidateFP(Root))
6227 return false;
6228
6229 MachineBasicBlock &MBB = *Root.getParent();
6230 bool Found = false;
6231
6232 auto Match = [&](int Opcode, int Operand, unsigned Pattern) -> bool {
6233 if (canCombineWithFMUL(MBB, Root.getOperand(Operand), Opcode)) {
6234 Patterns.push_back(Pattern);
6235 return true;
6236 }
6237 return false;
6238 };
6239
6241
6242 switch (Root.getOpcode()) {
6243 default:
6244 assert(false && "Unsupported FP instruction in combiner\n");
6245 break;
6246 case AArch64::FADDHrr:
6247 assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
6248 "FADDHrr does not have register operands");
6249
6250 Found = Match(AArch64::FMULHrr, 1, MCP::FMULADDH_OP1);
6251 Found |= Match(AArch64::FMULHrr, 2, MCP::FMULADDH_OP2);
6252 break;
6253 case AArch64::FADDSrr:
6254 assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
6255 "FADDSrr does not have register operands");
6256
6257 Found |= Match(AArch64::FMULSrr, 1, MCP::FMULADDS_OP1) ||
6258 Match(AArch64::FMULv1i32_indexed, 1, MCP::FMLAv1i32_indexed_OP1);
6259
6260 Found |= Match(AArch64::FMULSrr, 2, MCP::FMULADDS_OP2) ||
6261 Match(AArch64::FMULv1i32_indexed, 2, MCP::FMLAv1i32_indexed_OP2);
6262 break;
6263 case AArch64::FADDDrr:
6264 Found |= Match(AArch64::FMULDrr, 1, MCP::FMULADDD_OP1) ||
6265 Match(AArch64::FMULv1i64_indexed, 1, MCP::FMLAv1i64_indexed_OP1);
6266
6267 Found |= Match(AArch64::FMULDrr, 2, MCP::FMULADDD_OP2) ||
6268 Match(AArch64::FMULv1i64_indexed, 2, MCP::FMLAv1i64_indexed_OP2);
6269 break;
6270 case AArch64::FADDv4f16:
6271 Found |= Match(AArch64::FMULv4i16_indexed, 1, MCP::FMLAv4i16_indexed_OP1) ||
6272 Match(AArch64::FMULv4f16, 1, MCP::FMLAv4f16_OP1);
6273
6274 Found |= Match(AArch64::FMULv4i16_indexed, 2, MCP::FMLAv4i16_indexed_OP2) ||
6275 Match(AArch64::FMULv4f16, 2, MCP::FMLAv4f16_OP2);
6276 break;
6277 case AArch64::FADDv8f16:
6278 Found |= Match(AArch64::FMULv8i16_indexed, 1, MCP::FMLAv8i16_indexed_OP1) ||
6279 Match(AArch64::FMULv8f16, 1, MCP::FMLAv8f16_OP1);
6280
6281 Found |= Match(AArch64::FMULv8i16_indexed, 2, MCP::FMLAv8i16_indexed_OP2) ||
6282 Match(AArch64::FMULv8f16, 2, MCP::FMLAv8f16_OP2);
6283 break;
6284 case AArch64::FADDv2f32:
6285 Found |= Match(AArch64::FMULv2i32_indexed, 1, MCP::FMLAv2i32_indexed_OP1) ||
6286 Match(AArch64::FMULv2f32, 1, MCP::FMLAv2f32_OP1);
6287
6288 Found |= Match(AArch64::FMULv2i32_indexed, 2, MCP::FMLAv2i32_indexed_OP2) ||
6289 Match(AArch64::FMULv2f32, 2, MCP::FMLAv2f32_OP2);
6290 break;
6291 case AArch64::FADDv2f64:
6292 Found |= Match(AArch64::FMULv2i64_indexed, 1, MCP::FMLAv2i64_indexed_OP1) ||
6293 Match(AArch64::FMULv2f64, 1, MCP::FMLAv2f64_OP1);
6294
6295 Found |= Match(AArch64::FMULv2i64_indexed, 2, MCP::FMLAv2i64_indexed_OP2) ||
6296 Match(AArch64::FMULv2f64, 2, MCP::FMLAv2f64_OP2);
6297 break;
6298 case AArch64::FADDv4f32:
6299 Found |= Match(AArch64::FMULv4i32_indexed, 1, MCP::FMLAv4i32_indexed_OP1) ||
6300 Match(AArch64::FMULv4f32, 1, MCP::FMLAv4f32_OP1);
6301
6302 Found |= Match(AArch64::FMULv4i32_indexed, 2, MCP::FMLAv4i32_indexed_OP2) ||
6303 Match(AArch64::FMULv4f32, 2, MCP::FMLAv4f32_OP2);
6304 break;
6305 case AArch64::FSUBHrr:
6306 Found = Match(AArch64::FMULHrr, 1, MCP::FMULSUBH_OP1);
6307 Found |= Match(AArch64::FMULHrr, 2, MCP::FMULSUBH_OP2);
6308 Found |= Match(AArch64::FNMULHrr, 1, MCP::FNMULSUBH_OP1);
6309 break;
6310 case AArch64::FSUBSrr:
6311 Found = Match(AArch64::FMULSrr, 1, MCP::FMULSUBS_OP1);
6312
6313 Found |= Match(AArch64::FMULSrr, 2, MCP::FMULSUBS_OP2) ||
6314 Match(AArch64::FMULv1i32_indexed, 2, MCP::FMLSv1i32_indexed_OP2);
6315
6316 Found |= Match(AArch64::FNMULSrr, 1, MCP::FNMULSUBS_OP1);
6317 break;
6318 case AArch64::FSUBDrr:
6319 Found = Match(AArch64::FMULDrr, 1, MCP::FMULSUBD_OP1);
6320
6321 Found |= Match(AArch64::FMULDrr, 2, MCP::FMULSUBD_OP2) ||
6322 Match(AArch64::FMULv1i64_indexed, 2, MCP::FMLSv1i64_indexed_OP2);
6323
6324 Found |= Match(AArch64::FNMULDrr, 1, MCP::FNMULSUBD_OP1);
6325 break;
6326 case AArch64::FSUBv4f16:
6327 Found |= Match(AArch64::FMULv4i16_indexed, 2, MCP::FMLSv4i16_indexed_OP2) ||
6328 Match(AArch64::FMULv4f16, 2, MCP::FMLSv4f16_OP2);
6329
6330 Found |= Match(AArch64::FMULv4i16_indexed, 1, MCP::FMLSv4i16_indexed_OP1) ||
6331 Match(AArch64::FMULv4f16, 1, MCP::FMLSv4f16_OP1);
6332 break;
6333 case AArch64::FSUBv8f16:
6334 Found |= Match(AArch64::FMULv8i16_indexed, 2, MCP::FMLSv8i16_indexed_OP2) ||
6335 Match(AArch64::FMULv8f16, 2, MCP::FMLSv8f16_OP2);
6336
6337 Found |= Match(AArch64::FMULv8i16_indexed, 1, MCP::FMLSv8i16_indexed_OP1) ||
6338 Match(AArch64::FMULv8f16, 1, MCP::FMLSv8f16_OP1);
6339 break;
6340 case AArch64::FSUBv2f32:
6341 Found |= Match(AArch64::FMULv2i32_indexed, 2, MCP::FMLSv2i32_indexed_OP2) ||
6342 Match(AArch64::FMULv2f32, 2, MCP::FMLSv2f32_OP2);
6343
6344 Found |= Match(AArch64::FMULv2i32_indexed, 1, MCP::FMLSv2i32_indexed_OP1) ||
6345 Match(AArch64::FMULv2f32, 1, MCP::FMLSv2f32_OP1);
6346 break;
6347 case AArch64::FSUBv2f64:
6348 Found |= Match(AArch64::FMULv2i64_indexed, 2, MCP::FMLSv2i64_indexed_OP2) ||
6349 Match(AArch64::FMULv2f64, 2, MCP::FMLSv2f64_OP2);
6350
6351 Found |= Match(AArch64::FMULv2i64_indexed, 1, MCP::FMLSv2i64_indexed_OP1) ||
6352 Match(AArch64::FMULv2f64, 1, MCP::FMLSv2f64_OP1);
6353 break;
6354 case AArch64::FSUBv4f32:
6355 Found |= Match(AArch64::FMULv4i32_indexed, 2, MCP::FMLSv4i32_indexed_OP2) ||
6356 Match(AArch64::FMULv4f32, 2, MCP::FMLSv4f32_OP2);
6357
6358 Found |= Match(AArch64::FMULv4i32_indexed, 1, MCP::FMLSv4i32_indexed_OP1) ||
6359 Match(AArch64::FMULv4f32, 1, MCP::FMLSv4f32_OP1);
6360 break;
6361 }
6362 return Found;
6363}
6364
6366 SmallVectorImpl<unsigned> &Patterns) {
6367 MachineBasicBlock &MBB = *Root.getParent();
6368 bool Found = false;
6369
6370 auto Match = [&](unsigned Opcode, int Operand, unsigned Pattern) -> bool {
6372 MachineOperand &MO = Root.getOperand(Operand);
6373 MachineInstr *MI = nullptr;
6374 if (MO.isReg() && MO.getReg().isVirtual())
6375 MI = MRI.getUniqueVRegDef(MO.getReg());
6376 // Ignore No-op COPYs in FMUL(COPY(DUP(..)))
6377 if (MI && MI->getOpcode() == TargetOpcode::COPY &&
6378 MI->getOperand(1).getReg().isVirtual())
6379 MI = MRI.getUniqueVRegDef(MI->getOperand(1).getReg());
6380 if (MI && MI->getOpcode() == Opcode) {
6381 Patterns.push_back(Pattern);
6382 return true;
6383 }
6384 return false;
6385 };
6386
6388
6389 switch (Root.getOpcode()) {
6390 default:
6391 return false;
6392 case AArch64::FMULv2f32:
6393 Found = Match(AArch64::DUPv2i32lane, 1, MCP::FMULv2i32_indexed_OP1);
6394 Found |= Match(AArch64::DUPv2i32lane, 2, MCP::FMULv2i32_indexed_OP2);
6395 break;
6396 case AArch64::FMULv2f64:
6397 Found = Match(AArch64::DUPv2i64lane, 1, MCP::FMULv2i64_indexed_OP1);
6398 Found |= Match(AArch64::DUPv2i64lane, 2, MCP::FMULv2i64_indexed_OP2);
6399 break;
6400 case AArch64::FMULv4f16:
6401 Found = Match(AArch64::DUPv4i16lane, 1, MCP::FMULv4i16_indexed_OP1);
6402 Found |= Match(AArch64::DUPv4i16lane, 2, MCP::FMULv4i16_indexed_OP2);
6403 break;
6404 case AArch64::FMULv4f32:
6405 Found = Match(AArch64::DUPv4i32lane, 1, MCP::FMULv4i32_indexed_OP1);
6406 Found |= Match(AArch64::DUPv4i32lane, 2, MCP::FMULv4i32_indexed_OP2);
6407 break;
6408 case AArch64::FMULv8f16:
6409 Found = Match(AArch64::DUPv8i16lane, 1, MCP::FMULv8i16_indexed_OP1);
6410 Found |= Match(AArch64::DUPv8i16lane, 2, MCP::FMULv8i16_indexed_OP2);
6411 break;
6412 }
6413
6414 return Found;
6415}
6416
6418 SmallVectorImpl<unsigned> &Patterns) {
6419 unsigned Opc = Root.getOpcode();
6420 MachineBasicBlock &MBB = *Root.getParent();
6422
6423 auto Match = [&](unsigned Opcode, unsigned Pattern) -> bool {
6424 MachineOperand &MO = Root.getOperand(1);
6425 MachineInstr *MI = MRI.getUniqueVRegDef(MO.getReg());
6426 if (MI != nullptr && (MI->getOpcode() == Opcode) &&
6427 MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()) &&
6431 MI->getFlag(MachineInstr::MIFlag::FmNsz)) {
6432 Patterns.push_back(Pattern);
6433 return true;
6434 }
6435 return false;
6436 };
6437
6438 switch (Opc) {
6439 default:
6440 break;
6441 case AArch64::FNEGDr:
6442 return Match(AArch64::FMADDDrrr, AArch64MachineCombinerPattern::FNMADD);
6443 case AArch64::FNEGSr:
6444 return Match(AArch64::FMADDSrrr, AArch64MachineCombinerPattern::FNMADD);
6445 }
6446
6447 return false;
6448}
6449
6450/// Return true when a code sequence can improve throughput. It
6451/// should be called only for instructions in loops.
6452/// \param Pattern - combiner pattern
6454 switch (Pattern) {
6455 default:
6456 break;
6562 return true;
6563 } // end switch (Pattern)
6564 return false;
6565}
6566
6567/// Find other MI combine patterns.
6569 SmallVectorImpl<unsigned> &Patterns) {
6570 // A - (B + C) ==> (A - B) - C or (A - C) - B
6571 unsigned Opc = Root.getOpcode();
6572 MachineBasicBlock &MBB = *Root.getParent();
6573
6574 switch (Opc) {
6575 case AArch64::SUBWrr:
6576 case AArch64::SUBSWrr:
6577 case AArch64::SUBXrr:
6578 case AArch64::SUBSXrr:
6579 // Found candidate root.
6580 break;
6581 default:
6582 return false;
6583 }
6584
6585 if (isCombineInstrSettingFlag(Opc) &&
6586 Root.findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr, true) ==
6587 -1)
6588 return false;
6589
6590 if (canCombine(MBB, Root.getOperand(2), AArch64::ADDWrr) ||
6591 canCombine(MBB, Root.getOperand(2), AArch64::ADDSWrr) ||
6592 canCombine(MBB, Root.getOperand(2), AArch64::ADDXrr) ||
6593 canCombine(MBB, Root.getOperand(2), AArch64::ADDSXrr)) {
6596 return true;
6597 }
6598
6599 return false;
6600}
6601
6604 switch (Pattern) {
6608 default:
6610 }
6611}
6612
6613/// Return true when there is potentially a faster code sequence for an
6614/// instruction chain ending in \p Root. All potential patterns are listed in
6615/// the \p Pattern vector. Pattern should be sorted in priority order since the
6616/// pattern evaluator stops checking as soon as it finds a faster sequence.
6617
6619 MachineInstr &Root, SmallVectorImpl<unsigned> &Patterns,
6620 bool DoRegPressureReduce) const {
6621 // Integer patterns
6622 if (getMaddPatterns(Root, Patterns))
6623 return true;
6624 // Floating point patterns
6625 if (getFMULPatterns(Root, Patterns))
6626 return true;
6627 if (getFMAPatterns(Root, Patterns))
6628 return true;
6629 if (getFNEGPatterns(Root, Patterns))
6630 return true;
6631
6632 // Other patterns
6633 if (getMiscPatterns(Root, Patterns))
6634 return true;
6635
6636 return TargetInstrInfo::getMachineCombinerPatterns(Root, Patterns,
6637 DoRegPressureReduce);
6638}
6639
6641/// genFusedMultiply - Generate fused multiply instructions.
6642/// This function supports both integer and floating point instructions.
6643/// A typical example:
6644/// F|MUL I=A,B,0
6645/// F|ADD R,I,C
6646/// ==> F|MADD R,A,B,C
6647/// \param MF Containing MachineFunction
6648/// \param MRI Register information
6649/// \param TII Target information
6650/// \param Root is the F|ADD instruction
6651/// \param [out] InsInstrs is a vector of machine instructions and will
6652/// contain the generated madd instruction
6653/// \param IdxMulOpd is index of operand in Root that is the result of
6654/// the F|MUL. In the example above IdxMulOpd is 1.
6655/// \param MaddOpc the opcode fo the f|madd instruction
6656/// \param RC Register class of operands
6657/// \param kind of fma instruction (addressing mode) to be generated
6658/// \param ReplacedAddend is the result register from the instruction
6659/// replacing the non-combined operand, if any.
6660static MachineInstr *
6662 const TargetInstrInfo *TII, MachineInstr &Root,
6663 SmallVectorImpl<MachineInstr *> &InsInstrs, unsigned IdxMulOpd,
6664 unsigned MaddOpc, const TargetRegisterClass *RC,
6665 FMAInstKind kind = FMAInstKind::Default,
6666 const Register *ReplacedAddend = nullptr) {
6667 assert(IdxMulOpd == 1 || IdxMulOpd == 2);
6668
6669 unsigned IdxOtherOpd = IdxMulOpd == 1 ? 2 : 1;
6670 MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg());
6671 Register ResultReg = Root.getOperand(0).getReg();
6672 Register SrcReg0 = MUL->getOperand(1).getReg();
6673 bool Src0IsKill = MUL->getOperand(1).isKill();
6674 Register SrcReg1 = MUL->getOperand(2).getReg();
6675 bool Src1IsKill = MUL->getOperand(2).isKill();
6676
6677 Register SrcReg2;
6678 bool Src2IsKill;
6679 if (ReplacedAddend) {
6680 // If we just generated a new addend, we must be it's only use.
6681 SrcReg2 = *ReplacedAddend;
6682 Src2IsKill = true;
6683 } else {
6684 SrcReg2 = Root.getOperand(IdxOtherOpd).getReg();
6685 Src2IsKill = Root.getOperand(IdxOtherOpd).isKill();
6686 }
6687
6688 if (ResultReg.isVirtual())
6689 MRI.constrainRegClass(ResultReg, RC);
6690 if (SrcReg0.isVirtual())
6691 MRI.constrainRegClass(SrcReg0, RC);
6692 if (SrcReg1.isVirtual())
6693 MRI.constrainRegClass(SrcReg1, RC);
6694 if (SrcReg2.isVirtual())
6695 MRI.constrainRegClass(SrcReg2, RC);
6696
6698 if (kind == FMAInstKind::Default)
6699 MIB = BuildMI(MF, MIMetadata(Root), TII->get(MaddOpc), ResultReg)
6700 .addReg(SrcReg0, getKillRegState(Src0IsKill))
6701 .addReg(SrcReg1, getKillRegState(Src1IsKill))
6702 .addReg(SrcReg2, getKillRegState(Src2IsKill));
6703 else if (kind == FMAInstKind::Indexed)
6704 MIB = BuildMI(MF, MIMetadata(Root), TII->get(MaddOpc), ResultReg)
6705 .addReg(SrcReg2, getKillRegState(Src2IsKill))
6706 .addReg(SrcReg0, getKillRegState(Src0IsKill))
6707 .addReg(SrcReg1, getKillRegState(Src1IsKill))
6708 .addImm(MUL->getOperand(3).getImm());
6709 else if (kind == FMAInstKind::Accumulator)
6710 MIB = BuildMI(MF, MIMetadata(Root), TII->get(MaddOpc), ResultReg)
6711 .addReg(SrcReg2, getKillRegState(Src2IsKill))
6712 .addReg(SrcReg0, getKillRegState(Src0IsKill))
6713 .addReg(SrcReg1, getKillRegState(Src1IsKill));
6714 else
6715 assert(false && "Invalid FMA instruction kind \n");
6716 // Insert the MADD (MADD, FMA, FMS, FMLA, FMSL)
6717 InsInstrs.push_back(MIB);
6718 return MUL;
6719}
6720
6721static MachineInstr *
6723 const TargetInstrInfo *TII, MachineInstr &Root,
6725 MachineInstr *MAD = MRI.getUniqueVRegDef(Root.getOperand(1).getReg());
6726
6727 unsigned Opc = 0;
6728 const TargetRegisterClass *RC = MRI.getRegClass(MAD->getOperand(0).getReg());
6729 if (AArch64::FPR32RegClass.hasSubClassEq(RC))
6730 Opc = AArch64::FNMADDSrrr;
6731 else if (AArch64::FPR64RegClass.hasSubClassEq(RC))
6732 Opc = AArch64::FNMADDDrrr;
6733 else
6734 return nullptr;
6735
6736 Register ResultReg = Root.getOperand(0).getReg();
6737 Register SrcReg0 = MAD->getOperand(1).getReg();
6738 Register SrcReg1 = MAD->getOperand(2).getReg();
6739 Register SrcReg2 = MAD->getOperand(3).getReg();
6740 bool Src0IsKill = MAD->getOperand(1).isKill();
6741 bool Src1IsKill = MAD->getOperand(2).isKill();
6742 bool Src2IsKill = MAD->getOperand(3).isKill();
6743 if (ResultReg.isVirtual())
6744 MRI.constrainRegClass(ResultReg, RC);
6745 if (SrcReg0.isVirtual())
6746 MRI.constrainRegClass(SrcReg0, RC);
6747 if (SrcReg1.isVirtual())
6748 MRI.constrainRegClass(SrcReg1, RC);
6749 if (SrcReg2.isVirtual())
6750 MRI.constrainRegClass(SrcReg2, RC);
6751
6753 BuildMI(MF, MIMetadata(Root), TII->get(Opc), ResultReg)
6754 .addReg(SrcReg0, getKillRegState(Src0IsKill))
6755 .addReg(SrcReg1, getKillRegState(Src1IsKill))
6756 .addReg(SrcReg2, getKillRegState(Src2IsKill));
6757 InsInstrs.push_back(MIB);
6758
6759 return MAD;
6760}
6761
6762/// Fold (FMUL x (DUP y lane)) into (FMUL_indexed x y lane)
6763static MachineInstr *
6766 unsigned IdxDupOp, unsigned MulOpc,
6768 assert(((IdxDupOp == 1) || (IdxDupOp == 2)) &&
6769 "Invalid index of FMUL operand");
6770
6771 MachineFunction &MF = *Root.getMF();
6773
6774 MachineInstr *Dup =
6775 MF.getRegInfo().getUniqueVRegDef(Root.getOperand(IdxDupOp).getReg());
6776
6777 if (Dup->getOpcode() == TargetOpcode::COPY)
6778 Dup = MRI.getUniqueVRegDef(Dup->getOperand(1).getReg());
6779
6780 Register DupSrcReg = Dup->getOperand(1).getReg();
6781 MRI.clearKillFlags(DupSrcReg);
6782 MRI.constrainRegClass(DupSrcReg, RC);
6783
6784 unsigned DupSrcLane = Dup->getOperand(2).getImm();
6785
6786 unsigned IdxMulOp = IdxDupOp == 1 ? 2 : 1;
6787 MachineOperand &MulOp = Root.getOperand(IdxMulOp);
6788
6789 Register ResultReg = Root.getOperand(0).getReg();
6790
6792 MIB = BuildMI(MF, MIMetadata(Root), TII->get(MulOpc), ResultReg)
6793 .add(MulOp)
6794 .addReg(DupSrcReg)
6795 .addImm(DupSrcLane);
6796
6797 InsInstrs.push_back(MIB);
6798 return &Root;
6799}
6800
6801/// genFusedMultiplyAcc - Helper to generate fused multiply accumulate
6802/// instructions.
6803///
6804/// \see genFusedMultiply
6808 unsigned IdxMulOpd, unsigned MaddOpc, const TargetRegisterClass *RC) {
6809 return genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd, MaddOpc, RC,
6810 FMAInstKind::Accumulator);
6811}
6812
6813/// genNeg - Helper to generate an intermediate negation of the second operand
6814/// of Root
6816 const TargetInstrInfo *TII, MachineInstr &Root,
6818 DenseMap<unsigned, unsigned> &InstrIdxForVirtReg,
6819 unsigned MnegOpc, const TargetRegisterClass *RC) {
6820 Register NewVR = MRI.createVirtualRegister(RC);
6822 BuildMI(MF, MIMetadata(Root), TII->get(MnegOpc), NewVR)
6823 .add(Root.getOperand(2));
6824 InsInstrs.push_back(MIB);
6825
6826 assert(InstrIdxForVirtReg.empty());
6827 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
6828
6829 return NewVR;
6830}
6831
6832/// genFusedMultiplyAccNeg - Helper to generate fused multiply accumulate
6833/// instructions with an additional negation of the accumulator
6837 DenseMap<unsigned, unsigned> &InstrIdxForVirtReg, unsigned IdxMulOpd,
6838 unsigned MaddOpc, unsigned MnegOpc, const TargetRegisterClass *RC) {
6839 assert(IdxMulOpd == 1);
6840
6841 Register NewVR =
6842 genNeg(MF, MRI, TII, Root, InsInstrs, InstrIdxForVirtReg, MnegOpc, RC);
6843 return genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd, MaddOpc, RC,
6844 FMAInstKind::Accumulator, &NewVR);
6845}
6846
6847/// genFusedMultiplyIdx - Helper to generate fused multiply accumulate
6848/// instructions.
6849///
6850/// \see genFusedMultiply
6854 unsigned IdxMulOpd, unsigned MaddOpc, const TargetRegisterClass *RC) {
6855 return genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd, MaddOpc, RC,
6856 FMAInstKind::Indexed);
6857}
6858
6859/// genFusedMultiplyAccNeg - Helper to generate fused multiply accumulate
6860/// instructions with an additional negation of the accumulator
6864 DenseMap<unsigned, unsigned> &InstrIdxForVirtReg, unsigned IdxMulOpd,
6865 unsigned MaddOpc, unsigned MnegOpc, const TargetRegisterClass *RC) {
6866 assert(IdxMulOpd == 1);
6867
6868 Register NewVR =
6869 genNeg(MF, MRI, TII, Root, InsInstrs, InstrIdxForVirtReg, MnegOpc, RC);
6870
6871 return genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd, MaddOpc, RC,
6872 FMAInstKind::Indexed, &NewVR);
6873}
6874
6875/// genMaddR - Generate madd instruction and combine mul and add using
6876/// an extra virtual register
6877/// Example - an ADD intermediate needs to be stored in a register:
6878/// MUL I=A,B,0
6879/// ADD R,I,Imm
6880/// ==> ORR V, ZR, Imm
6881/// ==> MADD R,A,B,V
6882/// \param MF Containing MachineFunction
6883/// \param MRI Register information
6884/// \param TII Target information
6885/// \param Root is the ADD instruction
6886/// \param [out] InsInstrs is a vector of machine instructions and will
6887/// contain the generated madd instruction
6888/// \param IdxMulOpd is index of operand in Root that is the result of
6889/// the MUL. In the example above IdxMulOpd is 1.
6890/// \param MaddOpc the opcode fo the madd instruction
6891/// \param VR is a virtual register that holds the value of an ADD operand
6892/// (V in the example above).
6893/// \param RC Register class of operands
6895 const TargetInstrInfo *TII, MachineInstr &Root,
6897 unsigned IdxMulOpd, unsigned MaddOpc, unsigned VR,
6898 const TargetRegisterClass *RC) {
6899 assert(IdxMulOpd == 1 || IdxMulOpd == 2);
6900
6901 MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg());
6902 Register ResultReg = Root.getOperand(0).getReg();
6903 Register SrcReg0 = MUL->getOperand(1).getReg();
6904 bool Src0IsKill = MUL->getOperand(1).isKill();
6905 Register SrcReg1 = MUL->getOperand(2).getReg();
6906 bool Src1IsKill = MUL->getOperand(2).isKill();
6907
6908 if (ResultReg.isVirtual())
6909 MRI.constrainRegClass(ResultReg, RC);
6910 if (SrcReg0.isVirtual())
6911 MRI.constrainRegClass(SrcReg0, RC);
6912 if (SrcReg1.isVirtual())
6913 MRI.constrainRegClass(SrcReg1, RC);
6915 MRI.constrainRegClass(VR, RC);
6916
6918 BuildMI(MF, MIMetadata(Root), TII->get(MaddOpc), ResultReg)
6919 .addReg(SrcReg0, getKillRegState(Src0IsKill))
6920 .addReg(SrcReg1, getKillRegState(Src1IsKill))
6921 .addReg(VR);
6922 // Insert the MADD
6923 InsInstrs.push_back(MIB);
6924 return MUL;
6925}
6926
6927/// Do the following transformation
6928/// A - (B + C) ==> (A - B) - C
6929/// A - (B + C) ==> (A - C) - B
6930static void
6932 const TargetInstrInfo *TII, MachineInstr &Root,
6935 unsigned IdxOpd1,
6936 DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) {
6937 assert(IdxOpd1 == 1 || IdxOpd1 == 2);
6938 unsigned IdxOtherOpd = IdxOpd1 == 1 ? 2 : 1;
6939 MachineInstr *AddMI = MRI.getUniqueVRegDef(Root.getOperand(2).getReg());
6940
6941 Register ResultReg = Root.getOperand(0).getReg();
6942 Register RegA = Root.getOperand(1).getReg();
6943 bool RegAIsKill = Root.getOperand(1).isKill();
6944 Register RegB = AddMI->getOperand(IdxOpd1).getReg();
6945 bool RegBIsKill = AddMI->getOperand(IdxOpd1).isKill();
6946 Register RegC = AddMI->getOperand(IdxOtherOpd).getReg();
6947 bool RegCIsKill = AddMI->getOperand(IdxOtherOpd).isKill();
6948 Register NewVR = MRI.createVirtualRegister(MRI.getRegClass(RegA));
6949
6950 unsigned Opcode = Root.getOpcode();
6951 if (Opcode == AArch64::SUBSWrr)
6952 Opcode = AArch64::SUBWrr;
6953 else if (Opcode == AArch64::SUBSXrr)
6954 Opcode = AArch64::SUBXrr;
6955 else
6956 assert((Opcode == AArch64::SUBWrr || Opcode == AArch64::SUBXrr) &&
6957 "Unexpected instruction opcode.");
6958
6959 uint32_t Flags = Root.mergeFlagsWith(*AddMI);
6960 Flags &= ~MachineInstr::NoSWrap;
6961 Flags &= ~MachineInstr::NoUWrap;
6962
6963 MachineInstrBuilder MIB1 =
6964 BuildMI(MF, MIMetadata(Root), TII->get(Opcode), NewVR)
6965 .addReg(RegA, getKillRegState(RegAIsKill))
6966 .addReg(RegB, getKillRegState(RegBIsKill))
6967 .setMIFlags(Flags);
6968 MachineInstrBuilder MIB2 =
6969 BuildMI(MF, MIMetadata(Root), TII->get(Opcode), ResultReg)
6970 .addReg(NewVR, getKillRegState(true))
6971 .addReg(RegC, getKillRegState(RegCIsKill))
6972 .setMIFlags(Flags);
6973
6974 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
6975 InsInstrs.push_back(MIB1);
6976 InsInstrs.push_back(MIB2);
6977 DelInstrs.push_back(AddMI);
6978 DelInstrs.push_back(&Root);
6979}
6980
6981/// When getMachineCombinerPatterns() finds potential patterns,
6982/// this function generates the instructions that could replace the
6983/// original code sequence
6985 MachineInstr &Root, unsigned Pattern,
6988 DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const {
6989 MachineBasicBlock &MBB = *Root.getParent();
6991 MachineFunction &MF = *MBB.getParent();
6993
6994 MachineInstr *MUL = nullptr;
6995 const TargetRegisterClass *RC;
6996 unsigned Opc;
6997 switch (Pattern) {
6998 default:
6999 // Reassociate instructions.
7001 DelInstrs, InstrIdxForVirtReg);
7002 return;
7004 // A - (B + C)
7005 // ==> (A - B) - C
7006 genSubAdd2SubSub(MF, MRI, TII, Root, InsInstrs, DelInstrs, 1,
7007 InstrIdxForVirtReg);
7008 return;
7010 // A - (B + C)
7011 // ==> (A - C) - B
7012 genSubAdd2SubSub(MF, MRI, TII, Root, InsInstrs, DelInstrs, 2,
7013 InstrIdxForVirtReg);
7014 return;
7017 // MUL I=A,B,0
7018 // ADD R,I,C
7019 // ==> MADD R,A,B,C
7020 // --- Create(MADD);
7022 Opc = AArch64::MADDWrrr;
7023 RC = &AArch64::GPR32RegClass;
7024 } else {
7025 Opc = AArch64::MADDXrrr;
7026 RC = &AArch64::GPR64RegClass;
7027 }
7028 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
7029 break;
7032 // MUL I=A,B,0
7033 // ADD R,C,I
7034 // ==> MADD R,A,B,C
7035 // --- Create(MADD);
7037 Opc = AArch64::MADDWrrr;
7038 RC = &AArch64::GPR32RegClass;
7039 } else {
7040 Opc = AArch64::MADDXrrr;
7041 RC = &AArch64::GPR64RegClass;
7042 }
7043 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7044 break;
7047 // MUL I=A,B,0
7048 // ADD R,I,Imm
7049 // ==> MOV V, Imm
7050 // ==> MADD R,A,B,V
7051 // --- Create(MADD);
7052 const TargetRegisterClass *OrrRC;
7053 unsigned BitSize, OrrOpc, ZeroReg;
7055 OrrOpc = AArch64::ORRWri;
7056 OrrRC = &AArch64::GPR32spRegClass;
7057 BitSize = 32;
7058 ZeroReg = AArch64::WZR;
7059 Opc = AArch64::MADDWrrr;
7060 RC = &AArch64::GPR32RegClass;
7061 } else {
7062 OrrOpc = AArch64::ORRXri;
7063 OrrRC = &AArch64::GPR64spRegClass;
7064 BitSize = 64;
7065 ZeroReg = AArch64::XZR;
7066 Opc = AArch64::MADDXrrr;
7067 RC = &AArch64::GPR64RegClass;
7068 }
7069 Register NewVR = MRI.createVirtualRegister(OrrRC);
7070 uint64_t Imm = Root.getOperand(2).getImm();
7071
7072 if (Root.getOperand(3).isImm()) {
7073 unsigned Val = Root.getOperand(3).getImm();
7074 Imm = Imm << Val;
7075 }
7076 uint64_t UImm = SignExtend64(Imm, BitSize);
7077 // The immediate can be composed via a single instruction.
7079 AArch64_IMM::expandMOVImm(UImm, BitSize, Insn);
7080 if (Insn.size() != 1)
7081 return;
7082 auto MovI = Insn.begin();
7084 // MOV is an alias for one of three instructions: movz, movn, and orr.
7085 if (MovI->Opcode == OrrOpc)
7086 MIB1 = BuildMI(MF, MIMetadata(Root), TII->get(OrrOpc), NewVR)
7087 .addReg(ZeroReg)
7088 .addImm(MovI->Op2);
7089 else {
7090 if (BitSize == 32)
7091 assert((MovI->Opcode == AArch64::MOVNWi ||
7092 MovI->Opcode == AArch64::MOVZWi) &&
7093 "Expected opcode");
7094 else
7095 assert((MovI->Opcode == AArch64::MOVNXi ||
7096 MovI->Opcode == AArch64::MOVZXi) &&
7097 "Expected opcode");
7098 MIB1 = BuildMI(MF, MIMetadata(Root), TII->get(MovI->Opcode), NewVR)
7099 .addImm(MovI->Op1)
7100 .addImm(MovI->Op2);
7101 }
7102 InsInstrs.push_back(MIB1);
7103 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
7104 MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
7105 break;
7106 }
7109 // MUL I=A,B,0
7110 // SUB R,I, C
7111 // ==> SUB V, 0, C
7112 // ==> MADD R,A,B,V // = -C + A*B
7113 // --- Create(MADD);
7114 const TargetRegisterClass *SubRC;
7115 unsigned SubOpc, ZeroReg;
7117 SubOpc = AArch64::SUBWrr;
7118 SubRC = &AArch64::GPR32spRegClass;
7119 ZeroReg = AArch64::WZR;
7120 Opc = AArch64::MADDWrrr;
7121 RC = &AArch64::GPR32RegClass;
7122 } else {
7123 SubOpc = AArch64::SUBXrr;
7124 SubRC = &AArch64::GPR64spRegClass;
7125 ZeroReg = AArch64::XZR;
7126 Opc = AArch64::MADDXrrr;
7127 RC = &AArch64::GPR64RegClass;
7128 }
7129 Register NewVR = MRI.createVirtualRegister(SubRC);
7130 // SUB NewVR, 0, C
7131 MachineInstrBuilder MIB1 =
7132 BuildMI(MF, MIMetadata(Root), TII->get(SubOpc), NewVR)
7133 .addReg(ZeroReg)
7134 .add(Root.getOperand(2));
7135 InsInstrs.push_back(MIB1);
7136 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
7137 MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
7138 break;
7139 }
7142 // MUL I=A,B,0
7143 // SUB R,C,I
7144 // ==> MSUB R,A,B,C (computes C - A*B)
7145 // --- Create(MSUB);
7147 Opc = AArch64::MSUBWrrr;
7148 RC = &AArch64::GPR32RegClass;
7149 } else {
7150 Opc = AArch64::MSUBXrrr;
7151 RC = &AArch64::GPR64RegClass;
7152 }
7153 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7154 break;
7157 // MUL I=A,B,0
7158 // SUB R,I, Imm
7159 // ==> MOV V, -Imm
7160 // ==> MADD R,A,B,V // = -Imm + A*B
7161 // --- Create(MADD);
7162 const TargetRegisterClass *OrrRC;
7163 unsigned BitSize, OrrOpc, ZeroReg;
7165 OrrOpc = AArch64::ORRWri;
7166 OrrRC = &AArch64::GPR32spRegClass;
7167 BitSize = 32;
7168 ZeroReg = AArch64::WZR;
7169 Opc = AArch64::MADDWrrr;
7170 RC = &AArch64::GPR32RegClass;
7171 } else {
7172 OrrOpc = AArch64::ORRXri;
7173 OrrRC = &AArch64::GPR64spRegClass;
7174 BitSize = 64;
7175 ZeroReg = AArch64::XZR;
7176 Opc = AArch64::MADDXrrr;
7177 RC = &AArch64::GPR64RegClass;
7178 }
7179 Register NewVR = MRI.createVirtualRegister(OrrRC);
7180 uint64_t Imm = Root.getOperand(2).getImm();
7181 if (Root.getOperand(3).isImm()) {
7182 unsigned Val = Root.getOperand(3).getImm();
7183 Imm = Imm << Val;
7184 }
7185 uint64_t UImm = SignExtend64(-Imm, BitSize);
7186 // The immediate can be composed via a single instruction.
7188 AArch64_IMM::expandMOVImm(UImm, BitSize, Insn);
7189 if (Insn.size() != 1)
7190 return;
7191 auto MovI = Insn.begin();
7193 // MOV is an alias for one of three instructions: movz, movn, and orr.
7194 if (MovI->Opcode == OrrOpc)
7195 MIB1 = BuildMI(MF, MIMetadata(Root), TII->get(OrrOpc), NewVR)
7196 .addReg(ZeroReg)
7197 .addImm(MovI->Op2);
7198 else {
7199 if (BitSize == 32)
7200 assert((MovI->Opcode == AArch64::MOVNWi ||
7201 MovI->Opcode == AArch64::MOVZWi) &&
7202 "Expected opcode");
7203 else
7204 assert((MovI->Opcode == AArch64::MOVNXi ||
7205 MovI->Opcode == AArch64::MOVZXi) &&
7206 "Expected opcode");
7207 MIB1 = BuildMI(MF, MIMetadata(Root), TII->get(MovI->Opcode), NewVR)
7208 .addImm(MovI->Op1)
7209 .addImm(MovI->Op2);
7210 }
7211 InsInstrs.push_back(MIB1);
7212 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
7213 MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
7214 break;
7215 }
7216
7218 Opc = AArch64::MLAv8i8;
7219 RC = &AArch64::FPR64RegClass;
7220 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
7221 break;
7223 Opc = AArch64::MLAv8i8;
7224 RC = &AArch64::FPR64RegClass;
7225 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7226 break;
7228 Opc = AArch64::MLAv16i8;
7229 RC = &AArch64::FPR128RegClass;
7230 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
7231 break;
7233 Opc = AArch64::MLAv16i8;
7234 RC = &AArch64::FPR128RegClass;
7235 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7236 break;
7238 Opc = AArch64::MLAv4i16;
7239 RC = &AArch64::FPR64RegClass;
7240 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
7241 break;
7243 Opc = AArch64::MLAv4i16;
7244 RC = &AArch64::FPR64RegClass;
7245 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7246 break;
7248 Opc = AArch64::MLAv8i16;
7249 RC = &AArch64::FPR128RegClass;
7250 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
7251 break;
7253 Opc = AArch64::MLAv8i16;
7254 RC = &AArch64::FPR128RegClass;
7255 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7256 break;
7258 Opc = AArch64::MLAv2i32;
7259 RC = &AArch64::FPR64RegClass;
7260 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
7261 break;
7263 Opc = AArch64::MLAv2i32;
7264 RC = &AArch64::FPR64RegClass;
7265 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7266 break;
7268 Opc = AArch64::MLAv4i32;
7269 RC = &AArch64::FPR128RegClass;
7270 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
7271 break;
7273 Opc = AArch64::MLAv4i32;
7274 RC = &AArch64::FPR128RegClass;
7275 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7276 break;
7277
7279 Opc = AArch64::MLAv8i8;
7280 RC = &AArch64::FPR64RegClass;
7281 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
7282 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv8i8,
7283 RC);
7284 break;
7286 Opc = AArch64::MLSv8i8;
7287 RC = &AArch64::FPR64RegClass;
7288 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7289 break;
7291 Opc = AArch64::MLAv16i8;
7292 RC = &AArch64::FPR128RegClass;
7293 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
7294 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv16i8,
7295 RC);
7296 break;
7298 Opc = AArch64::MLSv16i8;
7299 RC = &AArch64::FPR128RegClass;
7300 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7301 break;
7303 Opc = AArch64::MLAv4i16;
7304 RC = &AArch64::FPR64RegClass;
7305 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
7306 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i16,
7307 RC);
7308 break;
7310 Opc = AArch64::MLSv4i16;
7311 RC = &AArch64::FPR64RegClass;
7312 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7313 break;
7315 Opc = AArch64::MLAv8i16;
7316 RC = &AArch64::FPR128RegClass;
7317 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
7318 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv8i16,
7319 RC);
7320 break;
7322 Opc = AArch64::MLSv8i16;
7323 RC = &AArch64::FPR128RegClass;
7324 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7325 break;
7327 Opc = AArch64::MLAv2i32;
7328 RC = &AArch64::FPR64RegClass;
7329 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
7330 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv2i32,
7331 RC);
7332 break;
7334 Opc = AArch64::MLSv2i32;
7335 RC = &AArch64::FPR64RegClass;
7336 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7337 break;
7339 Opc = AArch64::MLAv4i32;
7340 RC = &AArch64::FPR128RegClass;
7341 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
7342 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i32,
7343 RC);
7344 break;
7346 Opc = AArch64::MLSv4i32;
7347 RC = &AArch64::FPR128RegClass;
7348 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7349 break;
7350
7352 Opc = AArch64::MLAv4i16_indexed;
7353 RC = &AArch64::FPR64RegClass;
7354 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
7355 break;
7357 Opc = AArch64::MLAv4i16_indexed;
7358 RC = &AArch64::FPR64RegClass;
7359 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7360 break;
7362 Opc = AArch64::MLAv8i16_indexed;
7363 RC = &AArch64::FPR128RegClass;
7364 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
7365 break;
7367 Opc = AArch64::MLAv8i16_indexed;
7368 RC = &AArch64::FPR128RegClass;
7369 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7370 break;
7372 Opc = AArch64::MLAv2i32_indexed;
7373 RC = &AArch64::FPR64RegClass;
7374 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
7375 break;
7377 Opc = AArch64::MLAv2i32_indexed;
7378 RC = &AArch64::FPR64RegClass;
7379 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7380 break;
7382 Opc = AArch64::MLAv4i32_indexed;
7383 RC = &AArch64::FPR128RegClass;
7384 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
7385 break;
7387 Opc = AArch64::MLAv4i32_indexed;
7388 RC = &AArch64::FPR128RegClass;
7389 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7390 break;
7391
7393 Opc = AArch64::MLAv4i16_indexed;
7394 RC = &AArch64::FPR64RegClass;
7395 MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs,
7396 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i16,
7397 RC);
7398 break;
7400 Opc = AArch64::MLSv4i16_indexed;
7401 RC = &AArch64::FPR64RegClass;
7402 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7403 break;
7405 Opc = AArch64::MLAv8i16_indexed;
7406 RC = &AArch64::FPR128RegClass;
7407 MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs,
7408 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv8i16,
7409 RC);
7410 break;
7412 Opc = AArch64::MLSv8i16_indexed;
7413 RC = &AArch64::FPR128RegClass;
7414 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7415 break;
7417 Opc = AArch64::MLAv2i32_indexed;
7418 RC = &AArch64::FPR64RegClass;
7419 MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs,
7420 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv2i32,
7421 RC);
7422 break;
7424 Opc = AArch64::MLSv2i32_indexed;
7425 RC = &AArch64::FPR64RegClass;
7426 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7427 break;
7429 Opc = AArch64::MLAv4i32_indexed;
7430 RC = &AArch64::FPR128RegClass;
7431 MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs,
7432 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i32,
7433 RC);
7434 break;
7436 Opc = AArch64::MLSv4i32_indexed;
7437 RC = &AArch64::FPR128RegClass;
7438 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7439 break;
7440
7441 // Floating Point Support
7443 Opc = AArch64::FMADDHrrr;
7444 RC = &AArch64::FPR16RegClass;
7445 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
7446 break;
7448 Opc = AArch64::FMADDSrrr;
7449 RC = &AArch64::FPR32RegClass;
7450 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
7451 break;
7453 Opc = AArch64::FMADDDrrr;
7454 RC = &AArch64::FPR64RegClass;
7455 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
7456 break;
7457
7459 Opc = AArch64::FMADDHrrr;
7460 RC = &AArch64::FPR16RegClass;
7461 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7462 break;
7464 Opc = AArch64::FMADDSrrr;
7465 RC = &AArch64::FPR32RegClass;
7466 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7467 break;
7469 Opc = AArch64::FMADDDrrr;
7470 RC = &AArch64::FPR64RegClass;
7471 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7472 break;
7473
7475 Opc = AArch64::FMLAv1i32_indexed;
7476 RC = &AArch64::FPR32RegClass;
7477 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
7478 FMAInstKind::Indexed);
7479 break;
7481 Opc = AArch64::FMLAv1i32_indexed;
7482 RC = &AArch64::FPR32RegClass;
7483 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
7484 FMAInstKind::Indexed);
7485 break;
7486
7488 Opc = AArch64::FMLAv1i64_indexed;
7489 RC = &AArch64::FPR64RegClass;
7490 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
7491 FMAInstKind::Indexed);
7492 break;
7494 Opc = AArch64::FMLAv1i64_indexed;
7495 RC = &AArch64::FPR64RegClass;
7496 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
7497 FMAInstKind::Indexed);
7498 break;
7499
7501 RC = &AArch64::FPR64RegClass;
7502 Opc = AArch64::FMLAv4i16_indexed;
7503 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
7504 FMAInstKind::Indexed);
7505 break;
7507 RC = &AArch64::FPR64RegClass;
7508 Opc = AArch64::FMLAv4f16;
7509 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
7510 FMAInstKind::Accumulator);
7511 break;
7513 RC = &AArch64::FPR64RegClass;
7514 Opc = AArch64::FMLAv4i16_indexed;
7515 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
7516 FMAInstKind::Indexed);
7517 break;
7519 RC = &AArch64::FPR64RegClass;
7520 Opc = AArch64::FMLAv4f16;
7521 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
7522 FMAInstKind::Accumulator);
7523 break;
7524
7527 RC = &AArch64::FPR64RegClass;
7529 Opc = AArch64::FMLAv2i32_indexed;
7530 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
7531 FMAInstKind::Indexed);
7532 } else {
7533 Opc = AArch64::FMLAv2f32;
7534 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
7535 FMAInstKind::Accumulator);
7536 }
7537 break;
7540 RC = &AArch64::FPR64RegClass;
7542 Opc = AArch64::FMLAv2i32_indexed;
7543 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
7544 FMAInstKind::Indexed);
7545 } else {
7546 Opc = AArch64::FMLAv2f32;
7547 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
7548 FMAInstKind::Accumulator);
7549 }
7550 break;
7551
7553 RC = &AArch64::FPR128RegClass;
7554 Opc = AArch64::FMLAv8i16_indexed;
7555 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
7556 FMAInstKind::Indexed);
7557 break;
7559 RC = &AArch64::FPR128RegClass;
7560 Opc = AArch64::FMLAv8f16;
7561 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
7562 FMAInstKind::Accumulator);
7563 break;
7565 RC = &AArch64::FPR128RegClass;
7566 Opc = AArch64::FMLAv8i16_indexed;
7567 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
7568 FMAInstKind::Indexed);
7569 break;
7571 RC = &AArch64::FPR128RegClass;
7572 Opc = AArch64::FMLAv8f16;
7573 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
7574 FMAInstKind::Accumulator);
7575 break;
7576
7579 RC = &AArch64::FPR128RegClass;
7581 Opc = AArch64::FMLAv2i64_indexed;
7582 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
7583 FMAInstKind::Indexed);
7584 } else {
7585 Opc = AArch64::FMLAv2f64;
7586 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
7587 FMAInstKind::Accumulator);
7588 }
7589 break;
7592 RC = &AArch64::FPR128RegClass;
7594 Opc = AArch64::FMLAv2i64_indexed;
7595 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
7596 FMAInstKind::Indexed);
7597 } else {
7598 Opc = AArch64::FMLAv2f64;
7599 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
7600 FMAInstKind::Accumulator);
7601 }
7602 break;
7603
7606 RC = &AArch64::FPR128RegClass;
7608 Opc = AArch64::FMLAv4i32_indexed;
7609 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
7610 FMAInstKind::Indexed);
7611 } else {
7612 Opc = AArch64::FMLAv4f32;
7613 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
7614 FMAInstKind::Accumulator);
7615 }
7616 break;
7617
7620 RC = &AArch64::FPR128RegClass;
7622 Opc = AArch64::FMLAv4i32_indexed;
7623 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
7624 FMAInstKind::Indexed);
7625 } else {
7626 Opc = AArch64::FMLAv4f32;
7627 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
7628 FMAInstKind::Accumulator);
7629 }
7630 break;
7631
7633 Opc = AArch64::FNMSUBHrrr;
7634 RC = &AArch64::FPR16RegClass;
7635 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
7636 break;
7638 Opc = AArch64::FNMSUBSrrr;
7639 RC = &AArch64::FPR32RegClass;
7640 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
7641 break;
7643 Opc = AArch64::FNMSUBDrrr;
7644 RC = &AArch64::FPR64RegClass;
7645 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
7646 break;
7647
7649 Opc = AArch64::FNMADDHrrr;
7650 RC = &AArch64::FPR16RegClass;
7651 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
7652 break;
7654 Opc = AArch64::FNMADDSrrr;
7655 RC = &AArch64::FPR32RegClass;
7656 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
7657 break;
7659 Opc = AArch64::FNMADDDrrr;
7660 RC = &AArch64::FPR64RegClass;
7661 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
7662 break;
7663
7665 Opc = AArch64::FMSUBHrrr;
7666 RC = &AArch64::FPR16RegClass;
7667 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7668 break;
7670 Opc = AArch64::FMSUBSrrr;
7671 RC = &AArch64::FPR32RegClass;
7672 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7673 break;
7675 Opc = AArch64::FMSUBDrrr;
7676 RC = &AArch64::FPR64RegClass;
7677 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7678 break;
7679
7681 Opc = AArch64::FMLSv1i32_indexed;
7682 RC = &AArch64::FPR32RegClass;
7683 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
7684 FMAInstKind::Indexed);
7685 break;
7686
7688 Opc = AArch64::FMLSv1i64_indexed;
7689 RC = &AArch64::FPR64RegClass;
7690 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
7691 FMAInstKind::Indexed);
7692 break;
7693
7696 RC = &AArch64::FPR64RegClass;
7697 Register NewVR = MRI.createVirtualRegister(RC);
7698 MachineInstrBuilder MIB1 =
7699 BuildMI(MF, MIMetadata(Root), TII->get(AArch64::FNEGv4f16), NewVR)
7700 .add(Root.getOperand(2));
7701 InsInstrs.push_back(MIB1);
7702 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
7704 Opc = AArch64::FMLAv4f16;
7705 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
7706 FMAInstKind::Accumulator, &NewVR);
7707 } else {
7708 Opc = AArch64::FMLAv4i16_indexed;
7709 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
7710 FMAInstKind::Indexed, &NewVR);
7711 }
7712 break;
7713 }
7715 RC = &AArch64::FPR64RegClass;
7716 Opc = AArch64::FMLSv4f16;
7717 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
7718 FMAInstKind::Accumulator);
7719 break;
7721 RC = &AArch64::FPR64RegClass;
7722 Opc = AArch64::FMLSv4i16_indexed;
7723 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
7724 FMAInstKind::Indexed);
7725 break;
7726
7729 RC = &AArch64::FPR64RegClass;
7731 Opc = AArch64::FMLSv2i32_indexed;
7732 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
7733 FMAInstKind::Indexed);
7734 } else {
7735 Opc = AArch64::FMLSv2f32;
7736 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
7737 FMAInstKind::Accumulator);
7738 }
7739 break;
7740
7743 RC = &AArch64::FPR128RegClass;
7744 Register NewVR = MRI.createVirtualRegister(RC);
7745 MachineInstrBuilder MIB1 =
7746 BuildMI(MF, MIMetadata(Root), TII->get(AArch64::FNEGv8f16), NewVR)
7747 .add(Root.getOperand(2));
7748 InsInstrs.push_back(MIB1);
7749 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
7751 Opc = AArch64::FMLAv8f16;
7752 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
7753 FMAInstKind::Accumulator, &NewVR);
7754 } else {
7755 Opc = AArch64::FMLAv8i16_indexed;
7756 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
7757 FMAInstKind::Indexed, &NewVR);
7758 }
7759 break;
7760 }
7762 RC = &AArch64::FPR128RegClass;
7763 Opc = AArch64::FMLSv8f16;
7764 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
7765 FMAInstKind::Accumulator);
7766 break;
7768 RC = &AArch64::FPR128RegClass;
7769 Opc = AArch64::FMLSv8i16_indexed;
7770 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
7771 FMAInstKind::Indexed);
7772 break;
7773
7776 RC = &AArch64::FPR128RegClass;
7778 Opc = AArch64::FMLSv2i64_indexed;
7779 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
7780 FMAInstKind::Indexed);
7781 } else {
7782 Opc = AArch64::FMLSv2f64;
7783 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
7784 FMAInstKind::Accumulator);
7785 }
7786 break;
7787
7790 RC = &AArch64::FPR128RegClass;
7792 Opc = AArch64::FMLSv4i32_indexed;
7793 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
7794 FMAInstKind::Indexed);
7795 } else {
7796 Opc = AArch64::FMLSv4f32;
7797 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
7798 FMAInstKind::Accumulator);
7799 }
7800 break;
7803 RC = &AArch64::FPR64RegClass;
7804 Register NewVR = MRI.createVirtualRegister(RC);
7805 MachineInstrBuilder MIB1 =
7806 BuildMI(MF, MIMetadata(Root), TII->get(AArch64::FNEGv2f32), NewVR)
7807 .add(Root.getOperand(2));
7808 InsInstrs.push_back(MIB1);
7809 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
7811 Opc = AArch64::FMLAv2i32_indexed;
7812 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
7813 FMAInstKind::Indexed, &NewVR);
7814 } else {
7815 Opc = AArch64::FMLAv2f32;
7816 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
7817 FMAInstKind::Accumulator, &NewVR);
7818 }
7819 break;
7820 }
7823 RC = &AArch64::FPR128RegClass;
7824 Register NewVR = MRI.createVirtualRegister(RC);
7825 MachineInstrBuilder MIB1 =
7826 BuildMI(MF, MIMetadata(Root), TII->get(AArch64::FNEGv4f32), NewVR)
7827 .add(Root.getOperand(2));
7828 InsInstrs.push_back(MIB1);
7829 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
7831 Opc = AArch64::FMLAv4i32_indexed;
7832 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
7833 FMAInstKind::Indexed, &NewVR);
7834 } else {
7835 Opc = AArch64::FMLAv4f32;
7836 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
7837 FMAInstKind::Accumulator, &NewVR);
7838 }
7839 break;
7840 }
7843 RC = &AArch64::FPR128RegClass;
7844 Register NewVR = MRI.createVirtualRegister(RC);
7845 MachineInstrBuilder MIB1 =
7846 BuildMI(MF, MIMetadata(Root), TII->get(AArch64::FNEGv2f64), NewVR)
7847 .add(Root.getOperand(2));
7848 InsInstrs.push_back(MIB1);
7849 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
7851 Opc = AArch64::FMLAv2i64_indexed;
7852 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
7853 FMAInstKind::Indexed, &NewVR);
7854 } else {
7855 Opc = AArch64::FMLAv2f64;
7856 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
7857 FMAInstKind::Accumulator, &NewVR);
7858 }
7859 break;
7860 }
7863 unsigned IdxDupOp =
7865 : 2;
7866 genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv2i32_indexed,
7867 &AArch64::FPR128RegClass, MRI);
7868 break;
7869 }
7872 unsigned IdxDupOp =
7874 : 2;
7875 genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv2i64_indexed,
7876 &AArch64::FPR128RegClass, MRI);
7877 break;
7878 }
7881 unsigned IdxDupOp =
7883 : 2;
7884 genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv4i16_indexed,
7885 &AArch64::FPR128_loRegClass, MRI);
7886 break;
7887 }
7890 unsigned IdxDupOp =
7892 : 2;
7893 genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv4i32_indexed,
7894 &AArch64::FPR128RegClass, MRI);
7895 break;
7896 }
7899 unsigned IdxDupOp =
7901 : 2;
7902 genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv8i16_indexed,
7903 &AArch64::FPR128_loRegClass, MRI);
7904 break;
7905 }
7907 MUL = genFNegatedMAD(MF, MRI, TII, Root, InsInstrs);
7908 break;
7909 }
7910
7911 } // end switch (Pattern)
7912 // Record MUL and ADD/SUB for deletion
7913 if (MUL)
7914 DelInstrs.push_back(MUL);
7915 DelInstrs.push_back(&Root);
7916
7917 // Set the flags on the inserted instructions to be the merged flags of the
7918 // instructions that we have combined.
7919 uint32_t Flags = Root.getFlags();
7920 if (MUL)
7921 Flags = Root.mergeFlagsWith(*MUL);
7922 for (auto *MI : InsInstrs)
7923 MI->setFlags(Flags);
7924}
7925
7926/// Replace csincr-branch sequence by simple conditional branch
7927///
7928/// Examples:
7929/// 1. \code
7930/// csinc w9, wzr, wzr, <condition code>
7931/// tbnz w9, #0, 0x44
7932/// \endcode
7933/// to
7934/// \code
7935/// b.<inverted condition code>
7936/// \endcode
7937///
7938/// 2. \code
7939/// csinc w9, wzr, wzr, <condition code>
7940/// tbz w9, #0, 0x44
7941/// \endcode
7942/// to
7943/// \code
7944/// b.<condition code>
7945/// \endcode
7946///
7947/// Replace compare and branch sequence by TBZ/TBNZ instruction when the
7948/// compare's constant operand is power of 2.
7949///
7950/// Examples:
7951/// \code
7952/// and w8, w8, #0x400
7953/// cbnz w8, L1
7954/// \endcode
7955/// to
7956/// \code
7957/// tbnz w8, #10, L1
7958/// \endcode
7959///
7960/// \param MI Conditional Branch
7961/// \return True when the simple conditional branch is generated
7962///
7964 bool IsNegativeBranch = false;
7965 bool IsTestAndBranch = false;
7966 unsigned TargetBBInMI = 0;
7967 switch (MI.getOpcode()) {
7968 default:
7969 llvm_unreachable("Unknown branch instruction?");
7970 case AArch64::Bcc:
7971 return false;
7972 case AArch64::CBZW:
7973 case AArch64::CBZX:
7974 TargetBBInMI = 1;
7975 break;
7976 case AArch64::CBNZW:
7977 case AArch64::CBNZX:
7978 TargetBBInMI = 1;
7979 IsNegativeBranch = true;
7980 break;
7981 case AArch64::TBZW:
7982 case AArch64::TBZX:
7983 TargetBBInMI = 2;
7984 IsTestAndBranch = true;
7985 break;
7986 case AArch64::TBNZW:
7987 case AArch64::TBNZX:
7988 TargetBBInMI = 2;
7989 IsNegativeBranch = true;
7990 IsTestAndBranch = true;
7991 break;
7992 }
7993 // So we increment a zero register and test for bits other
7994 // than bit 0? Conservatively bail out in case the verifier
7995 // missed this case.
7996 if (IsTestAndBranch && MI.getOperand(1).getImm())
7997 return false;
7998
7999 // Find Definition.
8000 assert(MI.getParent() && "Incomplete machine instruciton\n");
8001 MachineBasicBlock *MBB = MI.getParent();
8002 MachineFunction *MF = MBB->getParent();
8004 Register VReg = MI.getOperand(0).getReg();
8005 if (!VReg.isVirtual())
8006 return false;
8007
8008 MachineInstr *DefMI = MRI->getVRegDef(VReg);
8009
8010 // Look through COPY instructions to find definition.
8011 while (DefMI->isCopy()) {
8012 Register CopyVReg = DefMI->getOperand(1).getReg();
8013 if (!MRI->hasOneNonDBGUse(CopyVReg))
8014 return false;
8015 if (!MRI->hasOneDef(CopyVReg))
8016 return false;
8017 DefMI = MRI->getVRegDef(CopyVReg);
8018 }
8019
8020 switch (DefMI->getOpcode()) {
8021 default:
8022 return false;
8023 // Fold AND into a TBZ/TBNZ if constant operand is power of 2.
8024 case AArch64::ANDWri:
8025 case AArch64::ANDXri: {
8026 if (IsTestAndBranch)
8027 return false;
8028 if (DefMI->getParent() != MBB)
8029 return false;
8030 if (!MRI->hasOneNonDBGUse(VReg))
8031 return false;
8032
8033 bool Is32Bit = (DefMI->getOpcode() == AArch64::ANDWri);
8035 DefMI->getOperand(2).getImm(), Is32Bit ? 32 : 64);
8036 if (!isPowerOf2_64(Mask))
8037 return false;
8038
8040 Register NewReg = MO.getReg();
8041 if (!NewReg.isVirtual())
8042 return false;
8043
8044 assert(!MRI->def_empty(NewReg) && "Register must be defined.");
8045
8046 MachineBasicBlock &RefToMBB = *MBB;
8047 MachineBasicBlock *TBB = MI.getOperand(1).getMBB();
8048 DebugLoc DL = MI.getDebugLoc();
8049 unsigned Imm = Log2_64(Mask);
8050 unsigned Opc = (Imm < 32)
8051 ? (IsNegativeBranch ? AArch64::TBNZW : AArch64::TBZW)
8052 : (IsNegativeBranch ? AArch64::TBNZX : AArch64::TBZX);
8053 MachineInstr *NewMI = BuildMI(RefToMBB, MI, DL, get(Opc))
8054 .addReg(NewReg)
8055 .addImm(Imm)
8056 .addMBB(TBB);
8057 // Register lives on to the CBZ now.
8058 MO.setIsKill(false);
8059
8060 // For immediate smaller than 32, we need to use the 32-bit
8061 // variant (W) in all cases. Indeed the 64-bit variant does not
8062 // allow to encode them.
8063 // Therefore, if the input register is 64-bit, we need to take the
8064 // 32-bit sub-part.
8065 if (!Is32Bit && Imm < 32)
8066 NewMI->getOperand(0).setSubReg(AArch64::sub_32);
8067 MI.eraseFromParent();
8068 return true;
8069 }
8070 // Look for CSINC
8071 case AArch64::CSINCWr:
8072 case AArch64::CSINCXr: {
8073 if (!(DefMI->getOperand(1).getReg() == AArch64::WZR &&
8074 DefMI->getOperand(2).getReg() == AArch64::WZR) &&
8075 !(DefMI->getOperand(1).getReg() == AArch64::XZR &&
8076 DefMI->getOperand(2).getReg() == AArch64::XZR))
8077 return false;
8078
8079 if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr,
8080 true) != -1)
8081 return false;
8082
8084 // Convert only when the condition code is not modified between
8085 // the CSINC and the branch. The CC may be used by other
8086 // instructions in between.
8088 return false;
8089 MachineBasicBlock &RefToMBB = *MBB;
8090 MachineBasicBlock *TBB = MI.getOperand(TargetBBInMI).getMBB();
8091 DebugLoc DL = MI.getDebugLoc();
8092 if (IsNegativeBranch)
8094 BuildMI(RefToMBB, MI, DL, get(AArch64::Bcc)).addImm(CC).addMBB(TBB);
8095 MI.eraseFromParent();
8096 return true;
8097 }
8098 }
8099}
8100
8101std::pair<unsigned, unsigned>
8103 const unsigned Mask = AArch64II::MO_FRAGMENT;
8104 return std::make_pair(TF & Mask, TF & ~Mask);
8105}
8106
8109 using namespace AArch64II;
8110
8111 static const std::pair<unsigned, const char *> TargetFlags[] = {
8112 {MO_PAGE, "aarch64-page"}, {MO_PAGEOFF, "aarch64-pageoff"},
8113 {MO_G3, "aarch64-g3"}, {MO_G2, "aarch64-g2"},
8114 {MO_G1, "aarch64-g1"}, {MO_G0, "aarch64-g0"},
8115 {MO_HI12, "aarch64-hi12"}};
8116 return ArrayRef(TargetFlags);
8117}
8118
8121 using namespace AArch64II;
8122
8123 static const std::pair<unsigned, const char *> TargetFlags[] = {
8124 {MO_COFFSTUB, "aarch64-coffstub"},
8125 {MO_GOT, "aarch64-got"},
8126 {MO_NC, "aarch64-nc"},
8127 {MO_S, "aarch64-s"},
8128 {MO_TLS, "aarch64-tls"},
8129 {MO_DLLIMPORT, "aarch64-dllimport"},
8130 {MO_PREL, "aarch64-prel"},
8131 {MO_TAGGED, "aarch64-tagged"},
8132 {MO_ARM64EC_CALLMANGLE, "aarch64-arm64ec-callmangle"},
8133 };
8134 return ArrayRef(TargetFlags);
8135}
8136
8139 static const std::pair<MachineMemOperand::Flags, const char *> TargetFlags[] =
8140 {{MOSuppressPair, "aarch64-suppress-pair"},
8141 {MOStridedAccess, "aarch64-strided-access"}};
8142 return ArrayRef(TargetFlags);
8143}
8144
8145/// Constants defining how certain sequences should be outlined.
8146/// This encompasses how an outlined function should be called, and what kind of
8147/// frame should be emitted for that outlined function.
8148///
8149/// \p MachineOutlinerDefault implies that the function should be called with
8150/// a save and restore of LR to the stack.
8151///
8152/// That is,
8153///
8154/// I1 Save LR OUTLINED_FUNCTION:
8155/// I2 --> BL OUTLINED_FUNCTION I1
8156/// I3 Restore LR I2
8157/// I3
8158/// RET
8159///
8160/// * Call construction overhead: 3 (save + BL + restore)
8161/// * Frame construction overhead: 1 (ret)
8162/// * Requires stack fixups? Yes
8163///
8164/// \p MachineOutlinerTailCall implies that the function is being created from
8165/// a sequence of instructions ending in a return.
8166///
8167/// That is,
8168///
8169/// I1 OUTLINED_FUNCTION:
8170/// I2 --> B OUTLINED_FUNCTION I1
8171/// RET I2
8172/// RET
8173///
8174/// * Call construction overhead: 1 (B)
8175/// * Frame construction overhead: 0 (Return included in sequence)
8176/// * Requires stack fixups? No
8177///
8178/// \p MachineOutlinerNoLRSave implies that the function should be called using
8179/// a BL instruction, but doesn't require LR to be saved and restored. This
8180/// happens when LR is known to be dead.
8181///
8182/// That is,
8183///
8184/// I1 OUTLINED_FUNCTION:
8185/// I2 --> BL OUTLINED_FUNCTION I1
8186/// I3 I2
8187/// I3
8188/// RET
8189///
8190/// * Call construction overhead: 1 (BL)
8191/// * Frame construction overhead: 1 (RET)
8192/// * Requires stack fixups? No
8193///
8194/// \p MachineOutlinerThunk implies that the function is being created from
8195/// a sequence of instructions ending in a call. The outlined function is
8196/// called with a BL instruction, and the outlined function tail-calls the
8197/// original call destination.
8198///
8199/// That is,
8200///
8201/// I1 OUTLINED_FUNCTION:
8202/// I2 --> BL OUTLINED_FUNCTION I1
8203/// BL f I2
8204/// B f
8205/// * Call construction overhead: 1 (BL)
8206/// * Frame construction overhead: 0
8207/// * Requires stack fixups? No
8208///
8209/// \p MachineOutlinerRegSave implies that the function should be called with a
8210/// save and restore of LR to an available register. This allows us to avoid
8211/// stack fixups. Note that this outlining variant is compatible with the
8212/// NoLRSave case.
8213///
8214/// That is,
8215///
8216/// I1 Save LR OUTLINED_FUNCTION:
8217/// I2 --> BL OUTLINED_FUNCTION I1
8218/// I3 Restore LR I2
8219/// I3
8220/// RET
8221///
8222/// * Call construction overhead: 3 (save + BL + restore)
8223/// * Frame construction overhead: 1 (ret)
8224/// * Requires stack fixups? No
8226 MachineOutlinerDefault, /// Emit a save, restore, call, and return.
8227 MachineOutlinerTailCall, /// Only emit a branch.
8228 MachineOutlinerNoLRSave, /// Emit a call and return.
8229 MachineOutlinerThunk, /// Emit a call and tail-call.
8230 MachineOutlinerRegSave /// Same as default, but save to a register.
8232
8236 UnsafeRegsDead = 0x8
8238
8240AArch64InstrInfo::findRegisterToSaveLRTo(outliner::Candidate &C) const {
8241 MachineFunction *MF = C.getMF();
8243 const AArch64RegisterInfo *ARI =
8244 static_cast<const AArch64RegisterInfo *>(&TRI);
8245 // Check if there is an available register across the sequence that we can
8246 // use.
8247 for (unsigned Reg : AArch64::GPR64RegClass) {
8248 if (!ARI->isReservedReg(*MF, Reg) &&
8249 Reg != AArch64::LR && // LR is not reserved, but don't use it.
8250 Reg != AArch64::X16 && // X16 is not guaranteed to be preserved.
8251 Reg != AArch64::X17 && // Ditto for X17.
8252 C.isAvailableAcrossAndOutOfSeq(Reg, TRI) &&
8253 C.isAvailableInsideSeq(Reg, TRI))
8254 return Reg;
8255 }
8256 return Register();
8257}
8258
8259static bool
8261 const outliner::Candidate &b) {
8262 const auto &MFIa = a.getMF()->getInfo<AArch64FunctionInfo>();
8263 const auto &MFIb = b.getMF()->getInfo<AArch64FunctionInfo>();
8264
8265 return MFIa->shouldSignReturnAddress(false) == MFIb->shouldSignReturnAddress(false) &&
8266 MFIa->shouldSignReturnAddress(true) == MFIb->shouldSignReturnAddress(true);
8267}
8268
8269static bool
8271 const outliner::Candidate &b) {
8272 const auto &MFIa = a.getMF()->getInfo<AArch64FunctionInfo>();
8273 const auto &MFIb = b.getMF()->getInfo<AArch64FunctionInfo>();
8274
8275 return MFIa->shouldSignWithBKey() == MFIb->shouldSignWithBKey();
8276}
8277
8279 const outliner::Candidate &b) {
8280 const AArch64Subtarget &SubtargetA =
8282 const AArch64Subtarget &SubtargetB =
8283 b.getMF()->getSubtarget<AArch64Subtarget>();
8284 return SubtargetA.hasV8_3aOps() == SubtargetB.hasV8_3aOps();
8285}
8286
8287std::optional<outliner::OutlinedFunction>
8289 const MachineModuleInfo &MMI,
8290 std::vector<outliner::Candidate> &RepeatedSequenceLocs) const {
8291 unsigned SequenceSize = 0;
8292 for (auto &MI : RepeatedSequenceLocs[0])
8293 SequenceSize += getInstSizeInBytes(MI);
8294
8295 unsigned NumBytesToCreateFrame = 0;
8296
8297 // We only allow outlining for functions having exactly matching return
8298 // address signing attributes, i.e., all share the same value for the
8299 // attribute "sign-return-address" and all share the same type of key they
8300 // are signed with.
8301 // Additionally we require all functions to simultaniously either support
8302 // v8.3a features or not. Otherwise an outlined function could get signed
8303 // using dedicated v8.3 instructions and a call from a function that doesn't
8304 // support v8.3 instructions would therefore be invalid.
8305 if (std::adjacent_find(
8306 RepeatedSequenceLocs.begin(), RepeatedSequenceLocs.end(),
8307 [](const outliner::Candidate &a, const outliner::Candidate &b) {
8308 // Return true if a and b are non-equal w.r.t. return address
8309 // signing or support of v8.3a features
8310 if (outliningCandidatesSigningScopeConsensus(a, b) &&
8311 outliningCandidatesSigningKeyConsensus(a, b) &&
8312 outliningCandidatesV8_3OpsConsensus(a, b)) {
8313 return false;
8314 }
8315 return true;
8316 }) != RepeatedSequenceLocs.end()) {
8317 return std::nullopt;
8318 }
8319
8320 // Since at this point all candidates agree on their return address signing
8321 // picking just one is fine. If the candidate functions potentially sign their
8322 // return addresses, the outlined function should do the same. Note that in
8323 // the case of "sign-return-address"="non-leaf" this is an assumption: It is
8324 // not certainly true that the outlined function will have to sign its return
8325 // address but this decision is made later, when the decision to outline
8326 // has already been made.
8327 // The same holds for the number of additional instructions we need: On
8328 // v8.3a RET can be replaced by RETAA/RETAB and no AUT instruction is
8329 // necessary. However, at this point we don't know if the outlined function
8330 // will have a RET instruction so we assume the worst.
8331 const TargetRegisterInfo &TRI = getRegisterInfo();
8332 // Performing a tail call may require extra checks when PAuth is enabled.
8333 // If PAuth is disabled, set it to zero for uniformity.
8334 unsigned NumBytesToCheckLRInTCEpilogue = 0;
8335 if (RepeatedSequenceLocs[0]
8336 .getMF()
8337 ->getInfo<AArch64FunctionInfo>()
8338 ->shouldSignReturnAddress(true)) {
8339 // One PAC and one AUT instructions
8340 NumBytesToCreateFrame += 8;
8341
8342 // PAuth is enabled - set extra tail call cost, if any.
8343 auto LRCheckMethod = Subtarget.getAuthenticatedLRCheckMethod();
8344 NumBytesToCheckLRInTCEpilogue =
8346 // Checking the authenticated LR value may significantly impact
8347 // SequenceSize, so account for it for more precise results.
8348 if (isTailCallReturnInst(RepeatedSequenceLocs[0].back()))
8349 SequenceSize += NumBytesToCheckLRInTCEpilogue;
8350
8351 // We have to check if sp modifying instructions would get outlined.
8352 // If so we only allow outlining if sp is unchanged overall, so matching
8353 // sub and add instructions are okay to outline, all other sp modifications
8354 // are not
8355 auto hasIllegalSPModification = [&TRI](outliner::Candidate &C) {
8356 int SPValue = 0;
8357 for (auto &MI : C) {
8358 if (MI.modifiesRegister(AArch64::SP, &TRI)) {
8359 switch (MI.getOpcode()) {
8360 case AArch64::ADDXri:
8361 case AArch64::ADDWri:
8362 assert(MI.getNumOperands() == 4 && "Wrong number of operands");
8363 assert(MI.getOperand(2).isImm() &&
8364 "Expected operand to be immediate");
8365 assert(MI.getOperand(1).isReg() &&
8366 "Expected operand to be a register");
8367 // Check if the add just increments sp. If so, we search for
8368 // matching sub instructions that decrement sp. If not, the
8369 // modification is illegal
8370 if (MI.getOperand(1).getReg() == AArch64::SP)
8371 SPValue += MI.getOperand(2).getImm();
8372 else
8373 return true;
8374 break;
8375 case AArch64::SUBXri:
8376 case AArch64::SUBWri:
8377 assert(MI.getNumOperands() == 4 && "Wrong number of operands");
8378 assert(MI.getOperand(2).isImm() &&
8379 "Expected operand to be immediate");
8380 assert(MI.getOperand(1).isReg() &&
8381 "Expected operand to be a register");
8382 // Check if the sub just decrements sp. If so, we search for
8383 // matching add instructions that increment sp. If not, the
8384 // modification is illegal
8385 if (MI.getOperand(1).getReg() == AArch64::SP)
8386 SPValue -= MI.getOperand(2).getImm();
8387 else
8388 return true;
8389 break;
8390 default:
8391 return true;
8392 }
8393 }
8394 }
8395 if (SPValue)
8396 return true;
8397 return false;
8398 };
8399 // Remove candidates with illegal stack modifying instructions
8400 llvm::erase_if(RepeatedSequenceLocs, hasIllegalSPModification);
8401
8402 // If the sequence doesn't have enough candidates left, then we're done.
8403 if (RepeatedSequenceLocs.size() < 2)
8404 return std::nullopt;
8405 }
8406
8407 // Properties about candidate MBBs that hold for all of them.
8408 unsigned FlagsSetInAll = 0xF;
8409
8410 // Compute liveness information for each candidate, and set FlagsSetInAll.
8411 for (outliner::Candidate &C : RepeatedSequenceLocs)
8412 FlagsSetInAll &= C.Flags;
8413
8414 unsigned LastInstrOpcode = RepeatedSequenceLocs[0].back().getOpcode();
8415
8416 // Helper lambda which sets call information for every candidate.
8417 auto SetCandidateCallInfo =
8418 [&RepeatedSequenceLocs](unsigned CallID, unsigned NumBytesForCall) {
8419 for (outliner::Candidate &C : RepeatedSequenceLocs)
8420 C.setCallInfo(CallID, NumBytesForCall);
8421 };
8422
8423 unsigned FrameID = MachineOutlinerDefault;
8424 NumBytesToCreateFrame += 4;
8425
8426 bool HasBTI = any_of(RepeatedSequenceLocs, [](outliner::Candidate &C) {
8427 return C.getMF()->getInfo<AArch64FunctionInfo>()->branchTargetEnforcement();
8428 });
8429
8430 // We check to see if CFI Instructions are present, and if they are
8431 // we find the number of CFI Instructions in the candidates.
8432 unsigned CFICount = 0;
8433 for (auto &I : RepeatedSequenceLocs[0]) {
8434 if (I.isCFIInstruction())
8435 CFICount++;
8436 }
8437
8438 // We compare the number of found CFI Instructions to the number of CFI
8439 // instructions in the parent function for each candidate. We must check this
8440 // since if we outline one of the CFI instructions in a function, we have to
8441 // outline them all for correctness. If we do not, the address offsets will be
8442 // incorrect between the two sections of the program.
8443 for (outliner::Candidate &C : RepeatedSequenceLocs) {
8444 std::vector<MCCFIInstruction> CFIInstructions =
8445 C.getMF()->getFrameInstructions();
8446
8447 if (CFICount > 0 && CFICount != CFIInstructions.size())
8448 return std::nullopt;
8449 }
8450
8451 // Returns true if an instructions is safe to fix up, false otherwise.
8452 auto IsSafeToFixup = [this, &TRI](MachineInstr &MI) {
8453 if (MI.isCall())
8454 return true;
8455
8456 if (!MI.modifiesRegister(AArch64::SP, &TRI) &&
8457 !MI.readsRegister(AArch64::SP, &TRI))
8458 return true;
8459
8460 // Any modification of SP will break our code to save/restore LR.
8461 // FIXME: We could handle some instructions which add a constant
8462 // offset to SP, with a bit more work.
8463 if (MI.modifiesRegister(AArch64::SP, &TRI))
8464 return false;
8465
8466 // At this point, we have a stack instruction that we might need to
8467 // fix up. We'll handle it if it's a load or store.
8468 if (MI.mayLoadOrStore()) {
8469 const MachineOperand *Base; // Filled with the base operand of MI.
8470 int64_t Offset; // Filled with the offset of MI.
8471 bool OffsetIsScalable;
8472
8473 // Does it allow us to offset the base operand and is the base the
8474 // register SP?
8475 if (!getMemOperandWithOffset(MI, Base, Offset, OffsetIsScalable, &TRI) ||
8476 !Base->isReg() || Base->getReg() != AArch64::SP)
8477 return false;
8478
8479 // Fixe-up code below assumes bytes.
8480 if (OffsetIsScalable)
8481 return false;
8482
8483 // Find the minimum/maximum offset for this instruction and check
8484 // if fixing it up would be in range.
8485 int64_t MinOffset,
8486 MaxOffset; // Unscaled offsets for the instruction.
8487 // The scale to multiply the offsets by.
8488 TypeSize Scale(0U, false), DummyWidth(0U, false);
8489 getMemOpInfo(MI.getOpcode(), Scale, DummyWidth, MinOffset, MaxOffset);
8490
8491 Offset += 16; // Update the offset to what it would be if we outlined.
8492 if (Offset < MinOffset * (int64_t)Scale.getFixedValue() ||
8493 Offset > MaxOffset * (int64_t)Scale.getFixedValue())
8494 return false;
8495
8496 // It's in range, so we can outline it.
8497 return true;
8498 }
8499
8500 // FIXME: Add handling for instructions like "add x0, sp, #8".
8501
8502 // We can't fix it up, so don't outline it.
8503 return false;
8504 };
8505
8506 // True if it's possible to fix up each stack instruction in this sequence.
8507 // Important for frames/call variants that modify the stack.
8508 bool AllStackInstrsSafe =
8509 llvm::all_of(RepeatedSequenceLocs[0], IsSafeToFixup);
8510
8511 // If the last instruction in any candidate is a terminator, then we should
8512 // tail call all of the candidates.
8513 if (RepeatedSequenceLocs[0].back().isTerminator()) {
8514 FrameID = MachineOutlinerTailCall;
8515 NumBytesToCreateFrame = 0;
8516 unsigned NumBytesForCall = 4 + NumBytesToCheckLRInTCEpilogue;
8517 SetCandidateCallInfo(MachineOutlinerTailCall, NumBytesForCall);
8518 }
8519
8520 else if (LastInstrOpcode == AArch64::BL ||
8521 ((LastInstrOpcode == AArch64::BLR ||
8522 LastInstrOpcode == AArch64::BLRNoIP) &&
8523 !HasBTI)) {
8524 // FIXME: Do we need to check if the code after this uses the value of LR?
8525 FrameID = MachineOutlinerThunk;
8526 NumBytesToCreateFrame = NumBytesToCheckLRInTCEpilogue;
8527 SetCandidateCallInfo(MachineOutlinerThunk, 4);
8528 }
8529
8530 else {
8531 // We need to decide how to emit calls + frames. We can always emit the same
8532 // frame if we don't need to save to the stack. If we have to save to the
8533 // stack, then we need a different frame.
8534 unsigned NumBytesNoStackCalls = 0;
8535 std::vector<outliner::Candidate> CandidatesWithoutStackFixups;
8536
8537 // Check if we have to save LR.
8538 for (outliner::Candidate &C : RepeatedSequenceLocs) {
8539 bool LRAvailable =
8540 (C.Flags & MachineOutlinerMBBFlags::LRUnavailableSomewhere)
8541 ? C.isAvailableAcrossAndOutOfSeq(AArch64::LR, TRI)
8542 : true;
8543 // If we have a noreturn caller, then we're going to be conservative and
8544 // say that we have to save LR. If we don't have a ret at the end of the
8545 // block, then we can't reason about liveness accurately.
8546 //
8547 // FIXME: We can probably do better than always disabling this in
8548 // noreturn functions by fixing up the liveness info.
8549 bool IsNoReturn =
8550 C.getMF()->getFunction().hasFnAttribute(Attribute::NoReturn);
8551
8552 // Is LR available? If so, we don't need a save.
8553 if (LRAvailable && !IsNoReturn) {
8554 NumBytesNoStackCalls += 4;
8555 C.setCallInfo(MachineOutlinerNoLRSave, 4);
8556 CandidatesWithoutStackFixups.push_back(C);
8557 }
8558
8559 // Is an unused register available? If so, we won't modify the stack, so
8560 // we can outline with the same frame type as those that don't save LR.
8561 else if (findRegisterToSaveLRTo(C)) {
8562 NumBytesNoStackCalls += 12;
8563 C.setCallInfo(MachineOutlinerRegSave, 12);
8564 CandidatesWithoutStackFixups.push_back(C);
8565 }
8566
8567 // Is SP used in the sequence at all? If not, we don't have to modify
8568 // the stack, so we are guaranteed to get the same frame.
8569 else if (C.isAvailableInsideSeq(AArch64::SP, TRI)) {
8570 NumBytesNoStackCalls += 12;
8571 C.setCallInfo(MachineOutlinerDefault, 12);
8572 CandidatesWithoutStackFixups.push_back(C);
8573 }
8574
8575 // If we outline this, we need to modify the stack. Pretend we don't
8576 // outline this by saving all of its bytes.
8577 else {
8578 NumBytesNoStackCalls += SequenceSize;
8579 }
8580 }
8581
8582 // If there are no places where we have to save LR, then note that we
8583 // don't have to update the stack. Otherwise, give every candidate the
8584 // default call type, as long as it's safe to do so.
8585 if (!AllStackInstrsSafe ||
8586 NumBytesNoStackCalls <= RepeatedSequenceLocs.size() * 12) {
8587 RepeatedSequenceLocs = CandidatesWithoutStackFixups;
8588 FrameID = MachineOutlinerNoLRSave;
8589 if (RepeatedSequenceLocs.size() < 2)
8590 return std::nullopt;
8591 } else {
8592 SetCandidateCallInfo(MachineOutlinerDefault, 12);
8593
8594 // Bugzilla ID: 46767
8595 // TODO: Check if fixing up the stack more than once is safe so we can
8596 // outline these.
8597 //
8598 // An outline resulting in a caller that requires stack fixups at the
8599 // callsite to a callee that also requires stack fixups can happen when
8600 // there are no available registers at the candidate callsite for a
8601 // candidate that itself also has calls.
8602 //
8603 // In other words if function_containing_sequence in the following pseudo
8604 // assembly requires that we save LR at the point of the call, but there
8605 // are no available registers: in this case we save using SP and as a
8606 // result the SP offsets requires stack fixups by multiples of 16.
8607 //
8608 // function_containing_sequence:
8609 // ...
8610 // save LR to SP <- Requires stack instr fixups in OUTLINED_FUNCTION_N
8611 // call OUTLINED_FUNCTION_N
8612 // restore LR from SP
8613 // ...
8614 //
8615 // OUTLINED_FUNCTION_N:
8616 // save LR to SP <- Requires stack instr fixups in OUTLINED_FUNCTION_N
8617 // ...
8618 // bl foo
8619 // restore LR from SP
8620 // ret
8621 //
8622 // Because the code to handle more than one stack fixup does not
8623 // currently have the proper checks for legality, these cases will assert
8624 // in the AArch64 MachineOutliner. This is because the code to do this
8625 // needs more hardening, testing, better checks that generated code is
8626 // legal, etc and because it is only verified to handle a single pass of
8627 // stack fixup.
8628 //
8629 // The assert happens in AArch64InstrInfo::buildOutlinedFrame to catch
8630 // these cases until they are known to be handled. Bugzilla 46767 is
8631 // referenced in comments at the assert site.
8632 //
8633 // To avoid asserting (or generating non-legal code on noassert builds)
8634 // we remove all candidates which would need more than one stack fixup by
8635 // pruning the cases where the candidate has calls while also having no
8636 // available LR and having no available general purpose registers to copy
8637 // LR to (ie one extra stack save/restore).
8638 //
8639 if (FlagsSetInAll & MachineOutlinerMBBFlags::HasCalls) {
8640 erase_if(RepeatedSequenceLocs, [this, &TRI](outliner::Candidate &C) {
8641 auto IsCall = [](const MachineInstr &MI) { return MI.isCall(); };
8642 return (llvm::any_of(C, IsCall)) &&
8643 (!C.isAvailableAcrossAndOutOfSeq(AArch64::LR, TRI) ||
8644 !findRegisterToSaveLRTo(C));
8645 });
8646 }
8647 }
8648
8649 // If we dropped all of the candidates, bail out here.
8650 if (RepeatedSequenceLocs.size() < 2) {
8651 RepeatedSequenceLocs.clear();
8652 return std::nullopt;
8653 }
8654 }
8655
8656 // Does every candidate's MBB contain a call? If so, then we might have a call
8657 // in the range.
8658 if (FlagsSetInAll & MachineOutlinerMBBFlags::HasCalls) {
8659 // Check if the range contains a call. These require a save + restore of the
8660 // link register.
8661 outliner::Candidate &FirstCand = RepeatedSequenceLocs[0];
8662 bool ModStackToSaveLR = false;
8663 if (std::any_of(FirstCand.begin(), std::prev(FirstCand.end()),
8664 [](const MachineInstr &MI) { return MI.isCall(); }))
8665 ModStackToSaveLR = true;
8666
8667 // Handle the last instruction separately. If this is a tail call, then the
8668 // last instruction is a call. We don't want to save + restore in this case.
8669 // However, it could be possible that the last instruction is a call without
8670 // it being valid to tail call this sequence. We should consider this as
8671 // well.
8672 else if (FrameID != MachineOutlinerThunk &&
8673 FrameID != MachineOutlinerTailCall && FirstCand.back().isCall())
8674 ModStackToSaveLR = true;
8675
8676 if (ModStackToSaveLR) {
8677 // We can't fix up the stack. Bail out.
8678 if (!AllStackInstrsSafe) {
8679 RepeatedSequenceLocs.clear();
8680 return std::nullopt;
8681 }
8682
8683 // Save + restore LR.
8684 NumBytesToCreateFrame += 8;
8685 }
8686 }
8687
8688 // If we have CFI instructions, we can only outline if the outlined section
8689 // can be a tail call
8690 if (FrameID != MachineOutlinerTailCall && CFICount > 0)
8691 return std::nullopt;
8692
8693 return outliner::OutlinedFunction(RepeatedSequenceLocs, SequenceSize,
8694 NumBytesToCreateFrame, FrameID);
8695}
8696
8698 Function &F, std::vector<outliner::Candidate> &Candidates) const {
8699 // If a bunch of candidates reach this point they must agree on their return
8700 // address signing. It is therefore enough to just consider the signing
8701 // behaviour of one of them
8702 const auto &CFn = Candidates.front().getMF()->getFunction();
8703
8704 // Since all candidates belong to the same module, just copy the
8705 // function-level attributes of an arbitrary function.
8706 if (CFn.hasFnAttribute("sign-return-address"))
8707 F.addFnAttr(CFn.getFnAttribute("sign-return-address"));
8708 if (CFn.hasFnAttribute("sign-return-address-key"))
8709 F.addFnAttr(CFn.getFnAttribute("sign-return-address-key"));
8710
8711 AArch64GenInstrInfo::mergeOutliningCandidateAttributes(F, Candidates);
8712}
8713
8715 MachineFunction &MF, bool OutlineFromLinkOnceODRs) const {
8716 const Function &F = MF.getFunction();
8717
8718 // Can F be deduplicated by the linker? If it can, don't outline from it.
8719 if (!OutlineFromLinkOnceODRs && F.hasLinkOnceODRLinkage())
8720 return false;
8721
8722 // Don't outline from functions with section markings; the program could
8723 // expect that all the code is in the named section.
8724 // FIXME: Allow outlining from multiple functions with the same section
8725 // marking.
8726 if (F.hasSection())
8727 return false;
8728
8729 // Outlining from functions with redzones is unsafe since the outliner may
8730 // modify the stack. Check if hasRedZone is true or unknown; if yes, don't
8731 // outline from it.
8733 if (!AFI || AFI->hasRedZone().value_or(true))
8734 return false;
8735
8736 // FIXME: Determine whether it is safe to outline from functions which contain
8737 // streaming-mode changes. We may need to ensure any smstart/smstop pairs are
8738 // outlined together and ensure it is safe to outline with async unwind info,
8739 // required for saving & restoring VG around calls.
8740 if (AFI->hasStreamingModeChanges())
8741 return false;
8742
8743 // FIXME: Teach the outliner to generate/handle Windows unwind info.
8745 return false;
8746
8747 // It's safe to outline from MF.
8748 return true;
8749}
8750
8753 unsigned &Flags) const {
8755 "Must track liveness!");
8757 std::pair<MachineBasicBlock::iterator, MachineBasicBlock::iterator>>
8758 Ranges;
8759 // According to the AArch64 Procedure Call Standard, the following are
8760 // undefined on entry/exit from a function call:
8761 //
8762 // * Registers x16, x17, (and thus w16, w17)
8763 // * Condition codes (and thus the NZCV register)
8764 //
8765 // If any of these registers are used inside or live across an outlined
8766 // function, then they may be modified later, either by the compiler or
8767 // some other tool (like the linker).
8768 //
8769 // To avoid outlining in these situations, partition each block into ranges
8770 // where these registers are dead. We will only outline from those ranges.
8772 auto AreAllUnsafeRegsDead = [&LRU]() {
8773 return LRU.available(AArch64::W16) && LRU.available(AArch64::W17) &&
8774 LRU.available(AArch64::NZCV);
8775 };
8776
8777 // We need to know if LR is live across an outlining boundary later on in
8778 // order to decide how we'll create the outlined call, frame, etc.
8779 //
8780 // It's pretty expensive to check this for *every candidate* within a block.
8781 // That's some potentially n^2 behaviour, since in the worst case, we'd need
8782 // to compute liveness from the end of the block for O(n) candidates within
8783 // the block.
8784 //
8785 // So, to improve the average case, let's keep track of liveness from the end
8786 // of the block to the beginning of *every outlinable range*. If we know that
8787 // LR is available in every range we could outline from, then we know that
8788 // we don't need to check liveness for any candidate within that range.
8789 bool LRAvailableEverywhere = true;
8790 // Compute liveness bottom-up.
8791 LRU.addLiveOuts(MBB);
8792 // Update flags that require info about the entire MBB.
8793 auto UpdateWholeMBBFlags = [&Flags](const MachineInstr &MI) {
8794 if (MI.isCall() && !MI.isTerminator())
8795 Flags |= MachineOutlinerMBBFlags::HasCalls;
8796 };
8797 // Range: [RangeBegin, RangeEnd)
8798 MachineBasicBlock::instr_iterator RangeBegin, RangeEnd;
8799 unsigned RangeLen;
8800 auto CreateNewRangeStartingAt =
8801 [&RangeBegin, &RangeEnd,
8802 &RangeLen](MachineBasicBlock::instr_iterator NewBegin) {
8803 RangeBegin = NewBegin;
8804 RangeEnd = std::next(RangeBegin);
8805 RangeLen = 0;
8806 };
8807 auto SaveRangeIfNonEmpty = [&RangeLen, &Ranges, &RangeBegin, &RangeEnd]() {
8808 // At least one unsafe register is not dead. We do not want to outline at
8809 // this point. If it is long enough to outline from, save the range
8810 // [RangeBegin, RangeEnd).
8811 if (RangeLen > 1)
8812 Ranges.push_back(std::make_pair(RangeBegin, RangeEnd));
8813 };
8814 // Find the first point where all unsafe registers are dead.
8815 // FIND: <safe instr> <-- end of first potential range
8816 // SKIP: <unsafe def>
8817 // SKIP: ... everything between ...
8818 // SKIP: <unsafe use>
8819 auto FirstPossibleEndPt = MBB.instr_rbegin();
8820 for (; FirstPossibleEndPt != MBB.instr_rend(); ++FirstPossibleEndPt) {
8821 LRU.stepBackward(*FirstPossibleEndPt);
8822 // Update flags that impact how we outline across the entire block,
8823 // regardless of safety.
8824 UpdateWholeMBBFlags(*FirstPossibleEndPt);
8825 if (AreAllUnsafeRegsDead())
8826 break;
8827 }
8828 // If we exhausted the entire block, we have no safe ranges to outline.
8829 if (FirstPossibleEndPt == MBB.instr_rend())
8830 return Ranges;
8831 // Current range.
8832 CreateNewRangeStartingAt(FirstPossibleEndPt->getIterator());
8833 // StartPt points to the first place where all unsafe registers
8834 // are dead (if there is any such point). Begin partitioning the MBB into
8835 // ranges.
8836 for (auto &MI : make_range(FirstPossibleEndPt, MBB.instr_rend())) {
8837 LRU.stepBackward(MI);
8838 UpdateWholeMBBFlags(MI);
8839 if (!AreAllUnsafeRegsDead()) {
8840 SaveRangeIfNonEmpty();
8841 CreateNewRangeStartingAt(MI.getIterator());
8842 continue;
8843 }
8844 LRAvailableEverywhere &= LRU.available(AArch64::LR);
8845 RangeBegin = MI.getIterator();
8846 ++RangeLen;
8847 }
8848 // Above loop misses the last (or only) range. If we are still safe, then
8849 // let's save the range.
8850 if (AreAllUnsafeRegsDead())
8851 SaveRangeIfNonEmpty();
8852 if (Ranges.empty())
8853 return Ranges;
8854 // We found the ranges bottom-up. Mapping expects the top-down. Reverse
8855 // the order.
8856 std::reverse(Ranges.begin(), Ranges.end());
8857 // If there is at least one outlinable range where LR is unavailable
8858 // somewhere, remember that.
8859 if (!LRAvailableEverywhere)
8860 Flags |= MachineOutlinerMBBFlags::LRUnavailableSomewhere;
8861 return Ranges;
8862}
8863
8867 unsigned Flags) const {
8868 MachineInstr &MI = *MIT;
8869 MachineBasicBlock *MBB = MI.getParent();
8870 MachineFunction *MF = MBB->getParent();
8872
8873 // Don't outline anything used for return address signing. The outlined
8874 // function will get signed later if needed
8875 switch (MI.getOpcode()) {
8876 case AArch64::PACM:
8877 case AArch64::PACIASP:
8878 case AArch64::PACIBSP:
8879 case AArch64::PACIASPPC:
8880 case AArch64::PACIBSPPC:
8881 case AArch64::AUTIASP:
8882 case AArch64::AUTIBSP:
8883 case AArch64::AUTIASPPCi:
8884 case AArch64::AUTIASPPCr:
8885 case AArch64::AUTIBSPPCi:
8886 case AArch64::AUTIBSPPCr:
8887 case AArch64::RETAA:
8888 case AArch64::RETAB:
8889 case AArch64::RETAASPPCi:
8890 case AArch64::RETAASPPCr:
8891 case AArch64::RETABSPPCi:
8892 case AArch64::RETABSPPCr:
8893 case AArch64::EMITBKEY:
8894 case AArch64::PAUTH_PROLOGUE:
8895 case AArch64::PAUTH_EPILOGUE:
8897 }
8898
8899 // Don't outline LOHs.
8900 if (FuncInfo->getLOHRelated().count(&MI))
8902
8903 // We can only outline these if we will tail call the outlined function, or
8904 // fix up the CFI offsets. Currently, CFI instructions are outlined only if
8905 // in a tail call.
8906 //
8907 // FIXME: If the proper fixups for the offset are implemented, this should be
8908 // possible.
8909 if (MI.isCFIInstruction())
8911
8912 // Is this a terminator for a basic block?
8913 if (MI.isTerminator())
8914 // TargetInstrInfo::getOutliningType has already filtered out anything
8915 // that would break this, so we can allow it here.
8917
8918 // Make sure none of the operands are un-outlinable.
8919 for (const MachineOperand &MOP : MI.operands()) {
8920 // A check preventing CFI indices was here before, but only CFI
8921 // instructions should have those.
8922 assert(!MOP.isCFIIndex());
8923
8924 // If it uses LR or W30 explicitly, then don't touch it.
8925 if (MOP.isReg() && !MOP.isImplicit() &&
8926 (MOP.getReg() == AArch64::LR || MOP.getReg() == AArch64::W30))
8928 }
8929
8930 // Special cases for instructions that can always be outlined, but will fail
8931 // the later tests. e.g, ADRPs, which are PC-relative use LR, but can always
8932 // be outlined because they don't require a *specific* value to be in LR.
8933 if (MI.getOpcode() == AArch64::ADRP)
8935
8936 // If MI is a call we might be able to outline it. We don't want to outline
8937 // any calls that rely on the position of items on the stack. When we outline
8938 // something containing a call, we have to emit a save and restore of LR in
8939 // the outlined function. Currently, this always happens by saving LR to the
8940 // stack. Thus, if we outline, say, half the parameters for a function call
8941 // plus the call, then we'll break the callee's expectations for the layout
8942 // of the stack.
8943 //
8944 // FIXME: Allow calls to functions which construct a stack frame, as long
8945 // as they don't access arguments on the stack.
8946 // FIXME: Figure out some way to analyze functions defined in other modules.
8947 // We should be able to compute the memory usage based on the IR calling
8948 // convention, even if we can't see the definition.
8949 if (MI.isCall()) {
8950 // Get the function associated with the call. Look at each operand and find
8951 // the one that represents the callee and get its name.
8952 const Function *Callee = nullptr;
8953 for (const MachineOperand &MOP : MI.operands()) {
8954 if (MOP.isGlobal()) {
8955 Callee = dyn_cast<Function>(MOP.getGlobal());
8956 break;
8957 }
8958 }
8959
8960 // Never outline calls to mcount. There isn't any rule that would require
8961 // this, but the Linux kernel's "ftrace" feature depends on it.
8962 if (Callee && Callee->getName() == "\01_mcount")
8964
8965 // If we don't know anything about the callee, assume it depends on the
8966 // stack layout of the caller. In that case, it's only legal to outline
8967 // as a tail-call. Explicitly list the call instructions we know about so we
8968 // don't get unexpected results with call pseudo-instructions.
8969 auto UnknownCallOutlineType = outliner::InstrType::Illegal;
8970 if (MI.getOpcode() == AArch64::BLR ||
8971 MI.getOpcode() == AArch64::BLRNoIP || MI.getOpcode() == AArch64::BL)
8972 UnknownCallOutlineType = outliner::InstrType::LegalTerminator;
8973
8974 if (!Callee)
8975 return UnknownCallOutlineType;
8976
8977 // We have a function we have information about. Check it if it's something
8978 // can safely outline.
8979 MachineFunction *CalleeMF = MMI.getMachineFunction(*Callee);
8980
8981 // We don't know what's going on with the callee at all. Don't touch it.
8982 if (!CalleeMF)
8983 return UnknownCallOutlineType;
8984
8985 // Check if we know anything about the callee saves on the function. If we
8986 // don't, then don't touch it, since that implies that we haven't
8987 // computed anything about its stack frame yet.
8988 MachineFrameInfo &MFI = CalleeMF->getFrameInfo();
8989 if (!MFI.isCalleeSavedInfoValid() || MFI.getStackSize() > 0 ||
8990 MFI.getNumObjects() > 0)
8991 return UnknownCallOutlineType;
8992
8993 // At this point, we can say that CalleeMF ought to not pass anything on the
8994 // stack. Therefore, we can outline it.
8996 }
8997
8998 // Don't touch the link register or W30.
8999 if (MI.readsRegister(AArch64::W30, &getRegisterInfo()) ||
9000 MI.modifiesRegister(AArch64::W30, &getRegisterInfo()))
9002
9003 // Don't outline BTI instructions, because that will prevent the outlining
9004 // site from being indirectly callable.
9005 if (hasBTISemantics(MI))
9007
9009}
9010
9011void AArch64InstrInfo::fixupPostOutline(MachineBasicBlock &MBB) const {
9012 for (MachineInstr &MI : MBB) {
9013 const MachineOperand *Base;
9014 TypeSize Width(0, false);
9015 int64_t Offset;
9016 bool OffsetIsScalable;
9017
9018 // Is this a load or store with an immediate offset with SP as the base?
9019 if (!MI.mayLoadOrStore() ||
9020 !getMemOperandWithOffsetWidth(MI, Base, Offset, OffsetIsScalable, Width,
9021 &RI) ||
9022 (Base->isReg() && Base->getReg() != AArch64::SP))
9023 continue;
9024
9025 // It is, so we have to fix it up.
9026 TypeSize Scale(0U, false);
9027 int64_t Dummy1, Dummy2;
9028
9030 assert(StackOffsetOperand.isImm() && "Stack offset wasn't immediate!");
9031 getMemOpInfo(MI.getOpcode(), Scale, Width, Dummy1, Dummy2);
9032 assert(Scale != 0 && "Unexpected opcode!");
9033 assert(!OffsetIsScalable && "Expected offset to be a byte offset");
9034
9035 // We've pushed the return address to the stack, so add 16 to the offset.
9036 // This is safe, since we already checked if it would overflow when we
9037 // checked if this instruction was legal to outline.
9038 int64_t NewImm = (Offset + 16) / (int64_t)Scale.getFixedValue();
9039 StackOffsetOperand.setImm(NewImm);
9040 }
9041}
9042
9044 const AArch64InstrInfo *TII,
9045 bool ShouldSignReturnAddr) {
9046 if (!ShouldSignReturnAddr)
9047 return;
9048
9049 BuildMI(MBB, MBB.begin(), DebugLoc(), TII->get(AArch64::PAUTH_PROLOGUE))
9052 TII->get(AArch64::PAUTH_EPILOGUE))
9054}
9055
9058 const outliner::OutlinedFunction &OF) const {
9059
9061
9063 FI->setOutliningStyle("Tail Call");
9065 // For thunk outlining, rewrite the last instruction from a call to a
9066 // tail-call.
9067 MachineInstr *Call = &*--MBB.instr_end();
9068 unsigned TailOpcode;
9069 if (Call->getOpcode() == AArch64::BL) {
9070 TailOpcode = AArch64::TCRETURNdi;
9071 } else {
9072 assert(Call->getOpcode() == AArch64::BLR ||
9073 Call->getOpcode() == AArch64::BLRNoIP);
9074 TailOpcode = AArch64::TCRETURNriALL;
9075 }
9076 MachineInstr *TC = BuildMI(MF, DebugLoc(), get(TailOpcode))
9077 .add(Call->getOperand(0))
9078 .addImm(0);
9079 MBB.insert(MBB.end(), TC);
9080 Call->eraseFromParent();
9081
9082 FI->setOutliningStyle("Thunk");
9083 }
9084
9085 bool IsLeafFunction = true;
9086
9087 // Is there a call in the outlined range?
9088 auto IsNonTailCall = [](const MachineInstr &MI) {
9089 return MI.isCall() && !MI.isReturn();
9090 };
9091
9092 if (llvm::any_of(MBB.instrs(), IsNonTailCall)) {
9093 // Fix up the instructions in the range, since we're going to modify the
9094 // stack.
9095
9096 // Bugzilla ID: 46767
9097 // TODO: Check if fixing up twice is safe so we can outline these.
9099 "Can only fix up stack references once");
9100 fixupPostOutline(MBB);
9101
9102 IsLeafFunction = false;
9103
9104 // LR has to be a live in so that we can save it.
9105 if (!MBB.isLiveIn(AArch64::LR))
9106 MBB.addLiveIn(AArch64::LR);
9107
9110
9113 Et = std::prev(MBB.end());
9114
9115 // Insert a save before the outlined region
9116 MachineInstr *STRXpre = BuildMI(MF, DebugLoc(), get(AArch64::STRXpre))
9117 .addReg(AArch64::SP, RegState::Define)
9118 .addReg(AArch64::LR)
9119 .addReg(AArch64::SP)
9120 .addImm(-16);
9121 It = MBB.insert(It, STRXpre);
9122
9124 const TargetSubtargetInfo &STI = MF.getSubtarget();
9125 const MCRegisterInfo *MRI = STI.getRegisterInfo();
9126 unsigned DwarfReg = MRI->getDwarfRegNum(AArch64::LR, true);
9127
9128 // Add a CFI saying the stack was moved 16 B down.
9129 int64_t StackPosEntry =
9131 BuildMI(MBB, It, DebugLoc(), get(AArch64::CFI_INSTRUCTION))
9132 .addCFIIndex(StackPosEntry)
9134
9135 // Add a CFI saying that the LR that we want to find is now 16 B higher
9136 // than before.
9137 int64_t LRPosEntry = MF.addFrameInst(
9138 MCCFIInstruction::createOffset(nullptr, DwarfReg, -16));
9139 BuildMI(MBB, It, DebugLoc(), get(AArch64::CFI_INSTRUCTION))
9140 .addCFIIndex(LRPosEntry)
9142 }
9143
9144 // Insert a restore before the terminator for the function.
9145 MachineInstr *LDRXpost = BuildMI(MF, DebugLoc(), get(AArch64::LDRXpost))
9146 .addReg(AArch64::SP, RegState::Define)
9147 .addReg(AArch64::LR, RegState::Define)
9148 .addReg(AArch64::SP)
9149 .addImm(16);
9150 Et = MBB.insert(Et, LDRXpost);
9151 }
9152
9153 bool ShouldSignReturnAddr = FI->shouldSignReturnAddress(!IsLeafFunction);
9154
9155 // If this is a tail call outlined function, then there's already a return.
9158 signOutlinedFunction(MF, MBB, this, ShouldSignReturnAddr);
9159 return;
9160 }
9161
9162 // It's not a tail call, so we have to insert the return ourselves.
9163
9164 // LR has to be a live in so that we can return to it.
9165 if (!MBB.isLiveIn(AArch64::LR))
9166 MBB.addLiveIn(AArch64::LR);
9167
9168 MachineInstr *ret = BuildMI(MF, DebugLoc(), get(AArch64::RET))
9169 .addReg(AArch64::LR);
9170 MBB.insert(MBB.end(), ret);
9171
9172 signOutlinedFunction(MF, MBB, this, ShouldSignReturnAddr);
9173
9174 FI->setOutliningStyle("Function");
9175
9176 // Did we have to modify the stack by saving the link register?
9178 return;
9179
9180 // We modified the stack.
9181 // Walk over the basic block and fix up all the stack accesses.
9182 fixupPostOutline(MBB);
9183}
9184
9188
9189 // Are we tail calling?
9190 if (C.CallConstructionID == MachineOutlinerTailCall) {
9191 // If yes, then we can just branch to the label.
9192 It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::TCRETURNdi))
9193 .addGlobalAddress(M.getNamedValue(MF.getName()))
9194 .addImm(0));
9195 return It;
9196 }
9197
9198 // Are we saving the link register?
9199 if (C.CallConstructionID == MachineOutlinerNoLRSave ||
9200 C.CallConstructionID == MachineOutlinerThunk) {
9201 // No, so just insert the call.
9202 It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::BL))
9203 .addGlobalAddress(M.getNamedValue(MF.getName())));
9204 return It;
9205 }
9206
9207 // We want to return the spot where we inserted the call.
9209
9210 // Instructions for saving and restoring LR around the call instruction we're
9211 // going to insert.
9212 MachineInstr *Save;
9213 MachineInstr *Restore;
9214 // Can we save to a register?
9215 if (C.CallConstructionID == MachineOutlinerRegSave) {
9216 // FIXME: This logic should be sunk into a target-specific interface so that
9217 // we don't have to recompute the register.
9218 Register Reg = findRegisterToSaveLRTo(C);
9219 assert(Reg && "No callee-saved register available?");
9220
9221 // LR has to be a live in so that we can save it.
9222 if (!MBB.isLiveIn(AArch64::LR))
9223 MBB.addLiveIn(AArch64::LR);
9224
9225 // Save and restore LR from Reg.
9226 Save = BuildMI(MF, DebugLoc(), get(AArch64::ORRXrs), Reg)
9227 .addReg(AArch64::XZR)
9228 .addReg(AArch64::LR)
9229 .addImm(0);
9230 Restore = BuildMI(MF, DebugLoc(), get(AArch64::ORRXrs), AArch64::LR)
9231 .addReg(AArch64::XZR)
9232 .addReg(Reg)
9233 .addImm(0);
9234 } else {
9235 // We have the default case. Save and restore from SP.
9236 Save = BuildMI(MF, DebugLoc(), get(AArch64::STRXpre))
9237 .addReg(AArch64::SP, RegState::Define)
9238 .addReg(AArch64::LR)
9239 .addReg(AArch64::SP)
9240 .addImm(-16);
9241 Restore = BuildMI(MF, DebugLoc(), get(AArch64::LDRXpost))
9242 .addReg(AArch64::SP, RegState::Define)
9243 .addReg(AArch64::LR, RegState::Define)
9244 .addReg(AArch64::SP)
9245 .addImm(16);
9246 }
9247
9248 It = MBB.insert(It, Save);
9249 It++;
9250
9251 // Insert the call.
9252 It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::BL))
9253 .addGlobalAddress(M.getNamedValue(MF.getName())));
9254 CallPt = It;
9255 It++;
9256
9257 It = MBB.insert(It, Restore);
9258 return CallPt;
9259}
9260
9262 MachineFunction &MF) const {
9263 return MF.getFunction().hasMinSize();
9264}
9265
9268 DebugLoc &DL,
9269 bool AllowSideEffects) const {
9270 const MachineFunction &MF = *MBB.getParent();
9272 const AArch64RegisterInfo &TRI = *STI.getRegisterInfo();
9273
9274 if (TRI.isGeneralPurposeRegister(MF, Reg)) {
9275 BuildMI(MBB, Iter, DL, get(AArch64::MOVZXi), Reg).addImm(0).addImm(0);
9276 } else if (STI.hasSVE()) {
9277 BuildMI(MBB, Iter, DL, get(AArch64::DUP_ZI_D), Reg)
9278 .addImm(0)
9279 .addImm(0);
9280 } else {
9281 BuildMI(MBB, Iter, DL, get(AArch64::MOVIv2d_ns), Reg)
9282 .addImm(0);
9283 }
9284}
9285
9286std::optional<DestSourcePair>
9288
9289 // AArch64::ORRWrs and AArch64::ORRXrs with WZR/XZR reg
9290 // and zero immediate operands used as an alias for mov instruction.
9291 if (MI.getOpcode() == AArch64::ORRWrs &&
9292 MI.getOperand(1).getReg() == AArch64::WZR &&
9293 MI.getOperand(3).getImm() == 0x0 &&
9294 // Check that the w->w move is not a zero-extending w->x mov.
9295 (!MI.getOperand(0).getReg().isVirtual() ||
9296 MI.getOperand(0).getSubReg() == 0) &&
9297 (!MI.getOperand(0).getReg().isPhysical() ||
9298 MI.findRegisterDefOperandIdx(MI.getOperand(0).getReg() - AArch64::W0 +
9299 AArch64::X0,
9300 /*TRI=*/nullptr) == -1))
9301 return DestSourcePair{MI.getOperand(0), MI.getOperand(2)};
9302
9303 if (MI.getOpcode() == AArch64::ORRXrs &&
9304 MI.getOperand(1).getReg() == AArch64::XZR &&
9305 MI.getOperand(3).getImm() == 0x0)
9306 return DestSourcePair{MI.getOperand(0), MI.getOperand(2)};
9307
9308 return std::nullopt;
9309}
9310
9311std::optional<DestSourcePair>
9313 if (MI.getOpcode() == AArch64::ORRWrs &&
9314 MI.getOperand(1).getReg() == AArch64::WZR &&
9315 MI.getOperand(3).getImm() == 0x0)
9316 return DestSourcePair{MI.getOperand(0), MI.getOperand(2)};
9317 return std::nullopt;
9318}
9319
9320std::optional<RegImmPair>
9322 int Sign = 1;
9323 int64_t Offset = 0;
9324
9325 // TODO: Handle cases where Reg is a super- or sub-register of the
9326 // destination register.
9327 const MachineOperand &Op0 = MI.getOperand(0);
9328 if (!Op0.isReg() || Reg != Op0.getReg())
9329 return std::nullopt;
9330
9331 switch (MI.getOpcode()) {
9332 default:
9333 return std::nullopt;
9334 case AArch64::SUBWri:
9335 case AArch64::SUBXri:
9336 case AArch64::SUBSWri:
9337 case AArch64::SUBSXri:
9338 Sign *= -1;
9339 [[fallthrough]];
9340 case AArch64::ADDSWri:
9341 case AArch64::ADDSXri:
9342 case AArch64::ADDWri:
9343 case AArch64::ADDXri: {
9344 // TODO: Third operand can be global address (usually some string).
9345 if (!MI.getOperand(0).isReg() || !MI.getOperand(1).isReg() ||
9346 !MI.getOperand(2).isImm())
9347 return std::nullopt;
9348 int Shift = MI.getOperand(3).getImm();
9349 assert((Shift == 0 || Shift == 12) && "Shift can be either 0 or 12");
9350 Offset = Sign * (MI.getOperand(2).getImm() << Shift);
9351 }
9352 }
9353 return RegImmPair{MI.getOperand(1).getReg(), Offset};
9354}
9355
9356/// If the given ORR instruction is a copy, and \p DescribedReg overlaps with
9357/// the destination register then, if possible, describe the value in terms of
9358/// the source register.
9359static std::optional<ParamLoadedValue>
9361 const TargetInstrInfo *TII,
9362 const TargetRegisterInfo *TRI) {
9363 auto DestSrc = TII->isCopyLikeInstr(MI);
9364 if (!DestSrc)
9365 return std::nullopt;
9366
9367 Register DestReg = DestSrc->Destination->getReg();
9368 Register SrcReg = DestSrc->Source->getReg();
9369
9370 auto Expr = DIExpression::get(MI.getMF()->getFunction().getContext(), {});
9371
9372 // If the described register is the destination, just return the source.
9373 if (DestReg == DescribedReg)
9374 return ParamLoadedValue(MachineOperand::CreateReg(SrcReg, false), Expr);
9375
9376 // ORRWrs zero-extends to 64-bits, so we need to consider such cases.
9377 if (MI.getOpcode() == AArch64::ORRWrs &&
9378 TRI->isSuperRegister(DestReg, DescribedReg))
9379 return ParamLoadedValue(MachineOperand::CreateReg(SrcReg, false), Expr);
9380
9381 // We may need to describe the lower part of a ORRXrs move.
9382 if (MI.getOpcode() == AArch64::ORRXrs &&
9383 TRI->isSubRegister(DestReg, DescribedReg)) {
9384 Register SrcSubReg = TRI->getSubReg(SrcReg, AArch64::sub_32);
9385 return ParamLoadedValue(MachineOperand::CreateReg(SrcSubReg, false), Expr);
9386 }
9387
9388 assert(!TRI->isSuperOrSubRegisterEq(DestReg, DescribedReg) &&
9389 "Unhandled ORR[XW]rs copy case");
9390
9391 return std::nullopt;
9392}
9393
9395 // Functions cannot be split to different sections on AArch64 if they have
9396 // a red zone. This is because relaxing a cross-section branch may require
9397 // incrementing the stack pointer to spill a register, which would overwrite
9398 // the red zone.
9399 if (MF.getInfo<AArch64FunctionInfo>()->hasRedZone().value_or(true))
9400 return false;
9401
9403}
9404
9406 const MachineBasicBlock &MBB) const {
9407 // Asm Goto blocks can contain conditional branches to goto labels, which can
9408 // get moved out of range of the branch instruction.
9409 auto isAsmGoto = [](const MachineInstr &MI) {
9410 return MI.getOpcode() == AArch64::INLINEASM_BR;
9411 };
9412 if (llvm::any_of(MBB, isAsmGoto) || MBB.isInlineAsmBrIndirectTarget())
9413 return false;
9414
9415 // Because jump tables are label-relative instead of table-relative, they all
9416 // must be in the same section or relocation fixup handling will fail.
9417
9418 // Check if MBB is a jump table target
9420 auto containsMBB = [&MBB](const MachineJumpTableEntry &JTE) {
9421 return llvm::is_contained(JTE.MBBs, &MBB);
9422 };
9423 if (MJTI != nullptr && llvm::any_of(MJTI->getJumpTables(), containsMBB))
9424 return false;
9425
9426 // Check if MBB contains a jump table lookup
9427 for (const MachineInstr &MI : MBB) {
9428 switch (MI.getOpcode()) {
9429 case TargetOpcode::G_BRJT:
9430 case AArch64::JumpTableDest32:
9431 case AArch64::JumpTableDest16:
9432 case AArch64::JumpTableDest8:
9433 return false;
9434 default:
9435 continue;
9436 }
9437 }
9438
9439 // MBB isn't a special case, so it's safe to be split to the cold section.
9440 return true;
9441}
9442
9443std::optional<ParamLoadedValue>
9445 Register Reg) const {
9446 const MachineFunction *MF = MI.getMF();
9448 switch (MI.getOpcode()) {
9449 case AArch64::MOVZWi:
9450 case AArch64::MOVZXi: {
9451 // MOVZWi may be used for producing zero-extended 32-bit immediates in
9452 // 64-bit parameters, so we need to consider super-registers.
9453 if (!TRI->isSuperRegisterEq(MI.getOperand(0).getReg(), Reg))
9454 return std::nullopt;
9455
9456 if (!MI.getOperand(1).isImm())
9457 return std::nullopt;
9458 int64_t Immediate = MI.getOperand(1).getImm();
9459 int Shift = MI.getOperand(2).getImm();
9460 return ParamLoadedValue(MachineOperand::CreateImm(Immediate << Shift),
9461 nullptr);
9462 }
9463 case AArch64::ORRWrs:
9464 case AArch64::ORRXrs:
9465 return describeORRLoadedValue(MI, Reg, this, TRI);
9466 }
9467
9469}
9470
9472 MachineInstr &ExtMI, MachineRegisterInfo &MRI) const {
9473 assert(ExtMI.getOpcode() == TargetOpcode::G_SEXT ||
9474 ExtMI.getOpcode() == TargetOpcode::G_ZEXT ||
9475 ExtMI.getOpcode() == TargetOpcode::G_ANYEXT);
9476
9477 // Anyexts are nops.
9478 if (ExtMI.getOpcode() == TargetOpcode::G_ANYEXT)
9479 return true;
9480
9481 Register DefReg = ExtMI.getOperand(0).getReg();
9482 if (!MRI.hasOneNonDBGUse(DefReg))
9483 return false;
9484
9485 // It's likely that a sext/zext as a G_PTR_ADD offset will be folded into an
9486 // addressing mode.
9487 auto *UserMI = &*MRI.use_instr_nodbg_begin(DefReg);
9488 return UserMI->getOpcode() == TargetOpcode::G_PTR_ADD;
9489}
9490
9492 return get(Opc).TSFlags & AArch64::ElementSizeMask;
9493}
9494
9495bool AArch64InstrInfo::isPTestLikeOpcode(unsigned Opc) const {
9496 return get(Opc).TSFlags & AArch64::InstrFlagIsPTestLike;
9497}
9498
9499bool AArch64InstrInfo::isWhileOpcode(unsigned Opc) const {
9500 return get(Opc).TSFlags & AArch64::InstrFlagIsWhile;
9501}
9502
9503unsigned int
9505 return OptLevel >= CodeGenOptLevel::Aggressive ? 6 : 2;
9506}
9507
9508bool AArch64InstrInfo::isLegalAddressingMode(unsigned NumBytes, int64_t Offset,
9509 unsigned Scale) const {
9510 if (Offset && Scale)
9511 return false;
9512
9513 // Check Reg + Imm
9514 if (!Scale) {
9515 // 9-bit signed offset
9516 if (isInt<9>(Offset))
9517 return true;
9518
9519 // 12-bit unsigned offset
9520 unsigned Shift = Log2_64(NumBytes);
9521 if (NumBytes && Offset > 0 && (Offset / NumBytes) <= (1LL << 12) - 1 &&
9522 // Must be a multiple of NumBytes (NumBytes is a power of 2)
9523 (Offset >> Shift) << Shift == Offset)
9524 return true;
9525 return false;
9526 }
9527
9528 // Check reg1 + SIZE_IN_BYTES * reg2 and reg1 + reg2
9529 return Scale == 1 || (Scale > 0 && Scale == NumBytes);
9530}
9531
9533 if (MF.getSubtarget<AArch64Subtarget>().hardenSlsBlr())
9534 return AArch64::BLRNoIP;
9535 else
9536 return AArch64::BLR;
9537}
9538
9541 Register TargetReg, bool FrameSetup) const {
9542 assert(TargetReg != AArch64::SP && "New top of stack cannot aleady be in SP");
9543
9545 MachineFunction &MF = *MBB.getParent();
9546 const AArch64InstrInfo *TII =
9547 MF.getSubtarget<AArch64Subtarget>().getInstrInfo();
9548 int64_t ProbeSize = MF.getInfo<AArch64FunctionInfo>()->getStackProbeSize();
9550
9551 MachineFunction::iterator MBBInsertPoint = std::next(MBB.getIterator());
9552 MachineBasicBlock *LoopTestMBB =
9554 MF.insert(MBBInsertPoint, LoopTestMBB);
9555 MachineBasicBlock *LoopBodyMBB =
9557 MF.insert(MBBInsertPoint, LoopBodyMBB);
9559 MF.insert(MBBInsertPoint, ExitMBB);
9560 MachineInstr::MIFlag Flags =
9562
9563 // LoopTest:
9564 // SUB SP, SP, #ProbeSize
9565 emitFrameOffset(*LoopTestMBB, LoopTestMBB->end(), DL, AArch64::SP,
9566 AArch64::SP, StackOffset::getFixed(-ProbeSize), TII, Flags);
9567
9568 // CMP SP, TargetReg
9569 BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL, TII->get(AArch64::SUBSXrx64),
9570 AArch64::XZR)
9571 .addReg(AArch64::SP)
9572 .addReg(TargetReg)
9574 .setMIFlags(Flags);
9575
9576 // B.<Cond> LoopExit
9577 BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL, TII->get(AArch64::Bcc))
9579 .addMBB(ExitMBB)
9580 .setMIFlags(Flags);
9581
9582 // STR XZR, [SP]
9583 BuildMI(*LoopBodyMBB, LoopBodyMBB->end(), DL, TII->get(AArch64::STRXui))
9584 .addReg(AArch64::XZR)
9585 .addReg(AArch64::SP)
9586 .addImm(0)
9587 .setMIFlags(Flags);
9588
9589 // B loop
9590 BuildMI(*LoopBodyMBB, LoopBodyMBB->end(), DL, TII->get(AArch64::B))
9591 .addMBB(LoopTestMBB)
9592 .setMIFlags(Flags);
9593
9594 // LoopExit:
9595 // MOV SP, TargetReg
9596 BuildMI(*ExitMBB, ExitMBB->end(), DL, TII->get(AArch64::ADDXri), AArch64::SP)
9597 .addReg(TargetReg)
9598 .addImm(0)
9600 .setMIFlags(Flags);
9601
9602 // LDR XZR, [SP]
9603 BuildMI(*ExitMBB, ExitMBB->end(), DL, TII->get(AArch64::LDRXui))
9604 .addReg(AArch64::XZR, RegState::Define)
9605 .addReg(AArch64::SP)
9606 .addImm(0)
9607 .setMIFlags(Flags);
9608
9609 ExitMBB->splice(ExitMBB->end(), &MBB, std::next(MBBI), MBB.end());
9611
9612 LoopTestMBB->addSuccessor(ExitMBB);
9613 LoopTestMBB->addSuccessor(LoopBodyMBB);
9614 LoopBodyMBB->addSuccessor(LoopTestMBB);
9615 MBB.addSuccessor(LoopTestMBB);
9616
9617 // Update liveins.
9618 if (MF.getRegInfo().reservedRegsFrozen())
9619 fullyRecomputeLiveIns({ExitMBB, LoopBodyMBB, LoopTestMBB});
9620
9621 return ExitMBB->begin();
9622}
9623
9624namespace {
9625class AArch64PipelinerLoopInfo : public TargetInstrInfo::PipelinerLoopInfo {
9626 MachineFunction *MF;
9627 const TargetInstrInfo *TII;
9628 const TargetRegisterInfo *TRI;
9630
9631 /// The block of the loop
9632 MachineBasicBlock *LoopBB;
9633 /// The conditional branch of the loop
9634 MachineInstr *CondBranch;
9635 /// The compare instruction for loop control
9636 MachineInstr *Comp;
9637 /// The number of the operand of the loop counter value in Comp
9638 unsigned CompCounterOprNum;
9639 /// The instruction that updates the loop counter value
9640 MachineInstr *Update;
9641 /// The number of the operand of the loop counter value in Update
9642 unsigned UpdateCounterOprNum;
9643 /// The initial value of the loop counter
9644 Register Init;
9645 /// True iff Update is a predecessor of Comp
9646 bool IsUpdatePriorComp;
9647
9648 /// The normalized condition used by createTripCountGreaterCondition()
9650
9651public:
9652 AArch64PipelinerLoopInfo(MachineBasicBlock *LoopBB, MachineInstr *CondBranch,
9653 MachineInstr *Comp, unsigned CompCounterOprNum,
9654 MachineInstr *Update, unsigned UpdateCounterOprNum,
9655 Register Init, bool IsUpdatePriorComp,
9657 : MF(Comp->getParent()->getParent()),
9658 TII(MF->getSubtarget().getInstrInfo()),
9659 TRI(MF->getSubtarget().getRegisterInfo()), MRI(MF->getRegInfo()),
9660 LoopBB(LoopBB), CondBranch(CondBranch), Comp(Comp),
9661 CompCounterOprNum(CompCounterOprNum), Update(Update),
9662 UpdateCounterOprNum(UpdateCounterOprNum), Init(Init),
9663 IsUpdatePriorComp(IsUpdatePriorComp), Cond(Cond.begin(), Cond.end()) {}
9664
9665 bool shouldIgnoreForPipelining(const MachineInstr *MI) const override {
9666 // Make the instructions for loop control be placed in stage 0.
9667 // The predecessors of Comp are considered by the caller.
9668 return MI == Comp;
9669 }
9670
9671 std::optional<bool> createTripCountGreaterCondition(
9672 int TC, MachineBasicBlock &MBB,
9673 SmallVectorImpl<MachineOperand> &CondParam) override {
9674 // A branch instruction will be inserted as "if (Cond) goto epilogue".
9675 // Cond is normalized for such use.
9676 // The predecessors of the branch are assumed to have already been inserted.
9677 CondParam = Cond;
9678 return {};
9679 }
9680
9681 void createRemainingIterationsGreaterCondition(
9683 DenseMap<MachineInstr *, MachineInstr *> &LastStage0Insts) override;
9684
9685 void setPreheader(MachineBasicBlock *NewPreheader) override {}
9686
9687 void adjustTripCount(int TripCountAdjust) override {}
9688
9689 void disposed() override {}
9690 bool isMVEExpanderSupported() override { return true; }
9691};
9692} // namespace
9693
9694/// Clone an instruction from MI. The register of ReplaceOprNum-th operand
9695/// is replaced by ReplaceReg. The output register is newly created.
9696/// The other operands are unchanged from MI.
9697static Register cloneInstr(const MachineInstr *MI, unsigned ReplaceOprNum,
9698 Register ReplaceReg, MachineBasicBlock &MBB,
9699 MachineBasicBlock::iterator InsertTo) {
9702 const TargetRegisterInfo *TRI =
9705 Register Result = 0;
9706 for (unsigned I = 0; I < NewMI->getNumOperands(); ++I) {
9707 if (I == 0 && NewMI->getOperand(0).getReg().isVirtual()) {
9708 Result = MRI.createVirtualRegister(
9709 MRI.getRegClass(NewMI->getOperand(0).getReg()));
9710 NewMI->getOperand(I).setReg(Result);
9711 } else if (I == ReplaceOprNum) {
9712 MRI.constrainRegClass(
9713 ReplaceReg,
9714 TII->getRegClass(NewMI->getDesc(), I, TRI, *MBB.getParent()));
9715 NewMI->getOperand(I).setReg(ReplaceReg);
9716 }
9717 }
9718 MBB.insert(InsertTo, NewMI);
9719 return Result;
9720}
9721
9722void AArch64PipelinerLoopInfo::createRemainingIterationsGreaterCondition(
9725 // Create and accumulate conditions for next TC iterations.
9726 // Example:
9727 // SUBSXrr N, counter, implicit-def $nzcv # compare instruction for the last
9728 // # iteration of the kernel
9729 //
9730 // # insert the following instructions
9731 // cond = CSINCXr 0, 0, C, implicit $nzcv
9732 // counter = ADDXri counter, 1 # clone from this->Update
9733 // SUBSXrr n, counter, implicit-def $nzcv # clone from this->Comp
9734 // cond = CSINCXr cond, cond, C, implicit $nzcv
9735 // ... (repeat TC times)
9736 // SUBSXri cond, 0, implicit-def $nzcv
9737
9738 assert(CondBranch->getOpcode() == AArch64::Bcc);
9739 // CondCode to exit the loop
9741 (AArch64CC::CondCode)CondBranch->getOperand(0).getImm();
9742 if (CondBranch->getOperand(1).getMBB() == LoopBB)
9744
9745 // Accumulate conditions to exit the loop
9746 Register AccCond = AArch64::XZR;
9747
9748 // If CC holds, CurCond+1 is returned; otherwise CurCond is returned.
9749 auto AccumulateCond = [&](Register CurCond,
9751 Register NewCond = MRI.createVirtualRegister(&AArch64::GPR64commonRegClass);
9752 BuildMI(MBB, MBB.end(), Comp->getDebugLoc(), TII->get(AArch64::CSINCXr))
9753 .addReg(NewCond, RegState::Define)
9754 .addReg(CurCond)
9755 .addReg(CurCond)
9757 return NewCond;
9758 };
9759
9760 if (!LastStage0Insts.empty() && LastStage0Insts[Comp]->getParent() == &MBB) {
9761 // Update and Comp for I==0 are already exists in MBB
9762 // (MBB is an unrolled kernel)
9763 Register Counter;
9764 for (int I = 0; I <= TC; ++I) {
9765 Register NextCounter;
9766 if (I != 0)
9767 NextCounter =
9768 cloneInstr(Comp, CompCounterOprNum, Counter, MBB, MBB.end());
9769
9770 AccCond = AccumulateCond(AccCond, CC);
9771
9772 if (I != TC) {
9773 if (I == 0) {
9774 if (Update != Comp && IsUpdatePriorComp) {
9775 Counter =
9776 LastStage0Insts[Comp]->getOperand(CompCounterOprNum).getReg();
9777 NextCounter = cloneInstr(Update, UpdateCounterOprNum, Counter, MBB,
9778 MBB.end());
9779 } else {
9780 // can use already calculated value
9781 NextCounter = LastStage0Insts[Update]->getOperand(0).getReg();
9782 }
9783 } else if (Update != Comp) {
9784 NextCounter =
9785 cloneInstr(Update, UpdateCounterOprNum, Counter, MBB, MBB.end());
9786 }
9787 }
9788 Counter = NextCounter;
9789 }
9790 } else {
9791 Register Counter;
9792 if (LastStage0Insts.empty()) {
9793 // use initial counter value (testing if the trip count is sufficient to
9794 // be executed by pipelined code)
9795 Counter = Init;
9796 if (IsUpdatePriorComp)
9797 Counter =
9798 cloneInstr(Update, UpdateCounterOprNum, Counter, MBB, MBB.end());
9799 } else {
9800 // MBB is an epilogue block. LastStage0Insts[Comp] is in the kernel block.
9801 Counter = LastStage0Insts[Comp]->getOperand(CompCounterOprNum).getReg();
9802 }
9803
9804 for (int I = 0; I <= TC; ++I) {
9805 Register NextCounter;
9806 NextCounter =
9807 cloneInstr(Comp, CompCounterOprNum, Counter, MBB, MBB.end());
9808 AccCond = AccumulateCond(AccCond, CC);
9809 if (I != TC && Update != Comp)
9810 NextCounter =
9811 cloneInstr(Update, UpdateCounterOprNum, Counter, MBB, MBB.end());
9812 Counter = NextCounter;
9813 }
9814 }
9815
9816 // If AccCond == 0, the remainder is greater than TC.
9817 BuildMI(MBB, MBB.end(), Comp->getDebugLoc(), TII->get(AArch64::SUBSXri))
9818 .addReg(AArch64::XZR, RegState::Define | RegState::Dead)
9819 .addReg(AccCond)
9820 .addImm(0)
9821 .addImm(0);
9822 Cond.clear();
9824}
9825
9826static void extractPhiReg(const MachineInstr &Phi, const MachineBasicBlock *MBB,
9827 Register &RegMBB, Register &RegOther) {
9828 assert(Phi.getNumOperands() == 5);
9829 if (Phi.getOperand(2).getMBB() == MBB) {
9830 RegMBB = Phi.getOperand(1).getReg();
9831 RegOther = Phi.getOperand(3).getReg();
9832 } else {
9833 assert(Phi.getOperand(4).getMBB() == MBB);
9834 RegMBB = Phi.getOperand(3).getReg();
9835 RegOther = Phi.getOperand(1).getReg();
9836 }
9837}
9838
9839static bool isDefinedOutside(Register Reg, const MachineBasicBlock *BB) {
9840 if (!Reg.isVirtual())
9841 return false;
9842 const MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
9843 return MRI.getVRegDef(Reg)->getParent() != BB;
9844}
9845
9846/// If Reg is an induction variable, return true and set some parameters
9847static bool getIndVarInfo(Register Reg, const MachineBasicBlock *LoopBB,
9848 MachineInstr *&UpdateInst,
9849 unsigned &UpdateCounterOprNum, Register &InitReg,
9850 bool &IsUpdatePriorComp) {
9851 // Example:
9852 //
9853 // Preheader:
9854 // InitReg = ...
9855 // LoopBB:
9856 // Reg0 = PHI (InitReg, Preheader), (Reg1, LoopBB)
9857 // Reg = COPY Reg0 ; COPY is ignored.
9858 // Reg1 = ADD Reg, #1; UpdateInst. Incremented by a loop invariant value.
9859 // ; Reg is the value calculated in the previous
9860 // ; iteration, so IsUpdatePriorComp == false.
9861
9862 if (LoopBB->pred_size() != 2)
9863 return false;
9864 if (!Reg.isVirtual())
9865 return false;
9866 const MachineRegisterInfo &MRI = LoopBB->getParent()->getRegInfo();
9867 UpdateInst = nullptr;
9868 UpdateCounterOprNum = 0;
9869 InitReg = 0;
9870 IsUpdatePriorComp = true;
9871 Register CurReg = Reg;
9872 while (true) {
9873 MachineInstr *Def = MRI.getVRegDef(CurReg);
9874 if (Def->getParent() != LoopBB)
9875 return false;
9876 if (Def->isCopy()) {
9877 // Ignore copy instructions unless they contain subregisters
9878 if (Def->getOperand(0).getSubReg() || Def->getOperand(1).getSubReg())
9879 return false;
9880 CurReg = Def->getOperand(1).getReg();
9881 } else if (Def->isPHI()) {
9882 if (InitReg != 0)
9883 return false;
9884 if (!UpdateInst)
9885 IsUpdatePriorComp = false;
9886 extractPhiReg(*Def, LoopBB, CurReg, InitReg);
9887 } else {
9888 if (UpdateInst)
9889 return false;
9890 switch (Def->getOpcode()) {
9891 case AArch64::ADDSXri:
9892 case AArch64::ADDSWri:
9893 case AArch64::SUBSXri:
9894 case AArch64::SUBSWri:
9895 case AArch64::ADDXri:
9896 case AArch64::ADDWri:
9897 case AArch64::SUBXri:
9898 case AArch64::SUBWri:
9899 UpdateInst = Def;
9900 UpdateCounterOprNum = 1;
9901 break;
9902 case AArch64::ADDSXrr:
9903 case AArch64::ADDSWrr:
9904 case AArch64::SUBSXrr:
9905 case AArch64::SUBSWrr:
9906 case AArch64::ADDXrr:
9907 case AArch64::ADDWrr:
9908 case AArch64::SUBXrr:
9909 case AArch64::SUBWrr:
9910 UpdateInst = Def;
9911 if (isDefinedOutside(Def->getOperand(2).getReg(), LoopBB))
9912 UpdateCounterOprNum = 1;
9913 else if (isDefinedOutside(Def->getOperand(1).getReg(), LoopBB))
9914 UpdateCounterOprNum = 2;
9915 else
9916 return false;
9917 break;
9918 default:
9919 return false;
9920 }
9921 CurReg = Def->getOperand(UpdateCounterOprNum).getReg();
9922 }
9923
9924 if (!CurReg.isVirtual())
9925 return false;
9926 if (Reg == CurReg)
9927 break;
9928 }
9929
9930 if (!UpdateInst)
9931 return false;
9932
9933 return true;
9934}
9935
9936std::unique_ptr<TargetInstrInfo::PipelinerLoopInfo>
9938 // Accept loops that meet the following conditions
9939 // * The conditional branch is BCC
9940 // * The compare instruction is ADDS/SUBS/WHILEXX
9941 // * One operand of the compare is an induction variable and the other is a
9942 // loop invariant value
9943 // * The induction variable is incremented/decremented by a single instruction
9944 // * Does not contain CALL or instructions which have unmodeled side effects
9945
9946 for (MachineInstr &MI : *LoopBB)
9947 if (MI.isCall() || MI.hasUnmodeledSideEffects())
9948 // This instruction may use NZCV, which interferes with the instruction to
9949 // be inserted for loop control.
9950 return nullptr;
9951
9952 MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
9954 if (analyzeBranch(*LoopBB, TBB, FBB, Cond))
9955 return nullptr;
9956
9957 // Infinite loops are not supported
9958 if (TBB == LoopBB && FBB == LoopBB)
9959 return nullptr;
9960
9961 // Must be conditional branch
9962 if (TBB != LoopBB && FBB == nullptr)
9963 return nullptr;
9964
9965 assert((TBB == LoopBB || FBB == LoopBB) &&
9966 "The Loop must be a single-basic-block loop");
9967
9968 MachineInstr *CondBranch = &*LoopBB->getFirstTerminator();
9970
9971 if (CondBranch->getOpcode() != AArch64::Bcc)
9972 return nullptr;
9973
9974 // Normalization for createTripCountGreaterCondition()
9975 if (TBB == LoopBB)
9977
9978 MachineInstr *Comp = nullptr;
9979 unsigned CompCounterOprNum = 0;
9980 for (MachineInstr &MI : reverse(*LoopBB)) {
9981 if (MI.modifiesRegister(AArch64::NZCV, &TRI)) {
9982 // Guarantee that the compare is SUBS/ADDS/WHILEXX and that one of the
9983 // operands is a loop invariant value
9984
9985 switch (MI.getOpcode()) {
9986 case AArch64::SUBSXri:
9987 case AArch64::SUBSWri:
9988 case AArch64::ADDSXri:
9989 case AArch64::ADDSWri:
9990 Comp = &MI;
9991 CompCounterOprNum = 1;
9992 break;
9993 case AArch64::ADDSWrr:
9994 case AArch64::ADDSXrr:
9995 case AArch64::SUBSWrr:
9996 case AArch64::SUBSXrr:
9997 Comp = &MI;
9998 break;
9999 default:
10000 if (isWhileOpcode(MI.getOpcode())) {
10001 Comp = &MI;
10002 break;
10003 }
10004 return nullptr;
10005 }
10006
10007 if (CompCounterOprNum == 0) {
10008 if (isDefinedOutside(Comp->getOperand(1).getReg(), LoopBB))
10009 CompCounterOprNum = 2;
10010 else if (isDefinedOutside(Comp->getOperand(2).getReg(), LoopBB))
10011 CompCounterOprNum = 1;
10012 else
10013 return nullptr;
10014 }
10015 break;
10016 }
10017 }
10018 if (!Comp)
10019 return nullptr;
10020
10021 MachineInstr *Update = nullptr;
10022 Register Init;
10023 bool IsUpdatePriorComp;
10024 unsigned UpdateCounterOprNum;
10025 if (!getIndVarInfo(Comp->getOperand(CompCounterOprNum).getReg(), LoopBB,
10026 Update, UpdateCounterOprNum, Init, IsUpdatePriorComp))
10027 return nullptr;
10028
10029 return std::make_unique<AArch64PipelinerLoopInfo>(
10030 LoopBB, CondBranch, Comp, CompCounterOprNum, Update, UpdateCounterOprNum,
10031 Init, IsUpdatePriorComp, Cond);
10032}
10033
10034#define GET_INSTRINFO_HELPERS
10035#define GET_INSTRMAP_INFO
10036#include "AArch64GenInstrInfo.inc"
unsigned SubReg
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
static bool forwardCopyWillClobberTuple(unsigned DestReg, unsigned SrcReg, unsigned NumRegs)
static cl::opt< unsigned > BCCDisplacementBits("aarch64-bcc-offset-bits", cl::Hidden, cl::init(19), cl::desc("Restrict range of Bcc instructions (DEBUG)"))
static void genSubAdd2SubSub(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, SmallVectorImpl< MachineInstr * > &DelInstrs, unsigned IdxOpd1, DenseMap< unsigned, unsigned > &InstrIdxForVirtReg)
Do the following transformation A - (B + C) ==> (A - B) - C A - (B + C) ==> (A - C) - B.
static bool getMaddPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns)
Find instructions that can be turned into madd.
static AArch64CC::CondCode findCondCodeUsedByInstr(const MachineInstr &Instr)
Find a condition code used by the instruction.
static MachineInstr * genFusedMultiplyAcc(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, unsigned IdxMulOpd, unsigned MaddOpc, const TargetRegisterClass *RC)
genFusedMultiplyAcc - Helper to generate fused multiply accumulate instructions.
static bool isCombineInstrCandidate64(unsigned Opc)
static unsigned removeCopies(const MachineRegisterInfo &MRI, unsigned VReg)
static bool areCFlagsAccessedBetweenInstrs(MachineBasicBlock::iterator From, MachineBasicBlock::iterator To, const TargetRegisterInfo *TRI, const AccessKind AccessToCheck=AK_All)
True when condition flags are accessed (either by writing or reading) on the instruction trace starti...
static bool getFMAPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns)
Floating-Point Support.
static bool isADDSRegImm(unsigned Opcode)
static MachineInstr * genFusedMultiplyIdxNeg(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, DenseMap< unsigned, unsigned > &InstrIdxForVirtReg, unsigned IdxMulOpd, unsigned MaddOpc, unsigned MnegOpc, const TargetRegisterClass *RC)
genFusedMultiplyAccNeg - Helper to generate fused multiply accumulate instructions with an additional...
static unsigned sForm(MachineInstr &Instr)
Get opcode of S version of Instr.
static bool isCombineInstrSettingFlag(unsigned Opc)
@ AK_Write
static bool getFNEGPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns)
static bool getIndVarInfo(Register Reg, const MachineBasicBlock *LoopBB, MachineInstr *&UpdateInst, unsigned &UpdateCounterOprNum, Register &InitReg, bool &IsUpdatePriorComp)
If Reg is an induction variable, return true and set some parameters.
static bool canPairLdStOpc(unsigned FirstOpc, unsigned SecondOpc)
static int findCondCodeUseOperandIdxForBranchOrSelect(const MachineInstr &Instr)
static unsigned getBranchDisplacementBits(unsigned Opc)
static std::optional< ParamLoadedValue > describeORRLoadedValue(const MachineInstr &MI, Register DescribedReg, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
If the given ORR instruction is a copy, and DescribedReg overlaps with the destination register then,...
static bool getFMULPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns)
static MachineInstr * genFusedMultiplyAccNeg(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, DenseMap< unsigned, unsigned > &InstrIdxForVirtReg, unsigned IdxMulOpd, unsigned MaddOpc, unsigned MnegOpc, const TargetRegisterClass *RC)
genFusedMultiplyAccNeg - Helper to generate fused multiply accumulate instructions with an additional...
static void appendVGScaledOffsetExpr(SmallVectorImpl< char > &Expr, int NumBytes, int NumVGScaledBytes, unsigned VG, llvm::raw_string_ostream &Comment)
static MachineInstr * genMaddR(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, unsigned IdxMulOpd, unsigned MaddOpc, unsigned VR, const TargetRegisterClass *RC)
genMaddR - Generate madd instruction and combine mul and add using an extra virtual register Example ...
static Register cloneInstr(const MachineInstr *MI, unsigned ReplaceOprNum, Register ReplaceReg, MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertTo)
Clone an instruction from MI.
static bool scaleOffset(unsigned Opc, int64_t &Offset)
static bool canCombineWithFMUL(MachineBasicBlock &MBB, MachineOperand &MO, unsigned MulOpc)
unsigned scaledOffsetOpcode(unsigned Opcode, unsigned &Scale)
static MachineInstr * genFusedMultiplyIdx(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, unsigned IdxMulOpd, unsigned MaddOpc, const TargetRegisterClass *RC)
genFusedMultiplyIdx - Helper to generate fused multiply accumulate instructions.
static MachineInstr * genIndexedMultiply(MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, unsigned IdxDupOp, unsigned MulOpc, const TargetRegisterClass *RC, MachineRegisterInfo &MRI)
Fold (FMUL x (DUP y lane)) into (FMUL_indexed x y lane)
static bool isSUBSRegImm(unsigned Opcode)
static bool UpdateOperandRegClass(MachineInstr &Instr)
static const TargetRegisterClass * getRegClass(const MachineInstr &MI, Register Reg)
static bool canCmpInstrBeRemoved(MachineInstr &MI, MachineInstr &CmpInstr, int CmpValue, const TargetRegisterInfo &TRI, SmallVectorImpl< MachineInstr * > &CCUseInstrs, bool &IsInvertCC)
unsigned unscaledOffsetOpcode(unsigned Opcode)
static bool canInstrSubstituteCmpInstr(MachineInstr &MI, MachineInstr &CmpInstr, const TargetRegisterInfo &TRI)
Check if CmpInstr can be substituted by MI.
static UsedNZCV getUsedNZCV(AArch64CC::CondCode CC)
static Register genNeg(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, DenseMap< unsigned, unsigned > &InstrIdxForVirtReg, unsigned MnegOpc, const TargetRegisterClass *RC)
genNeg - Helper to generate an intermediate negation of the second operand of Root
static bool isCombineInstrCandidateFP(const MachineInstr &Inst)
static bool canCombine(MachineBasicBlock &MBB, MachineOperand &MO, unsigned CombineOpc, unsigned ZeroReg=0, bool CheckZeroReg=false)
static unsigned convertToNonFlagSettingOpc(const MachineInstr &MI)
Return the opcode that does not set flags when possible - otherwise return the original opcode.
static const MachineInstrBuilder & AddSubReg(const MachineInstrBuilder &MIB, unsigned Reg, unsigned SubIdx, unsigned State, const TargetRegisterInfo *TRI)
static bool outliningCandidatesV8_3OpsConsensus(const outliner::Candidate &a, const outliner::Candidate &b)
static bool isCombineInstrCandidate32(unsigned Opc)
static void parseCondBranch(MachineInstr *LastInst, MachineBasicBlock *&Target, SmallVectorImpl< MachineOperand > &Cond)
static unsigned offsetExtendOpcode(unsigned Opcode)
MachineOutlinerMBBFlags
@ LRUnavailableSomewhere
@ HasCalls
@ UnsafeRegsDead
static void loadRegPairFromStackSlot(const TargetRegisterInfo &TRI, MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore, const MCInstrDesc &MCID, Register DestReg, unsigned SubIdx0, unsigned SubIdx1, int FI, MachineMemOperand *MMO)
static bool getMiscPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns)
Find other MI combine patterns.
static bool outliningCandidatesSigningKeyConsensus(const outliner::Candidate &a, const outliner::Candidate &b)
static bool outliningCandidatesSigningScopeConsensus(const outliner::Candidate &a, const outliner::Candidate &b)
static bool shouldClusterFI(const MachineFrameInfo &MFI, int FI1, int64_t Offset1, unsigned Opcode1, int FI2, int64_t Offset2, unsigned Opcode2)
static cl::opt< unsigned > TBZDisplacementBits("aarch64-tbz-offset-bits", cl::Hidden, cl::init(14), cl::desc("Restrict range of TB[N]Z instructions (DEBUG)"))
static void extractPhiReg(const MachineInstr &Phi, const MachineBasicBlock *MBB, Register &RegMBB, Register &RegOther)
static MCCFIInstruction createDefCFAExpression(const TargetRegisterInfo &TRI, unsigned Reg, const StackOffset &Offset)
static bool isDefinedOutside(Register Reg, const MachineBasicBlock *BB)
static MachineInstr * genFusedMultiply(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, unsigned IdxMulOpd, unsigned MaddOpc, const TargetRegisterClass *RC, FMAInstKind kind=FMAInstKind::Default, const Register *ReplacedAddend=nullptr)
genFusedMultiply - Generate fused multiply instructions.
static bool isCombineInstrCandidate(unsigned Opc)
static unsigned regOffsetOpcode(unsigned Opcode)
MachineOutlinerClass
Constants defining how certain sequences should be outlined.
@ MachineOutlinerTailCall
Emit a save, restore, call, and return.
@ MachineOutlinerRegSave
Emit a call and tail-call.
@ MachineOutlinerNoLRSave
Only emit a branch.
@ MachineOutlinerThunk
Emit a call and return.
@ MachineOutlinerDefault
static cl::opt< unsigned > BDisplacementBits("aarch64-b-offset-bits", cl::Hidden, cl::init(26), cl::desc("Restrict range of B instructions (DEBUG)"))
static bool areCFlagsAliveInSuccessors(const MachineBasicBlock *MBB)
Check if AArch64::NZCV should be alive in successors of MBB.
static void emitFrameOffsetAdj(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, unsigned DestReg, unsigned SrcReg, int64_t Offset, unsigned Opc, const TargetInstrInfo *TII, MachineInstr::MIFlag Flag, bool NeedsWinCFI, bool *HasWinCFI, bool EmitCFAOffset, StackOffset CFAOffset, unsigned FrameReg)
static bool isCheapImmediate(const MachineInstr &MI, unsigned BitSize)
static cl::opt< unsigned > CBZDisplacementBits("aarch64-cbz-offset-bits", cl::Hidden, cl::init(19), cl::desc("Restrict range of CB[N]Z instructions (DEBUG)"))
static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg, unsigned *NewVReg=nullptr)
static void signOutlinedFunction(MachineFunction &MF, MachineBasicBlock &MBB, const AArch64InstrInfo *TII, bool ShouldSignReturnAddr)
static MachineInstr * genFNegatedMAD(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs)
static bool canCombineWithMUL(MachineBasicBlock &MBB, MachineOperand &MO, unsigned MulOpc, unsigned ZeroReg)
static void storeRegPairToStackSlot(const TargetRegisterInfo &TRI, MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore, const MCInstrDesc &MCID, Register SrcReg, bool IsKill, unsigned SubIdx0, unsigned SubIdx1, int FI, MachineMemOperand *MMO)
SmallVector< AArch64_IMM::ImmInsnModel, 4 > Insn
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static const Function * getParent(const Value *V)
BlockVerifier::State From
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
uint64_t Size
bool End
Definition: ELF_riscv.cpp:480
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
static LVOptions Options
Definition: LVOptions.cpp:25
This file implements the LivePhysRegs utility for tracking liveness of physical registers.
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
unsigned const TargetRegisterInfo * TRI
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
Module.h This file contains the declarations for the Module class.
const char LLVMTargetMachineRef TM
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
const SmallVectorImpl< MachineOperand > & Cond
This file declares the machine register scavenger class.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallVector class.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition: Value.cpp:469
AArch64FunctionInfo - This class is derived from MachineFunctionInfo and contains private AArch64-spe...
bool shouldSignReturnAddress(const MachineFunction &MF) const
const SetOfInstructions & getLOHRelated() const
bool needsDwarfUnwindInfo(const MachineFunction &MF) const
void setOutliningStyle(std::string Style)
std::optional< bool > hasRedZone() const
static bool isHForm(const MachineInstr &MI)
Returns whether the instruction is in H form (16 bit operands)
void insertSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, Register DstReg, ArrayRef< MachineOperand > Cond, Register TrueReg, Register FalseReg) const override
static bool hasBTISemantics(const MachineInstr &MI)
Returns whether the instruction can be compatible with non-zero BTYPE.
static bool isQForm(const MachineInstr &MI)
Returns whether the instruction is in Q form (128 bit operands)
static void decomposeStackOffsetForFrameOffsets(const StackOffset &Offset, int64_t &NumBytes, int64_t &NumPredicateVectors, int64_t &NumDataVectors)
Returns the offset in parts to which this frame offset can be decomposed for the purpose of describin...
static bool getMemOpInfo(unsigned Opcode, TypeSize &Scale, TypeSize &Width, int64_t &MinOffset, int64_t &MaxOffset)
Returns true if opcode Opc is a memory operation.
static bool isTailCallReturnInst(const MachineInstr &MI)
Returns true if MI is one of the TCRETURN* instructions.
static bool isFPRCopy(const MachineInstr &MI)
Does this instruction rename an FPR without modifying bits?
MachineInstr * emitLdStWithAddr(MachineInstr &MemI, const ExtAddrMode &AM) const override
std::optional< DestSourcePair > isCopyInstrImpl(const MachineInstr &MI) const override
If the specific machine instruction is an instruction that moves/copies value from one register to an...
MachineBasicBlock * getBranchDestBlock(const MachineInstr &MI) const override
std::optional< RegImmPair > isAddImmediate(const MachineInstr &MI, Register Reg) const override
unsigned getInstSizeInBytes(const MachineInstr &MI) const override
GetInstSize - Return the number of bytes of code the specified instruction may be.
uint64_t getElementSizeForOpcode(unsigned Opc) const
Returns the vector element size (B, H, S or D) of an SVE opcode.
bool areMemAccessesTriviallyDisjoint(const MachineInstr &MIa, const MachineInstr &MIb) const override
void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc) const override
static bool isGPRCopy(const MachineInstr &MI)
Does this instruction rename a GPR without modifying bits?
static unsigned convertToFlagSettingOpc(unsigned Opc)
Return the opcode that set flags when possible.
bool getMachineCombinerPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns, bool DoRegPressureReduce) const override
Return true when there is potentially a faster code sequence for an instruction chain ending in Root.
bool isBranchOffsetInRange(unsigned BranchOpc, int64_t BrOffset) const override
bool canInsertSelect(const MachineBasicBlock &, ArrayRef< MachineOperand > Cond, Register, Register, Register, int &, int &, int &) const override
static const MachineOperand & getLdStOffsetOp(const MachineInstr &MI)
Returns the immediate offset operator of a load/store.
bool isCoalescableExtInstr(const MachineInstr &MI, Register &SrcReg, Register &DstReg, unsigned &SubIdx) const override
bool isWhileOpcode(unsigned Opc) const
Returns true if the opcode is for an SVE WHILE## instruction.
static std::optional< unsigned > getUnscaledLdSt(unsigned Opc)
Returns the unscaled load/store for the scaled load/store opcode, if there is a corresponding unscale...
static bool hasUnscaledLdStOffset(unsigned Opc)
Return true if it has an unscaled load/store offset.
static bool isPreLdSt(const MachineInstr &MI)
Returns whether the instruction is a pre-indexed load/store.
MachineBasicBlock::iterator insertOutlinedCall(Module &M, MachineBasicBlock &MBB, MachineBasicBlock::iterator &It, MachineFunction &MF, outliner::Candidate &C) const override
std::optional< ExtAddrMode > getAddrModeFromMemoryOp(const MachineInstr &MemI, const TargetRegisterInfo *TRI) const override
bool getMemOperandsWithOffsetWidth(const MachineInstr &MI, SmallVectorImpl< const MachineOperand * > &BaseOps, int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width, const TargetRegisterInfo *TRI) const override
bool analyzeBranchPredicate(MachineBasicBlock &MBB, MachineBranchPredicate &MBP, bool AllowModify) const override
static bool isSEHInstruction(const MachineInstr &MI)
Return true if the instructions is a SEH instruciton used for unwinding on Windows.
void insertIndirectBranch(MachineBasicBlock &MBB, MachineBasicBlock &NewDestBB, MachineBasicBlock &RestoreBB, const DebugLoc &DL, int64_t BrOffset, RegScavenger *RS) const override
SmallVector< std::pair< MachineBasicBlock::iterator, MachineBasicBlock::iterator > > getOutlinableRanges(MachineBasicBlock &MBB, unsigned &Flags) const override
static bool isPairableLdStInst(const MachineInstr &MI)
Return true if pairing the given load or store may be paired with another.
void genAlternativeCodeSequence(MachineInstr &Root, unsigned Pattern, SmallVectorImpl< MachineInstr * > &InsInstrs, SmallVectorImpl< MachineInstr * > &DelInstrs, DenseMap< unsigned, unsigned > &InstrIdxForVirtReg) const override
When getMachineCombinerPatterns() finds patterns, this function generates the instructions that could...
const AArch64RegisterInfo & getRegisterInfo() const
getRegisterInfo - TargetInstrInfo is a superset of MRegister info.
static bool isPreSt(const MachineInstr &MI)
Returns whether the instruction is a pre-indexed store.
MachineInstr * foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI, ArrayRef< unsigned > Ops, MachineBasicBlock::iterator InsertPt, int FrameIndex, LiveIntervals *LIS=nullptr, VirtRegMap *VRM=nullptr) const override
void insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const override
AArch64InstrInfo(const AArch64Subtarget &STI)
static bool isPairedLdSt(const MachineInstr &MI)
Returns whether the instruction is a paired load/store.
outliner::InstrType getOutliningTypeImpl(const MachineModuleInfo &MMI, MachineBasicBlock::iterator &MIT, unsigned Flags) const override
bool useMachineCombiner() const override
AArch64 supports MachineCombiner.
ArrayRef< std::pair< MachineMemOperand::Flags, const char * > > getSerializableMachineMemOperandTargetFlags() const override
void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register DestReg, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg) const override
std::optional< outliner::OutlinedFunction > getOutliningCandidateInfo(const MachineModuleInfo &MMI, std::vector< outliner::Candidate > &RepeatedSequenceLocs) const override
bool isExtendLikelyToBeFolded(MachineInstr &ExtMI, MachineRegisterInfo &MRI) const override
static bool isFalkorShiftExtFast(const MachineInstr &MI)
Returns true if the instruction has a shift by immediate that can be executed in one cycle less.
std::optional< ParamLoadedValue > describeLoadedValue(const MachineInstr &MI, Register Reg) const override
bool getMemOperandWithOffsetWidth(const MachineInstr &MI, const MachineOperand *&BaseOp, int64_t &Offset, bool &OffsetIsScalable, TypeSize &Width, const TargetRegisterInfo *TRI) const
If OffsetIsScalable is set to 'true', the offset is scaled by vscale.
Register isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const override
bool reverseBranchCondition(SmallVectorImpl< MachineOperand > &Cond) const override
static bool isStridedAccess(const MachineInstr &MI)
Return true if the given load or store is a strided memory access.
bool shouldClusterMemOps(ArrayRef< const MachineOperand * > BaseOps1, int64_t Offset1, bool OffsetIsScalable1, ArrayRef< const MachineOperand * > BaseOps2, int64_t Offset2, bool OffsetIsScalable2, unsigned ClusterSize, unsigned NumBytes) const override
Detect opportunities for ldp/stp formation.
bool expandPostRAPseudo(MachineInstr &MI) const override
unsigned int getTailDuplicateSize(CodeGenOptLevel OptLevel) const override
bool isFunctionSafeToOutlineFrom(MachineFunction &MF, bool OutlineFromLinkOnceODRs) const override
void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg) const override
bool shouldOutlineFromFunctionByDefault(MachineFunction &MF) const override
unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const override
bool isThroughputPattern(unsigned Pattern) const override
Return true when a code sequence can improve throughput.
void copyGPRRegTuple(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, DebugLoc DL, unsigned DestReg, unsigned SrcReg, bool KillSrc, unsigned Opcode, unsigned ZeroReg, llvm::ArrayRef< unsigned > Indices) const
void buildClearRegister(Register Reg, MachineBasicBlock &MBB, MachineBasicBlock::iterator Iter, DebugLoc &DL, bool AllowSideEffects=true) const override
void buildOutlinedFrame(MachineBasicBlock &MBB, MachineFunction &MF, const outliner::OutlinedFunction &OF) const override
ArrayRef< std::pair< unsigned, const char * > > getSerializableDirectMachineOperandTargetFlags() const override
MachineOperand & getMemOpBaseRegImmOfsOffsetOperand(MachineInstr &LdSt) const
Return the immediate offset of the base register in a load/store LdSt.
bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify=false) const override
bool canFoldIntoAddrMode(const MachineInstr &MemI, Register Reg, const MachineInstr &AddrI, ExtAddrMode &AM) const override
static bool isLdStPairSuppressed(const MachineInstr &MI)
Return true if pairing the given load or store is hinted to be unprofitable.
bool isFunctionSafeToSplit(const MachineFunction &MF) const override
bool isAssociativeAndCommutative(const MachineInstr &Inst, bool Invert) const override
Return true when Inst is associative and commutative so that it can be reassociated.
std::unique_ptr< TargetInstrInfo::PipelinerLoopInfo > analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const override
bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const override
unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const override
MachineBasicBlock::iterator probedStackAlloc(MachineBasicBlock::iterator MBBI, Register TargetReg, bool FrameSetup) const
bool optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int64_t CmpMask, int64_t CmpValue, const MachineRegisterInfo *MRI) const override
optimizeCompareInstr - Convert the instruction supplying the argument to the comparison into one that...
static unsigned getLoadStoreImmIdx(unsigned Opc)
Returns the index for the immediate for a given instruction.
static bool isGPRZero(const MachineInstr &MI)
Does this instruction set its full destination register to zero?
std::pair< unsigned, unsigned > decomposeMachineOperandsTargetFlags(unsigned TF) const override
bool analyzeCompare(const MachineInstr &MI, Register &SrcReg, Register &SrcReg2, int64_t &CmpMask, int64_t &CmpValue) const override
analyzeCompare - For a comparison instruction, return the source registers in SrcReg and SrcReg2,...
CombinerObjective getCombinerObjective(unsigned Pattern) const override
static bool isFpOrNEON(Register Reg)
Returns whether the physical register is FP or NEON.
bool isMBBSafeToSplitToCold(const MachineBasicBlock &MBB) const override
bool isAsCheapAsAMove(const MachineInstr &MI) const override
bool isLegalAddressingMode(unsigned NumBytes, int64_t Offset, unsigned Scale) const
std::optional< DestSourcePair > isCopyLikeInstrImpl(const MachineInstr &MI) const override
static void suppressLdStPair(MachineInstr &MI)
Hint that pairing the given load or store is unprofitable.
Register isStoreToStackSlot(const MachineInstr &MI, int &FrameIndex) const override
static bool isPreLd(const MachineInstr &MI)
Returns whether the instruction is a pre-indexed load.
ArrayRef< std::pair< unsigned, const char * > > getSerializableBitmaskMachineOperandTargetFlags() const override
void copyPhysRegTuple(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc, unsigned Opcode, llvm::ArrayRef< unsigned > Indices) const
bool optimizeCondBranch(MachineInstr &MI) const override
Replace csincr-branch sequence by simple conditional branch.
static int getMemScale(unsigned Opc)
Scaling factor for (scaled or unscaled) load or store.
bool isCandidateToMergeOrPair(const MachineInstr &MI) const
Return true if this is a load/store that can be potentially paired/merged.
MCInst getNop() const override
static const MachineOperand & getLdStBaseOp(const MachineInstr &MI)
Returns the base register operator of a load/store.
bool isPTestLikeOpcode(unsigned Opc) const
Returns true if the opcode is for an SVE instruction that sets the condition codes as if it's results...
void mergeOutliningCandidateAttributes(Function &F, std::vector< outliner::Candidate > &Candidates) const override
static void decomposeStackOffsetForDwarfOffsets(const StackOffset &Offset, int64_t &ByteSized, int64_t &VGSized)
bool isReservedReg(const MachineFunction &MF, MCRegister Reg) const
const AArch64RegisterInfo * getRegisterInfo() const override
bool isNeonAvailable() const
Returns true if the target has NEON and the function at runtime is known to have NEON enabled (e....
bool isSVEorStreamingSVEAvailable() const
Returns true if the target has access to either the full range of SVE instructions,...
unsigned ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const
ClassifyGlobalReference - Find the target operand flags that describe how a global value should be re...
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
const T & front() const
front - Get the first element.
Definition: ArrayRef.h:168
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
This class represents an Operation in the Expression.
A debug info location.
Definition: DebugLoc.h:33
bool empty() const
Definition: DenseMap.h:98
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:220
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition: Function.h:698
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:695
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:656
A set of register units used to track register liveness.
Definition: LiveRegUnits.h:30
bool available(MCPhysReg Reg) const
Returns true if no part of physical register Reg is live.
Definition: LiveRegUnits.h:116
void stepBackward(const MachineInstr &MI)
Updates liveness when stepping backwards over the instruction MI.
void addLiveOuts(const MachineBasicBlock &MBB)
Adds registers living out of block MBB.
static LocationSize precise(uint64_t Value)
This class is intended to be used as a base class for asm properties and features specific to the tar...
Definition: MCAsmInfo.h:56
bool usesWindowsCFI() const
Definition: MCAsmInfo.h:793
static MCCFIInstruction cfiDefCfa(MCSymbol *L, unsigned Register, int64_t Offset, SMLoc Loc={})
.cfi_def_cfa defines a rule for computing CFA as: take address from Register and add Offset to it.
Definition: MCDwarf.h:558
static MCCFIInstruction createOffset(MCSymbol *L, unsigned Register, int64_t Offset, SMLoc Loc={})
.cfi_offset Previous value of Register is saved at offset Offset from CFA.
Definition: MCDwarf.h:600
static MCCFIInstruction cfiDefCfaOffset(MCSymbol *L, int64_t Offset, SMLoc Loc={})
.cfi_def_cfa_offset modifies a rule for computing CFA.
Definition: MCDwarf.h:573
static MCCFIInstruction createEscape(MCSymbol *L, StringRef Vals, SMLoc Loc={}, StringRef Comment="")
.cfi_escape Allows the user to add arbitrary bytes to the unwind info.
Definition: MCDwarf.h:664
MCInstBuilder & addImm(int64_t Val)
Add a new integer immediate operand.
Definition: MCInstBuilder.h:43
Instances of this class represent a single low-level machine instruction.
Definition: MCInst.h:184
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:198
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
Wrapper class representing physical registers. Should be passed by value.
Definition: MCRegister.h:33
constexpr bool isValid() const
Definition: MCRegister.h:81
static constexpr unsigned NoRegister
Definition: MCRegister.h:52
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition: Metadata.h:1541
Set of metadata that should be preserved when using BuildMI().
bool isInlineAsmBrIndirectTarget() const
Returns true if this is the indirect dest of an INLINEASM_BR.
unsigned pred_size() const
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
reverse_instr_iterator instr_rbegin()
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
bool isLiveIn(MCPhysReg Reg, LaneBitmask LaneMask=LaneBitmask::getAll()) const
Return true if the specified register is in the live in set.
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
MBBSectionID getSectionID() const
Returns the section ID of this basic block.
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
reverse_instr_iterator instr_rend()
DebugLoc findDebugLoc(instr_iterator MBBI)
Find the next valid DebugLoc starting at MBBI, skipping any debug instructions.
Instructions::iterator instr_iterator
iterator getLastNonDebugInstr(bool SkipPseudoOp=true)
Returns an iterator to the last non-debug instruction in the basic block, or end().
bool isLayoutSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB will be emitted immediately after this block, such that if this bloc...
instr_iterator instr_end()
Instructions::const_iterator const_instr_iterator
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
iterator_range< succ_iterator > successors()
instr_iterator getFirstInstrTerminator()
Same getFirstTerminator but it ignores bundles and return an instr_iterator instead.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
uint64_t getStackSize() const
Return the number of bytes that must be allocated to hold all of the fixed size frame objects.
void setStackID(int ObjectIdx, uint8_t ID)
bool isCalleeSavedInfoValid() const
Has the callee saved info been calculated yet?
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
unsigned getNumObjects() const
Return the number of objects.
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
unsigned addFrameInst(const MCCFIInstruction &Inst)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
MachineInstr * CloneMachineInstr(const MachineInstr *Orig)
Create a new MachineInstr which is a copy of Orig, identical in all ways except the instruction has n...
const MachineJumpTableInfo * getJumpTableInfo() const
getJumpTableInfo - Return the jump table info object for the current function.
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
void insert(iterator MBBI, MachineBasicBlock *MBB)
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & setMemRefs(ArrayRef< MachineMemOperand * > MMOs) const
const MachineInstrBuilder & addCFIIndex(unsigned CFIIndex) const
const MachineInstrBuilder & setMIFlag(MachineInstr::MIFlag Flag) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addSym(MCSymbol *Sym, unsigned char TargetFlags=0) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
reverse_iterator getReverse() const
Get a reverse iterator to the same node.
Representation of each machine instruction.
Definition: MachineInstr.h:69
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:569
bool mayLoadOrStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read or modify memory.
bool isCopy() const
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:346
bool isCall(QueryType Type=AnyInBundle) const
Definition: MachineInstr.h:950
bool getFlag(MIFlag Flag) const
Return whether an MI flag is set.
Definition: MachineInstr.h:396
uint32_t mergeFlagsWith(const MachineInstr &Other) const
Return the MIFlags which represent both MachineInstrs.
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:572
unsigned getNumExplicitOperands() const
Returns the number of non-implicit operands.
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
bool isFullCopy() const
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
Definition: MachineInstr.h:566
bool hasUnmodeledSideEffects() const
Return true if this instruction has side effects that are not modeled by mayLoad / mayStore,...
bool registerDefIsDead(Register Reg, const TargetRegisterInfo *TRI) const
Returns true if the register is dead in this machine instruction.
bool definesRegister(Register Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr fully defines the specified register.
void setDesc(const MCInstrDesc &TID)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one.
bool hasOrderedMemoryRef() const
Return true if this instruction may have an ordered or volatile memory reference, or if the informati...
const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
ArrayRef< MachineMemOperand * > memoperands() const
Access to memory operands of the instruction.
Definition: MachineInstr.h:782
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:498
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
void removeOperand(unsigned OpNo)
Erase an operand from an instruction, leaving it with one fewer operand than it started with.
void addRegisterDefined(Register Reg, const TargetRegisterInfo *RegInfo=nullptr)
We have determined MI defines a register.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:579
uint32_t getFlags() const
Return the MI flags bitvector.
Definition: MachineInstr.h:391
int findRegisterDefOperandIdx(Register Reg, const TargetRegisterInfo *TRI, bool isDead=false, bool Overlap=false) const
Returns the operand index that is a def of the specified register or -1 if it is not found.
const std::vector< MachineJumpTableEntry > & getJumpTables() const
A description of a memory reference used in the backend.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
This class contains meta information specific to a module.
MachineFunction * getMachineFunction(const Function &F) const
Returns the MachineFunction associated to IR function F if there is one, otherwise nullptr.
MachineOperand class - Representation of each machine instruction operand.
void setSubReg(unsigned subReg)
unsigned getSubReg() const
void setImm(int64_t immVal)
int64_t getImm() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
MachineBasicBlock * getMBB() const
void setIsDead(bool Val=true)
void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
void setIsKill(bool Val=true)
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
static MachineOperand CreateImm(int64_t Val)
MachineOperandType getType() const
getType - Returns the MachineOperandType for this operand.
void setIsUndef(bool Val=true)
Register getReg() const
getReg - Returns the register number.
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
bool tracksLiveness() const
tracksLiveness - Returns true when tracking register liveness accurately.
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
bool reservedRegsFrozen() const
reservedRegsFrozen - Returns true after freezeReservedRegs() was called to ensure the set of reserved...
const TargetRegisterClass * getRegClassOrNull(Register Reg) const
Return the register class of Reg, or null if Reg has not been assigned a register class yet.
const TargetRegisterClass * constrainRegClass(Register Reg, const TargetRegisterClass *RC, unsigned MinNumRegs=0)
constrainRegClass - Constrain the register class of the specified virtual register to be a common sub...
MachineInstr * getUniqueVRegDef(Register Reg) const
getUniqueVRegDef - Return the unique machine instr that defines the specified virtual register or nul...
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
MI-level patchpoint operands.
Definition: StackMaps.h:76
uint32_t getNumPatchBytes() const
Return the number of patchable bytes the given patchpoint should emit.
Definition: StackMaps.h:104
void enterBasicBlockEnd(MachineBasicBlock &MBB)
Start tracking liveness from the end of basic block MBB.
bool isRegUsed(Register Reg, bool includeReserved=true) const
Return if a specific register is currently used.
Register FindUnusedReg(const TargetRegisterClass *RC) const
Find an unused register of the specified register class.
void setRegUsed(Register Reg, LaneBitmask LaneMask=LaneBitmask::getAll())
Tell the scavenger a register is used.
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
constexpr bool isValid() const
Definition: Register.h:116
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
static constexpr bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:71
static constexpr bool isPhysicalRegister(unsigned Reg)
Return true if the specified register number is in the physical register namespace.
Definition: Register.h:65
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition: Register.h:95
Represents a location in source code.
Definition: SMLoc.h:23
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:412
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition: SmallString.h:26
void append(StringRef RHS)
Append from a StringRef.
Definition: SmallString.h:68
StringRef str() const
Explicit conversion to StringRef.
Definition: SmallString.h:254
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:696
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
MI-level stackmap operands.
Definition: StackMaps.h:35
uint32_t getNumPatchBytes() const
Return the number of patchable bytes the given stackmap should emit.
Definition: StackMaps.h:50
StackOffset holds a fixed and a scalable offset in bytes.
Definition: TypeSize.h:33
int64_t getFixed() const
Returns the fixed component of the stack.
Definition: TypeSize.h:49
int64_t getScalable() const
Returns the scalable component of the stack.
Definition: TypeSize.h:52
static StackOffset get(int64_t Fixed, int64_t Scalable)
Definition: TypeSize.h:44
static StackOffset getScalable(int64_t Scalable)
Definition: TypeSize.h:43
static StackOffset getFixed(int64_t Fixed)
Definition: TypeSize.h:42
MI-level Statepoint operands.
Definition: StackMaps.h:158
uint32_t getNumPatchBytes() const
Return the number of patchable bytes the given statepoint should emit.
Definition: StackMaps.h:207
Object returned by analyzeLoopForPipelining.
TargetInstrInfo - Interface to description of machine instruction set.
virtual void genAlternativeCodeSequence(MachineInstr &Root, unsigned Pattern, SmallVectorImpl< MachineInstr * > &InsInstrs, SmallVectorImpl< MachineInstr * > &DelInstrs, DenseMap< unsigned, unsigned > &InstIdxForVirtReg) const
When getMachineCombinerPatterns() finds patterns, this function generates the instructions that could...
virtual std::optional< ParamLoadedValue > describeLoadedValue(const MachineInstr &MI, Register Reg) const
Produce the expression describing the MI loading a value into the physical register Reg.
virtual bool getMachineCombinerPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns, bool DoRegPressureReduce) const
Return true when there is potentially a faster code sequence for an instruction chain ending in Root.
virtual bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const
Test if the given instruction should be considered a scheduling boundary.
virtual CombinerObjective getCombinerObjective(unsigned Pattern) const
Return the objective of a combiner pattern.
virtual bool isFunctionSafeToSplit(const MachineFunction &MF) const
Return true if the function is a viable candidate for machine function splitting.
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:77
TargetOptions Options
const MCAsmInfo * getMCAsmInfo() const
Return target specific asm information.
unsigned UnsafeFPMath
UnsafeFPMath - This flag is enabled when the -enable-unsafe-fp-math flag is specified on the command ...
bool contains(Register Reg) const
Return true if the specified register is included in this register class.
bool hasSubClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a sub-class of or equal to this class.
bool hasSuperClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a super-class of or equal to this class.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
TargetSubtargetInfo - Generic base class for all target subtargets.
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
virtual const TargetInstrInfo * getInstrInfo() const
Target - Wrapper for Target specific information.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition: TypeSize.h:345
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
Definition: TypeSize.h:348
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:171
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition: TypeSize.h:168
self_iterator getIterator()
Definition: ilist_node.h:132
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
Definition: ilist_node.h:353
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:661
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static CondCode getInvertedCondCode(CondCode Code)
@ MO_NC
MO_NC - Indicates whether the linker is expected to check the symbol reference for overflow.
@ MO_G1
MO_G1 - A symbol operand with this flag (granule 1) represents the bits 16-31 of a 64-bit address,...
@ MO_PAGEOFF
MO_PAGEOFF - A symbol operand with this flag represents the offset of that symbol within a 4K page.
@ MO_GOT
MO_GOT - This flag indicates that a symbol operand represents the address of the GOT entry for the sy...
@ MO_G0
MO_G0 - A symbol operand with this flag (granule 0) represents the bits 0-15 of a 64-bit address,...
@ MO_PAGE
MO_PAGE - A symbol operand with this flag represents the pc-relative offset of the 4K page containing...
@ MO_G2
MO_G2 - A symbol operand with this flag (granule 2) represents the bits 32-47 of a 64-bit address,...
@ MO_G3
MO_G3 - A symbol operand with this flag (granule 3) represents the high 16-bits of a 64-bit address,...
unsigned getCheckerSizeInBytes(AuthCheckMethod Method)
Returns the number of bytes added by checkAuthenticatedRegister.
const SysReg * lookupSysRegByName(StringRef)
static uint64_t decodeLogicalImmediate(uint64_t val, unsigned regSize)
decodeLogicalImmediate - Decode a logical immediate value in the form "N:immr:imms" (where the immr a...
static unsigned getShiftValue(unsigned Imm)
getShiftValue - Extract the shift value.
static unsigned getArithExtendImm(AArch64_AM::ShiftExtendType ET, unsigned Imm)
getArithExtendImm - Encode the extend type and shift amount for an arithmetic instruction: imm: 3-bit...
static unsigned getArithShiftValue(unsigned Imm)
getArithShiftValue - get the arithmetic shift value.
static uint64_t encodeLogicalImmediate(uint64_t imm, unsigned regSize)
encodeLogicalImmediate - Return the encoded immediate value for a logical immediate instruction of th...
static AArch64_AM::ShiftExtendType getArithExtendType(unsigned Imm)
static AArch64_AM::ShiftExtendType getShiftType(unsigned Imm)
getShiftType - Extract the shift type.
static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)
getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...
void expandMOVImm(uint64_t Imm, unsigned BitSize, SmallVectorImpl< ImmInsnModel > &Insn)
Expand a MOVi32imm or MOVi64imm pseudo instruction to one or more real move-immediate instructions to...
static const uint64_t InstrFlagIsWhile
static const uint64_t InstrFlagIsPTestLike
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:121
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Dead
Unused definition.
@ Renamable
Register that may be renamed.
@ Define
Register definition.
@ Kill
The last use of a register.
@ Undef
Value of the register doesn't matter.
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
constexpr double e
Definition: MathExtras.h:47
InstrType
Represents how an instruction should be mapped by the outliner.
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:480
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1722
static bool isCondBranchOpcode(int Opc)
std::pair< MachineOperand, DIExpression * > ParamLoadedValue
MCCFIInstruction createDefCFA(const TargetRegisterInfo &TRI, unsigned FrameReg, unsigned Reg, const StackOffset &Offset, bool LastAdjustmentWasScalable=true)
static bool isPTrueOpcode(unsigned Opc)
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
bool succeeded(LogicalResult Result)
Utility function that returns true if the provided LogicalResult corresponds to a success value.
Definition: LogicalResult.h:67
int isAArch64FrameOffsetLegal(const MachineInstr &MI, StackOffset &Offset, bool *OutUseUnscaledOp=nullptr, unsigned *OutUnscaledOp=nullptr, int64_t *EmittableOffset=nullptr)
Check if the Offset is a valid frame offset for MI.
static bool isIndirectBranchOpcode(int Opc)
MCCFIInstruction createCFAOffset(const TargetRegisterInfo &MRI, unsigned Reg, const StackOffset &OffsetFromDefCFA)
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
unsigned getBLRCallOpcode(const MachineFunction &MF)
Return opcode to be used for indirect calls.
@ AArch64FrameOffsetIsLegal
Offset is legal.
@ AArch64FrameOffsetCanUpdate
Offset can apply, at least partly.
@ AArch64FrameOffsetCannotUpdate
Offset cannot apply.
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition: MathExtras.h:296
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:346
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1729
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:419
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
AArch64MachineCombinerPattern
@ MULSUBv8i16_OP2
@ FMULv4i16_indexed_OP1
@ FMLSv1i32_indexed_OP2
@ MULSUBv2i32_indexed_OP1
@ MULADDXI_OP1
@ FMLAv2i32_indexed_OP2
@ MULADDv4i16_indexed_OP2
@ FMLAv1i64_indexed_OP1
@ MULSUBv16i8_OP1
@ FMLAv8i16_indexed_OP2
@ FMULv2i32_indexed_OP1
@ MULSUBv8i16_indexed_OP2
@ FMLAv1i64_indexed_OP2
@ MULSUBv4i16_indexed_OP2
@ FMLAv1i32_indexed_OP1
@ FMLAv2i64_indexed_OP2
@ FMLSv8i16_indexed_OP1
@ MULSUBv2i32_OP1
@ FMULv4i16_indexed_OP2
@ MULSUBv4i32_indexed_OP2
@ FMULv2i64_indexed_OP2
@ MULSUBXI_OP1
@ FMLAv4i32_indexed_OP1
@ MULADDWI_OP1
@ MULADDv4i16_OP2
@ FMULv8i16_indexed_OP2
@ MULSUBv4i16_OP1
@ MULADDv4i32_OP2
@ MULADDv8i8_OP1
@ MULADDv2i32_OP2
@ MULADDv16i8_OP2
@ MULADDv8i8_OP2
@ FMLSv4i16_indexed_OP1
@ MULADDv16i8_OP1
@ FMLAv2i64_indexed_OP1
@ FMLAv1i32_indexed_OP2
@ FMLSv2i64_indexed_OP2
@ MULADDv2i32_OP1
@ MULADDv4i32_OP1
@ MULADDv2i32_indexed_OP1
@ MULSUBv16i8_OP2
@ MULADDv4i32_indexed_OP1
@ MULADDv2i32_indexed_OP2
@ FMLAv4i16_indexed_OP2
@ MULSUBv8i16_OP1
@ FMULv2i32_indexed_OP2
@ FMLSv2i32_indexed_OP2
@ FMLSv4i32_indexed_OP1
@ FMULv2i64_indexed_OP1
@ MULSUBv4i16_OP2
@ FMLSv4i16_indexed_OP2
@ FMLAv2i32_indexed_OP1
@ FMLSv2i32_indexed_OP1
@ FMLAv8i16_indexed_OP1
@ MULSUBv4i16_indexed_OP1
@ FMLSv4i32_indexed_OP2
@ MULADDv4i32_indexed_OP2
@ MULSUBv4i32_OP2
@ MULSUBv8i16_indexed_OP1
@ MULADDv8i16_OP2
@ MULSUBv2i32_indexed_OP2
@ FMULv4i32_indexed_OP2
@ FMLSv2i64_indexed_OP1
@ MULADDv4i16_OP1
@ FMLAv4i32_indexed_OP2
@ MULADDv8i16_indexed_OP1
@ FMULv4i32_indexed_OP1
@ FMLAv4i16_indexed_OP1
@ FMULv8i16_indexed_OP1
@ MULSUBv8i8_OP1
@ MULADDv8i16_OP1
@ MULSUBv4i32_indexed_OP1
@ MULSUBv4i32_OP1
@ FMLSv8i16_indexed_OP2
@ MULADDv8i16_indexed_OP2
@ MULSUBWI_OP1
@ MULSUBv2i32_OP2
@ FMLSv1i64_indexed_OP2
@ MULADDv4i16_indexed_OP1
@ MULSUBv8i8_OP2
void emitFrameOffset(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, unsigned DestReg, unsigned SrcReg, StackOffset Offset, const TargetInstrInfo *TII, MachineInstr::MIFlag=MachineInstr::NoFlags, bool SetNZCV=false, bool NeedsWinCFI=false, bool *HasWinCFI=nullptr, bool EmitCFAOffset=false, StackOffset InitialOffset={}, unsigned FrameReg=AArch64::SP)
emitFrameOffset - Emit instructions as needed to set DestReg to SrcReg plus Offset.
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:167
CombinerObjective
The combiner's goal may differ based on which pattern it is attempting to optimize.
std::optional< UsedNZCV > examineCFlagsUse(MachineInstr &MI, MachineInstr &CmpInstr, const TargetRegisterInfo &TRI, SmallVectorImpl< MachineInstr * > *CCUseInstrs=nullptr)
CodeGenOptLevel
Code generation optimization level.
Definition: CodeGen.h:54
auto instructionsWithoutDebug(IterT It, IterT End, bool SkipPseudoOp=true)
Construct a range iterator which begins at It and moves forwards until End is reached,...
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
unsigned getUndefRegState(bool B)
unsigned getDefRegState(bool B)
unsigned getKillRegState(bool B)
bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
Definition: MathExtras.h:260
DWARFExpression::Operation Op
static bool isUncondBranchOpcode(int Opc)
unsigned encodeSLEB128(int64_t Value, raw_ostream &OS, unsigned PadTo=0)
Utility function to encode a SLEB128 value to an output stream.
Definition: LEB128.h:23
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition: STLExtras.h:2051
bool rewriteAArch64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, unsigned FrameReg, StackOffset &Offset, const AArch64InstrInfo *TII)
rewriteAArch64FrameIndex - Rewrite MI to access 'Offset' bytes from the FP.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1879
static const MachineMemOperand::Flags MOSuppressPair
unsigned encodeULEB128(uint64_t Value, raw_ostream &OS, unsigned PadTo=0)
Utility function to encode a ULEB128 value to an output stream.
Definition: LEB128.h:80
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition: MathExtras.h:573
bool isNZCVTouchedInInstructionRange(const MachineInstr &DefMI, const MachineInstr &UseMI, const TargetRegisterInfo *TRI)
Return true if there is an instruction /after/ DefMI and before UseMI which either reads or clobbers ...
static const MachineMemOperand::Flags MOStridedAccess
@ Default
The result values are uniform if and only if all operands are uniform.
void fullyRecomputeLiveIns(ArrayRef< MachineBasicBlock * > MBBs)
Convenience function for recomputing live-in's for a set of MBBs until the computation converges.
Definition: LivePhysRegs.h:215
Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
Description of the encoding of one expression Op.
Used to describe addressing mode similar to ExtAddrMode in CodeGenPrepare.
static const MBBSectionID ColdSectionID
MachineJumpTableEntry - One jump table in the jump table info.
This class contains a discriminated union of information about pointers in memory operands,...
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
Used to describe a register and immediate addition.
An individual sequence of instructions to be replaced with a call to an outlined function.
MachineFunction * getMF() const
MachineBasicBlock::iterator begin()
MachineBasicBlock::iterator end()
The information necessary to create an outlined function for some class of candidate.
unsigned FrameConstructionID
Target-defined identifier for constructing a frame for this function.