LLVM 19.0.0git
AArch64InstrInfo.cpp
Go to the documentation of this file.
1//===- AArch64InstrInfo.cpp - AArch64 Instruction Information -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains the AArch64 implementation of the TargetInstrInfo class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "AArch64InstrInfo.h"
14#include "AArch64ExpandImm.h"
17#include "AArch64PointerAuth.h"
18#include "AArch64Subtarget.h"
21#include "llvm/ADT/ArrayRef.h"
22#include "llvm/ADT/STLExtras.h"
40#include "llvm/IR/DebugLoc.h"
41#include "llvm/IR/GlobalValue.h"
42#include "llvm/MC/MCAsmInfo.h"
43#include "llvm/MC/MCInst.h"
45#include "llvm/MC/MCInstrDesc.h"
50#include "llvm/Support/LEB128.h"
54#include <cassert>
55#include <cstdint>
56#include <iterator>
57#include <utility>
58
59using namespace llvm;
60
61#define GET_INSTRINFO_CTOR_DTOR
62#include "AArch64GenInstrInfo.inc"
63
65 "aarch64-tbz-offset-bits", cl::Hidden, cl::init(14),
66 cl::desc("Restrict range of TB[N]Z instructions (DEBUG)"));
67
69 "aarch64-cbz-offset-bits", cl::Hidden, cl::init(19),
70 cl::desc("Restrict range of CB[N]Z instructions (DEBUG)"));
71
73 BCCDisplacementBits("aarch64-bcc-offset-bits", cl::Hidden, cl::init(19),
74 cl::desc("Restrict range of Bcc instructions (DEBUG)"));
75
77 BDisplacementBits("aarch64-b-offset-bits", cl::Hidden, cl::init(26),
78 cl::desc("Restrict range of B instructions (DEBUG)"));
79
81 : AArch64GenInstrInfo(AArch64::ADJCALLSTACKDOWN, AArch64::ADJCALLSTACKUP,
82 AArch64::CATCHRET),
83 RI(STI.getTargetTriple()), Subtarget(STI) {}
84
85/// GetInstSize - Return the number of bytes of code the specified
86/// instruction may be. This returns the maximum number of bytes.
88 const MachineBasicBlock &MBB = *MI.getParent();
89 const MachineFunction *MF = MBB.getParent();
90 const Function &F = MF->getFunction();
91 const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo();
92
93 {
94 auto Op = MI.getOpcode();
95 if (Op == AArch64::INLINEASM || Op == AArch64::INLINEASM_BR)
96 return getInlineAsmLength(MI.getOperand(0).getSymbolName(), *MAI);
97 }
98
99 // Meta-instructions emit no code.
100 if (MI.isMetaInstruction())
101 return 0;
102
103 // FIXME: We currently only handle pseudoinstructions that don't get expanded
104 // before the assembly printer.
105 unsigned NumBytes = 0;
106 const MCInstrDesc &Desc = MI.getDesc();
107
108 // Size should be preferably set in
109 // llvm/lib/Target/AArch64/AArch64InstrInfo.td (default case).
110 // Specific cases handle instructions of variable sizes
111 switch (Desc.getOpcode()) {
112 default:
113 if (Desc.getSize())
114 return Desc.getSize();
115
116 // Anything not explicitly designated otherwise (i.e. pseudo-instructions
117 // with fixed constant size but not specified in .td file) is a normal
118 // 4-byte insn.
119 NumBytes = 4;
120 break;
121 case TargetOpcode::STACKMAP:
122 // The upper bound for a stackmap intrinsic is the full length of its shadow
123 NumBytes = StackMapOpers(&MI).getNumPatchBytes();
124 assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
125 break;
126 case TargetOpcode::PATCHPOINT:
127 // The size of the patchpoint intrinsic is the number of bytes requested
128 NumBytes = PatchPointOpers(&MI).getNumPatchBytes();
129 assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
130 break;
131 case TargetOpcode::STATEPOINT:
132 NumBytes = StatepointOpers(&MI).getNumPatchBytes();
133 assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
134 // No patch bytes means a normal call inst is emitted
135 if (NumBytes == 0)
136 NumBytes = 4;
137 break;
138 case TargetOpcode::PATCHABLE_FUNCTION_ENTER:
139 // If `patchable-function-entry` is set, PATCHABLE_FUNCTION_ENTER
140 // instructions are expanded to the specified number of NOPs. Otherwise,
141 // they are expanded to 36-byte XRay sleds.
142 NumBytes =
143 F.getFnAttributeAsParsedInteger("patchable-function-entry", 9) * 4;
144 break;
145 case TargetOpcode::PATCHABLE_FUNCTION_EXIT:
146 case TargetOpcode::PATCHABLE_TYPED_EVENT_CALL:
147 // An XRay sled can be 4 bytes of alignment plus a 32-byte block.
148 NumBytes = 36;
149 break;
150 case TargetOpcode::PATCHABLE_EVENT_CALL:
151 // EVENT_CALL XRay sleds are exactly 6 instructions long (no alignment).
152 NumBytes = 24;
153 break;
154
155 case AArch64::SPACE:
156 NumBytes = MI.getOperand(1).getImm();
157 break;
158 case TargetOpcode::BUNDLE:
159 NumBytes = getInstBundleLength(MI);
160 break;
161 }
162
163 return NumBytes;
164}
165
166unsigned AArch64InstrInfo::getInstBundleLength(const MachineInstr &MI) const {
167 unsigned Size = 0;
169 MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end();
170 while (++I != E && I->isInsideBundle()) {
171 assert(!I->isBundle() && "No nested bundle!");
173 }
174 return Size;
175}
176
179 // Block ends with fall-through condbranch.
180 switch (LastInst->getOpcode()) {
181 default:
182 llvm_unreachable("Unknown branch instruction?");
183 case AArch64::Bcc:
184 Target = LastInst->getOperand(1).getMBB();
185 Cond.push_back(LastInst->getOperand(0));
186 break;
187 case AArch64::CBZW:
188 case AArch64::CBZX:
189 case AArch64::CBNZW:
190 case AArch64::CBNZX:
191 Target = LastInst->getOperand(1).getMBB();
192 Cond.push_back(MachineOperand::CreateImm(-1));
193 Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
194 Cond.push_back(LastInst->getOperand(0));
195 break;
196 case AArch64::TBZW:
197 case AArch64::TBZX:
198 case AArch64::TBNZW:
199 case AArch64::TBNZX:
200 Target = LastInst->getOperand(2).getMBB();
201 Cond.push_back(MachineOperand::CreateImm(-1));
202 Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
203 Cond.push_back(LastInst->getOperand(0));
204 Cond.push_back(LastInst->getOperand(1));
205 }
206}
207
208static unsigned getBranchDisplacementBits(unsigned Opc) {
209 switch (Opc) {
210 default:
211 llvm_unreachable("unexpected opcode!");
212 case AArch64::B:
213 return BDisplacementBits;
214 case AArch64::TBNZW:
215 case AArch64::TBZW:
216 case AArch64::TBNZX:
217 case AArch64::TBZX:
218 return TBZDisplacementBits;
219 case AArch64::CBNZW:
220 case AArch64::CBZW:
221 case AArch64::CBNZX:
222 case AArch64::CBZX:
223 return CBZDisplacementBits;
224 case AArch64::Bcc:
225 return BCCDisplacementBits;
226 }
227}
228
230 int64_t BrOffset) const {
231 unsigned Bits = getBranchDisplacementBits(BranchOp);
232 assert(Bits >= 3 && "max branch displacement must be enough to jump"
233 "over conditional branch expansion");
234 return isIntN(Bits, BrOffset / 4);
235}
236
239 switch (MI.getOpcode()) {
240 default:
241 llvm_unreachable("unexpected opcode!");
242 case AArch64::B:
243 return MI.getOperand(0).getMBB();
244 case AArch64::TBZW:
245 case AArch64::TBNZW:
246 case AArch64::TBZX:
247 case AArch64::TBNZX:
248 return MI.getOperand(2).getMBB();
249 case AArch64::CBZW:
250 case AArch64::CBNZW:
251 case AArch64::CBZX:
252 case AArch64::CBNZX:
253 case AArch64::Bcc:
254 return MI.getOperand(1).getMBB();
255 }
256}
257
259 MachineBasicBlock &NewDestBB,
260 MachineBasicBlock &RestoreBB,
261 const DebugLoc &DL,
262 int64_t BrOffset,
263 RegScavenger *RS) const {
264 assert(RS && "RegScavenger required for long branching");
265 assert(MBB.empty() &&
266 "new block should be inserted for expanding unconditional branch");
267 assert(MBB.pred_size() == 1);
268 assert(RestoreBB.empty() &&
269 "restore block should be inserted for restoring clobbered registers");
270
271 auto buildIndirectBranch = [&](Register Reg, MachineBasicBlock &DestBB) {
272 // Offsets outside of the signed 33-bit range are not supported for ADRP +
273 // ADD.
274 if (!isInt<33>(BrOffset))
276 "Branch offsets outside of the signed 33-bit range not supported");
277
278 BuildMI(MBB, MBB.end(), DL, get(AArch64::ADRP), Reg)
279 .addSym(DestBB.getSymbol(), AArch64II::MO_PAGE);
280 BuildMI(MBB, MBB.end(), DL, get(AArch64::ADDXri), Reg)
281 .addReg(Reg)
282 .addSym(DestBB.getSymbol(), AArch64II::MO_PAGEOFF | AArch64II::MO_NC)
283 .addImm(0);
284 BuildMI(MBB, MBB.end(), DL, get(AArch64::BR)).addReg(Reg);
285 };
286
288 // If X16 is unused, we can rely on the linker to insert a range extension
289 // thunk if NewDestBB is out of range of a single B instruction.
290 constexpr Register Reg = AArch64::X16;
291 if (!RS->isRegUsed(Reg)) {
292 insertUnconditionalBranch(MBB, &NewDestBB, DL);
293 RS->setRegUsed(Reg);
294 return;
295 }
296
297 // If there's a free register and it's worth inflating the code size,
298 // manually insert the indirect branch.
299 Register Scavenged = RS->FindUnusedReg(&AArch64::GPR64RegClass);
300 if (Scavenged != AArch64::NoRegister &&
302 buildIndirectBranch(Scavenged, NewDestBB);
303 RS->setRegUsed(Scavenged);
304 return;
305 }
306
307 // Note: Spilling X16 briefly moves the stack pointer, making it incompatible
308 // with red zones.
310 if (!AFI || AFI->hasRedZone().value_or(true))
312 "Unable to insert indirect branch inside function that has red zone");
313
314 // Otherwise, spill X16 and defer range extension to the linker.
315 BuildMI(MBB, MBB.end(), DL, get(AArch64::STRXpre))
316 .addReg(AArch64::SP, RegState::Define)
317 .addReg(Reg)
318 .addReg(AArch64::SP)
319 .addImm(-16);
320
321 BuildMI(MBB, MBB.end(), DL, get(AArch64::B)).addMBB(&RestoreBB);
322
323 BuildMI(RestoreBB, RestoreBB.end(), DL, get(AArch64::LDRXpost))
324 .addReg(AArch64::SP, RegState::Define)
326 .addReg(AArch64::SP)
327 .addImm(16);
328}
329
330// Branch analysis.
333 MachineBasicBlock *&FBB,
335 bool AllowModify) const {
336 // If the block has no terminators, it just falls into the block after it.
338 if (I == MBB.end())
339 return false;
340
341 // Skip over SpeculationBarrierEndBB terminators
342 if (I->getOpcode() == AArch64::SpeculationBarrierISBDSBEndBB ||
343 I->getOpcode() == AArch64::SpeculationBarrierSBEndBB) {
344 --I;
345 }
346
347 if (!isUnpredicatedTerminator(*I))
348 return false;
349
350 // Get the last instruction in the block.
351 MachineInstr *LastInst = &*I;
352
353 // If there is only one terminator instruction, process it.
354 unsigned LastOpc = LastInst->getOpcode();
355 if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
356 if (isUncondBranchOpcode(LastOpc)) {
357 TBB = LastInst->getOperand(0).getMBB();
358 return false;
359 }
360 if (isCondBranchOpcode(LastOpc)) {
361 // Block ends with fall-through condbranch.
362 parseCondBranch(LastInst, TBB, Cond);
363 return false;
364 }
365 return true; // Can't handle indirect branch.
366 }
367
368 // Get the instruction before it if it is a terminator.
369 MachineInstr *SecondLastInst = &*I;
370 unsigned SecondLastOpc = SecondLastInst->getOpcode();
371
372 // If AllowModify is true and the block ends with two or more unconditional
373 // branches, delete all but the first unconditional branch.
374 if (AllowModify && isUncondBranchOpcode(LastOpc)) {
375 while (isUncondBranchOpcode(SecondLastOpc)) {
376 LastInst->eraseFromParent();
377 LastInst = SecondLastInst;
378 LastOpc = LastInst->getOpcode();
379 if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
380 // Return now the only terminator is an unconditional branch.
381 TBB = LastInst->getOperand(0).getMBB();
382 return false;
383 }
384 SecondLastInst = &*I;
385 SecondLastOpc = SecondLastInst->getOpcode();
386 }
387 }
388
389 // If we're allowed to modify and the block ends in a unconditional branch
390 // which could simply fallthrough, remove the branch. (Note: This case only
391 // matters when we can't understand the whole sequence, otherwise it's also
392 // handled by BranchFolding.cpp.)
393 if (AllowModify && isUncondBranchOpcode(LastOpc) &&
395 LastInst->eraseFromParent();
396 LastInst = SecondLastInst;
397 LastOpc = LastInst->getOpcode();
398 if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
399 assert(!isUncondBranchOpcode(LastOpc) &&
400 "unreachable unconditional branches removed above");
401
402 if (isCondBranchOpcode(LastOpc)) {
403 // Block ends with fall-through condbranch.
404 parseCondBranch(LastInst, TBB, Cond);
405 return false;
406 }
407 return true; // Can't handle indirect branch.
408 }
409 SecondLastInst = &*I;
410 SecondLastOpc = SecondLastInst->getOpcode();
411 }
412
413 // If there are three terminators, we don't know what sort of block this is.
414 if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(*--I))
415 return true;
416
417 // If the block ends with a B and a Bcc, handle it.
418 if (isCondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
419 parseCondBranch(SecondLastInst, TBB, Cond);
420 FBB = LastInst->getOperand(0).getMBB();
421 return false;
422 }
423
424 // If the block ends with two unconditional branches, handle it. The second
425 // one is not executed, so remove it.
426 if (isUncondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
427 TBB = SecondLastInst->getOperand(0).getMBB();
428 I = LastInst;
429 if (AllowModify)
430 I->eraseFromParent();
431 return false;
432 }
433
434 // ...likewise if it ends with an indirect branch followed by an unconditional
435 // branch.
436 if (isIndirectBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
437 I = LastInst;
438 if (AllowModify)
439 I->eraseFromParent();
440 return true;
441 }
442
443 // Otherwise, can't handle this.
444 return true;
445}
446
448 MachineBranchPredicate &MBP,
449 bool AllowModify) const {
450 // For the moment, handle only a block which ends with a cb(n)zx followed by
451 // a fallthrough. Why this? Because it is a common form.
452 // TODO: Should we handle b.cc?
453
455 if (I == MBB.end())
456 return true;
457
458 // Skip over SpeculationBarrierEndBB terminators
459 if (I->getOpcode() == AArch64::SpeculationBarrierISBDSBEndBB ||
460 I->getOpcode() == AArch64::SpeculationBarrierSBEndBB) {
461 --I;
462 }
463
464 if (!isUnpredicatedTerminator(*I))
465 return true;
466
467 // Get the last instruction in the block.
468 MachineInstr *LastInst = &*I;
469 unsigned LastOpc = LastInst->getOpcode();
470 if (!isCondBranchOpcode(LastOpc))
471 return true;
472
473 switch (LastOpc) {
474 default:
475 return true;
476 case AArch64::CBZW:
477 case AArch64::CBZX:
478 case AArch64::CBNZW:
479 case AArch64::CBNZX:
480 break;
481 };
482
483 MBP.TrueDest = LastInst->getOperand(1).getMBB();
484 assert(MBP.TrueDest && "expected!");
485 MBP.FalseDest = MBB.getNextNode();
486
487 MBP.ConditionDef = nullptr;
488 MBP.SingleUseCondition = false;
489
490 MBP.LHS = LastInst->getOperand(0);
491 MBP.RHS = MachineOperand::CreateImm(0);
492 MBP.Predicate = LastOpc == AArch64::CBNZX ? MachineBranchPredicate::PRED_NE
493 : MachineBranchPredicate::PRED_EQ;
494 return false;
495}
496
499 if (Cond[0].getImm() != -1) {
500 // Regular Bcc
503 } else {
504 // Folded compare-and-branch
505 switch (Cond[1].getImm()) {
506 default:
507 llvm_unreachable("Unknown conditional branch!");
508 case AArch64::CBZW:
509 Cond[1].setImm(AArch64::CBNZW);
510 break;
511 case AArch64::CBNZW:
512 Cond[1].setImm(AArch64::CBZW);
513 break;
514 case AArch64::CBZX:
515 Cond[1].setImm(AArch64::CBNZX);
516 break;
517 case AArch64::CBNZX:
518 Cond[1].setImm(AArch64::CBZX);
519 break;
520 case AArch64::TBZW:
521 Cond[1].setImm(AArch64::TBNZW);
522 break;
523 case AArch64::TBNZW:
524 Cond[1].setImm(AArch64::TBZW);
525 break;
526 case AArch64::TBZX:
527 Cond[1].setImm(AArch64::TBNZX);
528 break;
529 case AArch64::TBNZX:
530 Cond[1].setImm(AArch64::TBZX);
531 break;
532 }
533 }
534
535 return false;
536}
537
539 int *BytesRemoved) const {
541 if (I == MBB.end())
542 return 0;
543
544 if (!isUncondBranchOpcode(I->getOpcode()) &&
545 !isCondBranchOpcode(I->getOpcode()))
546 return 0;
547
548 // Remove the branch.
549 I->eraseFromParent();
550
551 I = MBB.end();
552
553 if (I == MBB.begin()) {
554 if (BytesRemoved)
555 *BytesRemoved = 4;
556 return 1;
557 }
558 --I;
559 if (!isCondBranchOpcode(I->getOpcode())) {
560 if (BytesRemoved)
561 *BytesRemoved = 4;
562 return 1;
563 }
564
565 // Remove the branch.
566 I->eraseFromParent();
567 if (BytesRemoved)
568 *BytesRemoved = 8;
569
570 return 2;
571}
572
573void AArch64InstrInfo::instantiateCondBranch(
576 if (Cond[0].getImm() != -1) {
577 // Regular Bcc
578 BuildMI(&MBB, DL, get(AArch64::Bcc)).addImm(Cond[0].getImm()).addMBB(TBB);
579 } else {
580 // Folded compare-and-branch
581 // Note that we use addOperand instead of addReg to keep the flags.
582 const MachineInstrBuilder MIB =
583 BuildMI(&MBB, DL, get(Cond[1].getImm())).add(Cond[2]);
584 if (Cond.size() > 3)
585 MIB.addImm(Cond[3].getImm());
586 MIB.addMBB(TBB);
587 }
588}
589
592 ArrayRef<MachineOperand> Cond, const DebugLoc &DL, int *BytesAdded) const {
593 // Shouldn't be a fall through.
594 assert(TBB && "insertBranch must not be told to insert a fallthrough");
595
596 if (!FBB) {
597 if (Cond.empty()) // Unconditional branch?
598 BuildMI(&MBB, DL, get(AArch64::B)).addMBB(TBB);
599 else
600 instantiateCondBranch(MBB, DL, TBB, Cond);
601
602 if (BytesAdded)
603 *BytesAdded = 4;
604
605 return 1;
606 }
607
608 // Two-way conditional branch.
609 instantiateCondBranch(MBB, DL, TBB, Cond);
610 BuildMI(&MBB, DL, get(AArch64::B)).addMBB(FBB);
611
612 if (BytesAdded)
613 *BytesAdded = 8;
614
615 return 2;
616}
617
618// Find the original register that VReg is copied from.
619static unsigned removeCopies(const MachineRegisterInfo &MRI, unsigned VReg) {
620 while (Register::isVirtualRegister(VReg)) {
621 const MachineInstr *DefMI = MRI.getVRegDef(VReg);
622 if (!DefMI->isFullCopy())
623 return VReg;
624 VReg = DefMI->getOperand(1).getReg();
625 }
626 return VReg;
627}
628
629// Determine if VReg is defined by an instruction that can be folded into a
630// csel instruction. If so, return the folded opcode, and the replacement
631// register.
632static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg,
633 unsigned *NewVReg = nullptr) {
634 VReg = removeCopies(MRI, VReg);
636 return 0;
637
638 bool Is64Bit = AArch64::GPR64allRegClass.hasSubClassEq(MRI.getRegClass(VReg));
639 const MachineInstr *DefMI = MRI.getVRegDef(VReg);
640 unsigned Opc = 0;
641 unsigned SrcOpNum = 0;
642 switch (DefMI->getOpcode()) {
643 case AArch64::ADDSXri:
644 case AArch64::ADDSWri:
645 // if NZCV is used, do not fold.
646 if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr,
647 true) == -1)
648 return 0;
649 // fall-through to ADDXri and ADDWri.
650 [[fallthrough]];
651 case AArch64::ADDXri:
652 case AArch64::ADDWri:
653 // add x, 1 -> csinc.
654 if (!DefMI->getOperand(2).isImm() || DefMI->getOperand(2).getImm() != 1 ||
655 DefMI->getOperand(3).getImm() != 0)
656 return 0;
657 SrcOpNum = 1;
658 Opc = Is64Bit ? AArch64::CSINCXr : AArch64::CSINCWr;
659 break;
660
661 case AArch64::ORNXrr:
662 case AArch64::ORNWrr: {
663 // not x -> csinv, represented as orn dst, xzr, src.
664 unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg());
665 if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)
666 return 0;
667 SrcOpNum = 2;
668 Opc = Is64Bit ? AArch64::CSINVXr : AArch64::CSINVWr;
669 break;
670 }
671
672 case AArch64::SUBSXrr:
673 case AArch64::SUBSWrr:
674 // if NZCV is used, do not fold.
675 if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr,
676 true) == -1)
677 return 0;
678 // fall-through to SUBXrr and SUBWrr.
679 [[fallthrough]];
680 case AArch64::SUBXrr:
681 case AArch64::SUBWrr: {
682 // neg x -> csneg, represented as sub dst, xzr, src.
683 unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg());
684 if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)
685 return 0;
686 SrcOpNum = 2;
687 Opc = Is64Bit ? AArch64::CSNEGXr : AArch64::CSNEGWr;
688 break;
689 }
690 default:
691 return 0;
692 }
693 assert(Opc && SrcOpNum && "Missing parameters");
694
695 if (NewVReg)
696 *NewVReg = DefMI->getOperand(SrcOpNum).getReg();
697 return Opc;
698}
699
702 Register DstReg, Register TrueReg,
703 Register FalseReg, int &CondCycles,
704 int &TrueCycles,
705 int &FalseCycles) const {
706 // Check register classes.
708 const TargetRegisterClass *RC =
709 RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg));
710 if (!RC)
711 return false;
712
713 // Also need to check the dest regclass, in case we're trying to optimize
714 // something like:
715 // %1(gpr) = PHI %2(fpr), bb1, %(fpr), bb2
716 if (!RI.getCommonSubClass(RC, MRI.getRegClass(DstReg)))
717 return false;
718
719 // Expanding cbz/tbz requires an extra cycle of latency on the condition.
720 unsigned ExtraCondLat = Cond.size() != 1;
721
722 // GPRs are handled by csel.
723 // FIXME: Fold in x+1, -x, and ~x when applicable.
724 if (AArch64::GPR64allRegClass.hasSubClassEq(RC) ||
725 AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
726 // Single-cycle csel, csinc, csinv, and csneg.
727 CondCycles = 1 + ExtraCondLat;
728 TrueCycles = FalseCycles = 1;
729 if (canFoldIntoCSel(MRI, TrueReg))
730 TrueCycles = 0;
731 else if (canFoldIntoCSel(MRI, FalseReg))
732 FalseCycles = 0;
733 return true;
734 }
735
736 // Scalar floating point is handled by fcsel.
737 // FIXME: Form fabs, fmin, and fmax when applicable.
738 if (AArch64::FPR64RegClass.hasSubClassEq(RC) ||
739 AArch64::FPR32RegClass.hasSubClassEq(RC)) {
740 CondCycles = 5 + ExtraCondLat;
741 TrueCycles = FalseCycles = 2;
742 return true;
743 }
744
745 // Can't do vectors.
746 return false;
747}
748
751 const DebugLoc &DL, Register DstReg,
753 Register TrueReg, Register FalseReg) const {
755
756 // Parse the condition code, see parseCondBranch() above.
758 switch (Cond.size()) {
759 default:
760 llvm_unreachable("Unknown condition opcode in Cond");
761 case 1: // b.cc
762 CC = AArch64CC::CondCode(Cond[0].getImm());
763 break;
764 case 3: { // cbz/cbnz
765 // We must insert a compare against 0.
766 bool Is64Bit;
767 switch (Cond[1].getImm()) {
768 default:
769 llvm_unreachable("Unknown branch opcode in Cond");
770 case AArch64::CBZW:
771 Is64Bit = false;
773 break;
774 case AArch64::CBZX:
775 Is64Bit = true;
777 break;
778 case AArch64::CBNZW:
779 Is64Bit = false;
781 break;
782 case AArch64::CBNZX:
783 Is64Bit = true;
785 break;
786 }
787 Register SrcReg = Cond[2].getReg();
788 if (Is64Bit) {
789 // cmp reg, #0 is actually subs xzr, reg, #0.
790 MRI.constrainRegClass(SrcReg, &AArch64::GPR64spRegClass);
791 BuildMI(MBB, I, DL, get(AArch64::SUBSXri), AArch64::XZR)
792 .addReg(SrcReg)
793 .addImm(0)
794 .addImm(0);
795 } else {
796 MRI.constrainRegClass(SrcReg, &AArch64::GPR32spRegClass);
797 BuildMI(MBB, I, DL, get(AArch64::SUBSWri), AArch64::WZR)
798 .addReg(SrcReg)
799 .addImm(0)
800 .addImm(0);
801 }
802 break;
803 }
804 case 4: { // tbz/tbnz
805 // We must insert a tst instruction.
806 switch (Cond[1].getImm()) {
807 default:
808 llvm_unreachable("Unknown branch opcode in Cond");
809 case AArch64::TBZW:
810 case AArch64::TBZX:
812 break;
813 case AArch64::TBNZW:
814 case AArch64::TBNZX:
816 break;
817 }
818 // cmp reg, #foo is actually ands xzr, reg, #1<<foo.
819 if (Cond[1].getImm() == AArch64::TBZW || Cond[1].getImm() == AArch64::TBNZW)
820 BuildMI(MBB, I, DL, get(AArch64::ANDSWri), AArch64::WZR)
821 .addReg(Cond[2].getReg())
822 .addImm(
823 AArch64_AM::encodeLogicalImmediate(1ull << Cond[3].getImm(), 32));
824 else
825 BuildMI(MBB, I, DL, get(AArch64::ANDSXri), AArch64::XZR)
826 .addReg(Cond[2].getReg())
827 .addImm(
828 AArch64_AM::encodeLogicalImmediate(1ull << Cond[3].getImm(), 64));
829 break;
830 }
831 }
832
833 unsigned Opc = 0;
834 const TargetRegisterClass *RC = nullptr;
835 bool TryFold = false;
836 if (MRI.constrainRegClass(DstReg, &AArch64::GPR64RegClass)) {
837 RC = &AArch64::GPR64RegClass;
838 Opc = AArch64::CSELXr;
839 TryFold = true;
840 } else if (MRI.constrainRegClass(DstReg, &AArch64::GPR32RegClass)) {
841 RC = &AArch64::GPR32RegClass;
842 Opc = AArch64::CSELWr;
843 TryFold = true;
844 } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR64RegClass)) {
845 RC = &AArch64::FPR64RegClass;
846 Opc = AArch64::FCSELDrrr;
847 } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR32RegClass)) {
848 RC = &AArch64::FPR32RegClass;
849 Opc = AArch64::FCSELSrrr;
850 }
851 assert(RC && "Unsupported regclass");
852
853 // Try folding simple instructions into the csel.
854 if (TryFold) {
855 unsigned NewVReg = 0;
856 unsigned FoldedOpc = canFoldIntoCSel(MRI, TrueReg, &NewVReg);
857 if (FoldedOpc) {
858 // The folded opcodes csinc, csinc and csneg apply the operation to
859 // FalseReg, so we need to invert the condition.
861 TrueReg = FalseReg;
862 } else
863 FoldedOpc = canFoldIntoCSel(MRI, FalseReg, &NewVReg);
864
865 // Fold the operation. Leave any dead instructions for DCE to clean up.
866 if (FoldedOpc) {
867 FalseReg = NewVReg;
868 Opc = FoldedOpc;
869 // The extends the live range of NewVReg.
870 MRI.clearKillFlags(NewVReg);
871 }
872 }
873
874 // Pull all virtual register into the appropriate class.
875 MRI.constrainRegClass(TrueReg, RC);
876 MRI.constrainRegClass(FalseReg, RC);
877
878 // Insert the csel.
879 BuildMI(MBB, I, DL, get(Opc), DstReg)
880 .addReg(TrueReg)
881 .addReg(FalseReg)
882 .addImm(CC);
883}
884
885// Return true if Imm can be loaded into a register by a "cheap" sequence of
886// instructions. For now, "cheap" means at most two instructions.
887static bool isCheapImmediate(const MachineInstr &MI, unsigned BitSize) {
888 if (BitSize == 32)
889 return true;
890
891 assert(BitSize == 64 && "Only bit sizes of 32 or 64 allowed");
892 uint64_t Imm = static_cast<uint64_t>(MI.getOperand(1).getImm());
894 AArch64_IMM::expandMOVImm(Imm, BitSize, Is);
895
896 return Is.size() <= 2;
897}
898
899// FIXME: this implementation should be micro-architecture dependent, so a
900// micro-architecture target hook should be introduced here in future.
902 if (Subtarget.hasExynosCheapAsMoveHandling()) {
903 if (isExynosCheapAsMove(MI))
904 return true;
905 return MI.isAsCheapAsAMove();
906 }
907
908 switch (MI.getOpcode()) {
909 default:
910 return MI.isAsCheapAsAMove();
911
912 case AArch64::ADDWrs:
913 case AArch64::ADDXrs:
914 case AArch64::SUBWrs:
915 case AArch64::SUBXrs:
916 return Subtarget.hasALULSLFast() && MI.getOperand(3).getImm() <= 4;
917
918 // If MOVi32imm or MOVi64imm can be expanded into ORRWri or
919 // ORRXri, it is as cheap as MOV.
920 // Likewise if it can be expanded to MOVZ/MOVN/MOVK.
921 case AArch64::MOVi32imm:
922 return isCheapImmediate(MI, 32);
923 case AArch64::MOVi64imm:
924 return isCheapImmediate(MI, 64);
925 }
926}
927
929 switch (MI.getOpcode()) {
930 default:
931 return false;
932
933 case AArch64::ADDWrs:
934 case AArch64::ADDXrs:
935 case AArch64::ADDSWrs:
936 case AArch64::ADDSXrs: {
937 unsigned Imm = MI.getOperand(3).getImm();
938 unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
939 if (ShiftVal == 0)
940 return true;
941 return AArch64_AM::getShiftType(Imm) == AArch64_AM::LSL && ShiftVal <= 5;
942 }
943
944 case AArch64::ADDWrx:
945 case AArch64::ADDXrx:
946 case AArch64::ADDXrx64:
947 case AArch64::ADDSWrx:
948 case AArch64::ADDSXrx:
949 case AArch64::ADDSXrx64: {
950 unsigned Imm = MI.getOperand(3).getImm();
951 switch (AArch64_AM::getArithExtendType(Imm)) {
952 default:
953 return false;
954 case AArch64_AM::UXTB:
955 case AArch64_AM::UXTH:
956 case AArch64_AM::UXTW:
957 case AArch64_AM::UXTX:
958 return AArch64_AM::getArithShiftValue(Imm) <= 4;
959 }
960 }
961
962 case AArch64::SUBWrs:
963 case AArch64::SUBSWrs: {
964 unsigned Imm = MI.getOperand(3).getImm();
965 unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
966 return ShiftVal == 0 ||
967 (AArch64_AM::getShiftType(Imm) == AArch64_AM::ASR && ShiftVal == 31);
968 }
969
970 case AArch64::SUBXrs:
971 case AArch64::SUBSXrs: {
972 unsigned Imm = MI.getOperand(3).getImm();
973 unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
974 return ShiftVal == 0 ||
975 (AArch64_AM::getShiftType(Imm) == AArch64_AM::ASR && ShiftVal == 63);
976 }
977
978 case AArch64::SUBWrx:
979 case AArch64::SUBXrx:
980 case AArch64::SUBXrx64:
981 case AArch64::SUBSWrx:
982 case AArch64::SUBSXrx:
983 case AArch64::SUBSXrx64: {
984 unsigned Imm = MI.getOperand(3).getImm();
985 switch (AArch64_AM::getArithExtendType(Imm)) {
986 default:
987 return false;
988 case AArch64_AM::UXTB:
989 case AArch64_AM::UXTH:
990 case AArch64_AM::UXTW:
991 case AArch64_AM::UXTX:
992 return AArch64_AM::getArithShiftValue(Imm) == 0;
993 }
994 }
995
996 case AArch64::LDRBBroW:
997 case AArch64::LDRBBroX:
998 case AArch64::LDRBroW:
999 case AArch64::LDRBroX:
1000 case AArch64::LDRDroW:
1001 case AArch64::LDRDroX:
1002 case AArch64::LDRHHroW:
1003 case AArch64::LDRHHroX:
1004 case AArch64::LDRHroW:
1005 case AArch64::LDRHroX:
1006 case AArch64::LDRQroW:
1007 case AArch64::LDRQroX:
1008 case AArch64::LDRSBWroW:
1009 case AArch64::LDRSBWroX:
1010 case AArch64::LDRSBXroW:
1011 case AArch64::LDRSBXroX:
1012 case AArch64::LDRSHWroW:
1013 case AArch64::LDRSHWroX:
1014 case AArch64::LDRSHXroW:
1015 case AArch64::LDRSHXroX:
1016 case AArch64::LDRSWroW:
1017 case AArch64::LDRSWroX:
1018 case AArch64::LDRSroW:
1019 case AArch64::LDRSroX:
1020 case AArch64::LDRWroW:
1021 case AArch64::LDRWroX:
1022 case AArch64::LDRXroW:
1023 case AArch64::LDRXroX:
1024 case AArch64::PRFMroW:
1025 case AArch64::PRFMroX:
1026 case AArch64::STRBBroW:
1027 case AArch64::STRBBroX:
1028 case AArch64::STRBroW:
1029 case AArch64::STRBroX:
1030 case AArch64::STRDroW:
1031 case AArch64::STRDroX:
1032 case AArch64::STRHHroW:
1033 case AArch64::STRHHroX:
1034 case AArch64::STRHroW:
1035 case AArch64::STRHroX:
1036 case AArch64::STRQroW:
1037 case AArch64::STRQroX:
1038 case AArch64::STRSroW:
1039 case AArch64::STRSroX:
1040 case AArch64::STRWroW:
1041 case AArch64::STRWroX:
1042 case AArch64::STRXroW:
1043 case AArch64::STRXroX: {
1044 unsigned IsSigned = MI.getOperand(3).getImm();
1045 return !IsSigned;
1046 }
1047 }
1048}
1049
1051 unsigned Opc = MI.getOpcode();
1052 switch (Opc) {
1053 default:
1054 return false;
1055 case AArch64::SEH_StackAlloc:
1056 case AArch64::SEH_SaveFPLR:
1057 case AArch64::SEH_SaveFPLR_X:
1058 case AArch64::SEH_SaveReg:
1059 case AArch64::SEH_SaveReg_X:
1060 case AArch64::SEH_SaveRegP:
1061 case AArch64::SEH_SaveRegP_X:
1062 case AArch64::SEH_SaveFReg:
1063 case AArch64::SEH_SaveFReg_X:
1064 case AArch64::SEH_SaveFRegP:
1065 case AArch64::SEH_SaveFRegP_X:
1066 case AArch64::SEH_SetFP:
1067 case AArch64::SEH_AddFP:
1068 case AArch64::SEH_Nop:
1069 case AArch64::SEH_PrologEnd:
1070 case AArch64::SEH_EpilogStart:
1071 case AArch64::SEH_EpilogEnd:
1072 case AArch64::SEH_PACSignLR:
1073 case AArch64::SEH_SaveAnyRegQP:
1074 case AArch64::SEH_SaveAnyRegQPX:
1075 return true;
1076 }
1077}
1078
1080 Register &SrcReg, Register &DstReg,
1081 unsigned &SubIdx) const {
1082 switch (MI.getOpcode()) {
1083 default:
1084 return false;
1085 case AArch64::SBFMXri: // aka sxtw
1086 case AArch64::UBFMXri: // aka uxtw
1087 // Check for the 32 -> 64 bit extension case, these instructions can do
1088 // much more.
1089 if (MI.getOperand(2).getImm() != 0 || MI.getOperand(3).getImm() != 31)
1090 return false;
1091 // This is a signed or unsigned 32 -> 64 bit extension.
1092 SrcReg = MI.getOperand(1).getReg();
1093 DstReg = MI.getOperand(0).getReg();
1094 SubIdx = AArch64::sub_32;
1095 return true;
1096 }
1097}
1098
1100 const MachineInstr &MIa, const MachineInstr &MIb) const {
1102 const MachineOperand *BaseOpA = nullptr, *BaseOpB = nullptr;
1103 int64_t OffsetA = 0, OffsetB = 0;
1104 TypeSize WidthA(0, false), WidthB(0, false);
1105 bool OffsetAIsScalable = false, OffsetBIsScalable = false;
1106
1107 assert(MIa.mayLoadOrStore() && "MIa must be a load or store.");
1108 assert(MIb.mayLoadOrStore() && "MIb must be a load or store.");
1109
1112 return false;
1113
1114 // Retrieve the base, offset from the base and width. Width
1115 // is the size of memory that is being loaded/stored (e.g. 1, 2, 4, 8). If
1116 // base are identical, and the offset of a lower memory access +
1117 // the width doesn't overlap the offset of a higher memory access,
1118 // then the memory accesses are different.
1119 // If OffsetAIsScalable and OffsetBIsScalable are both true, they
1120 // are assumed to have the same scale (vscale).
1121 if (getMemOperandWithOffsetWidth(MIa, BaseOpA, OffsetA, OffsetAIsScalable,
1122 WidthA, TRI) &&
1123 getMemOperandWithOffsetWidth(MIb, BaseOpB, OffsetB, OffsetBIsScalable,
1124 WidthB, TRI)) {
1125 if (BaseOpA->isIdenticalTo(*BaseOpB) &&
1126 OffsetAIsScalable == OffsetBIsScalable) {
1127 int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB;
1128 int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA;
1129 TypeSize LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
1130 if (LowWidth.isScalable() == OffsetAIsScalable &&
1131 LowOffset + (int)LowWidth.getKnownMinValue() <= HighOffset)
1132 return true;
1133 }
1134 }
1135 return false;
1136}
1137
1139 const MachineBasicBlock *MBB,
1140 const MachineFunction &MF) const {
1142 return true;
1143
1144 // Do not move an instruction that can be recognized as a branch target.
1145 if (hasBTISemantics(MI))
1146 return true;
1147
1148 switch (MI.getOpcode()) {
1149 case AArch64::HINT:
1150 // CSDB hints are scheduling barriers.
1151 if (MI.getOperand(0).getImm() == 0x14)
1152 return true;
1153 break;
1154 case AArch64::DSB:
1155 case AArch64::ISB:
1156 // DSB and ISB also are scheduling barriers.
1157 return true;
1158 case AArch64::MSRpstatesvcrImm1:
1159 // SMSTART and SMSTOP are also scheduling barriers.
1160 return true;
1161 default:;
1162 }
1163 if (isSEHInstruction(MI))
1164 return true;
1165 auto Next = std::next(MI.getIterator());
1166 return Next != MBB->end() && Next->isCFIInstruction();
1167}
1168
1169/// analyzeCompare - For a comparison instruction, return the source registers
1170/// in SrcReg and SrcReg2, and the value it compares against in CmpValue.
1171/// Return true if the comparison instruction can be analyzed.
1173 Register &SrcReg2, int64_t &CmpMask,
1174 int64_t &CmpValue) const {
1175 // The first operand can be a frame index where we'd normally expect a
1176 // register.
1177 assert(MI.getNumOperands() >= 2 && "All AArch64 cmps should have 2 operands");
1178 if (!MI.getOperand(1).isReg())
1179 return false;
1180
1181 switch (MI.getOpcode()) {
1182 default:
1183 break;
1184 case AArch64::PTEST_PP:
1185 case AArch64::PTEST_PP_ANY:
1186 SrcReg = MI.getOperand(0).getReg();
1187 SrcReg2 = MI.getOperand(1).getReg();
1188 // Not sure about the mask and value for now...
1189 CmpMask = ~0;
1190 CmpValue = 0;
1191 return true;
1192 case AArch64::SUBSWrr:
1193 case AArch64::SUBSWrs:
1194 case AArch64::SUBSWrx:
1195 case AArch64::SUBSXrr:
1196 case AArch64::SUBSXrs:
1197 case AArch64::SUBSXrx:
1198 case AArch64::ADDSWrr:
1199 case AArch64::ADDSWrs:
1200 case AArch64::ADDSWrx:
1201 case AArch64::ADDSXrr:
1202 case AArch64::ADDSXrs:
1203 case AArch64::ADDSXrx:
1204 // Replace SUBSWrr with SUBWrr if NZCV is not used.
1205 SrcReg = MI.getOperand(1).getReg();
1206 SrcReg2 = MI.getOperand(2).getReg();
1207 CmpMask = ~0;
1208 CmpValue = 0;
1209 return true;
1210 case AArch64::SUBSWri:
1211 case AArch64::ADDSWri:
1212 case AArch64::SUBSXri:
1213 case AArch64::ADDSXri:
1214 SrcReg = MI.getOperand(1).getReg();
1215 SrcReg2 = 0;
1216 CmpMask = ~0;
1217 CmpValue = MI.getOperand(2).getImm();
1218 return true;
1219 case AArch64::ANDSWri:
1220 case AArch64::ANDSXri:
1221 // ANDS does not use the same encoding scheme as the others xxxS
1222 // instructions.
1223 SrcReg = MI.getOperand(1).getReg();
1224 SrcReg2 = 0;
1225 CmpMask = ~0;
1227 MI.getOperand(2).getImm(),
1228 MI.getOpcode() == AArch64::ANDSWri ? 32 : 64);
1229 return true;
1230 }
1231
1232 return false;
1233}
1234
1236 MachineBasicBlock *MBB = Instr.getParent();
1237 assert(MBB && "Can't get MachineBasicBlock here");
1238 MachineFunction *MF = MBB->getParent();
1239 assert(MF && "Can't get MachineFunction here");
1243
1244 for (unsigned OpIdx = 0, EndIdx = Instr.getNumOperands(); OpIdx < EndIdx;
1245 ++OpIdx) {
1246 MachineOperand &MO = Instr.getOperand(OpIdx);
1247 const TargetRegisterClass *OpRegCstraints =
1248 Instr.getRegClassConstraint(OpIdx, TII, TRI);
1249
1250 // If there's no constraint, there's nothing to do.
1251 if (!OpRegCstraints)
1252 continue;
1253 // If the operand is a frame index, there's nothing to do here.
1254 // A frame index operand will resolve correctly during PEI.
1255 if (MO.isFI())
1256 continue;
1257
1258 assert(MO.isReg() &&
1259 "Operand has register constraints without being a register!");
1260
1261 Register Reg = MO.getReg();
1262 if (Reg.isPhysical()) {
1263 if (!OpRegCstraints->contains(Reg))
1264 return false;
1265 } else if (!OpRegCstraints->hasSubClassEq(MRI->getRegClass(Reg)) &&
1266 !MRI->constrainRegClass(Reg, OpRegCstraints))
1267 return false;
1268 }
1269
1270 return true;
1271}
1272
1273/// Return the opcode that does not set flags when possible - otherwise
1274/// return the original opcode. The caller is responsible to do the actual
1275/// substitution and legality checking.
1277 // Don't convert all compare instructions, because for some the zero register
1278 // encoding becomes the sp register.
1279 bool MIDefinesZeroReg = false;
1280 if (MI.definesRegister(AArch64::WZR, /*TRI=*/nullptr) ||
1281 MI.definesRegister(AArch64::XZR, /*TRI=*/nullptr))
1282 MIDefinesZeroReg = true;
1283
1284 switch (MI.getOpcode()) {
1285 default:
1286 return MI.getOpcode();
1287 case AArch64::ADDSWrr:
1288 return AArch64::ADDWrr;
1289 case AArch64::ADDSWri:
1290 return MIDefinesZeroReg ? AArch64::ADDSWri : AArch64::ADDWri;
1291 case AArch64::ADDSWrs:
1292 return MIDefinesZeroReg ? AArch64::ADDSWrs : AArch64::ADDWrs;
1293 case AArch64::ADDSWrx:
1294 return AArch64::ADDWrx;
1295 case AArch64::ADDSXrr:
1296 return AArch64::ADDXrr;
1297 case AArch64::ADDSXri:
1298 return MIDefinesZeroReg ? AArch64::ADDSXri : AArch64::ADDXri;
1299 case AArch64::ADDSXrs:
1300 return MIDefinesZeroReg ? AArch64::ADDSXrs : AArch64::ADDXrs;
1301 case AArch64::ADDSXrx:
1302 return AArch64::ADDXrx;
1303 case AArch64::SUBSWrr:
1304 return AArch64::SUBWrr;
1305 case AArch64::SUBSWri:
1306 return MIDefinesZeroReg ? AArch64::SUBSWri : AArch64::SUBWri;
1307 case AArch64::SUBSWrs:
1308 return MIDefinesZeroReg ? AArch64::SUBSWrs : AArch64::SUBWrs;
1309 case AArch64::SUBSWrx:
1310 return AArch64::SUBWrx;
1311 case AArch64::SUBSXrr:
1312 return AArch64::SUBXrr;
1313 case AArch64::SUBSXri:
1314 return MIDefinesZeroReg ? AArch64::SUBSXri : AArch64::SUBXri;
1315 case AArch64::SUBSXrs:
1316 return MIDefinesZeroReg ? AArch64::SUBSXrs : AArch64::SUBXrs;
1317 case AArch64::SUBSXrx:
1318 return AArch64::SUBXrx;
1319 }
1320}
1321
1322enum AccessKind { AK_Write = 0x01, AK_Read = 0x10, AK_All = 0x11 };
1323
1324/// True when condition flags are accessed (either by writing or reading)
1325/// on the instruction trace starting at From and ending at To.
1326///
1327/// Note: If From and To are from different blocks it's assumed CC are accessed
1328/// on the path.
1331 const TargetRegisterInfo *TRI, const AccessKind AccessToCheck = AK_All) {
1332 // Early exit if To is at the beginning of the BB.
1333 if (To == To->getParent()->begin())
1334 return true;
1335
1336 // Check whether the instructions are in the same basic block
1337 // If not, assume the condition flags might get modified somewhere.
1338 if (To->getParent() != From->getParent())
1339 return true;
1340
1341 // From must be above To.
1342 assert(std::any_of(
1343 ++To.getReverse(), To->getParent()->rend(),
1344 [From](MachineInstr &MI) { return MI.getIterator() == From; }));
1345
1346 // We iterate backward starting at \p To until we hit \p From.
1347 for (const MachineInstr &Instr :
1348 instructionsWithoutDebug(++To.getReverse(), From.getReverse())) {
1349 if (((AccessToCheck & AK_Write) &&
1350 Instr.modifiesRegister(AArch64::NZCV, TRI)) ||
1351 ((AccessToCheck & AK_Read) && Instr.readsRegister(AArch64::NZCV, TRI)))
1352 return true;
1353 }
1354 return false;
1355}
1356
1357/// optimizePTestInstr - Attempt to remove a ptest of a predicate-generating
1358/// operation which could set the flags in an identical manner
1359bool AArch64InstrInfo::optimizePTestInstr(
1360 MachineInstr *PTest, unsigned MaskReg, unsigned PredReg,
1361 const MachineRegisterInfo *MRI) const {
1362 auto *Mask = MRI->getUniqueVRegDef(MaskReg);
1363 auto *Pred = MRI->getUniqueVRegDef(PredReg);
1364 auto NewOp = Pred->getOpcode();
1365 bool OpChanged = false;
1366
1367 unsigned MaskOpcode = Mask->getOpcode();
1368 unsigned PredOpcode = Pred->getOpcode();
1369 bool PredIsPTestLike = isPTestLikeOpcode(PredOpcode);
1370 bool PredIsWhileLike = isWhileOpcode(PredOpcode);
1371
1372 if (isPTrueOpcode(MaskOpcode) && (PredIsPTestLike || PredIsWhileLike) &&
1373 getElementSizeForOpcode(MaskOpcode) ==
1374 getElementSizeForOpcode(PredOpcode) &&
1375 Mask->getOperand(1).getImm() == 31) {
1376 // For PTEST(PTRUE_ALL, WHILE), if the element size matches, the PTEST is
1377 // redundant since WHILE performs an implicit PTEST with an all active
1378 // mask. Must be an all active predicate of matching element size.
1379
1380 // For PTEST(PTRUE_ALL, PTEST_LIKE), the PTEST is redundant if the
1381 // PTEST_LIKE instruction uses the same all active mask and the element
1382 // size matches. If the PTEST has a condition of any then it is always
1383 // redundant.
1384 if (PredIsPTestLike) {
1385 auto PTestLikeMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg());
1386 if (Mask != PTestLikeMask && PTest->getOpcode() != AArch64::PTEST_PP_ANY)
1387 return false;
1388 }
1389
1390 // Fallthough to simply remove the PTEST.
1391 } else if ((Mask == Pred) && (PredIsPTestLike || PredIsWhileLike) &&
1392 PTest->getOpcode() == AArch64::PTEST_PP_ANY) {
1393 // For PTEST(PG, PG), PTEST is redundant when PG is the result of an
1394 // instruction that sets the flags as PTEST would. This is only valid when
1395 // the condition is any.
1396
1397 // Fallthough to simply remove the PTEST.
1398 } else if (PredIsPTestLike) {
1399 // For PTEST(PG, PTEST_LIKE(PG, ...)), the PTEST is redundant since the
1400 // flags are set based on the same mask 'PG', but PTEST_LIKE must operate
1401 // on 8-bit predicates like the PTEST. Otherwise, for instructions like
1402 // compare that also support 16/32/64-bit predicates, the implicit PTEST
1403 // performed by the compare could consider fewer lanes for these element
1404 // sizes.
1405 //
1406 // For example, consider
1407 //
1408 // ptrue p0.b ; P0=1111-1111-1111-1111
1409 // index z0.s, #0, #1 ; Z0=<0,1,2,3>
1410 // index z1.s, #1, #1 ; Z1=<1,2,3,4>
1411 // cmphi p1.s, p0/z, z1.s, z0.s ; P1=0001-0001-0001-0001
1412 // ; ^ last active
1413 // ptest p0, p1.b ; P1=0001-0001-0001-0001
1414 // ; ^ last active
1415 //
1416 // where the compare generates a canonical all active 32-bit predicate
1417 // (equivalent to 'ptrue p1.s, all'). The implicit PTEST sets the last
1418 // active flag, whereas the PTEST instruction with the same mask doesn't.
1419 // For PTEST_ANY this doesn't apply as the flags in this case would be
1420 // identical regardless of element size.
1421 auto PTestLikeMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg());
1422 uint64_t PredElementSize = getElementSizeForOpcode(PredOpcode);
1423 if ((Mask != PTestLikeMask) ||
1424 (PredElementSize != AArch64::ElementSizeB &&
1425 PTest->getOpcode() != AArch64::PTEST_PP_ANY))
1426 return false;
1427
1428 // Fallthough to simply remove the PTEST.
1429 } else {
1430 // If OP in PTEST(PG, OP(PG, ...)) has a flag-setting variant change the
1431 // opcode so the PTEST becomes redundant.
1432 switch (PredOpcode) {
1433 case AArch64::AND_PPzPP:
1434 case AArch64::BIC_PPzPP:
1435 case AArch64::EOR_PPzPP:
1436 case AArch64::NAND_PPzPP:
1437 case AArch64::NOR_PPzPP:
1438 case AArch64::ORN_PPzPP:
1439 case AArch64::ORR_PPzPP:
1440 case AArch64::BRKA_PPzP:
1441 case AArch64::BRKPA_PPzPP:
1442 case AArch64::BRKB_PPzP:
1443 case AArch64::BRKPB_PPzPP:
1444 case AArch64::RDFFR_PPz: {
1445 // Check to see if our mask is the same. If not the resulting flag bits
1446 // may be different and we can't remove the ptest.
1447 auto *PredMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg());
1448 if (Mask != PredMask)
1449 return false;
1450 break;
1451 }
1452 case AArch64::BRKN_PPzP: {
1453 // BRKN uses an all active implicit mask to set flags unlike the other
1454 // flag-setting instructions.
1455 // PTEST(PTRUE_B(31), BRKN(PG, A, B)) -> BRKNS(PG, A, B).
1456 if ((MaskOpcode != AArch64::PTRUE_B) ||
1457 (Mask->getOperand(1).getImm() != 31))
1458 return false;
1459 break;
1460 }
1461 case AArch64::PTRUE_B:
1462 // PTEST(OP=PTRUE_B(A), OP) -> PTRUES_B(A)
1463 break;
1464 default:
1465 // Bail out if we don't recognize the input
1466 return false;
1467 }
1468
1469 NewOp = convertToFlagSettingOpc(PredOpcode);
1470 OpChanged = true;
1471 }
1472
1474
1475 // If another instruction between Pred and PTest accesses flags, don't remove
1476 // the ptest or update the earlier instruction to modify them.
1477 if (areCFlagsAccessedBetweenInstrs(Pred, PTest, TRI))
1478 return false;
1479
1480 // If we pass all the checks, it's safe to remove the PTEST and use the flags
1481 // as they are prior to PTEST. Sometimes this requires the tested PTEST
1482 // operand to be replaced with an equivalent instruction that also sets the
1483 // flags.
1484 Pred->setDesc(get(NewOp));
1485 PTest->eraseFromParent();
1486 if (OpChanged) {
1487 bool succeeded = UpdateOperandRegClass(*Pred);
1488 (void)succeeded;
1489 assert(succeeded && "Operands have incompatible register classes!");
1490 Pred->addRegisterDefined(AArch64::NZCV, TRI);
1491 }
1492
1493 // Ensure that the flags def is live.
1494 if (Pred->registerDefIsDead(AArch64::NZCV, TRI)) {
1495 unsigned i = 0, e = Pred->getNumOperands();
1496 for (; i != e; ++i) {
1497 MachineOperand &MO = Pred->getOperand(i);
1498 if (MO.isReg() && MO.isDef() && MO.getReg() == AArch64::NZCV) {
1499 MO.setIsDead(false);
1500 break;
1501 }
1502 }
1503 }
1504 return true;
1505}
1506
1507/// Try to optimize a compare instruction. A compare instruction is an
1508/// instruction which produces AArch64::NZCV. It can be truly compare
1509/// instruction
1510/// when there are no uses of its destination register.
1511///
1512/// The following steps are tried in order:
1513/// 1. Convert CmpInstr into an unconditional version.
1514/// 2. Remove CmpInstr if above there is an instruction producing a needed
1515/// condition code or an instruction which can be converted into such an
1516/// instruction.
1517/// Only comparison with zero is supported.
1519 MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int64_t CmpMask,
1520 int64_t CmpValue, const MachineRegisterInfo *MRI) const {
1521 assert(CmpInstr.getParent());
1522 assert(MRI);
1523
1524 // Replace SUBSWrr with SUBWrr if NZCV is not used.
1525 int DeadNZCVIdx =
1526 CmpInstr.findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr, true);
1527 if (DeadNZCVIdx != -1) {
1528 if (CmpInstr.definesRegister(AArch64::WZR, /*TRI=*/nullptr) ||
1529 CmpInstr.definesRegister(AArch64::XZR, /*TRI=*/nullptr)) {
1530 CmpInstr.eraseFromParent();
1531 return true;
1532 }
1533 unsigned Opc = CmpInstr.getOpcode();
1534 unsigned NewOpc = convertToNonFlagSettingOpc(CmpInstr);
1535 if (NewOpc == Opc)
1536 return false;
1537 const MCInstrDesc &MCID = get(NewOpc);
1538 CmpInstr.setDesc(MCID);
1539 CmpInstr.removeOperand(DeadNZCVIdx);
1540 bool succeeded = UpdateOperandRegClass(CmpInstr);
1541 (void)succeeded;
1542 assert(succeeded && "Some operands reg class are incompatible!");
1543 return true;
1544 }
1545
1546 if (CmpInstr.getOpcode() == AArch64::PTEST_PP ||
1547 CmpInstr.getOpcode() == AArch64::PTEST_PP_ANY)
1548 return optimizePTestInstr(&CmpInstr, SrcReg, SrcReg2, MRI);
1549
1550 if (SrcReg2 != 0)
1551 return false;
1552
1553 // CmpInstr is a Compare instruction if destination register is not used.
1554 if (!MRI->use_nodbg_empty(CmpInstr.getOperand(0).getReg()))
1555 return false;
1556
1557 if (CmpValue == 0 && substituteCmpToZero(CmpInstr, SrcReg, *MRI))
1558 return true;
1559 return (CmpValue == 0 || CmpValue == 1) &&
1560 removeCmpToZeroOrOne(CmpInstr, SrcReg, CmpValue, *MRI);
1561}
1562
1563/// Get opcode of S version of Instr.
1564/// If Instr is S version its opcode is returned.
1565/// AArch64::INSTRUCTION_LIST_END is returned if Instr does not have S version
1566/// or we are not interested in it.
1567static unsigned sForm(MachineInstr &Instr) {
1568 switch (Instr.getOpcode()) {
1569 default:
1570 return AArch64::INSTRUCTION_LIST_END;
1571
1572 case AArch64::ADDSWrr:
1573 case AArch64::ADDSWri:
1574 case AArch64::ADDSXrr:
1575 case AArch64::ADDSXri:
1576 case AArch64::SUBSWrr:
1577 case AArch64::SUBSWri:
1578 case AArch64::SUBSXrr:
1579 case AArch64::SUBSXri:
1580 return Instr.getOpcode();
1581
1582 case AArch64::ADDWrr:
1583 return AArch64::ADDSWrr;
1584 case AArch64::ADDWri:
1585 return AArch64::ADDSWri;
1586 case AArch64::ADDXrr:
1587 return AArch64::ADDSXrr;
1588 case AArch64::ADDXri:
1589 return AArch64::ADDSXri;
1590 case AArch64::ADCWr:
1591 return AArch64::ADCSWr;
1592 case AArch64::ADCXr:
1593 return AArch64::ADCSXr;
1594 case AArch64::SUBWrr:
1595 return AArch64::SUBSWrr;
1596 case AArch64::SUBWri:
1597 return AArch64::SUBSWri;
1598 case AArch64::SUBXrr:
1599 return AArch64::SUBSXrr;
1600 case AArch64::SUBXri:
1601 return AArch64::SUBSXri;
1602 case AArch64::SBCWr:
1603 return AArch64::SBCSWr;
1604 case AArch64::SBCXr:
1605 return AArch64::SBCSXr;
1606 case AArch64::ANDWri:
1607 return AArch64::ANDSWri;
1608 case AArch64::ANDXri:
1609 return AArch64::ANDSXri;
1610 }
1611}
1612
1613/// Check if AArch64::NZCV should be alive in successors of MBB.
1615 for (auto *BB : MBB->successors())
1616 if (BB->isLiveIn(AArch64::NZCV))
1617 return true;
1618 return false;
1619}
1620
1621/// \returns The condition code operand index for \p Instr if it is a branch
1622/// or select and -1 otherwise.
1623static int
1625 switch (Instr.getOpcode()) {
1626 default:
1627 return -1;
1628
1629 case AArch64::Bcc: {
1630 int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV, /*TRI=*/nullptr);
1631 assert(Idx >= 2);
1632 return Idx - 2;
1633 }
1634
1635 case AArch64::CSINVWr:
1636 case AArch64::CSINVXr:
1637 case AArch64::CSINCWr:
1638 case AArch64::CSINCXr:
1639 case AArch64::CSELWr:
1640 case AArch64::CSELXr:
1641 case AArch64::CSNEGWr:
1642 case AArch64::CSNEGXr:
1643 case AArch64::FCSELSrrr:
1644 case AArch64::FCSELDrrr: {
1645 int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV, /*TRI=*/nullptr);
1646 assert(Idx >= 1);
1647 return Idx - 1;
1648 }
1649 }
1650}
1651
1652/// Find a condition code used by the instruction.
1653/// Returns AArch64CC::Invalid if either the instruction does not use condition
1654/// codes or we don't optimize CmpInstr in the presence of such instructions.
1657 return CCIdx >= 0 ? static_cast<AArch64CC::CondCode>(
1658 Instr.getOperand(CCIdx).getImm())
1660}
1661
1664 UsedNZCV UsedFlags;
1665 switch (CC) {
1666 default:
1667 break;
1668
1669 case AArch64CC::EQ: // Z set
1670 case AArch64CC::NE: // Z clear
1671 UsedFlags.Z = true;
1672 break;
1673
1674 case AArch64CC::HI: // Z clear and C set
1675 case AArch64CC::LS: // Z set or C clear
1676 UsedFlags.Z = true;
1677 [[fallthrough]];
1678 case AArch64CC::HS: // C set
1679 case AArch64CC::LO: // C clear
1680 UsedFlags.C = true;
1681 break;
1682
1683 case AArch64CC::MI: // N set
1684 case AArch64CC::PL: // N clear
1685 UsedFlags.N = true;
1686 break;
1687
1688 case AArch64CC::VS: // V set
1689 case AArch64CC::VC: // V clear
1690 UsedFlags.V = true;
1691 break;
1692
1693 case AArch64CC::GT: // Z clear, N and V the same
1694 case AArch64CC::LE: // Z set, N and V differ
1695 UsedFlags.Z = true;
1696 [[fallthrough]];
1697 case AArch64CC::GE: // N and V the same
1698 case AArch64CC::LT: // N and V differ
1699 UsedFlags.N = true;
1700 UsedFlags.V = true;
1701 break;
1702 }
1703 return UsedFlags;
1704}
1705
1706/// \returns Conditions flags used after \p CmpInstr in its MachineBB if NZCV
1707/// flags are not alive in successors of the same \p CmpInstr and \p MI parent.
1708/// \returns std::nullopt otherwise.
1709///
1710/// Collect instructions using that flags in \p CCUseInstrs if provided.
1711std::optional<UsedNZCV>
1713 const TargetRegisterInfo &TRI,
1714 SmallVectorImpl<MachineInstr *> *CCUseInstrs) {
1715 MachineBasicBlock *CmpParent = CmpInstr.getParent();
1716 if (MI.getParent() != CmpParent)
1717 return std::nullopt;
1718
1719 if (areCFlagsAliveInSuccessors(CmpParent))
1720 return std::nullopt;
1721
1722 UsedNZCV NZCVUsedAfterCmp;
1724 std::next(CmpInstr.getIterator()), CmpParent->instr_end())) {
1725 if (Instr.readsRegister(AArch64::NZCV, &TRI)) {
1727 if (CC == AArch64CC::Invalid) // Unsupported conditional instruction
1728 return std::nullopt;
1729 NZCVUsedAfterCmp |= getUsedNZCV(CC);
1730 if (CCUseInstrs)
1731 CCUseInstrs->push_back(&Instr);
1732 }
1733 if (Instr.modifiesRegister(AArch64::NZCV, &TRI))
1734 break;
1735 }
1736 return NZCVUsedAfterCmp;
1737}
1738
1739static bool isADDSRegImm(unsigned Opcode) {
1740 return Opcode == AArch64::ADDSWri || Opcode == AArch64::ADDSXri;
1741}
1742
1743static bool isSUBSRegImm(unsigned Opcode) {
1744 return Opcode == AArch64::SUBSWri || Opcode == AArch64::SUBSXri;
1745}
1746
1747/// Check if CmpInstr can be substituted by MI.
1748///
1749/// CmpInstr can be substituted:
1750/// - CmpInstr is either 'ADDS %vreg, 0' or 'SUBS %vreg, 0'
1751/// - and, MI and CmpInstr are from the same MachineBB
1752/// - and, condition flags are not alive in successors of the CmpInstr parent
1753/// - and, if MI opcode is the S form there must be no defs of flags between
1754/// MI and CmpInstr
1755/// or if MI opcode is not the S form there must be neither defs of flags
1756/// nor uses of flags between MI and CmpInstr.
1757/// - and, if C/V flags are not used after CmpInstr
1758/// or if N flag is used but MI produces poison value if signed overflow
1759/// occurs.
1761 const TargetRegisterInfo &TRI) {
1762 // NOTE this assertion guarantees that MI.getOpcode() is add or subtraction
1763 // that may or may not set flags.
1764 assert(sForm(MI) != AArch64::INSTRUCTION_LIST_END);
1765
1766 const unsigned CmpOpcode = CmpInstr.getOpcode();
1767 if (!isADDSRegImm(CmpOpcode) && !isSUBSRegImm(CmpOpcode))
1768 return false;
1769
1770 assert((CmpInstr.getOperand(2).isImm() &&
1771 CmpInstr.getOperand(2).getImm() == 0) &&
1772 "Caller guarantees that CmpInstr compares with constant 0");
1773
1774 std::optional<UsedNZCV> NZVCUsed = examineCFlagsUse(MI, CmpInstr, TRI);
1775 if (!NZVCUsed || NZVCUsed->C)
1776 return false;
1777
1778 // CmpInstr is either 'ADDS %vreg, 0' or 'SUBS %vreg, 0', and MI is either
1779 // '%vreg = add ...' or '%vreg = sub ...'.
1780 // Condition flag V is used to indicate signed overflow.
1781 // 1) MI and CmpInstr set N and V to the same value.
1782 // 2) If MI is add/sub with no-signed-wrap, it produces a poison value when
1783 // signed overflow occurs, so CmpInstr could still be simplified away.
1784 if (NZVCUsed->V && !MI.getFlag(MachineInstr::NoSWrap))
1785 return false;
1786
1787 AccessKind AccessToCheck = AK_Write;
1788 if (sForm(MI) != MI.getOpcode())
1789 AccessToCheck = AK_All;
1790 return !areCFlagsAccessedBetweenInstrs(&MI, &CmpInstr, &TRI, AccessToCheck);
1791}
1792
1793/// Substitute an instruction comparing to zero with another instruction
1794/// which produces needed condition flags.
1795///
1796/// Return true on success.
1797bool AArch64InstrInfo::substituteCmpToZero(
1798 MachineInstr &CmpInstr, unsigned SrcReg,
1799 const MachineRegisterInfo &MRI) const {
1800 // Get the unique definition of SrcReg.
1801 MachineInstr *MI = MRI.getUniqueVRegDef(SrcReg);
1802 if (!MI)
1803 return false;
1804
1806
1807 unsigned NewOpc = sForm(*MI);
1808 if (NewOpc == AArch64::INSTRUCTION_LIST_END)
1809 return false;
1810
1811 if (!canInstrSubstituteCmpInstr(*MI, CmpInstr, TRI))
1812 return false;
1813
1814 // Update the instruction to set NZCV.
1815 MI->setDesc(get(NewOpc));
1816 CmpInstr.eraseFromParent();
1817 bool succeeded = UpdateOperandRegClass(*MI);
1818 (void)succeeded;
1819 assert(succeeded && "Some operands reg class are incompatible!");
1820 MI->addRegisterDefined(AArch64::NZCV, &TRI);
1821 return true;
1822}
1823
1824/// \returns True if \p CmpInstr can be removed.
1825///
1826/// \p IsInvertCC is true if, after removing \p CmpInstr, condition
1827/// codes used in \p CCUseInstrs must be inverted.
1829 int CmpValue, const TargetRegisterInfo &TRI,
1831 bool &IsInvertCC) {
1832 assert((CmpValue == 0 || CmpValue == 1) &&
1833 "Only comparisons to 0 or 1 considered for removal!");
1834
1835 // MI is 'CSINCWr %vreg, wzr, wzr, <cc>' or 'CSINCXr %vreg, xzr, xzr, <cc>'
1836 unsigned MIOpc = MI.getOpcode();
1837 if (MIOpc == AArch64::CSINCWr) {
1838 if (MI.getOperand(1).getReg() != AArch64::WZR ||
1839 MI.getOperand(2).getReg() != AArch64::WZR)
1840 return false;
1841 } else if (MIOpc == AArch64::CSINCXr) {
1842 if (MI.getOperand(1).getReg() != AArch64::XZR ||
1843 MI.getOperand(2).getReg() != AArch64::XZR)
1844 return false;
1845 } else {
1846 return false;
1847 }
1849 if (MICC == AArch64CC::Invalid)
1850 return false;
1851
1852 // NZCV needs to be defined
1853 if (MI.findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr, true) != -1)
1854 return false;
1855
1856 // CmpInstr is 'ADDS %vreg, 0' or 'SUBS %vreg, 0' or 'SUBS %vreg, 1'
1857 const unsigned CmpOpcode = CmpInstr.getOpcode();
1858 bool IsSubsRegImm = isSUBSRegImm(CmpOpcode);
1859 if (CmpValue && !IsSubsRegImm)
1860 return false;
1861 if (!CmpValue && !IsSubsRegImm && !isADDSRegImm(CmpOpcode))
1862 return false;
1863
1864 // MI conditions allowed: eq, ne, mi, pl
1865 UsedNZCV MIUsedNZCV = getUsedNZCV(MICC);
1866 if (MIUsedNZCV.C || MIUsedNZCV.V)
1867 return false;
1868
1869 std::optional<UsedNZCV> NZCVUsedAfterCmp =
1870 examineCFlagsUse(MI, CmpInstr, TRI, &CCUseInstrs);
1871 // Condition flags are not used in CmpInstr basic block successors and only
1872 // Z or N flags allowed to be used after CmpInstr within its basic block
1873 if (!NZCVUsedAfterCmp || NZCVUsedAfterCmp->C || NZCVUsedAfterCmp->V)
1874 return false;
1875 // Z or N flag used after CmpInstr must correspond to the flag used in MI
1876 if ((MIUsedNZCV.Z && NZCVUsedAfterCmp->N) ||
1877 (MIUsedNZCV.N && NZCVUsedAfterCmp->Z))
1878 return false;
1879 // If CmpInstr is comparison to zero MI conditions are limited to eq, ne
1880 if (MIUsedNZCV.N && !CmpValue)
1881 return false;
1882
1883 // There must be no defs of flags between MI and CmpInstr
1884 if (areCFlagsAccessedBetweenInstrs(&MI, &CmpInstr, &TRI, AK_Write))
1885 return false;
1886
1887 // Condition code is inverted in the following cases:
1888 // 1. MI condition is ne; CmpInstr is 'ADDS %vreg, 0' or 'SUBS %vreg, 0'
1889 // 2. MI condition is eq, pl; CmpInstr is 'SUBS %vreg, 1'
1890 IsInvertCC = (CmpValue && (MICC == AArch64CC::EQ || MICC == AArch64CC::PL)) ||
1891 (!CmpValue && MICC == AArch64CC::NE);
1892 return true;
1893}
1894
1895/// Remove comparison in csinc-cmp sequence
1896///
1897/// Examples:
1898/// 1. \code
1899/// csinc w9, wzr, wzr, ne
1900/// cmp w9, #0
1901/// b.eq
1902/// \endcode
1903/// to
1904/// \code
1905/// csinc w9, wzr, wzr, ne
1906/// b.ne
1907/// \endcode
1908///
1909/// 2. \code
1910/// csinc x2, xzr, xzr, mi
1911/// cmp x2, #1
1912/// b.pl
1913/// \endcode
1914/// to
1915/// \code
1916/// csinc x2, xzr, xzr, mi
1917/// b.pl
1918/// \endcode
1919///
1920/// \param CmpInstr comparison instruction
1921/// \return True when comparison removed
1922bool AArch64InstrInfo::removeCmpToZeroOrOne(
1923 MachineInstr &CmpInstr, unsigned SrcReg, int CmpValue,
1924 const MachineRegisterInfo &MRI) const {
1925 MachineInstr *MI = MRI.getUniqueVRegDef(SrcReg);
1926 if (!MI)
1927 return false;
1930 bool IsInvertCC = false;
1931 if (!canCmpInstrBeRemoved(*MI, CmpInstr, CmpValue, TRI, CCUseInstrs,
1932 IsInvertCC))
1933 return false;
1934 // Make transformation
1935 CmpInstr.eraseFromParent();
1936 if (IsInvertCC) {
1937 // Invert condition codes in CmpInstr CC users
1938 for (MachineInstr *CCUseInstr : CCUseInstrs) {
1940 assert(Idx >= 0 && "Unexpected instruction using CC.");
1941 MachineOperand &CCOperand = CCUseInstr->getOperand(Idx);
1943 static_cast<AArch64CC::CondCode>(CCOperand.getImm()));
1944 CCOperand.setImm(CCUse);
1945 }
1946 }
1947 return true;
1948}
1949
1951 if (MI.getOpcode() != TargetOpcode::LOAD_STACK_GUARD &&
1952 MI.getOpcode() != AArch64::CATCHRET)
1953 return false;
1954
1955 MachineBasicBlock &MBB = *MI.getParent();
1956 auto &Subtarget = MBB.getParent()->getSubtarget<AArch64Subtarget>();
1957 auto TRI = Subtarget.getRegisterInfo();
1958 DebugLoc DL = MI.getDebugLoc();
1959
1960 if (MI.getOpcode() == AArch64::CATCHRET) {
1961 // Skip to the first instruction before the epilog.
1962 const TargetInstrInfo *TII =
1964 MachineBasicBlock *TargetMBB = MI.getOperand(0).getMBB();
1966 MachineBasicBlock::iterator FirstEpilogSEH = std::prev(MBBI);
1967 while (FirstEpilogSEH->getFlag(MachineInstr::FrameDestroy) &&
1968 FirstEpilogSEH != MBB.begin())
1969 FirstEpilogSEH = std::prev(FirstEpilogSEH);
1970 if (FirstEpilogSEH != MBB.begin())
1971 FirstEpilogSEH = std::next(FirstEpilogSEH);
1972 BuildMI(MBB, FirstEpilogSEH, DL, TII->get(AArch64::ADRP))
1973 .addReg(AArch64::X0, RegState::Define)
1974 .addMBB(TargetMBB);
1975 BuildMI(MBB, FirstEpilogSEH, DL, TII->get(AArch64::ADDXri))
1976 .addReg(AArch64::X0, RegState::Define)
1977 .addReg(AArch64::X0)
1978 .addMBB(TargetMBB)
1979 .addImm(0);
1980 return true;
1981 }
1982
1983 Register Reg = MI.getOperand(0).getReg();
1985 if (M.getStackProtectorGuard() == "sysreg") {
1986 const AArch64SysReg::SysReg *SrcReg =
1987 AArch64SysReg::lookupSysRegByName(M.getStackProtectorGuardReg());
1988 if (!SrcReg)
1989 report_fatal_error("Unknown SysReg for Stack Protector Guard Register");
1990
1991 // mrs xN, sysreg
1992 BuildMI(MBB, MI, DL, get(AArch64::MRS))
1994 .addImm(SrcReg->Encoding);
1995 int Offset = M.getStackProtectorGuardOffset();
1996 if (Offset >= 0 && Offset <= 32760 && Offset % 8 == 0) {
1997 // ldr xN, [xN, #offset]
1998 BuildMI(MBB, MI, DL, get(AArch64::LDRXui))
1999 .addDef(Reg)
2000 .addUse(Reg, RegState::Kill)
2001 .addImm(Offset / 8);
2002 } else if (Offset >= -256 && Offset <= 255) {
2003 // ldur xN, [xN, #offset]
2004 BuildMI(MBB, MI, DL, get(AArch64::LDURXi))
2005 .addDef(Reg)
2006 .addUse(Reg, RegState::Kill)
2007 .addImm(Offset);
2008 } else if (Offset >= -4095 && Offset <= 4095) {
2009 if (Offset > 0) {
2010 // add xN, xN, #offset
2011 BuildMI(MBB, MI, DL, get(AArch64::ADDXri))
2012 .addDef(Reg)
2013 .addUse(Reg, RegState::Kill)
2014 .addImm(Offset)
2015 .addImm(0);
2016 } else {
2017 // sub xN, xN, #offset
2018 BuildMI(MBB, MI, DL, get(AArch64::SUBXri))
2019 .addDef(Reg)
2020 .addUse(Reg, RegState::Kill)
2021 .addImm(-Offset)
2022 .addImm(0);
2023 }
2024 // ldr xN, [xN]
2025 BuildMI(MBB, MI, DL, get(AArch64::LDRXui))
2026 .addDef(Reg)
2027 .addUse(Reg, RegState::Kill)
2028 .addImm(0);
2029 } else {
2030 // Cases that are larger than +/- 4095 and not a multiple of 8, or larger
2031 // than 23760.
2032 // It might be nice to use AArch64::MOVi32imm here, which would get
2033 // expanded in PreSched2 after PostRA, but our lone scratch Reg already
2034 // contains the MRS result. findScratchNonCalleeSaveRegister() in
2035 // AArch64FrameLowering might help us find such a scratch register
2036 // though. If we failed to find a scratch register, we could emit a
2037 // stream of add instructions to build up the immediate. Or, we could try
2038 // to insert a AArch64::MOVi32imm before register allocation so that we
2039 // didn't need to scavenge for a scratch register.
2040 report_fatal_error("Unable to encode Stack Protector Guard Offset");
2041 }
2042 MBB.erase(MI);
2043 return true;
2044 }
2045
2046 const GlobalValue *GV =
2047 cast<GlobalValue>((*MI.memoperands_begin())->getValue());
2048 const TargetMachine &TM = MBB.getParent()->getTarget();
2049 unsigned OpFlags = Subtarget.ClassifyGlobalReference(GV, TM);
2050 const unsigned char MO_NC = AArch64II::MO_NC;
2051
2052 if ((OpFlags & AArch64II::MO_GOT) != 0) {
2053 BuildMI(MBB, MI, DL, get(AArch64::LOADgot), Reg)
2054 .addGlobalAddress(GV, 0, OpFlags);
2055 if (Subtarget.isTargetILP32()) {
2056 unsigned Reg32 = TRI->getSubReg(Reg, AArch64::sub_32);
2057 BuildMI(MBB, MI, DL, get(AArch64::LDRWui))
2058 .addDef(Reg32, RegState::Dead)
2059 .addUse(Reg, RegState::Kill)
2060 .addImm(0)
2061 .addMemOperand(*MI.memoperands_begin())
2063 } else {
2064 BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
2065 .addReg(Reg, RegState::Kill)
2066 .addImm(0)
2067 .addMemOperand(*MI.memoperands_begin());
2068 }
2069 } else if (TM.getCodeModel() == CodeModel::Large) {
2070 assert(!Subtarget.isTargetILP32() && "how can large exist in ILP32?");
2071 BuildMI(MBB, MI, DL, get(AArch64::MOVZXi), Reg)
2072 .addGlobalAddress(GV, 0, AArch64II::MO_G0 | MO_NC)
2073 .addImm(0);
2074 BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
2075 .addReg(Reg, RegState::Kill)
2076 .addGlobalAddress(GV, 0, AArch64II::MO_G1 | MO_NC)
2077 .addImm(16);
2078 BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
2079 .addReg(Reg, RegState::Kill)
2080 .addGlobalAddress(GV, 0, AArch64II::MO_G2 | MO_NC)
2081 .addImm(32);
2082 BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
2083 .addReg(Reg, RegState::Kill)
2085 .addImm(48);
2086 BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
2087 .addReg(Reg, RegState::Kill)
2088 .addImm(0)
2089 .addMemOperand(*MI.memoperands_begin());
2090 } else if (TM.getCodeModel() == CodeModel::Tiny) {
2091 BuildMI(MBB, MI, DL, get(AArch64::ADR), Reg)
2092 .addGlobalAddress(GV, 0, OpFlags);
2093 } else {
2094 BuildMI(MBB, MI, DL, get(AArch64::ADRP), Reg)
2095 .addGlobalAddress(GV, 0, OpFlags | AArch64II::MO_PAGE);
2096 unsigned char LoFlags = OpFlags | AArch64II::MO_PAGEOFF | MO_NC;
2097 if (Subtarget.isTargetILP32()) {
2098 unsigned Reg32 = TRI->getSubReg(Reg, AArch64::sub_32);
2099 BuildMI(MBB, MI, DL, get(AArch64::LDRWui))
2100 .addDef(Reg32, RegState::Dead)
2101 .addUse(Reg, RegState::Kill)
2102 .addGlobalAddress(GV, 0, LoFlags)
2103 .addMemOperand(*MI.memoperands_begin())
2105 } else {
2106 BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
2107 .addReg(Reg, RegState::Kill)
2108 .addGlobalAddress(GV, 0, LoFlags)
2109 .addMemOperand(*MI.memoperands_begin());
2110 }
2111 }
2112
2113 MBB.erase(MI);
2114
2115 return true;
2116}
2117
2118// Return true if this instruction simply sets its single destination register
2119// to zero. This is equivalent to a register rename of the zero-register.
2121 switch (MI.getOpcode()) {
2122 default:
2123 break;
2124 case AArch64::MOVZWi:
2125 case AArch64::MOVZXi: // movz Rd, #0 (LSL #0)
2126 if (MI.getOperand(1).isImm() && MI.getOperand(1).getImm() == 0) {
2127 assert(MI.getDesc().getNumOperands() == 3 &&
2128 MI.getOperand(2).getImm() == 0 && "invalid MOVZi operands");
2129 return true;
2130 }
2131 break;
2132 case AArch64::ANDWri: // and Rd, Rzr, #imm
2133 return MI.getOperand(1).getReg() == AArch64::WZR;
2134 case AArch64::ANDXri:
2135 return MI.getOperand(1).getReg() == AArch64::XZR;
2136 case TargetOpcode::COPY:
2137 return MI.getOperand(1).getReg() == AArch64::WZR;
2138 }
2139 return false;
2140}
2141
2142// Return true if this instruction simply renames a general register without
2143// modifying bits.
2145 switch (MI.getOpcode()) {
2146 default:
2147 break;
2148 case TargetOpcode::COPY: {
2149 // GPR32 copies will by lowered to ORRXrs
2150 Register DstReg = MI.getOperand(0).getReg();
2151 return (AArch64::GPR32RegClass.contains(DstReg) ||
2152 AArch64::GPR64RegClass.contains(DstReg));
2153 }
2154 case AArch64::ORRXrs: // orr Xd, Xzr, Xm (LSL #0)
2155 if (MI.getOperand(1).getReg() == AArch64::XZR) {
2156 assert(MI.getDesc().getNumOperands() == 4 &&
2157 MI.getOperand(3).getImm() == 0 && "invalid ORRrs operands");
2158 return true;
2159 }
2160 break;
2161 case AArch64::ADDXri: // add Xd, Xn, #0 (LSL #0)
2162 if (MI.getOperand(2).getImm() == 0) {
2163 assert(MI.getDesc().getNumOperands() == 4 &&
2164 MI.getOperand(3).getImm() == 0 && "invalid ADDXri operands");
2165 return true;
2166 }
2167 break;
2168 }
2169 return false;
2170}
2171
2172// Return true if this instruction simply renames a general register without
2173// modifying bits.
2175 switch (MI.getOpcode()) {
2176 default:
2177 break;
2178 case TargetOpcode::COPY: {
2179 Register DstReg = MI.getOperand(0).getReg();
2180 return AArch64::FPR128RegClass.contains(DstReg);
2181 }
2182 case AArch64::ORRv16i8:
2183 if (MI.getOperand(1).getReg() == MI.getOperand(2).getReg()) {
2184 assert(MI.getDesc().getNumOperands() == 3 && MI.getOperand(0).isReg() &&
2185 "invalid ORRv16i8 operands");
2186 return true;
2187 }
2188 break;
2189 }
2190 return false;
2191}
2192
2194 int &FrameIndex) const {
2195 switch (MI.getOpcode()) {
2196 default:
2197 break;
2198 case AArch64::LDRWui:
2199 case AArch64::LDRXui:
2200 case AArch64::LDRBui:
2201 case AArch64::LDRHui:
2202 case AArch64::LDRSui:
2203 case AArch64::LDRDui:
2204 case AArch64::LDRQui:
2205 case AArch64::LDR_PXI:
2206 if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() &&
2207 MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) {
2208 FrameIndex = MI.getOperand(1).getIndex();
2209 return MI.getOperand(0).getReg();
2210 }
2211 break;
2212 }
2213
2214 return 0;
2215}
2216
2218 int &FrameIndex) const {
2219 switch (MI.getOpcode()) {
2220 default:
2221 break;
2222 case AArch64::STRWui:
2223 case AArch64::STRXui:
2224 case AArch64::STRBui:
2225 case AArch64::STRHui:
2226 case AArch64::STRSui:
2227 case AArch64::STRDui:
2228 case AArch64::STRQui:
2229 case AArch64::STR_PXI:
2230 if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() &&
2231 MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) {
2232 FrameIndex = MI.getOperand(1).getIndex();
2233 return MI.getOperand(0).getReg();
2234 }
2235 break;
2236 }
2237 return 0;
2238}
2239
2240/// Check all MachineMemOperands for a hint to suppress pairing.
2242 return llvm::any_of(MI.memoperands(), [](MachineMemOperand *MMO) {
2243 return MMO->getFlags() & MOSuppressPair;
2244 });
2245}
2246
2247/// Set a flag on the first MachineMemOperand to suppress pairing.
2249 if (MI.memoperands_empty())
2250 return;
2251 (*MI.memoperands_begin())->setFlags(MOSuppressPair);
2252}
2253
2254/// Check all MachineMemOperands for a hint that the load/store is strided.
2256 return llvm::any_of(MI.memoperands(), [](MachineMemOperand *MMO) {
2257 return MMO->getFlags() & MOStridedAccess;
2258 });
2259}
2260
2262 switch (Opc) {
2263 default:
2264 return false;
2265 case AArch64::STURSi:
2266 case AArch64::STRSpre:
2267 case AArch64::STURDi:
2268 case AArch64::STRDpre:
2269 case AArch64::STURQi:
2270 case AArch64::STRQpre:
2271 case AArch64::STURBBi:
2272 case AArch64::STURHHi:
2273 case AArch64::STURWi:
2274 case AArch64::STRWpre:
2275 case AArch64::STURXi:
2276 case AArch64::STRXpre:
2277 case AArch64::LDURSi:
2278 case AArch64::LDRSpre:
2279 case AArch64::LDURDi:
2280 case AArch64::LDRDpre:
2281 case AArch64::LDURQi:
2282 case AArch64::LDRQpre:
2283 case AArch64::LDURWi:
2284 case AArch64::LDRWpre:
2285 case AArch64::LDURXi:
2286 case AArch64::LDRXpre:
2287 case AArch64::LDRSWpre:
2288 case AArch64::LDURSWi:
2289 case AArch64::LDURHHi:
2290 case AArch64::LDURBBi:
2291 case AArch64::LDURSBWi:
2292 case AArch64::LDURSHWi:
2293 return true;
2294 }
2295}
2296
2297std::optional<unsigned> AArch64InstrInfo::getUnscaledLdSt(unsigned Opc) {
2298 switch (Opc) {
2299 default: return {};
2300 case AArch64::PRFMui: return AArch64::PRFUMi;
2301 case AArch64::LDRXui: return AArch64::LDURXi;
2302 case AArch64::LDRWui: return AArch64::LDURWi;
2303 case AArch64::LDRBui: return AArch64::LDURBi;
2304 case AArch64::LDRHui: return AArch64::LDURHi;
2305 case AArch64::LDRSui: return AArch64::LDURSi;
2306 case AArch64::LDRDui: return AArch64::LDURDi;
2307 case AArch64::LDRQui: return AArch64::LDURQi;
2308 case AArch64::LDRBBui: return AArch64::LDURBBi;
2309 case AArch64::LDRHHui: return AArch64::LDURHHi;
2310 case AArch64::LDRSBXui: return AArch64::LDURSBXi;
2311 case AArch64::LDRSBWui: return AArch64::LDURSBWi;
2312 case AArch64::LDRSHXui: return AArch64::LDURSHXi;
2313 case AArch64::LDRSHWui: return AArch64::LDURSHWi;
2314 case AArch64::LDRSWui: return AArch64::LDURSWi;
2315 case AArch64::STRXui: return AArch64::STURXi;
2316 case AArch64::STRWui: return AArch64::STURWi;
2317 case AArch64::STRBui: return AArch64::STURBi;
2318 case AArch64::STRHui: return AArch64::STURHi;
2319 case AArch64::STRSui: return AArch64::STURSi;
2320 case AArch64::STRDui: return AArch64::STURDi;
2321 case AArch64::STRQui: return AArch64::STURQi;
2322 case AArch64::STRBBui: return AArch64::STURBBi;
2323 case AArch64::STRHHui: return AArch64::STURHHi;
2324 }
2325}
2326
2328 switch (Opc) {
2329 default:
2330 return 2;
2331 case AArch64::LDPXi:
2332 case AArch64::LDPDi:
2333 case AArch64::STPXi:
2334 case AArch64::STPDi:
2335 case AArch64::LDNPXi:
2336 case AArch64::LDNPDi:
2337 case AArch64::STNPXi:
2338 case AArch64::STNPDi:
2339 case AArch64::LDPQi:
2340 case AArch64::STPQi:
2341 case AArch64::LDNPQi:
2342 case AArch64::STNPQi:
2343 case AArch64::LDPWi:
2344 case AArch64::LDPSi:
2345 case AArch64::STPWi:
2346 case AArch64::STPSi:
2347 case AArch64::LDNPWi:
2348 case AArch64::LDNPSi:
2349 case AArch64::STNPWi:
2350 case AArch64::STNPSi:
2351 case AArch64::LDG:
2352 case AArch64::STGPi:
2353
2354 case AArch64::LD1B_IMM:
2355 case AArch64::LD1B_H_IMM:
2356 case AArch64::LD1B_S_IMM:
2357 case AArch64::LD1B_D_IMM:
2358 case AArch64::LD1SB_H_IMM:
2359 case AArch64::LD1SB_S_IMM:
2360 case AArch64::LD1SB_D_IMM:
2361 case AArch64::LD1H_IMM:
2362 case AArch64::LD1H_S_IMM:
2363 case AArch64::LD1H_D_IMM:
2364 case AArch64::LD1SH_S_IMM:
2365 case AArch64::LD1SH_D_IMM:
2366 case AArch64::LD1W_IMM:
2367 case AArch64::LD1W_D_IMM:
2368 case AArch64::LD1SW_D_IMM:
2369 case AArch64::LD1D_IMM:
2370
2371 case AArch64::LD2B_IMM:
2372 case AArch64::LD2H_IMM:
2373 case AArch64::LD2W_IMM:
2374 case AArch64::LD2D_IMM:
2375 case AArch64::LD3B_IMM:
2376 case AArch64::LD3H_IMM:
2377 case AArch64::LD3W_IMM:
2378 case AArch64::LD3D_IMM:
2379 case AArch64::LD4B_IMM:
2380 case AArch64::LD4H_IMM:
2381 case AArch64::LD4W_IMM:
2382 case AArch64::LD4D_IMM:
2383
2384 case AArch64::ST1B_IMM:
2385 case AArch64::ST1B_H_IMM:
2386 case AArch64::ST1B_S_IMM:
2387 case AArch64::ST1B_D_IMM:
2388 case AArch64::ST1H_IMM:
2389 case AArch64::ST1H_S_IMM:
2390 case AArch64::ST1H_D_IMM:
2391 case AArch64::ST1W_IMM:
2392 case AArch64::ST1W_D_IMM:
2393 case AArch64::ST1D_IMM:
2394
2395 case AArch64::ST2B_IMM:
2396 case AArch64::ST2H_IMM:
2397 case AArch64::ST2W_IMM:
2398 case AArch64::ST2D_IMM:
2399 case AArch64::ST3B_IMM:
2400 case AArch64::ST3H_IMM:
2401 case AArch64::ST3W_IMM:
2402 case AArch64::ST3D_IMM:
2403 case AArch64::ST4B_IMM:
2404 case AArch64::ST4H_IMM:
2405 case AArch64::ST4W_IMM:
2406 case AArch64::ST4D_IMM:
2407
2408 case AArch64::LD1RB_IMM:
2409 case AArch64::LD1RB_H_IMM:
2410 case AArch64::LD1RB_S_IMM:
2411 case AArch64::LD1RB_D_IMM:
2412 case AArch64::LD1RSB_H_IMM:
2413 case AArch64::LD1RSB_S_IMM:
2414 case AArch64::LD1RSB_D_IMM:
2415 case AArch64::LD1RH_IMM:
2416 case AArch64::LD1RH_S_IMM:
2417 case AArch64::LD1RH_D_IMM:
2418 case AArch64::LD1RSH_S_IMM:
2419 case AArch64::LD1RSH_D_IMM:
2420 case AArch64::LD1RW_IMM:
2421 case AArch64::LD1RW_D_IMM:
2422 case AArch64::LD1RSW_IMM:
2423 case AArch64::LD1RD_IMM:
2424
2425 case AArch64::LDNT1B_ZRI:
2426 case AArch64::LDNT1H_ZRI:
2427 case AArch64::LDNT1W_ZRI:
2428 case AArch64::LDNT1D_ZRI:
2429 case AArch64::STNT1B_ZRI:
2430 case AArch64::STNT1H_ZRI:
2431 case AArch64::STNT1W_ZRI:
2432 case AArch64::STNT1D_ZRI:
2433
2434 case AArch64::LDNF1B_IMM:
2435 case AArch64::LDNF1B_H_IMM:
2436 case AArch64::LDNF1B_S_IMM:
2437 case AArch64::LDNF1B_D_IMM:
2438 case AArch64::LDNF1SB_H_IMM:
2439 case AArch64::LDNF1SB_S_IMM:
2440 case AArch64::LDNF1SB_D_IMM:
2441 case AArch64::LDNF1H_IMM:
2442 case AArch64::LDNF1H_S_IMM:
2443 case AArch64::LDNF1H_D_IMM:
2444 case AArch64::LDNF1SH_S_IMM:
2445 case AArch64::LDNF1SH_D_IMM:
2446 case AArch64::LDNF1W_IMM:
2447 case AArch64::LDNF1W_D_IMM:
2448 case AArch64::LDNF1SW_D_IMM:
2449 case AArch64::LDNF1D_IMM:
2450 return 3;
2451 case AArch64::ADDG:
2452 case AArch64::STGi:
2453 case AArch64::LDR_PXI:
2454 case AArch64::STR_PXI:
2455 return 2;
2456 }
2457}
2458
2460 switch (MI.getOpcode()) {
2461 default:
2462 return false;
2463 // Scaled instructions.
2464 case AArch64::STRSui:
2465 case AArch64::STRDui:
2466 case AArch64::STRQui:
2467 case AArch64::STRXui:
2468 case AArch64::STRWui:
2469 case AArch64::LDRSui:
2470 case AArch64::LDRDui:
2471 case AArch64::LDRQui:
2472 case AArch64::LDRXui:
2473 case AArch64::LDRWui:
2474 case AArch64::LDRSWui:
2475 // Unscaled instructions.
2476 case AArch64::STURSi:
2477 case AArch64::STRSpre:
2478 case AArch64::STURDi:
2479 case AArch64::STRDpre:
2480 case AArch64::STURQi:
2481 case AArch64::STRQpre:
2482 case AArch64::STURWi:
2483 case AArch64::STRWpre:
2484 case AArch64::STURXi:
2485 case AArch64::STRXpre:
2486 case AArch64::LDURSi:
2487 case AArch64::LDRSpre:
2488 case AArch64::LDURDi:
2489 case AArch64::LDRDpre:
2490 case AArch64::LDURQi:
2491 case AArch64::LDRQpre:
2492 case AArch64::LDURWi:
2493 case AArch64::LDRWpre:
2494 case AArch64::LDURXi:
2495 case AArch64::LDRXpre:
2496 case AArch64::LDURSWi:
2497 case AArch64::LDRSWpre:
2498 return true;
2499 }
2500}
2501
2503 switch (MI.getOpcode()) {
2504 default:
2505 assert((!MI.isCall() || !MI.isReturn()) &&
2506 "Unexpected instruction - was a new tail call opcode introduced?");
2507 return false;
2508 case AArch64::TCRETURNdi:
2509 case AArch64::TCRETURNri:
2510 case AArch64::TCRETURNrix16x17:
2511 case AArch64::TCRETURNrix17:
2512 case AArch64::TCRETURNrinotx16:
2513 case AArch64::TCRETURNriALL:
2514 return true;
2515 }
2516}
2517
2519 switch (Opc) {
2520 default:
2521 llvm_unreachable("Opcode has no flag setting equivalent!");
2522 // 32-bit cases:
2523 case AArch64::ADDWri:
2524 return AArch64::ADDSWri;
2525 case AArch64::ADDWrr:
2526 return AArch64::ADDSWrr;
2527 case AArch64::ADDWrs:
2528 return AArch64::ADDSWrs;
2529 case AArch64::ADDWrx:
2530 return AArch64::ADDSWrx;
2531 case AArch64::ANDWri:
2532 return AArch64::ANDSWri;
2533 case AArch64::ANDWrr:
2534 return AArch64::ANDSWrr;
2535 case AArch64::ANDWrs:
2536 return AArch64::ANDSWrs;
2537 case AArch64::BICWrr:
2538 return AArch64::BICSWrr;
2539 case AArch64::BICWrs:
2540 return AArch64::BICSWrs;
2541 case AArch64::SUBWri:
2542 return AArch64::SUBSWri;
2543 case AArch64::SUBWrr:
2544 return AArch64::SUBSWrr;
2545 case AArch64::SUBWrs:
2546 return AArch64::SUBSWrs;
2547 case AArch64::SUBWrx:
2548 return AArch64::SUBSWrx;
2549 // 64-bit cases:
2550 case AArch64::ADDXri:
2551 return AArch64::ADDSXri;
2552 case AArch64::ADDXrr:
2553 return AArch64::ADDSXrr;
2554 case AArch64::ADDXrs:
2555 return AArch64::ADDSXrs;
2556 case AArch64::ADDXrx:
2557 return AArch64::ADDSXrx;
2558 case AArch64::ANDXri:
2559 return AArch64::ANDSXri;
2560 case AArch64::ANDXrr:
2561 return AArch64::ANDSXrr;
2562 case AArch64::ANDXrs:
2563 return AArch64::ANDSXrs;
2564 case AArch64::BICXrr:
2565 return AArch64::BICSXrr;
2566 case AArch64::BICXrs:
2567 return AArch64::BICSXrs;
2568 case AArch64::SUBXri:
2569 return AArch64::SUBSXri;
2570 case AArch64::SUBXrr:
2571 return AArch64::SUBSXrr;
2572 case AArch64::SUBXrs:
2573 return AArch64::SUBSXrs;
2574 case AArch64::SUBXrx:
2575 return AArch64::SUBSXrx;
2576 // SVE instructions:
2577 case AArch64::AND_PPzPP:
2578 return AArch64::ANDS_PPzPP;
2579 case AArch64::BIC_PPzPP:
2580 return AArch64::BICS_PPzPP;
2581 case AArch64::EOR_PPzPP:
2582 return AArch64::EORS_PPzPP;
2583 case AArch64::NAND_PPzPP:
2584 return AArch64::NANDS_PPzPP;
2585 case AArch64::NOR_PPzPP:
2586 return AArch64::NORS_PPzPP;
2587 case AArch64::ORN_PPzPP:
2588 return AArch64::ORNS_PPzPP;
2589 case AArch64::ORR_PPzPP:
2590 return AArch64::ORRS_PPzPP;
2591 case AArch64::BRKA_PPzP:
2592 return AArch64::BRKAS_PPzP;
2593 case AArch64::BRKPA_PPzPP:
2594 return AArch64::BRKPAS_PPzPP;
2595 case AArch64::BRKB_PPzP:
2596 return AArch64::BRKBS_PPzP;
2597 case AArch64::BRKPB_PPzPP:
2598 return AArch64::BRKPBS_PPzPP;
2599 case AArch64::BRKN_PPzP:
2600 return AArch64::BRKNS_PPzP;
2601 case AArch64::RDFFR_PPz:
2602 return AArch64::RDFFRS_PPz;
2603 case AArch64::PTRUE_B:
2604 return AArch64::PTRUES_B;
2605 }
2606}
2607
2608// Is this a candidate for ld/st merging or pairing? For example, we don't
2609// touch volatiles or load/stores that have a hint to avoid pair formation.
2611
2612 bool IsPreLdSt = isPreLdSt(MI);
2613
2614 // If this is a volatile load/store, don't mess with it.
2615 if (MI.hasOrderedMemoryRef())
2616 return false;
2617
2618 // Make sure this is a reg/fi+imm (as opposed to an address reloc).
2619 // For Pre-inc LD/ST, the operand is shifted by one.
2620 assert((MI.getOperand(IsPreLdSt ? 2 : 1).isReg() ||
2621 MI.getOperand(IsPreLdSt ? 2 : 1).isFI()) &&
2622 "Expected a reg or frame index operand.");
2623
2624 // For Pre-indexed addressing quadword instructions, the third operand is the
2625 // immediate value.
2626 bool IsImmPreLdSt = IsPreLdSt && MI.getOperand(3).isImm();
2627
2628 if (!MI.getOperand(2).isImm() && !IsImmPreLdSt)
2629 return false;
2630
2631 // Can't merge/pair if the instruction modifies the base register.
2632 // e.g., ldr x0, [x0]
2633 // This case will never occur with an FI base.
2634 // However, if the instruction is an LDR<S,D,Q,W,X,SW>pre or
2635 // STR<S,D,Q,W,X>pre, it can be merged.
2636 // For example:
2637 // ldr q0, [x11, #32]!
2638 // ldr q1, [x11, #16]
2639 // to
2640 // ldp q0, q1, [x11, #32]!
2641 if (MI.getOperand(1).isReg() && !IsPreLdSt) {
2642 Register BaseReg = MI.getOperand(1).getReg();
2644 if (MI.modifiesRegister(BaseReg, TRI))
2645 return false;
2646 }
2647
2648 // Check if this load/store has a hint to avoid pair formation.
2649 // MachineMemOperands hints are set by the AArch64StorePairSuppress pass.
2651 return false;
2652
2653 // Do not pair any callee-save store/reload instructions in the
2654 // prologue/epilogue if the CFI information encoded the operations as separate
2655 // instructions, as that will cause the size of the actual prologue to mismatch
2656 // with the prologue size recorded in the Windows CFI.
2657 const MCAsmInfo *MAI = MI.getMF()->getTarget().getMCAsmInfo();
2658 bool NeedsWinCFI = MAI->usesWindowsCFI() &&
2659 MI.getMF()->getFunction().needsUnwindTableEntry();
2660 if (NeedsWinCFI && (MI.getFlag(MachineInstr::FrameSetup) ||
2662 return false;
2663
2664 // On some CPUs quad load/store pairs are slower than two single load/stores.
2665 if (Subtarget.isPaired128Slow()) {
2666 switch (MI.getOpcode()) {
2667 default:
2668 break;
2669 case AArch64::LDURQi:
2670 case AArch64::STURQi:
2671 case AArch64::LDRQui:
2672 case AArch64::STRQui:
2673 return false;
2674 }
2675 }
2676
2677 return true;
2678}
2679
2682 int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width,
2683 const TargetRegisterInfo *TRI) const {
2684 if (!LdSt.mayLoadOrStore())
2685 return false;
2686
2687 const MachineOperand *BaseOp;
2688 TypeSize WidthN(0, false);
2689 if (!getMemOperandWithOffsetWidth(LdSt, BaseOp, Offset, OffsetIsScalable,
2690 WidthN, TRI))
2691 return false;
2692 // The maximum vscale is 16 under AArch64, return the maximal extent for the
2693 // vector.
2694 Width = LocationSize::precise(WidthN);
2695 BaseOps.push_back(BaseOp);
2696 return true;
2697}
2698
2699std::optional<ExtAddrMode>
2701 const TargetRegisterInfo *TRI) const {
2702 const MachineOperand *Base; // Filled with the base operand of MI.
2703 int64_t Offset; // Filled with the offset of MI.
2704 bool OffsetIsScalable;
2705 if (!getMemOperandWithOffset(MemI, Base, Offset, OffsetIsScalable, TRI))
2706 return std::nullopt;
2707
2708 if (!Base->isReg())
2709 return std::nullopt;
2710 ExtAddrMode AM;
2711 AM.BaseReg = Base->getReg();
2712 AM.Displacement = Offset;
2713 AM.ScaledReg = 0;
2714 AM.Scale = 0;
2715 return AM;
2716}
2717
2719 Register Reg,
2720 const MachineInstr &AddrI,
2721 ExtAddrMode &AM) const {
2722 // Filter out instructions into which we cannot fold.
2723 unsigned NumBytes;
2724 int64_t OffsetScale = 1;
2725 switch (MemI.getOpcode()) {
2726 default:
2727 return false;
2728
2729 case AArch64::LDURQi:
2730 case AArch64::STURQi:
2731 NumBytes = 16;
2732 break;
2733
2734 case AArch64::LDURDi:
2735 case AArch64::STURDi:
2736 case AArch64::LDURXi:
2737 case AArch64::STURXi:
2738 NumBytes = 8;
2739 break;
2740
2741 case AArch64::LDURWi:
2742 case AArch64::LDURSWi:
2743 case AArch64::STURWi:
2744 NumBytes = 4;
2745 break;
2746
2747 case AArch64::LDURHi:
2748 case AArch64::STURHi:
2749 case AArch64::LDURHHi:
2750 case AArch64::STURHHi:
2751 case AArch64::LDURSHXi:
2752 case AArch64::LDURSHWi:
2753 NumBytes = 2;
2754 break;
2755
2756 case AArch64::LDRBroX:
2757 case AArch64::LDRBBroX:
2758 case AArch64::LDRSBXroX:
2759 case AArch64::LDRSBWroX:
2760 case AArch64::STRBroX:
2761 case AArch64::STRBBroX:
2762 case AArch64::LDURBi:
2763 case AArch64::LDURBBi:
2764 case AArch64::LDURSBXi:
2765 case AArch64::LDURSBWi:
2766 case AArch64::STURBi:
2767 case AArch64::STURBBi:
2768 case AArch64::LDRBui:
2769 case AArch64::LDRBBui:
2770 case AArch64::LDRSBXui:
2771 case AArch64::LDRSBWui:
2772 case AArch64::STRBui:
2773 case AArch64::STRBBui:
2774 NumBytes = 1;
2775 break;
2776
2777 case AArch64::LDRQroX:
2778 case AArch64::STRQroX:
2779 case AArch64::LDRQui:
2780 case AArch64::STRQui:
2781 NumBytes = 16;
2782 OffsetScale = 16;
2783 break;
2784
2785 case AArch64::LDRDroX:
2786 case AArch64::STRDroX:
2787 case AArch64::LDRXroX:
2788 case AArch64::STRXroX:
2789 case AArch64::LDRDui:
2790 case AArch64::STRDui:
2791 case AArch64::LDRXui:
2792 case AArch64::STRXui:
2793 NumBytes = 8;
2794 OffsetScale = 8;
2795 break;
2796
2797 case AArch64::LDRWroX:
2798 case AArch64::LDRSWroX:
2799 case AArch64::STRWroX:
2800 case AArch64::LDRWui:
2801 case AArch64::LDRSWui:
2802 case AArch64::STRWui:
2803 NumBytes = 4;
2804 OffsetScale = 4;
2805 break;
2806
2807 case AArch64::LDRHroX:
2808 case AArch64::STRHroX:
2809 case AArch64::LDRHHroX:
2810 case AArch64::STRHHroX:
2811 case AArch64::LDRSHXroX:
2812 case AArch64::LDRSHWroX:
2813 case AArch64::LDRHui:
2814 case AArch64::STRHui:
2815 case AArch64::LDRHHui:
2816 case AArch64::STRHHui:
2817 case AArch64::LDRSHXui:
2818 case AArch64::LDRSHWui:
2819 NumBytes = 2;
2820 OffsetScale = 2;
2821 break;
2822 }
2823
2824 // Check the fold operand is not the loaded/stored value.
2825 const MachineOperand &BaseRegOp = MemI.getOperand(0);
2826 if (BaseRegOp.isReg() && BaseRegOp.getReg() == Reg)
2827 return false;
2828
2829 // Handle memory instructions with a [Reg, Reg] addressing mode.
2830 if (MemI.getOperand(2).isReg()) {
2831 // Bail if the addressing mode already includes extension of the offset
2832 // register.
2833 if (MemI.getOperand(3).getImm())
2834 return false;
2835
2836 // Check if we actually have a scaled offset.
2837 if (MemI.getOperand(4).getImm() == 0)
2838 OffsetScale = 1;
2839
2840 // If the address instructions is folded into the base register, then the
2841 // addressing mode must not have a scale. Then we can swap the base and the
2842 // scaled registers.
2843 if (MemI.getOperand(1).getReg() == Reg && OffsetScale != 1)
2844 return false;
2845
2846 switch (AddrI.getOpcode()) {
2847 default:
2848 return false;
2849
2850 case AArch64::SBFMXri:
2851 // sxtw Xa, Wm
2852 // ldr Xd, [Xn, Xa, lsl #N]
2853 // ->
2854 // ldr Xd, [Xn, Wm, sxtw #N]
2855 if (AddrI.getOperand(2).getImm() != 0 ||
2856 AddrI.getOperand(3).getImm() != 31)
2857 return false;
2858
2859 AM.BaseReg = MemI.getOperand(1).getReg();
2860 if (AM.BaseReg == Reg)
2861 AM.BaseReg = MemI.getOperand(2).getReg();
2862 AM.ScaledReg = AddrI.getOperand(1).getReg();
2863 AM.Scale = OffsetScale;
2864 AM.Displacement = 0;
2866 return true;
2867
2868 case TargetOpcode::SUBREG_TO_REG: {
2869 // mov Wa, Wm
2870 // ldr Xd, [Xn, Xa, lsl #N]
2871 // ->
2872 // ldr Xd, [Xn, Wm, uxtw #N]
2873
2874 // Zero-extension looks like an ORRWrs followed by a SUBREG_TO_REG.
2875 if (AddrI.getOperand(1).getImm() != 0 ||
2876 AddrI.getOperand(3).getImm() != AArch64::sub_32)
2877 return false;
2878
2879 const MachineRegisterInfo &MRI = AddrI.getMF()->getRegInfo();
2880 Register OffsetReg = AddrI.getOperand(2).getReg();
2881 if (!OffsetReg.isVirtual() || !MRI.hasOneNonDBGUse(OffsetReg))
2882 return false;
2883
2884 const MachineInstr &DefMI = *MRI.getVRegDef(OffsetReg);
2885 if (DefMI.getOpcode() != AArch64::ORRWrs ||
2886 DefMI.getOperand(1).getReg() != AArch64::WZR ||
2887 DefMI.getOperand(3).getImm() != 0)
2888 return false;
2889
2890 AM.BaseReg = MemI.getOperand(1).getReg();
2891 if (AM.BaseReg == Reg)
2892 AM.BaseReg = MemI.getOperand(2).getReg();
2893 AM.ScaledReg = DefMI.getOperand(2).getReg();
2894 AM.Scale = OffsetScale;
2895 AM.Displacement = 0;
2897 return true;
2898 }
2899 }
2900 }
2901
2902 // Handle memory instructions with a [Reg, #Imm] addressing mode.
2903
2904 // Check we are not breaking a potential conversion to an LDP.
2905 auto validateOffsetForLDP = [](unsigned NumBytes, int64_t OldOffset,
2906 int64_t NewOffset) -> bool {
2907 int64_t MinOffset, MaxOffset;
2908 switch (NumBytes) {
2909 default:
2910 return true;
2911 case 4:
2912 MinOffset = -256;
2913 MaxOffset = 252;
2914 break;
2915 case 8:
2916 MinOffset = -512;
2917 MaxOffset = 504;
2918 break;
2919 case 16:
2920 MinOffset = -1024;
2921 MaxOffset = 1008;
2922 break;
2923 }
2924 return OldOffset < MinOffset || OldOffset > MaxOffset ||
2925 (NewOffset >= MinOffset && NewOffset <= MaxOffset);
2926 };
2927 auto canFoldAddSubImmIntoAddrMode = [&](int64_t Disp) -> bool {
2928 int64_t OldOffset = MemI.getOperand(2).getImm() * OffsetScale;
2929 int64_t NewOffset = OldOffset + Disp;
2930 if (!isLegalAddressingMode(NumBytes, NewOffset, /* Scale */ 0))
2931 return false;
2932 // If the old offset would fit into an LDP, but the new offset wouldn't,
2933 // bail out.
2934 if (!validateOffsetForLDP(NumBytes, OldOffset, NewOffset))
2935 return false;
2936 AM.BaseReg = AddrI.getOperand(1).getReg();
2937 AM.ScaledReg = 0;
2938 AM.Scale = 0;
2939 AM.Displacement = NewOffset;
2941 return true;
2942 };
2943
2944 auto canFoldAddRegIntoAddrMode =
2945 [&](int64_t Scale,
2947 if (MemI.getOperand(2).getImm() != 0)
2948 return false;
2949 if (!isLegalAddressingMode(NumBytes, /* Offset */ 0, Scale))
2950 return false;
2951 AM.BaseReg = AddrI.getOperand(1).getReg();
2952 AM.ScaledReg = AddrI.getOperand(2).getReg();
2953 AM.Scale = Scale;
2954 AM.Displacement = 0;
2955 AM.Form = Form;
2956 return true;
2957 };
2958
2959 auto avoidSlowSTRQ = [&](const MachineInstr &MemI) {
2960 unsigned Opcode = MemI.getOpcode();
2961 return (Opcode == AArch64::STURQi || Opcode == AArch64::STRQui) &&
2962 Subtarget.isSTRQroSlow();
2963 };
2964
2965 int64_t Disp = 0;
2966 const bool OptSize = MemI.getMF()->getFunction().hasOptSize();
2967 switch (AddrI.getOpcode()) {
2968 default:
2969 return false;
2970
2971 case AArch64::ADDXri:
2972 // add Xa, Xn, #N
2973 // ldr Xd, [Xa, #M]
2974 // ->
2975 // ldr Xd, [Xn, #N'+M]
2976 Disp = AddrI.getOperand(2).getImm() << AddrI.getOperand(3).getImm();
2977 return canFoldAddSubImmIntoAddrMode(Disp);
2978
2979 case AArch64::SUBXri:
2980 // sub Xa, Xn, #N
2981 // ldr Xd, [Xa, #M]
2982 // ->
2983 // ldr Xd, [Xn, #N'+M]
2984 Disp = AddrI.getOperand(2).getImm() << AddrI.getOperand(3).getImm();
2985 return canFoldAddSubImmIntoAddrMode(-Disp);
2986
2987 case AArch64::ADDXrs: {
2988 // add Xa, Xn, Xm, lsl #N
2989 // ldr Xd, [Xa]
2990 // ->
2991 // ldr Xd, [Xn, Xm, lsl #N]
2992
2993 // Don't fold the add if the result would be slower, unless optimising for
2994 // size.
2995 unsigned Shift = static_cast<unsigned>(AddrI.getOperand(3).getImm());
2997 return false;
2998 Shift = AArch64_AM::getShiftValue(Shift);
2999 if (!OptSize) {
3000 if (Shift != 2 && Shift != 3 && Subtarget.hasAddrLSLSlow14())
3001 return false;
3002 if (avoidSlowSTRQ(MemI))
3003 return false;
3004 }
3005 return canFoldAddRegIntoAddrMode(1ULL << Shift);
3006 }
3007
3008 case AArch64::ADDXrr:
3009 // add Xa, Xn, Xm
3010 // ldr Xd, [Xa]
3011 // ->
3012 // ldr Xd, [Xn, Xm, lsl #0]
3013
3014 // Don't fold the add if the result would be slower, unless optimising for
3015 // size.
3016 if (!OptSize && avoidSlowSTRQ(MemI))
3017 return false;
3018 return canFoldAddRegIntoAddrMode(1);
3019
3020 case AArch64::ADDXrx:
3021 // add Xa, Xn, Wm, {s,u}xtw #N
3022 // ldr Xd, [Xa]
3023 // ->
3024 // ldr Xd, [Xn, Wm, {s,u}xtw #N]
3025
3026 // Don't fold the add if the result would be slower, unless optimising for
3027 // size.
3028 if (!OptSize && avoidSlowSTRQ(MemI))
3029 return false;
3030
3031 // Can fold only sign-/zero-extend of a word.
3032 unsigned Imm = static_cast<unsigned>(AddrI.getOperand(3).getImm());
3034 if (Extend != AArch64_AM::UXTW && Extend != AArch64_AM::SXTW)
3035 return false;
3036
3037 return canFoldAddRegIntoAddrMode(
3038 1ULL << AArch64_AM::getArithShiftValue(Imm),
3041 }
3042}
3043
3044// Given an opcode for an instruction with a [Reg, #Imm] addressing mode,
3045// return the opcode of an instruction performing the same operation, but using
3046// the [Reg, Reg] addressing mode.
3047static unsigned regOffsetOpcode(unsigned Opcode) {
3048 switch (Opcode) {
3049 default:
3050 llvm_unreachable("Address folding not implemented for instruction");
3051
3052 case AArch64::LDURQi:
3053 case AArch64::LDRQui:
3054 return AArch64::LDRQroX;
3055 case AArch64::STURQi:
3056 case AArch64::STRQui:
3057 return AArch64::STRQroX;
3058 case AArch64::LDURDi:
3059 case AArch64::LDRDui:
3060 return AArch64::LDRDroX;
3061 case AArch64::STURDi:
3062 case AArch64::STRDui:
3063 return AArch64::STRDroX;
3064 case AArch64::LDURXi:
3065 case AArch64::LDRXui:
3066 return AArch64::LDRXroX;
3067 case AArch64::STURXi:
3068 case AArch64::STRXui:
3069 return AArch64::STRXroX;
3070 case AArch64::LDURWi:
3071 case AArch64::LDRWui:
3072 return AArch64::LDRWroX;
3073 case AArch64::LDURSWi:
3074 case AArch64::LDRSWui:
3075 return AArch64::LDRSWroX;
3076 case AArch64::STURWi:
3077 case AArch64::STRWui:
3078 return AArch64::STRWroX;
3079 case AArch64::LDURHi:
3080 case AArch64::LDRHui:
3081 return AArch64::LDRHroX;
3082 case AArch64::STURHi:
3083 case AArch64::STRHui:
3084 return AArch64::STRHroX;
3085 case AArch64::LDURHHi:
3086 case AArch64::LDRHHui:
3087 return AArch64::LDRHHroX;
3088 case AArch64::STURHHi:
3089 case AArch64::STRHHui:
3090 return AArch64::STRHHroX;
3091 case AArch64::LDURSHXi:
3092 case AArch64::LDRSHXui:
3093 return AArch64::LDRSHXroX;
3094 case AArch64::LDURSHWi:
3095 case AArch64::LDRSHWui:
3096 return AArch64::LDRSHWroX;
3097 case AArch64::LDURBi:
3098 case AArch64::LDRBui:
3099 return AArch64::LDRBroX;
3100 case AArch64::LDURBBi:
3101 case AArch64::LDRBBui:
3102 return AArch64::LDRBBroX;
3103 case AArch64::LDURSBXi:
3104 case AArch64::LDRSBXui:
3105 return AArch64::LDRSBXroX;
3106 case AArch64::LDURSBWi:
3107 case AArch64::LDRSBWui:
3108 return AArch64::LDRSBWroX;
3109 case AArch64::STURBi:
3110 case AArch64::STRBui:
3111 return AArch64::STRBroX;
3112 case AArch64::STURBBi:
3113 case AArch64::STRBBui:
3114 return AArch64::STRBBroX;
3115 }
3116}
3117
3118// Given an opcode for an instruction with a [Reg, #Imm] addressing mode, return
3119// the opcode of an instruction performing the same operation, but using the
3120// [Reg, #Imm] addressing mode with scaled offset.
3121unsigned scaledOffsetOpcode(unsigned Opcode, unsigned &Scale) {
3122 switch (Opcode) {
3123 default:
3124 llvm_unreachable("Address folding not implemented for instruction");
3125
3126 case AArch64::LDURQi:
3127 Scale = 16;
3128 return AArch64::LDRQui;
3129 case AArch64::STURQi:
3130 Scale = 16;
3131 return AArch64::STRQui;
3132 case AArch64::LDURDi:
3133 Scale = 8;
3134 return AArch64::LDRDui;
3135 case AArch64::STURDi:
3136 Scale = 8;
3137 return AArch64::STRDui;
3138 case AArch64::LDURXi:
3139 Scale = 8;
3140 return AArch64::LDRXui;
3141 case AArch64::STURXi:
3142 Scale = 8;
3143 return AArch64::STRXui;
3144 case AArch64::LDURWi:
3145 Scale = 4;
3146 return AArch64::LDRWui;
3147 case AArch64::LDURSWi:
3148 Scale = 4;
3149 return AArch64::LDRSWui;
3150 case AArch64::STURWi:
3151 Scale = 4;
3152 return AArch64::STRWui;
3153 case AArch64::LDURHi:
3154 Scale = 2;
3155 return AArch64::LDRHui;
3156 case AArch64::STURHi:
3157 Scale = 2;
3158 return AArch64::STRHui;
3159 case AArch64::LDURHHi:
3160 Scale = 2;
3161 return AArch64::LDRHHui;
3162 case AArch64::STURHHi:
3163 Scale = 2;
3164 return AArch64::STRHHui;
3165 case AArch64::LDURSHXi:
3166 Scale = 2;
3167 return AArch64::LDRSHXui;
3168 case AArch64::LDURSHWi:
3169 Scale = 2;
3170 return AArch64::LDRSHWui;
3171 case AArch64::LDURBi:
3172 Scale = 1;
3173 return AArch64::LDRBui;
3174 case AArch64::LDURBBi:
3175 Scale = 1;
3176 return AArch64::LDRBBui;
3177 case AArch64::LDURSBXi:
3178 Scale = 1;
3179 return AArch64::LDRSBXui;
3180 case AArch64::LDURSBWi:
3181 Scale = 1;
3182 return AArch64::LDRSBWui;
3183 case AArch64::STURBi:
3184 Scale = 1;
3185 return AArch64::STRBui;
3186 case AArch64::STURBBi:
3187 Scale = 1;
3188 return AArch64::STRBBui;
3189 case AArch64::LDRQui:
3190 case AArch64::STRQui:
3191 Scale = 16;
3192 return Opcode;
3193 case AArch64::LDRDui:
3194 case AArch64::STRDui:
3195 case AArch64::LDRXui:
3196 case AArch64::STRXui:
3197 Scale = 8;
3198 return Opcode;
3199 case AArch64::LDRWui:
3200 case AArch64::LDRSWui:
3201 case AArch64::STRWui:
3202 Scale = 4;
3203 return Opcode;
3204 case AArch64::LDRHui:
3205 case AArch64::STRHui:
3206 case AArch64::LDRHHui:
3207 case AArch64::STRHHui:
3208 case AArch64::LDRSHXui:
3209 case AArch64::LDRSHWui:
3210 Scale = 2;
3211 return Opcode;
3212 case AArch64::LDRBui:
3213 case AArch64::LDRBBui:
3214 case AArch64::LDRSBXui:
3215 case AArch64::LDRSBWui:
3216 case AArch64::STRBui:
3217 case AArch64::STRBBui:
3218 Scale = 1;
3219 return Opcode;
3220 }
3221}
3222
3223// Given an opcode for an instruction with a [Reg, #Imm] addressing mode, return
3224// the opcode of an instruction performing the same operation, but using the
3225// [Reg, #Imm] addressing mode with unscaled offset.
3226unsigned unscaledOffsetOpcode(unsigned Opcode) {
3227 switch (Opcode) {
3228 default:
3229 llvm_unreachable("Address folding not implemented for instruction");
3230
3231 case AArch64::LDURQi:
3232 case AArch64::STURQi:
3233 case AArch64::LDURDi:
3234 case AArch64::STURDi:
3235 case AArch64::LDURXi:
3236 case AArch64::STURXi:
3237 case AArch64::LDURWi:
3238 case AArch64::LDURSWi:
3239 case AArch64::STURWi:
3240 case AArch64::LDURHi:
3241 case AArch64::STURHi:
3242 case AArch64::LDURHHi:
3243 case AArch64::STURHHi:
3244 case AArch64::LDURSHXi:
3245 case AArch64::LDURSHWi:
3246 case AArch64::LDURBi:
3247 case AArch64::STURBi:
3248 case AArch64::LDURBBi:
3249 case AArch64::STURBBi:
3250 case AArch64::LDURSBWi:
3251 case AArch64::LDURSBXi:
3252 return Opcode;
3253 case AArch64::LDRQui:
3254 return AArch64::LDURQi;
3255 case AArch64::STRQui:
3256 return AArch64::STURQi;
3257 case AArch64::LDRDui:
3258 return AArch64::LDURDi;
3259 case AArch64::STRDui:
3260 return AArch64::STURDi;
3261 case AArch64::LDRXui:
3262 return AArch64::LDURXi;
3263 case AArch64::STRXui:
3264 return AArch64::STURXi;
3265 case AArch64::LDRWui:
3266 return AArch64::LDURWi;
3267 case AArch64::LDRSWui:
3268 return AArch64::LDURSWi;
3269 case AArch64::STRWui:
3270 return AArch64::STURWi;
3271 case AArch64::LDRHui:
3272 return AArch64::LDURHi;
3273 case AArch64::STRHui:
3274 return AArch64::STURHi;
3275 case AArch64::LDRHHui:
3276 return AArch64::LDURHHi;
3277 case AArch64::STRHHui:
3278 return AArch64::STURHHi;
3279 case AArch64::LDRSHXui:
3280 return AArch64::LDURSHXi;
3281 case AArch64::LDRSHWui:
3282 return AArch64::LDURSHWi;
3283 case AArch64::LDRBBui:
3284 return AArch64::LDURBBi;
3285 case AArch64::LDRBui:
3286 return AArch64::LDURBi;
3287 case AArch64::STRBBui:
3288 return AArch64::STURBBi;
3289 case AArch64::STRBui:
3290 return AArch64::STURBi;
3291 case AArch64::LDRSBWui:
3292 return AArch64::LDURSBWi;
3293 case AArch64::LDRSBXui:
3294 return AArch64::LDURSBXi;
3295 }
3296}
3297
3298// Given the opcode of a memory load/store instruction, return the opcode of an
3299// instruction performing the same operation, but using
3300// the [Reg, Reg, {s,u}xtw #N] addressing mode with sign-/zero-extend of the
3301// offset register.
3302static unsigned offsetExtendOpcode(unsigned Opcode) {
3303 switch (Opcode) {
3304 default:
3305 llvm_unreachable("Address folding not implemented for instruction");
3306
3307 case AArch64::LDRQroX:
3308 case AArch64::LDURQi:
3309 case AArch64::LDRQui:
3310 return AArch64::LDRQroW;
3311 case AArch64::STRQroX:
3312 case AArch64::STURQi:
3313 case AArch64::STRQui:
3314 return AArch64::STRQroW;
3315 case AArch64::LDRDroX:
3316 case AArch64::LDURDi:
3317 case AArch64::LDRDui:
3318 return AArch64::LDRDroW;
3319 case AArch64::STRDroX:
3320 case AArch64::STURDi:
3321 case AArch64::STRDui:
3322 return AArch64::STRDroW;
3323 case AArch64::LDRXroX:
3324 case AArch64::LDURXi:
3325 case AArch64::LDRXui:
3326 return AArch64::LDRXroW;
3327 case AArch64::STRXroX:
3328 case AArch64::STURXi:
3329 case AArch64::STRXui:
3330 return AArch64::STRXroW;
3331 case AArch64::LDRWroX:
3332 case AArch64::LDURWi:
3333 case AArch64::LDRWui:
3334 return AArch64::LDRWroW;
3335 case AArch64::LDRSWroX:
3336 case AArch64::LDURSWi:
3337 case AArch64::LDRSWui:
3338 return AArch64::LDRSWroW;
3339 case AArch64::STRWroX:
3340 case AArch64::STURWi:
3341 case AArch64::STRWui:
3342 return AArch64::STRWroW;
3343 case AArch64::LDRHroX:
3344 case AArch64::LDURHi:
3345 case AArch64::LDRHui:
3346 return AArch64::LDRHroW;
3347 case AArch64::STRHroX:
3348 case AArch64::STURHi:
3349 case AArch64::STRHui:
3350 return AArch64::STRHroW;
3351 case AArch64::LDRHHroX:
3352 case AArch64::LDURHHi:
3353 case AArch64::LDRHHui:
3354 return AArch64::LDRHHroW;
3355 case AArch64::STRHHroX:
3356 case AArch64::STURHHi:
3357 case AArch64::STRHHui:
3358 return AArch64::STRHHroW;
3359 case AArch64::LDRSHXroX:
3360 case AArch64::LDURSHXi:
3361 case AArch64::LDRSHXui:
3362 return AArch64::LDRSHXroW;
3363 case AArch64::LDRSHWroX:
3364 case AArch64::LDURSHWi:
3365 case AArch64::LDRSHWui:
3366 return AArch64::LDRSHWroW;
3367 case AArch64::LDRBroX:
3368 case AArch64::LDURBi:
3369 case AArch64::LDRBui:
3370 return AArch64::LDRBroW;
3371 case AArch64::LDRBBroX:
3372 case AArch64::LDURBBi:
3373 case AArch64::LDRBBui:
3374 return AArch64::LDRBBroW;
3375 case AArch64::LDRSBXroX:
3376 case AArch64::LDURSBXi:
3377 case AArch64::LDRSBXui:
3378 return AArch64::LDRSBXroW;
3379 case AArch64::LDRSBWroX:
3380 case AArch64::LDURSBWi:
3381 case AArch64::LDRSBWui:
3382 return AArch64::LDRSBWroW;
3383 case AArch64::STRBroX:
3384 case AArch64::STURBi:
3385 case AArch64::STRBui:
3386 return AArch64::STRBroW;
3387 case AArch64::STRBBroX:
3388 case AArch64::STURBBi:
3389 case AArch64::STRBBui:
3390 return AArch64::STRBBroW;
3391 }
3392}
3393
3395 const ExtAddrMode &AM) const {
3396
3397 const DebugLoc &DL = MemI.getDebugLoc();
3398 MachineBasicBlock &MBB = *MemI.getParent();
3400
3402 if (AM.ScaledReg) {
3403 // The new instruction will be in the form `ldr Rt, [Xn, Xm, lsl #imm]`.
3404 unsigned Opcode = regOffsetOpcode(MemI.getOpcode());
3405 MRI.constrainRegClass(AM.BaseReg, &AArch64::GPR64spRegClass);
3406 auto B = BuildMI(MBB, MemI, DL, get(Opcode))
3407 .addReg(MemI.getOperand(0).getReg(),
3408 MemI.mayLoad() ? RegState::Define : 0)
3409 .addReg(AM.BaseReg)
3410 .addReg(AM.ScaledReg)
3411 .addImm(0)
3412 .addImm(AM.Scale > 1)
3413 .setMemRefs(MemI.memoperands())
3414 .setMIFlags(MemI.getFlags());
3415 return B.getInstr();
3416 }
3417
3418 assert(AM.ScaledReg == 0 && AM.Scale == 0 &&
3419 "Addressing mode not supported for folding");
3420
3421 // The new instruction will be in the form `ld[u]r Rt, [Xn, #imm]`.
3422 unsigned Scale = 1;
3423 unsigned Opcode = MemI.getOpcode();
3424 if (isInt<9>(AM.Displacement))
3425 Opcode = unscaledOffsetOpcode(Opcode);
3426 else
3427 Opcode = scaledOffsetOpcode(Opcode, Scale);
3428
3429 auto B = BuildMI(MBB, MemI, DL, get(Opcode))
3430 .addReg(MemI.getOperand(0).getReg(),
3431 MemI.mayLoad() ? RegState::Define : 0)
3432 .addReg(AM.BaseReg)
3433 .addImm(AM.Displacement / Scale)
3434 .setMemRefs(MemI.memoperands())
3435 .setMIFlags(MemI.getFlags());
3436 return B.getInstr();
3437 }
3438
3441 // The new instruction will be in the form `ldr Rt, [Xn, Wm, {s,u}xtw #N]`.
3442 assert(AM.ScaledReg && !AM.Displacement &&
3443 "Address offset can be a register or an immediate, but not both");
3444 unsigned Opcode = offsetExtendOpcode(MemI.getOpcode());
3445 MRI.constrainRegClass(AM.BaseReg, &AArch64::GPR64spRegClass);
3446 // Make sure the offset register is in the correct register class.
3447 Register OffsetReg = AM.ScaledReg;
3448 const TargetRegisterClass *RC = MRI.getRegClass(OffsetReg);
3449 if (RC->hasSuperClassEq(&AArch64::GPR64RegClass)) {
3450 OffsetReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3451 BuildMI(MBB, MemI, DL, get(TargetOpcode::COPY), OffsetReg)
3452 .addReg(AM.ScaledReg, 0, AArch64::sub_32);
3453 }
3454 auto B = BuildMI(MBB, MemI, DL, get(Opcode))
3455 .addReg(MemI.getOperand(0).getReg(),
3456 MemI.mayLoad() ? RegState::Define : 0)
3457 .addReg(AM.BaseReg)
3458 .addReg(OffsetReg)
3460 .addImm(AM.Scale != 1)
3461 .setMemRefs(MemI.memoperands())
3462 .setMIFlags(MemI.getFlags());
3463
3464 return B.getInstr();
3465 }
3466
3468 "Function must not be called with an addressing mode it can't handle");
3469}
3470
3472 const MachineInstr &LdSt, const MachineOperand *&BaseOp, int64_t &Offset,
3473 bool &OffsetIsScalable, TypeSize &Width,
3474 const TargetRegisterInfo *TRI) const {
3475 assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
3476 // Handle only loads/stores with base register followed by immediate offset.
3477 if (LdSt.getNumExplicitOperands() == 3) {
3478 // Non-paired instruction (e.g., ldr x1, [x0, #8]).
3479 if ((!LdSt.getOperand(1).isReg() && !LdSt.getOperand(1).isFI()) ||
3480 !LdSt.getOperand(2).isImm())
3481 return false;
3482 } else if (LdSt.getNumExplicitOperands() == 4) {
3483 // Paired instruction (e.g., ldp x1, x2, [x0, #8]).
3484 if (!LdSt.getOperand(1).isReg() ||
3485 (!LdSt.getOperand(2).isReg() && !LdSt.getOperand(2).isFI()) ||
3486 !LdSt.getOperand(3).isImm())
3487 return false;
3488 } else
3489 return false;
3490
3491 // Get the scaling factor for the instruction and set the width for the
3492 // instruction.
3493 TypeSize Scale(0U, false);
3494 int64_t Dummy1, Dummy2;
3495
3496 // If this returns false, then it's an instruction we don't want to handle.
3497 if (!getMemOpInfo(LdSt.getOpcode(), Scale, Width, Dummy1, Dummy2))
3498 return false;
3499
3500 // Compute the offset. Offset is calculated as the immediate operand
3501 // multiplied by the scaling factor. Unscaled instructions have scaling factor
3502 // set to 1.
3503 if (LdSt.getNumExplicitOperands() == 3) {
3504 BaseOp = &LdSt.getOperand(1);
3505 Offset = LdSt.getOperand(2).getImm() * Scale.getKnownMinValue();
3506 } else {
3507 assert(LdSt.getNumExplicitOperands() == 4 && "invalid number of operands");
3508 BaseOp = &LdSt.getOperand(2);
3509 Offset = LdSt.getOperand(3).getImm() * Scale.getKnownMinValue();
3510 }
3511 OffsetIsScalable = Scale.isScalable();
3512
3513 if (!BaseOp->isReg() && !BaseOp->isFI())
3514 return false;
3515
3516 return true;
3517}
3518
3521 assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
3522 MachineOperand &OfsOp = LdSt.getOperand(LdSt.getNumExplicitOperands() - 1);
3523 assert(OfsOp.isImm() && "Offset operand wasn't immediate.");
3524 return OfsOp;
3525}
3526
3527bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
3528 TypeSize &Width, int64_t &MinOffset,
3529 int64_t &MaxOffset) {
3530 switch (Opcode) {
3531 // Not a memory operation or something we want to handle.
3532 default:
3533 Scale = TypeSize::getFixed(0);
3534 Width = TypeSize::getFixed(0);
3535 MinOffset = MaxOffset = 0;
3536 return false;
3537 case AArch64::STRWpost:
3538 case AArch64::LDRWpost:
3539 Width = TypeSize::getFixed(32);
3540 Scale = TypeSize::getFixed(4);
3541 MinOffset = -256;
3542 MaxOffset = 255;
3543 break;
3544 case AArch64::LDURQi:
3545 case AArch64::STURQi:
3546 Width = TypeSize::getFixed(16);
3547 Scale = TypeSize::getFixed(1);
3548 MinOffset = -256;
3549 MaxOffset = 255;
3550 break;
3551 case AArch64::PRFUMi:
3552 case AArch64::LDURXi:
3553 case AArch64::LDURDi:
3554 case AArch64::LDAPURXi:
3555 case AArch64::STURXi:
3556 case AArch64::STURDi:
3557 case AArch64::STLURXi:
3558 Width = TypeSize::getFixed(8);
3559 Scale = TypeSize::getFixed(1);
3560 MinOffset = -256;
3561 MaxOffset = 255;
3562 break;
3563 case AArch64::LDURWi:
3564 case AArch64::LDURSi:
3565 case AArch64::LDURSWi:
3566 case AArch64::LDAPURi:
3567 case AArch64::LDAPURSWi:
3568 case AArch64::STURWi:
3569 case AArch64::STURSi:
3570 case AArch64::STLURWi:
3571 Width = TypeSize::getFixed(4);
3572 Scale = TypeSize::getFixed(1);
3573 MinOffset = -256;
3574 MaxOffset = 255;
3575 break;
3576 case AArch64::LDURHi:
3577 case AArch64::LDURHHi:
3578 case AArch64::LDURSHXi:
3579 case AArch64::LDURSHWi:
3580 case AArch64::LDAPURHi:
3581 case AArch64::LDAPURSHWi:
3582 case AArch64::LDAPURSHXi:
3583 case AArch64::STURHi:
3584 case AArch64::STURHHi:
3585 case AArch64::STLURHi:
3586 Width = TypeSize::getFixed(2);
3587 Scale = TypeSize::getFixed(1);
3588 MinOffset = -256;
3589 MaxOffset = 255;
3590 break;
3591 case AArch64::LDURBi:
3592 case AArch64::LDURBBi:
3593 case AArch64::LDURSBXi:
3594 case AArch64::LDURSBWi:
3595 case AArch64::LDAPURBi:
3596 case AArch64::LDAPURSBWi:
3597 case AArch64::LDAPURSBXi:
3598 case AArch64::STURBi:
3599 case AArch64::STURBBi:
3600 case AArch64::STLURBi:
3601 Width = TypeSize::getFixed(1);
3602 Scale = TypeSize::getFixed(1);
3603 MinOffset = -256;
3604 MaxOffset = 255;
3605 break;
3606 case AArch64::LDPQi:
3607 case AArch64::LDNPQi:
3608 case AArch64::STPQi:
3609 case AArch64::STNPQi:
3610 Scale = TypeSize::getFixed(16);
3611 Width = TypeSize::getFixed(32);
3612 MinOffset = -64;
3613 MaxOffset = 63;
3614 break;
3615 case AArch64::LDRQui:
3616 case AArch64::STRQui:
3617 Scale = TypeSize::getFixed(16);
3618 Width = TypeSize::getFixed(16);
3619 MinOffset = 0;
3620 MaxOffset = 4095;
3621 break;
3622 case AArch64::LDPXi:
3623 case AArch64::LDPDi:
3624 case AArch64::LDNPXi:
3625 case AArch64::LDNPDi:
3626 case AArch64::STPXi:
3627 case AArch64::STPDi:
3628 case AArch64::STNPXi:
3629 case AArch64::STNPDi:
3630 Scale = TypeSize::getFixed(8);
3631 Width = TypeSize::getFixed(16);
3632 MinOffset = -64;
3633 MaxOffset = 63;
3634 break;
3635 case AArch64::PRFMui:
3636 case AArch64::LDRXui:
3637 case AArch64::LDRDui:
3638 case AArch64::STRXui:
3639 case AArch64::STRDui:
3640 Scale = TypeSize::getFixed(8);
3641 Width = TypeSize::getFixed(8);
3642 MinOffset = 0;
3643 MaxOffset = 4095;
3644 break;
3645 case AArch64::StoreSwiftAsyncContext:
3646 // Store is an STRXui, but there might be an ADDXri in the expansion too.
3647 Scale = TypeSize::getFixed(1);
3648 Width = TypeSize::getFixed(8);
3649 MinOffset = 0;
3650 MaxOffset = 4095;
3651 break;
3652 case AArch64::LDPWi:
3653 case AArch64::LDPSi:
3654 case AArch64::LDNPWi:
3655 case AArch64::LDNPSi:
3656 case AArch64::STPWi:
3657 case AArch64::STPSi:
3658 case AArch64::STNPWi:
3659 case AArch64::STNPSi:
3660 Scale = TypeSize::getFixed(4);
3661 Width = TypeSize::getFixed(8);
3662 MinOffset = -64;
3663 MaxOffset = 63;
3664 break;
3665 case AArch64::LDRWui:
3666 case AArch64::LDRSui:
3667 case AArch64::LDRSWui:
3668 case AArch64::STRWui:
3669 case AArch64::STRSui:
3670 Scale = TypeSize::getFixed(4);
3671 Width = TypeSize::getFixed(4);
3672 MinOffset = 0;
3673 MaxOffset = 4095;
3674 break;
3675 case AArch64::LDRHui:
3676 case AArch64::LDRHHui:
3677 case AArch64::LDRSHWui:
3678 case AArch64::LDRSHXui:
3679 case AArch64::STRHui:
3680 case AArch64::STRHHui:
3681 Scale = TypeSize::getFixed(2);
3682 Width = TypeSize::getFixed(2);
3683 MinOffset = 0;
3684 MaxOffset = 4095;
3685 break;
3686 case AArch64::LDRBui:
3687 case AArch64::LDRBBui:
3688 case AArch64::LDRSBWui:
3689 case AArch64::LDRSBXui:
3690 case AArch64::STRBui:
3691 case AArch64::STRBBui:
3692 Scale = TypeSize::getFixed(1);
3693 Width = TypeSize::getFixed(1);
3694 MinOffset = 0;
3695 MaxOffset = 4095;
3696 break;
3697 case AArch64::STPXpre:
3698 case AArch64::LDPXpost:
3699 case AArch64::STPDpre:
3700 case AArch64::LDPDpost:
3701 Scale = TypeSize::getFixed(8);
3702 Width = TypeSize::getFixed(8);
3703 MinOffset = -512;
3704 MaxOffset = 504;
3705 break;
3706 case AArch64::STPQpre:
3707 case AArch64::LDPQpost:
3708 Scale = TypeSize::getFixed(16);
3709 Width = TypeSize::getFixed(16);
3710 MinOffset = -1024;
3711 MaxOffset = 1008;
3712 break;
3713 case AArch64::STRXpre:
3714 case AArch64::STRDpre:
3715 case AArch64::LDRXpost:
3716 case AArch64::LDRDpost:
3717 Scale = TypeSize::getFixed(1);
3718 Width = TypeSize::getFixed(8);
3719 MinOffset = -256;
3720 MaxOffset = 255;
3721 break;
3722 case AArch64::STRQpre:
3723 case AArch64::LDRQpost:
3724 Scale = TypeSize::getFixed(1);
3725 Width = TypeSize::getFixed(16);
3726 MinOffset = -256;
3727 MaxOffset = 255;
3728 break;
3729 case AArch64::ADDG:
3730 Scale = TypeSize::getFixed(16);
3731 Width = TypeSize::getFixed(0);
3732 MinOffset = 0;
3733 MaxOffset = 63;
3734 break;
3735 case AArch64::TAGPstack:
3736 Scale = TypeSize::getFixed(16);
3737 Width = TypeSize::getFixed(0);
3738 // TAGP with a negative offset turns into SUBP, which has a maximum offset
3739 // of 63 (not 64!).
3740 MinOffset = -63;
3741 MaxOffset = 63;
3742 break;
3743 case AArch64::LDG:
3744 case AArch64::STGi:
3745 case AArch64::STZGi:
3746 Scale = TypeSize::getFixed(16);
3747 Width = TypeSize::getFixed(16);
3748 MinOffset = -256;
3749 MaxOffset = 255;
3750 break;
3751 case AArch64::STR_ZZZZXI:
3752 case AArch64::LDR_ZZZZXI:
3753 Scale = TypeSize::getScalable(16);
3754 Width = TypeSize::getScalable(16 * 4);
3755 MinOffset = -256;
3756 MaxOffset = 252;
3757 break;
3758 case AArch64::STR_ZZZXI:
3759 case AArch64::LDR_ZZZXI:
3760 Scale = TypeSize::getScalable(16);
3761 Width = TypeSize::getScalable(16 * 3);
3762 MinOffset = -256;
3763 MaxOffset = 253;
3764 break;
3765 case AArch64::STR_ZZXI:
3766 case AArch64::LDR_ZZXI:
3767 Scale = TypeSize::getScalable(16);
3768 Width = TypeSize::getScalable(16 * 2);
3769 MinOffset = -256;
3770 MaxOffset = 254;
3771 break;
3772 case AArch64::LDR_PXI:
3773 case AArch64::STR_PXI:
3774 Scale = TypeSize::getScalable(2);
3775 Width = TypeSize::getScalable(2);
3776 MinOffset = -256;
3777 MaxOffset = 255;
3778 break;
3779 case AArch64::LDR_PPXI:
3780 case AArch64::STR_PPXI:
3781 Scale = TypeSize::getScalable(2);
3782 Width = TypeSize::getScalable(2 * 2);
3783 MinOffset = -256;
3784 MaxOffset = 254;
3785 break;
3786 case AArch64::LDR_ZXI:
3787 case AArch64::STR_ZXI:
3788 Scale = TypeSize::getScalable(16);
3789 Width = TypeSize::getScalable(16);
3790 MinOffset = -256;
3791 MaxOffset = 255;
3792 break;
3793 case AArch64::LD1B_IMM:
3794 case AArch64::LD1H_IMM:
3795 case AArch64::LD1W_IMM:
3796 case AArch64::LD1D_IMM:
3797 case AArch64::LDNT1B_ZRI:
3798 case AArch64::LDNT1H_ZRI:
3799 case AArch64::LDNT1W_ZRI:
3800 case AArch64::LDNT1D_ZRI:
3801 case AArch64::ST1B_IMM:
3802 case AArch64::ST1H_IMM:
3803 case AArch64::ST1W_IMM:
3804 case AArch64::ST1D_IMM:
3805 case AArch64::STNT1B_ZRI:
3806 case AArch64::STNT1H_ZRI:
3807 case AArch64::STNT1W_ZRI:
3808 case AArch64::STNT1D_ZRI:
3809 case AArch64::LDNF1B_IMM:
3810 case AArch64::LDNF1H_IMM:
3811 case AArch64::LDNF1W_IMM:
3812 case AArch64::LDNF1D_IMM:
3813 // A full vectors worth of data
3814 // Width = mbytes * elements
3815 Scale = TypeSize::getScalable(16);
3816 Width = TypeSize::getScalable(16);
3817 MinOffset = -8;
3818 MaxOffset = 7;
3819 break;
3820 case AArch64::LD2B_IMM:
3821 case AArch64::LD2H_IMM:
3822 case AArch64::LD2W_IMM:
3823 case AArch64::LD2D_IMM:
3824 case AArch64::ST2B_IMM:
3825 case AArch64::ST2H_IMM:
3826 case AArch64::ST2W_IMM:
3827 case AArch64::ST2D_IMM:
3828 Scale = TypeSize::getScalable(32);
3829 Width = TypeSize::getScalable(16 * 2);
3830 MinOffset = -8;
3831 MaxOffset = 7;
3832 break;
3833 case AArch64::LD3B_IMM:
3834 case AArch64::LD3H_IMM:
3835 case AArch64::LD3W_IMM:
3836 case AArch64::LD3D_IMM:
3837 case AArch64::ST3B_IMM:
3838 case AArch64::ST3H_IMM:
3839 case AArch64::ST3W_IMM:
3840 case AArch64::ST3D_IMM:
3841 Scale = TypeSize::getScalable(48);
3842 Width = TypeSize::getScalable(16 * 3);
3843 MinOffset = -8;
3844 MaxOffset = 7;
3845 break;
3846 case AArch64::LD4B_IMM:
3847 case AArch64::LD4H_IMM:
3848 case AArch64::LD4W_IMM:
3849 case AArch64::LD4D_IMM:
3850 case AArch64::ST4B_IMM:
3851 case AArch64::ST4H_IMM:
3852 case AArch64::ST4W_IMM:
3853 case AArch64::ST4D_IMM:
3854 Scale = TypeSize::getScalable(64);
3855 Width = TypeSize::getScalable(16 * 4);
3856 MinOffset = -8;
3857 MaxOffset = 7;
3858 break;
3859 case AArch64::LD1B_H_IMM:
3860 case AArch64::LD1SB_H_IMM:
3861 case AArch64::LD1H_S_IMM:
3862 case AArch64::LD1SH_S_IMM:
3863 case AArch64::LD1W_D_IMM:
3864 case AArch64::LD1SW_D_IMM:
3865 case AArch64::ST1B_H_IMM:
3866 case AArch64::ST1H_S_IMM:
3867 case AArch64::ST1W_D_IMM:
3868 case AArch64::LDNF1B_H_IMM:
3869 case AArch64::LDNF1SB_H_IMM:
3870 case AArch64::LDNF1H_S_IMM:
3871 case AArch64::LDNF1SH_S_IMM:
3872 case AArch64::LDNF1W_D_IMM:
3873 case AArch64::LDNF1SW_D_IMM:
3874 // A half vector worth of data
3875 // Width = mbytes * elements
3876 Scale = TypeSize::getScalable(8);
3877 Width = TypeSize::getScalable(8);
3878 MinOffset = -8;
3879 MaxOffset = 7;
3880 break;
3881 case AArch64::LD1B_S_IMM:
3882 case AArch64::LD1SB_S_IMM:
3883 case AArch64::LD1H_D_IMM:
3884 case AArch64::LD1SH_D_IMM:
3885 case AArch64::ST1B_S_IMM:
3886 case AArch64::ST1H_D_IMM:
3887 case AArch64::LDNF1B_S_IMM:
3888 case AArch64::LDNF1SB_S_IMM:
3889 case AArch64::LDNF1H_D_IMM:
3890 case AArch64::LDNF1SH_D_IMM:
3891 // A quarter vector worth of data
3892 // Width = mbytes * elements
3893 Scale = TypeSize::getScalable(4);
3894 Width = TypeSize::getScalable(4);
3895 MinOffset = -8;
3896 MaxOffset = 7;
3897 break;
3898 case AArch64::LD1B_D_IMM:
3899 case AArch64::LD1SB_D_IMM:
3900 case AArch64::ST1B_D_IMM:
3901 case AArch64::LDNF1B_D_IMM:
3902 case AArch64::LDNF1SB_D_IMM:
3903 // A eighth vector worth of data
3904 // Width = mbytes * elements
3905 Scale = TypeSize::getScalable(2);
3906 Width = TypeSize::getScalable(2);
3907 MinOffset = -8;
3908 MaxOffset = 7;
3909 break;
3910 case AArch64::ST2Gi:
3911 case AArch64::STZ2Gi:
3912 Scale = TypeSize::getFixed(16);
3913 Width = TypeSize::getFixed(32);
3914 MinOffset = -256;
3915 MaxOffset = 255;
3916 break;
3917 case AArch64::STGPi:
3918 Scale = TypeSize::getFixed(16);
3919 Width = TypeSize::getFixed(16);
3920 MinOffset = -64;
3921 MaxOffset = 63;
3922 break;
3923 case AArch64::LD1RB_IMM:
3924 case AArch64::LD1RB_H_IMM:
3925 case AArch64::LD1RB_S_IMM:
3926 case AArch64::LD1RB_D_IMM:
3927 case AArch64::LD1RSB_H_IMM:
3928 case AArch64::LD1RSB_S_IMM:
3929 case AArch64::LD1RSB_D_IMM:
3930 Scale = TypeSize::getFixed(1);
3931 Width = TypeSize::getFixed(1);
3932 MinOffset = 0;
3933 MaxOffset = 63;
3934 break;
3935 case AArch64::LD1RH_IMM:
3936 case AArch64::LD1RH_S_IMM:
3937 case AArch64::LD1RH_D_IMM:
3938 case AArch64::LD1RSH_S_IMM:
3939 case AArch64::LD1RSH_D_IMM:
3940 Scale = TypeSize::getFixed(2);
3941 Width = TypeSize::getFixed(2);
3942 MinOffset = 0;
3943 MaxOffset = 63;
3944 break;
3945 case AArch64::LD1RW_IMM:
3946 case AArch64::LD1RW_D_IMM:
3947 case AArch64::LD1RSW_IMM:
3948 Scale = TypeSize::getFixed(4);
3949 Width = TypeSize::getFixed(4);
3950 MinOffset = 0;
3951 MaxOffset = 63;
3952 break;
3953 case AArch64::LD1RD_IMM:
3954 Scale = TypeSize::getFixed(8);
3955 Width = TypeSize::getFixed(8);
3956 MinOffset = 0;
3957 MaxOffset = 63;
3958 break;
3959 }
3960
3961 return true;
3962}
3963
3964// Scaling factor for unscaled load or store.
3966 switch (Opc) {
3967 default:
3968 llvm_unreachable("Opcode has unknown scale!");
3969 case AArch64::LDRBBui:
3970 case AArch64::LDURBBi:
3971 case AArch64::LDRSBWui:
3972 case AArch64::LDURSBWi:
3973 case AArch64::STRBBui:
3974 case AArch64::STURBBi:
3975 return 1;
3976 case AArch64::LDRHHui:
3977 case AArch64::LDURHHi:
3978 case AArch64::LDRSHWui:
3979 case AArch64::LDURSHWi:
3980 case AArch64::STRHHui:
3981 case AArch64::STURHHi:
3982 return 2;
3983 case AArch64::LDRSui:
3984 case AArch64::LDURSi:
3985 case AArch64::LDRSpre:
3986 case AArch64::LDRSWui:
3987 case AArch64::LDURSWi:
3988 case AArch64::LDRSWpre:
3989 case AArch64::LDRWpre:
3990 case AArch64::LDRWui:
3991 case AArch64::LDURWi:
3992 case AArch64::STRSui:
3993 case AArch64::STURSi:
3994 case AArch64::STRSpre:
3995 case AArch64::STRWui:
3996 case AArch64::STURWi:
3997 case AArch64::STRWpre:
3998 case AArch64::LDPSi:
3999 case AArch64::LDPSWi:
4000 case AArch64::LDPWi:
4001 case AArch64::STPSi:
4002 case AArch64::STPWi:
4003 return 4;
4004 case AArch64::LDRDui:
4005 case AArch64::LDURDi:
4006 case AArch64::LDRDpre:
4007 case AArch64::LDRXui:
4008 case AArch64::LDURXi:
4009 case AArch64::LDRXpre:
4010 case AArch64::STRDui:
4011 case AArch64::STURDi:
4012 case AArch64::STRDpre:
4013 case AArch64::STRXui:
4014 case AArch64::STURXi:
4015 case AArch64::STRXpre:
4016 case AArch64::LDPDi:
4017 case AArch64::LDPXi:
4018 case AArch64::STPDi:
4019 case AArch64::STPXi:
4020 return 8;
4021 case AArch64::LDRQui:
4022 case AArch64::LDURQi:
4023 case AArch64::STRQui:
4024 case AArch64::STURQi:
4025 case AArch64::STRQpre:
4026 case AArch64::LDPQi:
4027 case AArch64::LDRQpre:
4028 case AArch64::STPQi:
4029 case AArch64::STGi:
4030 case AArch64::STZGi:
4031 case AArch64::ST2Gi:
4032 case AArch64::STZ2Gi:
4033 case AArch64::STGPi:
4034 return 16;
4035 }
4036}
4037
4039 switch (MI.getOpcode()) {
4040 default:
4041 return false;
4042 case AArch64::LDRWpre:
4043 case AArch64::LDRXpre:
4044 case AArch64::LDRSWpre:
4045 case AArch64::LDRSpre:
4046 case AArch64::LDRDpre:
4047 case AArch64::LDRQpre:
4048 return true;
4049 }
4050}
4051
4053 switch (MI.getOpcode()) {
4054 default:
4055 return false;
4056 case AArch64::STRWpre:
4057 case AArch64::STRXpre:
4058 case AArch64::STRSpre:
4059 case AArch64::STRDpre:
4060 case AArch64::STRQpre:
4061 return true;
4062 }
4063}
4064
4066 return isPreLd(MI) || isPreSt(MI);
4067}
4068
4070 switch (MI.getOpcode()) {
4071 default:
4072 return false;
4073 case AArch64::LDPSi:
4074 case AArch64::LDPSWi:
4075 case AArch64::LDPDi:
4076 case AArch64::LDPQi:
4077 case AArch64::LDPWi:
4078 case AArch64::LDPXi:
4079 case AArch64::STPSi:
4080 case AArch64::STPDi:
4081 case AArch64::STPQi:
4082 case AArch64::STPWi:
4083 case AArch64::STPXi:
4084 case AArch64::STGPi:
4085 return true;
4086 }
4087}
4088
4090 unsigned Idx =
4092 : 1;
4093 return MI.getOperand(Idx);
4094}
4095
4096const MachineOperand &
4098 unsigned Idx =
4100 : 2;
4101 return MI.getOperand(Idx);
4102}
4103
4105 Register Reg) {
4106 if (MI.getParent() == nullptr)
4107 return nullptr;
4108 const MachineFunction *MF = MI.getParent()->getParent();
4109 return MF ? MF->getRegInfo().getRegClassOrNull(Reg) : nullptr;
4110}
4111
4113 auto IsHFPR = [&](const MachineOperand &Op) {
4114 if (!Op.isReg())
4115 return false;
4116 auto Reg = Op.getReg();
4117 if (Reg.isPhysical())
4118 return AArch64::FPR16RegClass.contains(Reg);
4119 const TargetRegisterClass *TRC = ::getRegClass(MI, Reg);
4120 return TRC == &AArch64::FPR16RegClass ||
4121 TRC == &AArch64::FPR16_loRegClass;
4122 };
4123 return llvm::any_of(MI.operands(), IsHFPR);
4124}
4125
4127 auto IsQFPR = [&](const MachineOperand &Op) {
4128 if (!Op.isReg())
4129 return false;
4130 auto Reg = Op.getReg();
4131 if (Reg.isPhysical())
4132 return AArch64::FPR128RegClass.contains(Reg);
4133 const TargetRegisterClass *TRC = ::getRegClass(MI, Reg);
4134 return TRC == &AArch64::FPR128RegClass ||
4135 TRC == &AArch64::FPR128_loRegClass;
4136 };
4137 return llvm::any_of(MI.operands(), IsQFPR);
4138}
4139
4141 switch (MI.getOpcode()) {
4142 case AArch64::BRK:
4143 case AArch64::HLT:
4144 case AArch64::PACIASP:
4145 case AArch64::PACIBSP:
4146 // Implicit BTI behavior.
4147 return true;
4148 case AArch64::PAUTH_PROLOGUE:
4149 // PAUTH_PROLOGUE expands to PACI(A|B)SP.
4150 return true;
4151 case AArch64::HINT: {
4152 unsigned Imm = MI.getOperand(0).getImm();
4153 // Explicit BTI instruction.
4154 if (Imm == 32 || Imm == 34 || Imm == 36 || Imm == 38)
4155 return true;
4156 // PACI(A|B)SP instructions.
4157 if (Imm == 25 || Imm == 27)
4158 return true;
4159 return false;
4160 }
4161 default:
4162 return false;
4163 }
4164}
4165
4167 auto IsFPR = [&](const MachineOperand &Op) {
4168 if (!Op.isReg())
4169 return false;
4170 auto Reg = Op.getReg();
4171 if (Reg.isPhysical())
4172 return AArch64::FPR128RegClass.contains(Reg) ||
4173 AArch64::FPR64RegClass.contains(Reg) ||
4174 AArch64::FPR32RegClass.contains(Reg) ||
4175 AArch64::FPR16RegClass.contains(Reg) ||
4176 AArch64::FPR8RegClass.contains(Reg);
4177
4178 const TargetRegisterClass *TRC = ::getRegClass(MI, Reg);
4179 return TRC == &AArch64::FPR128RegClass ||
4180 TRC == &AArch64::FPR128_loRegClass ||
4181 TRC == &AArch64::FPR64RegClass ||
4182 TRC == &AArch64::FPR64_loRegClass ||
4183 TRC == &AArch64::FPR32RegClass || TRC == &AArch64::FPR16RegClass ||
4184 TRC == &AArch64::FPR8RegClass;
4185 };
4186 return llvm::any_of(MI.operands(), IsFPR);
4187}
4188
4189// Scale the unscaled offsets. Returns false if the unscaled offset can't be
4190// scaled.
4191static bool scaleOffset(unsigned Opc, int64_t &Offset) {
4192 int Scale = AArch64InstrInfo::getMemScale(Opc);
4193
4194 // If the byte-offset isn't a multiple of the stride, we can't scale this
4195 // offset.
4196 if (Offset % Scale != 0)
4197 return false;
4198
4199 // Convert the byte-offset used by unscaled into an "element" offset used
4200 // by the scaled pair load/store instructions.
4201 Offset /= Scale;
4202 return true;
4203}
4204
4205static bool canPairLdStOpc(unsigned FirstOpc, unsigned SecondOpc) {
4206 if (FirstOpc == SecondOpc)
4207 return true;
4208 // We can also pair sign-ext and zero-ext instructions.
4209 switch (FirstOpc) {
4210 default:
4211 return false;
4212 case AArch64::STRSui:
4213 case AArch64::STURSi:
4214 return SecondOpc == AArch64::STRSui || SecondOpc == AArch64::STURSi;
4215 case AArch64::STRDui:
4216 case AArch64::STURDi:
4217 return SecondOpc == AArch64::STRDui || SecondOpc == AArch64::STURDi;
4218 case AArch64::STRQui:
4219 case AArch64::STURQi:
4220 return SecondOpc == AArch64::STRQui || SecondOpc == AArch64::STURQi;
4221 case AArch64::STRWui:
4222 case AArch64::STURWi:
4223 return SecondOpc == AArch64::STRWui || SecondOpc == AArch64::STURWi;
4224 case AArch64::STRXui:
4225 case AArch64::STURXi:
4226 return SecondOpc == AArch64::STRXui || SecondOpc == AArch64::STURXi;
4227 case AArch64::LDRSui:
4228 case AArch64::LDURSi:
4229 return SecondOpc == AArch64::LDRSui || SecondOpc == AArch64::LDURSi;
4230 case AArch64::LDRDui:
4231 case AArch64::LDURDi:
4232 return SecondOpc == AArch64::LDRDui || SecondOpc == AArch64::LDURDi;
4233 case AArch64::LDRQui:
4234 case AArch64::LDURQi:
4235 return SecondOpc == AArch64::LDRQui || SecondOpc == AArch64::LDURQi;
4236 case AArch64::LDRWui:
4237 case AArch64::LDURWi:
4238 return SecondOpc == AArch64::LDRSWui || SecondOpc == AArch64::LDURSWi;
4239 case AArch64::LDRSWui:
4240 case AArch64::LDURSWi:
4241 return SecondOpc == AArch64::LDRWui || SecondOpc == AArch64::LDURWi;
4242 case AArch64::LDRXui:
4243 case AArch64::LDURXi:
4244 return SecondOpc == AArch64::LDRXui || SecondOpc == AArch64::LDURXi;
4245 }
4246 // These instructions can't be paired based on their opcodes.
4247 return false;
4248}
4249
4250static bool shouldClusterFI(const MachineFrameInfo &MFI, int FI1,
4251 int64_t Offset1, unsigned Opcode1, int FI2,
4252 int64_t Offset2, unsigned Opcode2) {
4253 // Accesses through fixed stack object frame indices may access a different
4254 // fixed stack slot. Check that the object offsets + offsets match.
4255 if (MFI.isFixedObjectIndex(FI1) && MFI.isFixedObjectIndex(FI2)) {
4256 int64_t ObjectOffset1 = MFI.getObjectOffset(FI1);
4257 int64_t ObjectOffset2 = MFI.getObjectOffset(FI2);
4258 assert(ObjectOffset1 <= ObjectOffset2 && "Object offsets are not ordered.");
4259 // Convert to scaled object offsets.
4260 int Scale1 = AArch64InstrInfo::getMemScale(Opcode1);
4261 if (ObjectOffset1 % Scale1 != 0)
4262 return false;
4263 ObjectOffset1 /= Scale1;
4264 int Scale2 = AArch64InstrInfo::getMemScale(Opcode2);
4265 if (ObjectOffset2 % Scale2 != 0)
4266 return false;
4267 ObjectOffset2 /= Scale2;
4268 ObjectOffset1 += Offset1;
4269 ObjectOffset2 += Offset2;
4270 return ObjectOffset1 + 1 == ObjectOffset2;
4271 }
4272
4273 return FI1 == FI2;
4274}
4275
4276/// Detect opportunities for ldp/stp formation.
4277///
4278/// Only called for LdSt for which getMemOperandWithOffset returns true.
4280 ArrayRef<const MachineOperand *> BaseOps1, int64_t OpOffset1,
4281 bool OffsetIsScalable1, ArrayRef<const MachineOperand *> BaseOps2,
4282 int64_t OpOffset2, bool OffsetIsScalable2, unsigned ClusterSize,
4283 unsigned NumBytes) const {
4284 assert(BaseOps1.size() == 1 && BaseOps2.size() == 1);
4285 const MachineOperand &BaseOp1 = *BaseOps1.front();
4286 const MachineOperand &BaseOp2 = *BaseOps2.front();
4287 const MachineInstr &FirstLdSt = *BaseOp1.getParent();
4288 const MachineInstr &SecondLdSt = *BaseOp2.getParent();
4289 if (BaseOp1.getType() != BaseOp2.getType())
4290 return false;
4291
4292 assert((BaseOp1.isReg() || BaseOp1.isFI()) &&
4293 "Only base registers and frame indices are supported.");
4294
4295 // Check for both base regs and base FI.
4296 if (BaseOp1.isReg() && BaseOp1.getReg() != BaseOp2.getReg())
4297 return false;
4298
4299 // Only cluster up to a single pair.
4300 if (ClusterSize > 2)
4301 return false;
4302
4303 if (!isPairableLdStInst(FirstLdSt) || !isPairableLdStInst(SecondLdSt))
4304 return false;
4305
4306 // Can we pair these instructions based on their opcodes?
4307 unsigned FirstOpc = FirstLdSt.getOpcode();
4308 unsigned SecondOpc = SecondLdSt.getOpcode();
4309 if (!canPairLdStOpc(FirstOpc, SecondOpc))
4310 return false;
4311
4312 // Can't merge volatiles or load/stores that have a hint to avoid pair
4313 // formation, for example.
4314 if (!isCandidateToMergeOrPair(FirstLdSt) ||
4315 !isCandidateToMergeOrPair(SecondLdSt))
4316 return false;
4317
4318 // isCandidateToMergeOrPair guarantees that operand 2 is an immediate.
4319 int64_t Offset1 = FirstLdSt.getOperand(2).getImm();
4320 if (hasUnscaledLdStOffset(FirstOpc) && !scaleOffset(FirstOpc, Offset1))
4321 return false;
4322
4323 int64_t Offset2 = SecondLdSt.getOperand(2).getImm();
4324 if (hasUnscaledLdStOffset(SecondOpc) && !scaleOffset(SecondOpc, Offset2))
4325 return false;
4326
4327 // Pairwise instructions have a 7-bit signed offset field.
4328 if (Offset1 > 63 || Offset1 < -64)
4329 return false;
4330
4331 // The caller should already have ordered First/SecondLdSt by offset.
4332 // Note: except for non-equal frame index bases
4333 if (BaseOp1.isFI()) {
4334 assert((!BaseOp1.isIdenticalTo(BaseOp2) || Offset1 <= Offset2) &&
4335 "Caller should have ordered offsets.");
4336
4337 const MachineFrameInfo &MFI =
4338 FirstLdSt.getParent()->getParent()->getFrameInfo();
4339 return shouldClusterFI(MFI, BaseOp1.getIndex(), Offset1, FirstOpc,
4340 BaseOp2.getIndex(), Offset2, SecondOpc);
4341 }
4342
4343 assert(Offset1 <= Offset2 && "Caller should have ordered offsets.");
4344
4345 return Offset1 + 1 == Offset2;
4346}
4347
4349 unsigned Reg, unsigned SubIdx,
4350 unsigned State,
4351 const TargetRegisterInfo *TRI) {
4352 if (!SubIdx)
4353 return MIB.addReg(Reg, State);
4354
4356 return MIB.addReg(TRI->getSubReg(Reg, SubIdx), State);
4357 return MIB.addReg(Reg, State, SubIdx);
4358}
4359
4360static bool forwardCopyWillClobberTuple(unsigned DestReg, unsigned SrcReg,
4361 unsigned NumRegs) {
4362 // We really want the positive remainder mod 32 here, that happens to be
4363 // easily obtainable with a mask.
4364 return ((DestReg - SrcReg) & 0x1f) < NumRegs;
4365}
4366
4369 const DebugLoc &DL, MCRegister DestReg,
4370 MCRegister SrcReg, bool KillSrc,
4371 unsigned Opcode,
4372 ArrayRef<unsigned> Indices) const {
4373 assert(Subtarget.hasNEON() && "Unexpected register copy without NEON");
4375 uint16_t DestEncoding = TRI->getEncodingValue(DestReg);
4376 uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg);
4377 unsigned NumRegs = Indices.size();
4378
4379 int SubReg = 0, End = NumRegs, Incr = 1;
4380 if (forwardCopyWillClobberTuple(DestEncoding, SrcEncoding, NumRegs)) {
4381 SubReg = NumRegs - 1;
4382 End = -1;
4383 Incr = -1;
4384 }
4385
4386 for (; SubReg != End; SubReg += Incr) {
4387 const MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opcode));
4388 AddSubReg(MIB, DestReg, Indices[SubReg], RegState::Define, TRI);
4389 AddSubReg(MIB, SrcReg, Indices[SubReg], 0, TRI);
4390 AddSubReg(MIB, SrcReg, Indices[SubReg], getKillRegState(KillSrc), TRI);
4391 }
4392}
4393
4396 DebugLoc DL, unsigned DestReg,
4397 unsigned SrcReg, bool KillSrc,
4398 unsigned Opcode, unsigned ZeroReg,
4399 llvm::ArrayRef<unsigned> Indices) const {
4401 unsigned NumRegs = Indices.size();
4402
4403#ifndef NDEBUG
4404 uint16_t DestEncoding = TRI->getEncodingValue(DestReg);
4405 uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg);
4406 assert(DestEncoding % NumRegs == 0 && SrcEncoding % NumRegs == 0 &&
4407 "GPR reg sequences should not be able to overlap");
4408#endif
4409
4410 for (unsigned SubReg = 0; SubReg != NumRegs; ++SubReg) {
4411 const MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opcode));
4412 AddSubReg(MIB, DestReg, Indices[SubReg], RegState::Define, TRI);
4413 MIB.addReg(ZeroReg);
4414 AddSubReg(MIB, SrcReg, Indices[SubReg], getKillRegState(KillSrc), TRI);
4415 MIB.addImm(0);
4416 }
4417}
4418
4421 const DebugLoc &DL, MCRegister DestReg,
4422 MCRegister SrcReg, bool KillSrc) const {
4423 if (AArch64::GPR32spRegClass.contains(DestReg) &&
4424 (AArch64::GPR32spRegClass.contains(SrcReg) || SrcReg == AArch64::WZR)) {
4426
4427 if (DestReg == AArch64::WSP || SrcReg == AArch64::WSP) {
4428 // If either operand is WSP, expand to ADD #0.
4429 if (Subtarget.hasZeroCycleRegMove()) {
4430 // Cyclone recognizes "ADD Xd, Xn, #0" as a zero-cycle register move.
4431 MCRegister DestRegX = TRI->getMatchingSuperReg(
4432 DestReg, AArch64::sub_32, &AArch64::GPR64spRegClass);
4433 MCRegister SrcRegX = TRI->getMatchingSuperReg(
4434 SrcReg, AArch64::sub_32, &AArch64::GPR64spRegClass);
4435 // This instruction is reading and writing X registers. This may upset
4436 // the register scavenger and machine verifier, so we need to indicate
4437 // that we are reading an undefined value from SrcRegX, but a proper
4438 // value from SrcReg.
4439 BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestRegX)
4440 .addReg(SrcRegX, RegState::Undef)
4441 .addImm(0)
4443 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
4444 } else {
4445 BuildMI(MBB, I, DL, get(AArch64::ADDWri), DestReg)
4446 .addReg(SrcReg, getKillRegState(KillSrc))
4447 .addImm(0)
4449 }
4450 } else if (SrcReg == AArch64::WZR && Subtarget.hasZeroCycleZeroingGP()) {
4451 BuildMI(MBB, I, DL, get(AArch64::MOVZWi), DestReg)
4452 .addImm(0)
4454 } else {
4455 if (Subtarget.hasZeroCycleRegMove()) {
4456 // Cyclone recognizes "ORR Xd, XZR, Xm" as a zero-cycle register move.
4457 MCRegister DestRegX = TRI->getMatchingSuperReg(
4458 DestReg, AArch64::sub_32, &AArch64::GPR64spRegClass);
4459 MCRegister SrcRegX = TRI->getMatchingSuperReg(
4460 SrcReg, AArch64::sub_32, &AArch64::GPR64spRegClass);
4461 // This instruction is reading and writing X registers. This may upset
4462 // the register scavenger and machine verifier, so we need to indicate
4463 // that we are reading an undefined value from SrcRegX, but a proper
4464 // value from SrcReg.
4465 BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestRegX)
4466 .addReg(AArch64::XZR)
4467 .addReg(SrcRegX, RegState::Undef)
4468 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
4469 } else {
4470 // Otherwise, expand to ORR WZR.
4471 BuildMI(MBB, I, DL, get(AArch64::ORRWrr), DestReg)
4472 .addReg(AArch64::WZR)
4473 .addReg(SrcReg, getKillRegState(KillSrc));
4474 }
4475 }
4476 return;
4477 }
4478
4479 // Copy a Predicate register by ORRing with itself.
4480 if (AArch64::PPRRegClass.contains(DestReg) &&
4481 AArch64::PPRRegClass.contains(SrcReg)) {
4482 assert(Subtarget.hasSVEorSME() && "Unexpected SVE register.");
4483 BuildMI(MBB, I, DL, get(AArch64::ORR_PPzPP), DestReg)
4484 .addReg(SrcReg) // Pg
4485 .addReg(SrcReg)
4486 .addReg(SrcReg, getKillRegState(KillSrc));
4487 return;
4488 }
4489
4490 // Copy a predicate-as-counter register by ORRing with itself as if it
4491 // were a regular predicate (mask) register.
4492 bool DestIsPNR = AArch64::PNRRegClass.contains(DestReg);
4493 bool SrcIsPNR = AArch64::PNRRegClass.contains(SrcReg);
4494 if (DestIsPNR || SrcIsPNR) {
4495 assert((Subtarget.hasSVE2p1() || Subtarget.hasSME2()) &&
4496 "Unexpected predicate-as-counter register.");
4497 auto ToPPR = [](MCRegister R) -> MCRegister {
4498 return (R - AArch64::PN0) + AArch64::P0;
4499 };
4500 MCRegister PPRSrcReg = SrcIsPNR ? ToPPR(SrcReg) : SrcReg;
4501 MCRegister PPRDestReg = DestIsPNR ? ToPPR(DestReg) : DestReg;
4502
4503 if (PPRSrcReg != PPRDestReg) {
4504 auto NewMI = BuildMI(MBB, I, DL, get(AArch64::ORR_PPzPP), PPRDestReg)
4505 .addReg(PPRSrcReg) // Pg
4506 .addReg(PPRSrcReg)
4507 .addReg(PPRSrcReg, getKillRegState(KillSrc));
4508 if (DestIsPNR)
4509 NewMI.addDef(DestReg, RegState::Implicit);
4510 }
4511 return;
4512 }
4513
4514 // Copy a Z register by ORRing with itself.
4515 if (AArch64::ZPRRegClass.contains(DestReg) &&
4516 AArch64::ZPRRegClass.contains(SrcReg)) {
4517 assert(Subtarget.hasSVEorSME() && "Unexpected SVE register.");
4518 BuildMI(MBB, I, DL, get(AArch64::ORR_ZZZ), DestReg)
4519 .addReg(SrcReg)
4520 .addReg(SrcReg, getKillRegState(KillSrc));
4521 return;
4522 }
4523
4524 // Copy a Z register pair by copying the individual sub-registers.
4525 if ((AArch64::ZPR2RegClass.contains(DestReg) ||
4526 AArch64::ZPR2StridedOrContiguousRegClass.contains(DestReg)) &&
4527 (AArch64::ZPR2RegClass.contains(SrcReg) ||
4528 AArch64::ZPR2StridedOrContiguousRegClass.contains(SrcReg))) {
4529 assert(Subtarget.hasSVEorSME() && "Unexpected SVE register.");
4530 static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1};
4531 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORR_ZZZ,
4532 Indices);
4533 return;
4534 }
4535
4536 // Copy a Z register triple by copying the individual sub-registers.
4537 if (AArch64::ZPR3RegClass.contains(DestReg) &&
4538 AArch64::ZPR3RegClass.contains(SrcReg)) {
4539 assert(Subtarget.hasSVEorSME() && "Unexpected SVE register.");
4540 static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1,
4541 AArch64::zsub2};
4542 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORR_ZZZ,
4543 Indices);
4544 return;
4545 }
4546
4547 // Copy a Z register quad by copying the individual sub-registers.
4548 if ((AArch64::ZPR4RegClass.contains(DestReg) ||
4549 AArch64::ZPR4StridedOrContiguousRegClass.contains(DestReg)) &&
4550 (AArch64::ZPR4RegClass.contains(SrcReg) ||
4551 AArch64::ZPR4StridedOrContiguousRegClass.contains(SrcReg))) {
4552 assert(Subtarget.hasSVEorSME() && "Unexpected SVE register.");
4553 static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1,
4554 AArch64::zsub2, AArch64::zsub3};
4555 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORR_ZZZ,
4556 Indices);
4557 return;
4558 }
4559
4560 if (AArch64::GPR64spRegClass.contains(DestReg) &&
4561 (AArch64::GPR64spRegClass.contains(SrcReg) || SrcReg == AArch64::XZR)) {
4562 if (DestReg == AArch64::SP || SrcReg == AArch64::SP) {
4563 // If either operand is SP, expand to ADD #0.
4564 BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestReg)
4565 .addReg(SrcReg, getKillRegState(KillSrc))
4566 .addImm(0)
4568 } else if (SrcReg == AArch64::XZR && Subtarget.hasZeroCycleZeroingGP()) {
4569 BuildMI(MBB, I, DL, get(AArch64::MOVZXi), DestReg)
4570 .addImm(0)
4572 } else {
4573 // Otherwise, expand to ORR XZR.
4574 BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestReg)
4575 .addReg(AArch64::XZR)
4576 .addReg(SrcReg, getKillRegState(KillSrc));
4577 }
4578 return;
4579 }
4580
4581 // Copy a DDDD register quad by copying the individual sub-registers.
4582 if (AArch64::DDDDRegClass.contains(DestReg) &&
4583 AArch64::DDDDRegClass.contains(SrcReg)) {
4584 static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1,
4585 AArch64::dsub2, AArch64::dsub3};
4586 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
4587 Indices);
4588 return;
4589 }
4590
4591 // Copy a DDD register triple by copying the individual sub-registers.
4592 if (AArch64::DDDRegClass.contains(DestReg) &&
4593 AArch64::DDDRegClass.contains(SrcReg)) {
4594 static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1,
4595 AArch64::dsub2};
4596 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
4597 Indices);
4598 return;
4599 }
4600
4601 // Copy a DD register pair by copying the individual sub-registers.
4602 if (AArch64::DDRegClass.contains(DestReg) &&
4603 AArch64::DDRegClass.contains(SrcReg)) {
4604 static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1};
4605 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
4606 Indices);
4607 return;
4608 }
4609
4610 // Copy a QQQQ register quad by copying the individual sub-registers.
4611 if (AArch64::QQQQRegClass.contains(DestReg) &&
4612 AArch64::QQQQRegClass.contains(SrcReg)) {
4613 static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1,
4614 AArch64::qsub2, AArch64::qsub3};
4615 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
4616 Indices);
4617 return;
4618 }
4619
4620 // Copy a QQQ register triple by copying the individual sub-registers.
4621 if (AArch64::QQQRegClass.contains(DestReg) &&
4622 AArch64::QQQRegClass.contains(SrcReg)) {
4623 static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1,
4624 AArch64::qsub2};
4625 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
4626 Indices);
4627 return;
4628 }
4629
4630 // Copy a QQ register pair by copying the individual sub-registers.
4631 if (AArch64::QQRegClass.contains(DestReg) &&
4632 AArch64::QQRegClass.contains(SrcReg)) {
4633 static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1};
4634 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
4635 Indices);
4636 return;
4637 }
4638
4639 if (AArch64::XSeqPairsClassRegClass.contains(DestReg) &&
4640 AArch64::XSeqPairsClassRegClass.contains(SrcReg)) {
4641 static const unsigned Indices[] = {AArch64::sube64, AArch64::subo64};
4642 copyGPRRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRXrs,
4643 AArch64::XZR, Indices);
4644 return;
4645 }
4646
4647 if (AArch64::WSeqPairsClassRegClass.contains(DestReg) &&
4648 AArch64::WSeqPairsClassRegClass.contains(SrcReg)) {
4649 static const unsigned Indices[] = {AArch64::sube32, AArch64::subo32};
4650 copyGPRRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRWrs,
4651 AArch64::WZR, Indices);
4652 return;
4653 }
4654
4655 if (AArch64::FPR128RegClass.contains(DestReg) &&
4656 AArch64::FPR128RegClass.contains(SrcReg)) {
4657 if (Subtarget.hasSVEorSME() && !Subtarget.isNeonAvailable())
4658 BuildMI(MBB, I, DL, get(AArch64::ORR_ZZZ))
4659 .addReg(AArch64::Z0 + (DestReg - AArch64::Q0), RegState::Define)
4660 .addReg(AArch64::Z0 + (SrcReg - AArch64::Q0))
4661 .addReg(AArch64::Z0 + (SrcReg - AArch64::Q0));
4662 else if (Subtarget.hasNEON())
4663 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
4664 .addReg(SrcReg)
4665 .addReg(SrcReg, getKillRegState(KillSrc));
4666 else {
4667 BuildMI(MBB, I, DL, get(AArch64::STRQpre))
4668 .addReg(AArch64::SP, RegState::Define)
4669 .addReg(SrcReg, getKillRegState(KillSrc))
4670 .addReg(AArch64::SP)
4671 .addImm(-16);
4672 BuildMI(MBB, I, DL, get(AArch64::LDRQpre))
4673 .addReg(AArch64::SP, RegState::Define)
4674 .addReg(DestReg, RegState::Define)
4675 .addReg(AArch64::SP)
4676 .addImm(16);
4677 }
4678 return;
4679 }
4680
4681 if (AArch64::FPR64RegClass.contains(DestReg) &&
4682 AArch64::FPR64RegClass.contains(SrcReg)) {
4683 BuildMI(MBB, I, DL, get(AArch64::FMOVDr), DestReg)
4684 .addReg(SrcReg, getKillRegState(KillSrc));
4685 return;
4686 }
4687
4688 if (AArch64::FPR32RegClass.contains(DestReg) &&
4689 AArch64::FPR32RegClass.contains(SrcReg)) {
4690 BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
4691 .addReg(SrcReg, getKillRegState(KillSrc));
4692 return;
4693 }
4694
4695 if (AArch64::FPR16RegClass.contains(DestReg) &&
4696 AArch64::FPR16RegClass.contains(SrcReg)) {
4697 DestReg =
4698 RI.getMatchingSuperReg(DestReg, AArch64::hsub, &AArch64::FPR32RegClass);
4699 SrcReg =
4700 RI.getMatchingSuperReg(SrcReg, AArch64::hsub, &AArch64::FPR32RegClass);
4701 BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
4702 .addReg(SrcReg, getKillRegState(KillSrc));
4703 return;
4704 }
4705
4706 if (AArch64::FPR8RegClass.contains(DestReg) &&
4707 AArch64::FPR8RegClass.contains(SrcReg)) {
4708 DestReg =
4709 RI.getMatchingSuperReg(DestReg, AArch64::bsub, &AArch64::FPR32RegClass);
4710 SrcReg =
4711 RI.getMatchingSuperReg(SrcReg, AArch64::bsub, &AArch64::FPR32RegClass);
4712 BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
4713 .addReg(SrcReg, getKillRegState(KillSrc));
4714 return;
4715 }
4716
4717 // Copies between GPR64 and FPR64.
4718 if (AArch64::FPR64RegClass.contains(DestReg) &&
4719 AArch64::GPR64RegClass.contains(SrcReg)) {
4720 BuildMI(MBB, I, DL, get(AArch64::FMOVXDr), DestReg)
4721 .addReg(SrcReg, getKillRegState(KillSrc));
4722 return;
4723 }
4724 if (AArch64::GPR64RegClass.contains(DestReg) &&
4725 AArch64::FPR64RegClass.contains(SrcReg)) {
4726 BuildMI(MBB, I, DL, get(AArch64::FMOVDXr), DestReg)
4727 .addReg(SrcReg, getKillRegState(KillSrc));
4728 return;
4729 }
4730 // Copies between GPR32 and FPR32.
4731 if (AArch64::FPR32RegClass.contains(DestReg) &&
4732 AArch64::GPR32RegClass.contains(SrcReg)) {
4733 BuildMI(MBB, I, DL, get(AArch64::FMOVWSr), DestReg)
4734 .addReg(SrcReg, getKillRegState(KillSrc));
4735 return;
4736 }
4737 if (AArch64::GPR32RegClass.contains(DestReg) &&
4738 AArch64::FPR32RegClass.contains(SrcReg)) {
4739 BuildMI(MBB, I, DL, get(AArch64::FMOVSWr), DestReg)
4740 .addReg(SrcReg, getKillRegState(KillSrc));
4741 return;
4742 }
4743
4744 if (DestReg == AArch64::NZCV) {
4745 assert(AArch64::GPR64RegClass.contains(SrcReg) && "Invalid NZCV copy");
4746 BuildMI(MBB, I, DL, get(AArch64::MSR))
4747 .addImm(AArch64SysReg::NZCV)
4748 .addReg(SrcReg, getKillRegState(KillSrc))
4749 .addReg(AArch64::NZCV, RegState::Implicit | RegState::Define);
4750 return;
4751 }
4752
4753 if (SrcReg == AArch64::NZCV) {
4754 assert(AArch64::GPR64RegClass.contains(DestReg) && "Invalid NZCV copy");
4755 BuildMI(MBB, I, DL, get(AArch64::MRS), DestReg)
4756 .addImm(AArch64SysReg::NZCV)
4757 .addReg(AArch64::NZCV, RegState::Implicit | getKillRegState(KillSrc));
4758 return;
4759 }
4760
4761#ifndef NDEBUG
4763 errs() << TRI.getRegAsmName(DestReg) << " = COPY "
4764 << TRI.getRegAsmName(SrcReg) << "\n";
4765#endif
4766 llvm_unreachable("unimplemented reg-to-reg copy");
4767}
4768
4771 MachineBasicBlock::iterator InsertBefore,
4772 const MCInstrDesc &MCID,
4773 Register SrcReg, bool IsKill,
4774 unsigned SubIdx0, unsigned SubIdx1, int FI,
4775 MachineMemOperand *MMO) {
4776 Register SrcReg0 = SrcReg;
4777 Register SrcReg1 = SrcReg;
4778 if (SrcReg.isPhysical()) {
4779 SrcReg0 = TRI.getSubReg(SrcReg, SubIdx0);
4780 SubIdx0 = 0;
4781 SrcReg1 = TRI.getSubReg(SrcReg, SubIdx1);
4782 SubIdx1 = 0;
4783 }
4784 BuildMI(MBB, InsertBefore, DebugLoc(), MCID)
4785 .addReg(SrcReg0, getKillRegState(IsKill), SubIdx0)
4786 .addReg(SrcReg1, getKillRegState(IsKill), SubIdx1)
4787 .addFrameIndex(FI)
4788 .addImm(0)
4789 .addMemOperand(MMO);
4790}
4791
4794 Register SrcReg, bool isKill, int FI,
4795 const TargetRegisterClass *RC,
4796 const TargetRegisterInfo *TRI,
4797 Register VReg) const {
4798 MachineFunction &MF = *MBB.getParent();
4799 MachineFrameInfo &MFI = MF.getFrameInfo();
4800
4802 MachineMemOperand *MMO =
4804 MFI.getObjectSize(FI), MFI.getObjectAlign(FI));
4805 unsigned Opc = 0;
4806 bool Offset = true;
4808 unsigned StackID = TargetStackID::Default;
4809 switch (TRI->getSpillSize(*RC)) {
4810 case 1:
4811 if (AArch64::FPR8RegClass.hasSubClassEq(RC))
4812 Opc = AArch64::STRBui;
4813 break;
4814 case 2: {
4815 bool IsPNR = AArch64::PNRRegClass.hasSubClassEq(RC);
4816 if (AArch64::FPR16RegClass.hasSubClassEq(RC))
4817 Opc = AArch64::STRHui;
4818 else if (IsPNR || AArch64::PPRRegClass.hasSubClassEq(RC)) {
4819 assert(Subtarget.hasSVEorSME() &&
4820 "Unexpected register store without SVE store instructions");
4821 assert((!IsPNR || Subtarget.hasSVE2p1() || Subtarget.hasSME2()) &&
4822 "Unexpected register store without SVE2p1 or SME2");
4823 Opc = AArch64::STR_PXI;
4825 }
4826 break;
4827 }
4828 case 4:
4829 if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
4830 Opc = AArch64::STRWui;
4831 if (SrcReg.isVirtual())
4832 MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR32RegClass);
4833 else
4834 assert(SrcReg != AArch64::WSP);
4835 } else if (AArch64::FPR32RegClass.hasSubClassEq(RC))
4836 Opc = AArch64::STRSui;
4837 else if (AArch64::PPR2RegClass.hasSubClassEq(RC)) {
4838 Opc = AArch64::STR_PPXI;
4840 }
4841 break;
4842 case 8:
4843 if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) {
4844 Opc = AArch64::STRXui;
4845 if (SrcReg.isVirtual())
4846 MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass);
4847 else
4848 assert(SrcReg != AArch64::SP);
4849 } else if (AArch64::FPR64RegClass.hasSubClassEq(RC)) {
4850 Opc = AArch64::STRDui;
4851 } else if (AArch64::WSeqPairsClassRegClass.hasSubClassEq(RC)) {
4853 get(AArch64::STPWi), SrcReg, isKill,
4854 AArch64::sube32, AArch64::subo32, FI, MMO);
4855 return;
4856 }
4857 break;
4858 case 16:
4859 if (AArch64::FPR128RegClass.hasSubClassEq(RC))
4860 Opc = AArch64::STRQui;
4861 else if (AArch64::DDRegClass.hasSubClassEq(RC)) {
4862 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
4863 Opc = AArch64::ST1Twov1d;
4864 Offset = false;
4865 } else if (AArch64::XSeqPairsClassRegClass.hasSubClassEq(RC)) {
4867 get(AArch64::STPXi), SrcReg, isKill,
4868 AArch64::sube64, AArch64::subo64, FI, MMO);
4869 return;
4870 } else if (AArch64::ZPRRegClass.hasSubClassEq(RC)) {
4871 assert(Subtarget.hasSVEorSME() &&
4872 "Unexpected register store without SVE store instructions");
4873 Opc = AArch64::STR_ZXI;
4875 }
4876 break;
4877 case 24:
4878 if (AArch64::DDDRegClass.hasSubClassEq(RC)) {
4879 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
4880 Opc = AArch64::ST1Threev1d;
4881 Offset = false;
4882 }
4883 break;
4884 case 32:
4885 if (AArch64::DDDDRegClass.hasSubClassEq(RC)) {
4886 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
4887 Opc = AArch64::ST1Fourv1d;
4888 Offset = false;
4889 } else if (AArch64::QQRegClass.hasSubClassEq(RC)) {
4890 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
4891 Opc = AArch64::ST1Twov2d;
4892 Offset = false;
4893 } else if (AArch64::ZPR2RegClass.hasSubClassEq(RC) ||
4894 AArch64::ZPR2StridedOrContiguousRegClass.hasSubClassEq(RC)) {
4895 assert(Subtarget.hasSVEorSME() &&
4896 "Unexpected register store without SVE store instructions");
4897 Opc = AArch64::STR_ZZXI;
4899 }
4900 break;
4901 case 48:
4902 if (AArch64::QQQRegClass.hasSubClassEq(RC)) {
4903 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
4904 Opc = AArch64::ST1Threev2d;
4905 Offset = false;
4906 } else if (AArch64::ZPR3RegClass.hasSubClassEq(RC)) {
4907 assert(Subtarget.hasSVEorSME() &&
4908 "Unexpected register store without SVE store instructions");
4909 Opc = AArch64::STR_ZZZXI;
4911 }
4912 break;
4913 case 64:
4914 if (AArch64::QQQQRegClass.hasSubClassEq(RC)) {
4915 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
4916 Opc = AArch64::ST1Fourv2d;
4917 Offset = false;
4918 } else if (AArch64::ZPR4RegClass.hasSubClassEq(RC) ||
4919 AArch64::ZPR4StridedOrContiguousRegClass.hasSubClassEq(RC)) {
4920 assert(Subtarget.hasSVEorSME() &&
4921 "Unexpected register store without SVE store instructions");
4922 Opc = AArch64::STR_ZZZZXI;
4924 }
4925 break;
4926 }
4927 assert(Opc && "Unknown register class");
4928 MFI.setStackID(FI, StackID);
4929
4930 const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DebugLoc(), get(Opc))
4931 .addReg(SrcReg, getKillRegState(isKill))
4932 .addFrameIndex(FI);
4933
4934 if (Offset)
4935 MI.addImm(0);
4936 if (PNRReg.isValid())
4937 MI.addDef(PNRReg, RegState::Implicit);
4938 MI.addMemOperand(MMO);
4939}
4940
4943 MachineBasicBlock::iterator InsertBefore,
4944 const MCInstrDesc &MCID,
4945 Register DestReg, unsigned SubIdx0,
4946 unsigned SubIdx1, int FI,
4947 MachineMemOperand *MMO) {
4948 Register DestReg0 = DestReg;
4949 Register DestReg1 = DestReg;
4950 bool IsUndef = true;
4951 if (DestReg.isPhysical()) {
4952 DestReg0 = TRI.getSubReg(DestReg, SubIdx0);
4953 SubIdx0 = 0;
4954 DestReg1 = TRI.getSubReg(DestReg, SubIdx1);
4955 SubIdx1 = 0;
4956 IsUndef = false;
4957 }
4958 BuildMI(MBB, InsertBefore, DebugLoc(), MCID)
4959 .addReg(DestReg0, RegState::Define | getUndefRegState(IsUndef), SubIdx0)
4960 .addReg(DestReg1, RegState::Define | getUndefRegState(IsUndef), SubIdx1)
4961 .addFrameIndex(FI)
4962 .addImm(0)
4963 .addMemOperand(MMO);
4964}
4965
4968 Register DestReg, int FI,
4969 const TargetRegisterClass *RC,
4970 const TargetRegisterInfo *TRI,
4971 Register VReg) const {
4972 MachineFunction &MF = *MBB.getParent();
4973 MachineFrameInfo &MFI = MF.getFrameInfo();
4975 MachineMemOperand *MMO =
4977 MFI.getObjectSize(FI), MFI.getObjectAlign(FI));
4978
4979 unsigned Opc = 0;
4980 bool Offset = true;
4981 unsigned StackID = TargetStackID::Default;
4983 switch (TRI->getSpillSize(*RC)) {
4984 case 1:
4985 if (AArch64::FPR8RegClass.hasSubClassEq(RC))
4986 Opc = AArch64::LDRBui;
4987 break;
4988 case 2: {
4989 bool IsPNR = AArch64::PNRRegClass.hasSubClassEq(RC);
4990 if (AArch64::FPR16RegClass.hasSubClassEq(RC))
4991 Opc = AArch64::LDRHui;
4992 else if (IsPNR || AArch64::PPRRegClass.hasSubClassEq(RC)) {
4993 assert(Subtarget.hasSVEorSME() &&
4994 "Unexpected register load without SVE load instructions");
4995 assert((!IsPNR || Subtarget.hasSVE2p1() || Subtarget.hasSME2()) &&
4996 "Unexpected register load without SVE2p1 or SME2");
4997 if (IsPNR)
4998 PNRReg = DestReg;
4999 Opc = AArch64::LDR_PXI;
5001 }
5002 break;
5003 }
5004 case 4:
5005 if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
5006 Opc = AArch64::LDRWui;
5007 if (DestReg.isVirtual())
5008 MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR32RegClass);
5009 else
5010 assert(DestReg != AArch64::WSP);
5011 } else if (AArch64::FPR32RegClass.hasSubClassEq(RC))
5012 Opc = AArch64::LDRSui;
5013 else if (AArch64::PPR2RegClass.hasSubClassEq(RC)) {
5014 Opc = AArch64::LDR_PPXI;
5016 }
5017 break;
5018 case 8:
5019 if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) {
5020 Opc = AArch64::LDRXui;
5021 if (DestReg.isVirtual())
5022 MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR64RegClass);
5023 else
5024 assert(DestReg != AArch64::SP);
5025 } else if (AArch64::FPR64RegClass.hasSubClassEq(RC)) {
5026 Opc = AArch64::LDRDui;
5027 } else if (AArch64::WSeqPairsClassRegClass.hasSubClassEq(RC)) {
5029 get(AArch64::LDPWi), DestReg, AArch64::sube32,
5030 AArch64::subo32, FI, MMO);
5031 return;
5032 }
5033 break;
5034 case 16:
5035 if (AArch64::FPR128RegClass.hasSubClassEq(RC))
5036 Opc = AArch64::LDRQui;
5037 else if (AArch64::DDRegClass.hasSubClassEq(RC)) {
5038 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
5039 Opc = AArch64::LD1Twov1d;
5040 Offset = false;
5041 } else if (AArch64::XSeqPairsClassRegClass.hasSubClassEq(RC)) {
5043 get(AArch64::LDPXi), DestReg, AArch64::sube64,
5044 AArch64::subo64, FI, MMO);
5045 return;
5046 } else if (AArch64::ZPRRegClass.hasSubClassEq(RC)) {
5047 assert(Subtarget.hasSVEorSME() &&
5048 "Unexpected register load without SVE load instructions");
5049 Opc = AArch64::LDR_ZXI;
5051 }
5052 break;
5053 case 24:
5054 if (AArch64::DDDRegClass.hasSubClassEq(RC)) {
5055 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
5056 Opc = AArch64::LD1Threev1d;
5057 Offset = false;
5058 }
5059 break;
5060 case 32:
5061 if (AArch64::DDDDRegClass.hasSubClassEq(RC)) {
5062 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
5063 Opc = AArch64::LD1Fourv1d;
5064 Offset = false;
5065 } else if (AArch64::QQRegClass.hasSubClassEq(RC)) {
5066 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
5067 Opc = AArch64::LD1Twov2d;
5068 Offset = false;
5069 } else if (AArch64::ZPR2RegClass.hasSubClassEq(RC) ||
5070 AArch64::ZPR2StridedOrContiguousRegClass.hasSubClassEq(RC)) {
5071 assert(Subtarget.hasSVEorSME() &&
5072 "Unexpected register load without SVE load instructions");
5073 Opc = AArch64::LDR_ZZXI;
5075 }
5076 break;
5077 case 48:
5078 if (AArch64::QQQRegClass.hasSubClassEq(RC)) {
5079 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
5080 Opc = AArch64::LD1Threev2d;
5081 Offset = false;
5082 } else if (AArch64::ZPR3RegClass.hasSubClassEq(RC)) {
5083 assert(Subtarget.hasSVEorSME() &&
5084 "Unexpected register load without SVE load instructions");
5085 Opc = AArch64::LDR_ZZZXI;
5087 }
5088 break;
5089 case 64:
5090 if (AArch64::QQQQRegClass.hasSubClassEq(RC)) {
5091 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
5092 Opc = AArch64::LD1Fourv2d;
5093 Offset = false;
5094 } else if (AArch64::ZPR4RegClass.hasSubClassEq(RC) ||
5095 AArch64::ZPR4StridedOrContiguousRegClass.hasSubClassEq(RC)) {
5096 assert(Subtarget.hasSVEorSME() &&
5097 "Unexpected register load without SVE load instructions");
5098 Opc = AArch64::LDR_ZZZZXI;
5100 }
5101 break;
5102 }
5103
5104 assert(Opc && "Unknown register class");
5105 MFI.setStackID(FI, StackID);
5106
5107 const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DebugLoc(), get(Opc))
5108 .addReg(DestReg, getDefRegState(true))
5109 .addFrameIndex(FI);
5110 if (Offset)
5111 MI.addImm(0);
5112 if (PNRReg.isValid() && !PNRReg.isVirtual())
5113 MI.addDef(PNRReg, RegState::Implicit);
5114 MI.addMemOperand(MMO);
5115
5116 if (PNRReg.isValid() && PNRReg.isVirtual())
5117 BuildMI(MBB, MBBI, DebugLoc(), get(TargetOpcode::COPY), PNRReg)
5118 .addReg(DestReg);
5119}
5120
5122 const MachineInstr &UseMI,
5123 const TargetRegisterInfo *TRI) {
5124 return any_of(instructionsWithoutDebug(std::next(DefMI.getIterator()),
5125 UseMI.getIterator()),
5126 [TRI](const MachineInstr &I) {
5127 return I.modifiesRegister(AArch64::NZCV, TRI) ||
5128 I.readsRegister(AArch64::NZCV, TRI);
5129 });
5130}
5131
5133 const StackOffset &Offset, int64_t &ByteSized, int64_t &VGSized) {
5134 // The smallest scalable element supported by scaled SVE addressing
5135 // modes are predicates, which are 2 scalable bytes in size. So the scalable
5136 // byte offset must always be a multiple of 2.
5137 assert(Offset.getScalable() % 2 == 0 && "Invalid frame offset");
5138
5139 // VGSized offsets are divided by '2', because the VG register is the
5140 // the number of 64bit granules as opposed to 128bit vector chunks,
5141 // which is how the 'n' in e.g. MVT::nxv1i8 is modelled.
5142 // So, for a stack offset of 16 MVT::nxv1i8's, the size is n x 16 bytes.
5143 // VG = n * 2 and the dwarf offset must be VG * 8 bytes.
5144 ByteSized = Offset.getFixed();
5145 VGSized = Offset.getScalable() / 2;
5146}
5147
5148/// Returns the offset in parts to which this frame offset can be
5149/// decomposed for the purpose of describing a frame offset.
5150/// For non-scalable offsets this is simply its byte size.
5152 const StackOffset &Offset, int64_t &NumBytes, int64_t &NumPredicateVectors,
5153 int64_t &NumDataVectors) {
5154 // The smallest scalable element supported by scaled SVE addressing
5155 // modes are predicates, which are 2 scalable bytes in size. So the scalable
5156 // byte offset must always be a multiple of 2.
5157 assert(Offset.getScalable() % 2 == 0 && "Invalid frame offset");
5158
5159 NumBytes = Offset.getFixed();
5160 NumDataVectors = 0;
5161 NumPredicateVectors = Offset.getScalable() / 2;
5162 // This method is used to get the offsets to adjust the frame offset.
5163 // If the function requires ADDPL to be used and needs more than two ADDPL
5164 // instructions, part of the offset is folded into NumDataVectors so that it
5165 // uses ADDVL for part of it, reducing the number of ADDPL instructions.
5166 if (NumPredicateVectors % 8 == 0 || NumPredicateVectors < -64 ||
5167 NumPredicateVectors > 62) {
5168 NumDataVectors = NumPredicateVectors / 8;
5169 NumPredicateVectors -= NumDataVectors * 8;
5170 }
5171}
5172
5173// Convenience function to create a DWARF expression for
5174// Expr + NumBytes + NumVGScaledBytes * AArch64::VG
5175static void appendVGScaledOffsetExpr(SmallVectorImpl<char> &Expr, int NumBytes,
5176 int NumVGScaledBytes, unsigned VG,
5177 llvm::raw_string_ostream &Comment) {
5178 uint8_t buffer[16];
5179
5180 if (NumBytes) {
5181 Expr.push_back(dwarf::DW_OP_consts);
5182 Expr.append(buffer, buffer + encodeSLEB128(NumBytes, buffer));
5183 Expr.push_back((uint8_t)dwarf::DW_OP_plus);
5184 Comment << (NumBytes < 0 ? " - " : " + ") << std::abs(NumBytes);
5185 }
5186
5187 if (NumVGScaledBytes) {
5188 Expr.push_back((uint8_t)dwarf::DW_OP_consts);
5189 Expr.append(buffer, buffer + encodeSLEB128(NumVGScaledBytes, buffer));
5190
5191 Expr.push_back((uint8_t)dwarf::DW_OP_bregx);
5192 Expr.append(buffer, buffer + encodeULEB128(VG, buffer));
5193 Expr.push_back(0);
5194
5195 Expr.push_back((uint8_t)dwarf::DW_OP_mul);
5196 Expr.push_back((uint8_t)dwarf::DW_OP_plus);
5197
5198 Comment << (NumVGScaledBytes < 0 ? " - " : " + ")
5199 << std::abs(NumVGScaledBytes) << " * VG";
5200 }
5201}
5202
5203// Creates an MCCFIInstruction:
5204// { DW_CFA_def_cfa_expression, ULEB128 (sizeof expr), expr }
5206 unsigned Reg,
5207 const StackOffset &Offset) {
5208 int64_t NumBytes, NumVGScaledBytes;
5210 NumVGScaledBytes);
5211 std::string CommentBuffer;
5212 llvm::raw_string_ostream Comment(CommentBuffer);
5213
5214 if (Reg == AArch64::SP)
5215 Comment << "sp";
5216 else if (Reg == AArch64::FP)
5217 Comment << "fp";
5218 else
5219 Comment << printReg(Reg, &TRI);
5220
5221 // Build up the expression (Reg + NumBytes + NumVGScaledBytes * AArch64::VG)
5222 SmallString<64> Expr;
5223 unsigned DwarfReg = TRI.getDwarfRegNum(Reg, true);
5224 Expr.push_back((uint8_t)(dwarf::DW_OP_breg0 + DwarfReg));
5225 Expr.push_back(0);
5226 appendVGScaledOffsetExpr(Expr, NumBytes, NumVGScaledBytes,
5227 TRI.getDwarfRegNum(AArch64::VG, true), Comment);
5228
5229 // Wrap this into DW_CFA_def_cfa.
5230 SmallString<64> DefCfaExpr;
5231 DefCfaExpr.push_back(dwarf::DW_CFA_def_cfa_expression);
5232 uint8_t buffer[16];
5233 DefCfaExpr.append(buffer, buffer + encodeULEB128(Expr.size(), buffer));
5234 DefCfaExpr.append(Expr.str());
5235 return MCCFIInstruction::createEscape(nullptr, DefCfaExpr.str(), SMLoc(),
5236 Comment.str());
5237}
5238
5240 unsigned FrameReg, unsigned Reg,
5241 const StackOffset &Offset,
5242 bool LastAdjustmentWasScalable) {
5243 if (Offset.getScalable())
5244 return createDefCFAExpression(TRI, Reg, Offset);
5245
5246 if (FrameReg == Reg && !LastAdjustmentWasScalable)
5247 return MCCFIInstruction::cfiDefCfaOffset(nullptr, int(Offset.getFixed()));
5248
5249 unsigned DwarfReg = TRI.getDwarfRegNum(Reg, true);
5250 return MCCFIInstruction::cfiDefCfa(nullptr, DwarfReg, (int)Offset.getFixed());
5251}
5252
5254 unsigned Reg,
5255 const StackOffset &OffsetFromDefCFA) {
5256 int64_t NumBytes, NumVGScaledBytes;
5258 OffsetFromDefCFA, NumBytes, NumVGScaledBytes);
5259
5260 unsigned DwarfReg = TRI.getDwarfRegNum(Reg, true);
5261
5262 // Non-scalable offsets can use DW_CFA_offset directly.
5263 if (!NumVGScaledBytes)
5264 return MCCFIInstruction::createOffset(nullptr, DwarfReg, NumBytes);
5265
5266 std::string CommentBuffer;
5267 llvm::raw_string_ostream Comment(CommentBuffer);
5268 Comment << printReg(Reg, &TRI) << " @ cfa";
5269
5270 // Build up expression (NumBytes + NumVGScaledBytes * AArch64::VG)
5271 SmallString<64> OffsetExpr;
5272 appendVGScaledOffsetExpr(OffsetExpr, NumBytes, NumVGScaledBytes,
5273 TRI.getDwarfRegNum(AArch64::VG, true), Comment);
5274
5275 // Wrap this into DW_CFA_expression
5276 SmallString<64> CfaExpr;
5277 CfaExpr.push_back(dwarf::DW_CFA_expression);
5278 uint8_t buffer[16];
5279 CfaExpr.append(buffer, buffer + encodeULEB128(DwarfReg, buffer));
5280 CfaExpr.append(buffer, buffer + encodeULEB128(OffsetExpr.size(), buffer));
5281 CfaExpr.append(OffsetExpr.str());
5282
5283 return MCCFIInstruction::createEscape(nullptr, CfaExpr.str(), SMLoc(),
5284 Comment.str());
5285}
5286
5287// Helper function to emit a frame offset adjustment from a given
5288// pointer (SrcReg), stored into DestReg. This function is explicit
5289// in that it requires the opcode.
5292 const DebugLoc &DL, unsigned DestReg,
5293 unsigned SrcReg, int64_t Offset, unsigned Opc,
5294 const TargetInstrInfo *TII,
5295 MachineInstr::MIFlag Flag, bool NeedsWinCFI,
5296 bool *HasWinCFI, bool EmitCFAOffset,
5297 StackOffset CFAOffset, unsigned FrameReg) {
5298 int Sign = 1;
5299 unsigned MaxEncoding, ShiftSize;
5300 switch (Opc) {
5301 case AArch64::ADDXri:
5302 case AArch64::ADDSXri:
5303 case AArch64::SUBXri:
5304 case AArch64::SUBSXri:
5305 MaxEncoding = 0xfff;
5306 ShiftSize = 12;
5307 break;
5308 case AArch64::ADDVL_XXI:
5309 case AArch64::ADDPL_XXI:
5310 case AArch64::ADDSVL_XXI:
5311 case AArch64::ADDSPL_XXI:
5312 MaxEncoding = 31;
5313 ShiftSize = 0;
5314 if (Offset < 0) {
5315 MaxEncoding = 32;
5316 Sign = -1;
5317 Offset = -Offset;
5318 }
5319 break;
5320 default:
5321 llvm_unreachable("Unsupported opcode");
5322 }
5323
5324 // `Offset` can be in bytes or in "scalable bytes".
5325 int VScale = 1;
5326 if (Opc == AArch64::ADDVL_XXI || Opc == AArch64::ADDSVL_XXI)
5327 VScale = 16;
5328 else if (Opc == AArch64::ADDPL_XXI || Opc == AArch64::ADDSPL_XXI)
5329 VScale = 2;
5330
5331 // FIXME: If the offset won't fit in 24-bits, compute the offset into a
5332 // scratch register. If DestReg is a virtual register, use it as the
5333 // scratch register; otherwise, create a new virtual register (to be
5334 // replaced by the scavenger at the end of PEI). That case can be optimized
5335 // slightly if DestReg is SP which is always 16-byte aligned, so the scratch
5336 // register can be loaded with offset%8 and the add/sub can use an extending
5337 // instruction with LSL#3.
5338 // Currently the function handles any offsets but generates a poor sequence
5339 // of code.
5340 // assert(Offset < (1 << 24) && "unimplemented reg plus immediate");
5341
5342 const unsigned MaxEncodableValue = MaxEncoding << ShiftSize;
5343 Register TmpReg = DestReg;
5344 if (TmpReg == AArch64::XZR)
5346 &AArch64::GPR64RegClass);
5347 do {
5348 uint64_t ThisVal = std::min<uint64_t>(Offset, MaxEncodableValue);
5349 unsigned LocalShiftSize = 0;
5350 if (ThisVal > MaxEncoding) {
5351 ThisVal = ThisVal >> ShiftSize;
5352 LocalShiftSize = ShiftSize;
5353 }
5354 assert((ThisVal >> ShiftSize) <= MaxEncoding &&
5355 "Encoding cannot handle value that big");
5356
5357 Offset -= ThisVal << LocalShiftSize;
5358 if (Offset == 0)
5359 TmpReg = DestReg;
5360 auto MBI = BuildMI(MBB, MBBI, DL, TII->get(Opc), TmpReg)
5361 .addReg(SrcReg)
5362 .addImm(Sign * (int)ThisVal);
5363 if (ShiftSize)
5364 MBI = MBI.addImm(
5366 MBI = MBI.setMIFlag(Flag);
5367
5368 auto Change =
5369 VScale == 1
5370 ? StackOffset::getFixed(ThisVal << LocalShiftSize)
5371 : StackOffset::getScalable(VScale * (ThisVal << LocalShiftSize));
5372 if (Sign == -1 || Opc == AArch64::SUBXri || Opc == AArch64::SUBSXri)
5373 CFAOffset += Change;
5374 else
5375 CFAOffset -= Change;
5376 if (EmitCFAOffset && DestReg == TmpReg) {
5377 MachineFunction &MF = *MBB.getParent();
5378 const TargetSubtargetInfo &STI = MF.getSubtarget();
5379 const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
5380
5381 unsigned CFIIndex = MF.addFrameInst(
5382 createDefCFA(TRI, FrameReg, DestReg, CFAOffset, VScale != 1));
5383 BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
5384 .addCFIIndex(CFIIndex)
5385 .setMIFlags(Flag);
5386 }
5387
5388 if (NeedsWinCFI) {
5389 assert(Sign == 1 && "SEH directives should always have a positive sign");
5390 int Imm = (int)(ThisVal << LocalShiftSize);
5391 if ((DestReg == AArch64::FP && SrcReg == AArch64::SP) ||
5392 (SrcReg == AArch64::FP && DestReg == AArch64::SP)) {
5393 if (HasWinCFI)
5394 *HasWinCFI = true;
5395 if (Imm == 0)
5396 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_SetFP)).setMIFlag(Flag);
5397 else
5398 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_AddFP))
5399 .addImm(Imm)
5400 .setMIFlag(Flag);
5401 assert(Offset == 0 && "Expected remaining offset to be zero to "
5402 "emit a single SEH directive");
5403 } else if (DestReg == AArch64::SP) {
5404 if (HasWinCFI)
5405 *HasWinCFI = true;
5406 assert(SrcReg == AArch64::SP && "Unexpected SrcReg for SEH_StackAlloc");
5407 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_StackAlloc))
5408 .addImm(Imm)
5409 .setMIFlag(Flag);
5410 }
5411 }
5412
5413 SrcReg = TmpReg;
5414 } while (Offset);
5415}
5416
5419 unsigned DestReg, unsigned SrcReg,
5421 MachineInstr::MIFlag Flag, bool SetNZCV,
5422 bool NeedsWinCFI, bool *HasWinCFI,
5423 bool EmitCFAOffset, StackOffset CFAOffset,
5424 unsigned FrameReg) {
5425 // If a function is marked as arm_locally_streaming, then the runtime value of
5426 // vscale in the prologue/epilogue is different the runtime value of vscale
5427 // in the function's body. To avoid having to consider multiple vscales,
5428 // we can use `addsvl` to allocate any scalable stack-slots, which under
5429 // most circumstances will be only locals, not callee-save slots.
5430 const Function &F = MBB.getParent()->getFunction();
5431 bool UseSVL = F.hasFnAttribute("aarch64_pstate_sm_body");
5432
5433 int64_t Bytes, NumPredicateVectors, NumDataVectors;
5435 Offset, Bytes, NumPredicateVectors, NumDataVectors);
5436
5437 // First emit non-scalable frame offsets, or a simple 'mov'.
5438 if (Bytes || (!Offset && SrcReg != DestReg)) {
5439 assert((DestReg != AArch64::SP || Bytes % 8 == 0) &&
5440 "SP increment/decrement not 8-byte aligned");
5441 unsigned Opc = SetNZCV ? AArch64::ADDSXri : AArch64::ADDXri;
5442 if (Bytes < 0) {
5443 Bytes = -Bytes;
5444 Opc = SetNZCV ? AArch64::SUBSXri : AArch64::SUBXri;
5445 }
5446 emitFrameOffsetAdj(MBB, MBBI, DL, DestReg, SrcReg, Bytes, Opc, TII, Flag,
5447 NeedsWinCFI, HasWinCFI, EmitCFAOffset, CFAOffset,
5448 FrameReg);
5449 CFAOffset += (Opc == AArch64::ADDXri || Opc == AArch64::ADDSXri)
5450 ? StackOffset::getFixed(-Bytes)
5451 : StackOffset::getFixed(Bytes);
5452 SrcReg = DestReg;
5453 FrameReg = DestReg;
5454 }
5455
5456 assert(!(SetNZCV && (NumPredicateVectors || NumDataVectors)) &&
5457 "SetNZCV not supported with SVE vectors");
5458 assert(!(NeedsWinCFI && (NumPredicateVectors || NumDataVectors)) &&
5459 "WinCFI not supported with SVE vectors");
5460
5461 if (NumDataVectors) {
5462 emitFrameOffsetAdj(MBB, MBBI, DL, DestReg, SrcReg, NumDataVectors,
5463 UseSVL ? AArch64::ADDSVL_XXI : AArch64::ADDVL_XXI,
5464 TII, Flag, NeedsWinCFI, nullptr, EmitCFAOffset,
5465 CFAOffset, FrameReg);
5466 CFAOffset += StackOffset::getScalable(-NumDataVectors * 16);
5467 SrcReg = DestReg;
5468 }
5469
5470 if (NumPredicateVectors) {
5471 assert(DestReg != AArch64::SP && "Unaligned access to SP");
5472 emitFrameOffsetAdj(MBB, MBBI, DL, DestReg, SrcReg, NumPredicateVectors,
5473 UseSVL ? AArch64::ADDSPL_XXI : AArch64::ADDPL_XXI,
5474 TII, Flag, NeedsWinCFI, nullptr, EmitCFAOffset,
5475 CFAOffset, FrameReg);
5476 }
5477}
5478
5481 MachineBasicBlock::iterator InsertPt, int FrameIndex,
5482 LiveIntervals *LIS, VirtRegMap *VRM) const {
5483 // This is a bit of a hack. Consider this instruction:
5484 //
5485 // %0 = COPY %sp; GPR64all:%0
5486 //
5487 // We explicitly chose GPR64all for the virtual register so such a copy might
5488 // be eliminated by RegisterCoalescer. However, that may not be possible, and
5489 // %0 may even spill. We can't spill %sp, and since it is in the GPR64all
5490 // register class, TargetInstrInfo::foldMemoryOperand() is going to try.
5491 //
5492 // To prevent that, we are going to constrain the %0 register class here.
5493 if (MI.isFullCopy()) {
5494 Register DstReg = MI.getOperand(0).getReg();
5495 Register SrcReg = MI.getOperand(1).getReg();
5496 if (SrcReg == AArch64::SP && DstReg.isVirtual()) {
5497 MF.getRegInfo().constrainRegClass(DstReg, &AArch64::GPR64RegClass);
5498 return nullptr;
5499 }
5500 if (DstReg == AArch64::SP && SrcReg.isVirtual()) {
5501 MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass);
5502 return nullptr;
5503 }
5504 // Nothing can folded with copy from/to NZCV.
5505 if (SrcReg == AArch64::NZCV || DstReg == AArch64::NZCV)
5506 return nullptr;
5507 }
5508
5509 // Handle the case where a copy is being spilled or filled but the source
5510 // and destination register class don't match. For example:
5511 //
5512 // %0 = COPY %xzr; GPR64common:%0
5513 //
5514 // In this case we can still safely fold away the COPY and generate the
5515 // following spill code:
5516 //
5517 // STRXui %xzr, %stack.0
5518 //
5519 // This also eliminates spilled cross register class COPYs (e.g. between x and
5520 // d regs) of the same size. For example:
5521 //
5522 // %0 = COPY %1; GPR64:%0, FPR64:%1
5523 //
5524 // will be filled as
5525 //
5526 // LDRDui %0, fi<#0>
5527 //
5528 // instead of
5529 //
5530 // LDRXui %Temp, fi<#0>
5531 // %0 = FMOV %Temp
5532 //
5533 if (MI.isCopy() && Ops.size() == 1 &&
5534 // Make sure we're only folding the explicit COPY defs/uses.
5535 (Ops[0] == 0 || Ops[0] == 1)) {
5536 bool IsSpill = Ops[0] == 0;
5537 bool IsFill = !IsSpill;
5539 const MachineRegisterInfo &MRI = MF.getRegInfo();
5540 MachineBasicBlock &MBB = *MI.getParent();
5541 const MachineOperand &DstMO = MI.getOperand(0);
5542 const MachineOperand &SrcMO = MI.getOperand(1);
5543 Register DstReg = DstMO.getReg();
5544 Register SrcReg = SrcMO.getReg();
5545 // This is slightly expensive to compute for physical regs since
5546 // getMinimalPhysRegClass is slow.
5547 auto getRegClass = [&](unsigned Reg) {
5548 return Register::isVirtualRegister(Reg) ? MRI.getRegClass(Reg)
5549 : TRI.getMinimalPhysRegClass(Reg);
5550 };
5551
5552 if (DstMO.getSubReg() == 0 && SrcMO.getSubReg() == 0) {
5553 assert(TRI.getRegSizeInBits(*getRegClass(DstReg)) ==
5554 TRI.getRegSizeInBits(*getRegClass(SrcReg)) &&
5555 "Mismatched register size in non subreg COPY");
5556 if (IsSpill)
5557 storeRegToStackSlot(MBB, InsertPt, SrcReg, SrcMO.isKill(), FrameIndex,
5558 getRegClass(SrcReg), &TRI, Register());
5559 else
5560 loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex,
5561 getRegClass(DstReg), &TRI, Register());
5562 return &*--InsertPt;
5563 }
5564
5565 // Handle cases like spilling def of:
5566 //
5567 // %0:sub_32<def,read-undef> = COPY %wzr; GPR64common:%0
5568 //
5569 // where the physical register source can be widened and stored to the full
5570 // virtual reg destination stack slot, in this case producing:
5571 //
5572 // STRXui %xzr, %stack.0
5573 //
5574 if (IsSpill && DstMO.isUndef() && SrcReg == AArch64::WZR &&
5575 TRI.getRegSizeInBits(*getRegClass(DstReg)) == 64) {
5576 assert(SrcMO.getSubReg() == 0 &&
5577 "Unexpected subreg on physical register");
5578 storeRegToStackSlot(MBB, InsertPt, AArch64::XZR, SrcMO.isKill(),
5579 FrameIndex, &AArch64::GPR64RegClass, &TRI,
5580 Register());
5581 return &*--InsertPt;
5582 }
5583
5584 // Handle cases like filling use of:
5585 //
5586 // %0:sub_32<def,read-undef> = COPY %1; GPR64:%0, GPR32:%1
5587 //
5588 // where we can load the full virtual reg source stack slot, into the subreg
5589 // destination, in this case producing:
5590 //
5591 // LDRWui %0:sub_32<def,read-undef>, %stack.0
5592 //
5593 if (IsFill && SrcMO.getSubReg() == 0 && DstMO.isUndef()) {
5594 const TargetRegisterClass *FillRC;
5595 switch (DstMO.getSubReg()) {
5596 default:
5597 FillRC = nullptr;
5598 break;
5599 case AArch64::sub_32:
5600 FillRC = &AArch64::GPR32RegClass;
5601 break;
5602 case AArch64::ssub:
5603 FillRC = &AArch64::FPR32RegClass;
5604 break;
5605 case AArch64::dsub:
5606 FillRC = &AArch64::FPR64RegClass;
5607 break;
5608 }
5609
5610 if (FillRC) {
5611 assert(TRI.getRegSizeInBits(*getRegClass(SrcReg)) ==
5612 TRI.getRegSizeInBits(*FillRC) &&
5613 "Mismatched regclass size on folded subreg COPY");
5614 loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex, FillRC, &TRI,
5615 Register());
5616 MachineInstr &LoadMI = *--InsertPt;
5617 MachineOperand &LoadDst = LoadMI.getOperand(0);
5618 assert(LoadDst.getSubReg() == 0 && "unexpected subreg on fill load");
5619 LoadDst.setSubReg(DstMO.getSubReg());
5620 LoadDst.setIsUndef();
5621 return &LoadMI;
5622 }
5623 }
5624 }
5625
5626 // Cannot fold.
5627 return nullptr;
5628}
5629
5631 StackOffset &SOffset,
5632 bool *OutUseUnscaledOp,
5633 unsigned *OutUnscaledOp,
5634 int64_t *EmittableOffset) {
5635 // Set output values in case of early exit.
5636 if (EmittableOffset)
5637 *EmittableOffset = 0;
5638 if (OutUseUnscaledOp)
5639 *OutUseUnscaledOp = false;
5640 if (OutUnscaledOp)
5641 *OutUnscaledOp = 0;
5642
5643 // Exit early for structured vector spills/fills as they can't take an
5644 // immediate offset.
5645 switch (MI.getOpcode()) {
5646 default:
5647 break;
5648 case AArch64::LD1Rv1d:
5649 case AArch64::LD1Rv2s:
5650 case AArch64::LD1Rv2d:
5651 case AArch64::LD1Rv4h:
5652 case AArch64::LD1Rv4s:
5653 case AArch64::LD1Rv8b:
5654 case AArch64::LD1Rv8h:
5655 case AArch64::LD1Rv16b:
5656 case AArch64::LD1Twov2d:
5657 case AArch64::LD1Threev2d:
5658 case AArch64::LD1Fourv2d:
5659 case AArch64::LD1Twov1d:
5660 case AArch64::LD1Threev1d:
5661 case AArch64::LD1Fourv1d:
5662 case AArch64::ST1Twov2d:
5663 case AArch64::ST1Threev2d:
5664 case AArch64::ST1Fourv2d:
5665 case AArch64::ST1Twov1d:
5666 case AArch64::ST1Threev1d:
5667 case AArch64::ST1Fourv1d:
5668 case AArch64::ST1i8:
5669 case AArch64::ST1i16:
5670 case AArch64::ST1i32:
5671 case AArch64::ST1i64:
5672 case AArch64::IRG:
5673 case AArch64::IRGstack:
5674 case AArch64::STGloop:
5675 case AArch64::STZGloop:
5677 }
5678
5679 // Get the min/max offset and the scale.
5680 TypeSize ScaleValue(0U, false), Width(0U, false);
5681 int64_t MinOff, MaxOff;
5682 if (!AArch64InstrInfo::getMemOpInfo(MI.getOpcode(), ScaleValue, Width, MinOff,
5683 MaxOff))
5684 llvm_unreachable("unhandled opcode in isAArch64FrameOffsetLegal");
5685
5686 // Construct the complete offset.
5687 bool IsMulVL = ScaleValue.isScalable();
5688 unsigned Scale = ScaleValue.getKnownMinValue();
5689 int64_t Offset = IsMulVL ? SOffset.getScalable() : SOffset.getFixed();
5690
5691 const MachineOperand &ImmOpnd =
5692 MI.getOperand(AArch64InstrInfo::getLoadStoreImmIdx(MI.getOpcode()));
5693 Offset += ImmOpnd.getImm() * Scale;
5694
5695 // If the offset doesn't match the scale, we rewrite the instruction to
5696 // use the unscaled instruction instead. Likewise, if we have a negative
5697 // offset and there is an unscaled op to use.
5698 std::optional<unsigned> UnscaledOp =
5700 bool useUnscaledOp = UnscaledOp && (Offset % Scale || Offset < 0);
5701 if (useUnscaledOp &&
5702 !AArch64InstrInfo::getMemOpInfo(*UnscaledOp, ScaleValue, Width, MinOff,
5703 MaxOff))
5704 llvm_unreachable("unhandled opcode in isAArch64FrameOffsetLegal");
5705
5706 Scale = ScaleValue.getKnownMinValue();
5707 assert(IsMulVL == ScaleValue.isScalable() &&
5708 "Unscaled opcode has different value for scalable");
5709
5710 int64_t Remainder = Offset % Scale;
5711 assert(!(Remainder && useUnscaledOp) &&
5712 "Cannot have remainder when using unscaled op");
5713
5714 assert(MinOff < MaxOff && "Unexpected Min/Max offsets");
5715 int64_t NewOffset = Offset / Scale;
5716 if (MinOff <= NewOffset && NewOffset <= MaxOff)
5717 Offset = Remainder;
5718 else {
5719 NewOffset = NewOffset < 0 ? MinOff : MaxOff;
5720 Offset = Offset - NewOffset * Scale;
5721 }
5722
5723 if (EmittableOffset)
5724 *EmittableOffset = NewOffset;
5725 if (OutUseUnscaledOp)
5726 *OutUseUnscaledOp = useUnscaledOp;
5727 if (OutUnscaledOp && UnscaledOp)
5728 *OutUnscaledOp = *UnscaledOp;
5729
5730 if (IsMulVL)
5731 SOffset = StackOffset::get(SOffset.getFixed(), Offset);
5732 else
5733 SOffset = StackOffset::get(Offset, SOffset.getScalable());
5735 (SOffset ? 0 : AArch64FrameOffsetIsLegal);
5736}
5737
5739 unsigned FrameReg, StackOffset &Offset,
5740 const AArch64InstrInfo *TII) {
5741 unsigned Opcode = MI.getOpcode();
5742 unsigned ImmIdx = FrameRegIdx + 1;
5743
5744 if (Opcode == AArch64::ADDSXri || Opcode == AArch64::ADDXri) {
5745 Offset += StackOffset::getFixed(MI.getOperand(ImmIdx).getImm());
5746 emitFrameOffset(*MI.getParent(), MI, MI.getDebugLoc(),
5747 MI.getOperand(0).getReg(), FrameReg, Offset, TII,
5748 MachineInstr::NoFlags, (Opcode == AArch64::ADDSXri));
5749 MI.eraseFromParent();
5750 Offset = StackOffset();
5751 return true;
5752 }
5753
5754 int64_t NewOffset;
5755 unsigned UnscaledOp;
5756 bool UseUnscaledOp;
5757 int Status = isAArch64FrameOffsetLegal(MI, Offset, &UseUnscaledOp,
5758 &UnscaledOp, &NewOffset);
5761 // Replace the FrameIndex with FrameReg.
5762 MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
5763 if (UseUnscaledOp)
5764 MI.setDesc(TII->get(UnscaledOp));
5765
5766 MI.getOperand(ImmIdx).ChangeToImmediate(NewOffset);
5767 return !Offset;
5768 }
5769
5770 return false;
5771}
5772
5775 DebugLoc DL;
5776 BuildMI(MBB, MI, DL, get(AArch64::HINT)).addImm(0);
5777}
5778
5780 return MCInstBuilder(AArch64::HINT).addImm(0);
5781}
5782
5783// AArch64 supports MachineCombiner.
5784bool AArch64InstrInfo::useMachineCombiner() const { return true; }
5785
5786// True when Opc sets flag
5787static bool isCombineInstrSettingFlag(unsigned Opc) {
5788 switch (Opc) {
5789 case AArch64::ADDSWrr:
5790 case AArch64::ADDSWri:
5791 case AArch64::ADDSXrr:
5792 case AArch64::ADDSXri:
5793 case AArch64::SUBSWrr:
5794 case AArch64::SUBSXrr:
5795 // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
5796 case AArch64::SUBSWri:
5797 case AArch64::SUBSXri:
5798 return true;
5799 default:
5800 break;
5801 }
5802 return false;
5803}
5804
5805// 32b Opcodes that can be combined with a MUL
5806static bool isCombineInstrCandidate32(unsigned Opc) {
5807 switch (Opc) {
5808 case AArch64::ADDWrr:
5809 case AArch64::ADDWri:
5810 case AArch64::SUBWrr:
5811 case AArch64::ADDSWrr:
5812 case AArch64::ADDSWri:
5813 case AArch64::SUBSWrr:
5814 // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
5815 case AArch64::SUBWri:
5816 case AArch64::SUBSWri:
5817 return true;
5818 default:
5819 break;
5820 }
5821 return false;
5822}
5823
5824// 64b Opcodes that can be combined with a MUL
5825static bool isCombineInstrCandidate64(unsigned Opc) {
5826 switch (Opc) {
5827 case AArch64::ADDXrr:
5828 case AArch64::ADDXri:
5829 case AArch64::SUBXrr:
5830 case AArch64::ADDSXrr:
5831 case AArch64::ADDSXri:
5832 case AArch64::SUBSXrr:
5833 // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
5834 case AArch64::SUBXri:
5835 case AArch64::SUBSXri:
5836 case AArch64::ADDv8i8:
5837 case AArch64::ADDv16i8:
5838 case AArch64::ADDv4i16:
5839 case AArch64::ADDv8i16:
5840 case AArch64::ADDv2i32:
5841 case AArch64::ADDv4i32:
5842 case AArch64::SUBv8i8:
5843 case AArch64::SUBv16i8:
5844 case AArch64::SUBv4i16:
5845 case AArch64::SUBv8i16:
5846 case AArch64::SUBv2i32:
5847 case AArch64::SUBv4i32:
5848 return true;
5849 default:
5850 break;
5851 }
5852 return false;
5853}
5854
5855// FP Opcodes that can be combined with a FMUL.
5856static bool isCombineInstrCandidateFP(const MachineInstr &Inst) {
5857 switch (Inst.getOpcode()) {
5858 default:
5859 break;
5860 case AArch64::FADDHrr:
5861 case AArch64::FADDSrr:
5862 case AArch64::FADDDrr:
5863 case AArch64::FADDv4f16:
5864 case AArch64::FADDv8f16:
5865 case AArch64::FADDv2f32:
5866 case AArch64::FADDv2f64:
5867 case AArch64::FADDv4f32:
5868 case AArch64::FSUBHrr:
5869 case AArch64::FSUBSrr:
5870 case AArch64::FSUBDrr:
5871 case AArch64::FSUBv4f16:
5872 case AArch64::FSUBv8f16:
5873 case AArch64::FSUBv2f32:
5874 case AArch64::FSUBv2f64:
5875 case AArch64::FSUBv4f32:
5877 // We can fuse FADD/FSUB with FMUL, if fusion is either allowed globally by
5878 // the target options or if FADD/FSUB has the contract fast-math flag.
5879 return Options.UnsafeFPMath ||
5880 Options.AllowFPOpFusion == FPOpFusion::Fast ||
5882 return true;
5883 }
5884 return false;
5885}
5886
5887// Opcodes that can be combined with a MUL
5888static bool isCombineInstrCandidate(unsigned Opc) {
5890}
5891
5892//
5893// Utility routine that checks if \param MO is defined by an
5894// \param CombineOpc instruction in the basic block \param MBB
5896 unsigned CombineOpc, unsigned ZeroReg = 0,
5897 bool CheckZeroReg = false) {
5899 MachineInstr *MI = nullptr;
5900
5901 if (MO.isReg() && MO.getReg().isVirtual())
5902 MI = MRI.getUniqueVRegDef(MO.getReg());
5903 // And it needs to be in the trace (otherwise, it won't have a depth).
5904 if (!MI || MI->getParent() != &MBB || (unsigned)MI->getOpcode() != CombineOpc)
5905 return false;
5906 // Must only used by the user we combine with.
5907 if (!MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
5908 return false;
5909
5910 if (CheckZeroReg) {
5911 assert(MI->getNumOperands() >= 4 && MI->getOperand(0).isReg() &&
5912 MI->getOperand(1).isReg() && MI->getOperand(2).isReg() &&
5913 MI->getOperand(3).isReg() && "MAdd/MSub must have a least 4 regs");
5914 // The third input reg must be zero.
5915 if (MI->getOperand(3).getReg() != ZeroReg)
5916 return false;
5917 }
5918
5919 if (isCombineInstrSettingFlag(CombineOpc) &&
5920 MI->findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr, true) == -1)
5921 return false;
5922
5923 return true;
5924}
5925
5926//
5927// Is \param MO defined by an integer multiply and can be combined?
5929 unsigned MulOpc, unsigned ZeroReg) {
5930 return canCombine(MBB, MO, MulOpc, ZeroReg, true);
5931}
5932
5933//
5934// Is \param MO defined by a floating-point multiply and can be combined?
5936 unsigned MulOpc) {
5937 return canCombine(MBB, MO, MulOpc);
5938}
5939
5940// TODO: There are many more machine instruction opcodes to match:
5941// 1. Other data types (integer, vectors)
5942// 2. Other math / logic operations (xor, or)
5943// 3. Other forms of the same operation (intrinsics and other variants)
5945 bool Invert) const {
5946 if (Invert)
5947 return false;
5948 switch (Inst.getOpcode()) {
5949 // == Floating-point types ==
5950 // -- Floating-point instructions --
5951 case AArch64::FADDHrr:
5952 case AArch64::FADDSrr:
5953 case AArch64::FADDDrr:
5954 case AArch64::FMULHrr:
5955 case AArch64::FMULSrr:
5956 case AArch64::FMULDrr:
5957 case AArch64::FMULX16:
5958 case AArch64::FMULX32:
5959 case AArch64::FMULX64:
5960 // -- Advanced SIMD instructions --
5961 case AArch64::FADDv4f16:
5962 case AArch64::FADDv8f16:
5963 case AArch64::FADDv2f32:
5964 case AArch64::FADDv4f32:
5965 case AArch64::FADDv2f64:
5966 case AArch64::FMULv4f16:
5967 case AArch64::FMULv8f16:
5968 case AArch64::FMULv2f32:
5969 case AArch64::FMULv4f32:
5970 case AArch64::FMULv2f64:
5971 case AArch64::FMULXv4f16:
5972 case AArch64::FMULXv8f16:
5973 case AArch64::FMULXv2f32:
5974 case AArch64::FMULXv4f32:
5975 case AArch64::FMULXv2f64:
5976 // -- SVE instructions --
5977 // Opcodes FMULX_ZZZ_? don't exist because there is no unpredicated FMULX
5978 // in the SVE instruction set (though there are predicated ones).
5979 case AArch64::FADD_ZZZ_H:
5980 case AArch64::FADD_ZZZ_S:
5981 case AArch64::FADD_ZZZ_D:
5982 case AArch64::FMUL_ZZZ_H:
5983 case AArch64::FMUL_ZZZ_S:
5984 case AArch64::FMUL_ZZZ_D:
5985 return Inst.getParent()->getParent()->getTarget().Options.UnsafeFPMath ||
5988
5989 // == Integer types ==
5990 // -- Base instructions --
5991 // Opcodes MULWrr and MULXrr don't exist because
5992 // `MUL <Wd>, <Wn>, <Wm>` and `MUL <Xd>, <Xn>, <Xm>` are aliases of
5993 // `MADD <Wd>, <Wn>, <Wm>, WZR` and `MADD <Xd>, <Xn>, <Xm>, XZR` respectively.
5994 // The machine-combiner does not support three-source-operands machine
5995 // instruction. So we cannot reassociate MULs.
5996 case AArch64::ADDWrr:
5997 case AArch64::ADDXrr:
5998 case AArch64::ANDWrr:
5999 case AArch64::ANDXrr:
6000 case AArch64::ORRWrr:
6001 case AArch64::ORRXrr:
6002 case AArch64::EORWrr:
6003 case AArch64::EORXrr:
6004 case AArch64::EONWrr:
6005 case AArch64::EONXrr:
6006 // -- Advanced SIMD instructions --
6007 // Opcodes MULv1i64 and MULv2i64 don't exist because there is no 64-bit MUL
6008 // in the Advanced SIMD instruction set.
6009 case AArch64::ADDv8i8:
6010 case AArch64::ADDv16i8:
6011 case AArch64::ADDv4i16:
6012 case AArch64::ADDv8i16:
6013 case AArch64::ADDv2i32:
6014 case AArch64::ADDv4i32:
6015 case AArch64::ADDv1i64:
6016 case AArch64::ADDv2i64:
6017 case AArch64::MULv8i8:
6018 case AArch64::MULv16i8:
6019 case AArch64::MULv4i16:
6020 case AArch64::MULv8i16:
6021 case AArch64::MULv2i32:
6022 case AArch64::MULv4i32:
6023 case AArch64::ANDv8i8:
6024 case AArch64::ANDv16i8:
6025 case AArch64::ORRv8i8:
6026 case AArch64::ORRv16i8:
6027 case AArch64::EORv8i8:
6028 case AArch64::EORv16i8:
6029 // -- SVE instructions --
6030 case AArch64::ADD_ZZZ_B:
6031 case AArch64::ADD_ZZZ_H:
6032 case AArch64::ADD_ZZZ_S:
6033 case AArch64::ADD_ZZZ_D:
6034 case AArch64::MUL_ZZZ_B:
6035 case AArch64::MUL_ZZZ_H:
6036 case AArch64::MUL_ZZZ_S:
6037 case AArch64::MUL_ZZZ_D:
6038 case AArch64::AND_ZZZ:
6039 case AArch64::ORR_ZZZ:
6040 case AArch64::EOR_ZZZ:
6041 return true;
6042
6043 default:
6044 return false;
6045 }
6046}
6047
6048/// Find instructions that can be turned into madd.
6050 SmallVectorImpl<unsigned> &Patterns) {
6051 unsigned Opc = Root.getOpcode();
6052 MachineBasicBlock &MBB = *Root.getParent();
6053 bool Found = false;
6054
6055 if (!isCombineInstrCandidate(Opc))
6056 return false;
6057 if (isCombineInstrSettingFlag(Opc)) {
6058 int Cmp_NZCV =
6059 Root.findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr, true);
6060 // When NZCV is live bail out.
6061 if (Cmp_NZCV == -1)
6062 return false;
6063 unsigned NewOpc = convertToNonFlagSettingOpc(Root);
6064 // When opcode can't change bail out.
6065 // CHECKME: do we miss any cases for opcode conversion?
6066 if (NewOpc == Opc)
6067 return false;
6068 Opc = NewOpc;
6069 }
6070
6071 auto setFound = [&](int Opcode, int Operand, unsigned ZeroReg,
6072 unsigned Pattern) {
6073 if (canCombineWithMUL(MBB, Root.getOperand(Operand), Opcode, ZeroReg)) {
6074 Patterns.push_back(Pattern);
6075 Found = true;
6076 }
6077 };
6078
6079 auto setVFound = [&](int Opcode, int Operand, unsigned Pattern) {
6080 if (canCombine(MBB, Root.getOperand(Operand), Opcode)) {
6081 Patterns.push_back(Pattern);
6082 Found = true;
6083 }
6084 };
6085
6087
6088 switch (Opc) {
6089 default:
6090 break;
6091 case AArch64::ADDWrr:
6092 assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
6093 "ADDWrr does not have register operands");
6094 setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULADDW_OP1);
6095 setFound(AArch64::MADDWrrr, 2, AArch64::WZR, MCP::MULADDW_OP2);
6096 break;
6097 case AArch64::ADDXrr:
6098 setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULADDX_OP1);
6099 setFound(AArch64::MADDXrrr, 2, AArch64::XZR, MCP::MULADDX_OP2);
6100 break;
6101 case AArch64::SUBWrr:
6102 setFound(AArch64::MADDWrrr, 2, AArch64::WZR, MCP::MULSUBW_OP2);
6103 setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULSUBW_OP1);
6104 break;
6105 case AArch64::SUBXrr:
6106 setFound(AArch64::MADDXrrr, 2, AArch64::XZR, MCP::MULSUBX_OP2);
6107 setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULSUBX_OP1);
6108 break;
6109 case AArch64::ADDWri:
6110 setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULADDWI_OP1);
6111 break;
6112 case AArch64::ADDXri:
6113 setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULADDXI_OP1);
6114 break;
6115 case AArch64::SUBWri:
6116 setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULSUBWI_OP1);
6117 break;
6118 case AArch64::SUBXri:
6119 setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULSUBXI_OP1);
6120 break;
6121 case AArch64::ADDv8i8:
6122 setVFound(AArch64::MULv8i8, 1, MCP::MULADDv8i8_OP1);
6123 setVFound(AArch64::MULv8i8, 2, MCP::MULADDv8i8_OP2);
6124 break;
6125 case AArch64::ADDv16i8:
6126 setVFound(AArch64::MULv16i8, 1, MCP::MULADDv16i8_OP1);
6127 setVFound(AArch64::MULv16i8, 2, MCP::MULADDv16i8_OP2);
6128 break;
6129 case AArch64::ADDv4i16:
6130 setVFound(AArch64::MULv4i16, 1, MCP::MULADDv4i16_OP1);
6131 setVFound(AArch64::MULv4i16, 2, MCP::MULADDv4i16_OP2);
6132 setVFound(AArch64::MULv4i16_indexed, 1, MCP::MULADDv4i16_indexed_OP1);
6133 setVFound(AArch64::MULv4i16_indexed, 2, MCP::MULADDv4i16_indexed_OP2);
6134 break;
6135 case AArch64::ADDv8i16:
6136 setVFound(AArch64::MULv8i16, 1, MCP::MULADDv8i16_OP1);
6137 setVFound(AArch64::MULv8i16, 2, MCP::MULADDv8i16_OP2);
6138 setVFound(AArch64::MULv8i16_indexed, 1, MCP::MULADDv8i16_indexed_OP1);
6139 setVFound(AArch64::MULv8i16_indexed, 2, MCP::MULADDv8i16_indexed_OP2);
6140 break;
6141 case AArch64::ADDv2i32:
6142 setVFound(AArch64::MULv2i32, 1, MCP::MULADDv2i32_OP1);
6143 setVFound(AArch64::MULv2i32, 2, MCP::MULADDv2i32_OP2);
6144 setVFound(AArch64::MULv2i32_indexed, 1, MCP::MULADDv2i32_indexed_OP1);
6145 setVFound(AArch64::MULv2i32_indexed, 2, MCP::MULADDv2i32_indexed_OP2);
6146 break;
6147 case AArch64::ADDv4i32:
6148 setVFound(AArch64::MULv4i32, 1, MCP::MULADDv4i32_OP1);
6149 setVFound(AArch64::MULv4i32, 2, MCP::MULADDv4i32_OP2);
6150 setVFound(AArch64::MULv4i32_indexed, 1, MCP::MULADDv4i32_indexed_OP1);
6151 setVFound(AArch64::MULv4i32_indexed, 2, MCP::MULADDv4i32_indexed_OP2);
6152 break;
6153 case AArch64::SUBv8i8:
6154 setVFound(AArch64::MULv8i8, 1, MCP::MULSUBv8i8_OP1);
6155 setVFound(AArch64::MULv8i8, 2, MCP::MULSUBv8i8_OP2);
6156 break;
6157 case AArch64::SUBv16i8:
6158 setVFound(AArch64::MULv16i8, 1, MCP::MULSUBv16i8_OP1);
6159 setVFound(AArch64::MULv16i8, 2, MCP::MULSUBv16i8_OP2);
6160 break;
6161 case AArch64::SUBv4i16:
6162 setVFound(AArch64::MULv4i16, 1, MCP::MULSUBv4i16_OP1);
6163 setVFound(AArch64::MULv4i16, 2, MCP::MULSUBv4i16_OP2);
6164 setVFound(AArch64::MULv4i16_indexed, 1, MCP::MULSUBv4i16_indexed_OP1);
6165 setVFound(AArch64::MULv4i16_indexed, 2, MCP::MULSUBv4i16_indexed_OP2);
6166 break;
6167 case AArch64::SUBv8i16:
6168 setVFound(AArch64::MULv8i16, 1, MCP::MULSUBv8i16_OP1);
6169 setVFound(AArch64::MULv8i16, 2, MCP::MULSUBv8i16_OP2);
6170 setVFound(AArch64::MULv8i16_indexed, 1, MCP::MULSUBv8i16_indexed_OP1);
6171 setVFound(AArch64::MULv8i16_indexed, 2, MCP::MULSUBv8i16_indexed_OP2);
6172 break;
6173 case AArch64::SUBv2i32:
6174 setVFound(AArch64::MULv2i32, 1, MCP::MULSUBv2i32_OP1);
6175 setVFound(AArch64::MULv2i32, 2, MCP::MULSUBv2i32_OP2);
6176 setVFound(AArch64::MULv2i32_indexed, 1, MCP::MULSUBv2i32_indexed_OP1);
6177 setVFound(AArch64::MULv2i32_indexed, 2, MCP::MULSUBv2i32_indexed_OP2);
6178 break;
6179 case AArch64::SUBv4i32:
6180 setVFound(AArch64::MULv4i32, 1, MCP::MULSUBv4i32_OP1);
6181 setVFound(AArch64::MULv4i32, 2, MCP::MULSUBv4i32_OP2);
6182 setVFound(AArch64::MULv4i32_indexed, 1, MCP::MULSUBv4i32_indexed_OP1);
6183 setVFound(AArch64::MULv4i32_indexed, 2, MCP::MULSUBv4i32_indexed_OP2);
6184 break;
6185 }
6186 return Found;
6187}
6188/// Floating-Point Support
6189
6190/// Find instructions that can be turned into madd.
6192 SmallVectorImpl<unsigned> &Patterns) {
6193
6194 if (!isCombineInstrCandidateFP(Root))
6195 return false;
6196
6197 MachineBasicBlock &MBB = *Root.getParent();
6198 bool Found = false;
6199
6200 auto Match = [&](int Opcode, int Operand, unsigned Pattern) -> bool {
6201 if (canCombineWithFMUL(MBB, Root.getOperand(Operand), Opcode)) {
6202 Patterns.push_back(Pattern);
6203 return true;
6204 }
6205 return false;
6206 };
6207
6209
6210 switch (Root.getOpcode()) {
6211 default:
6212 assert(false && "Unsupported FP instruction in combiner\n");
6213 break;
6214 case AArch64::FADDHrr:
6215 assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
6216 "FADDHrr does not have register operands");
6217
6218 Found = Match(AArch64::FMULHrr, 1, MCP::FMULADDH_OP1);
6219 Found |= Match(AArch64::FMULHrr, 2, MCP::FMULADDH_OP2);
6220 break;
6221 case AArch64::FADDSrr:
6222 assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
6223 "FADDSrr does not have register operands");
6224
6225 Found |= Match(AArch64::FMULSrr, 1, MCP::FMULADDS_OP1) ||
6226 Match(AArch64::FMULv1i32_indexed, 1, MCP::FMLAv1i32_indexed_OP1);
6227
6228 Found |= Match(AArch64::FMULSrr, 2, MCP::FMULADDS_OP2) ||
6229 Match(AArch64::FMULv1i32_indexed, 2, MCP::FMLAv1i32_indexed_OP2);
6230 break;
6231 case AArch64::FADDDrr:
6232 Found |= Match(AArch64::FMULDrr, 1, MCP::FMULADDD_OP1) ||
6233 Match(AArch64::FMULv1i64_indexed, 1, MCP::FMLAv1i64_indexed_OP1);
6234
6235 Found |= Match(AArch64::FMULDrr, 2, MCP::FMULADDD_OP2) ||
6236 Match(AArch64::FMULv1i64_indexed, 2, MCP::FMLAv1i64_indexed_OP2);
6237 break;
6238 case AArch64::FADDv4f16:
6239 Found |= Match(AArch64::FMULv4i16_indexed, 1, MCP::FMLAv4i16_indexed_OP1) ||
6240 Match(AArch64::FMULv4f16, 1, MCP::FMLAv4f16_OP1);
6241
6242 Found |= Match(AArch64::FMULv4i16_indexed, 2, MCP::FMLAv4i16_indexed_OP2) ||
6243 Match(AArch64::FMULv4f16, 2, MCP::FMLAv4f16_OP2);
6244 break;
6245 case AArch64::FADDv8f16:
6246 Found |= Match(AArch64::FMULv8i16_indexed, 1, MCP::FMLAv8i16_indexed_OP1) ||
6247 Match(AArch64::FMULv8f16, 1, MCP::FMLAv8f16_OP1);
6248
6249 Found |= Match(AArch64::FMULv8i16_indexed, 2, MCP::FMLAv8i16_indexed_OP2) ||
6250 Match(AArch64::FMULv8f16, 2, MCP::FMLAv8f16_OP2);
6251 break;
6252 case AArch64::FADDv2f32:
6253 Found |= Match(AArch64::FMULv2i32_indexed, 1, MCP::FMLAv2i32_indexed_OP1) ||
6254 Match(AArch64::FMULv2f32, 1, MCP::FMLAv2f32_OP1);
6255
6256 Found |= Match(AArch64::FMULv2i32_indexed, 2, MCP::FMLAv2i32_indexed_OP2) ||
6257 Match(AArch64::FMULv2f32, 2, MCP::FMLAv2f32_OP2);
6258 break;
6259 case AArch64::FADDv2f64:
6260 Found |= Match(AArch64::FMULv2i64_indexed, 1, MCP::FMLAv2i64_indexed_OP1) ||
6261 Match(AArch64::FMULv2f64, 1, MCP::FMLAv2f64_OP1);
6262
6263 Found |= Match(AArch64::FMULv2i64_indexed, 2, MCP::FMLAv2i64_indexed_OP2) ||
6264 Match(AArch64::FMULv2f64, 2, MCP::FMLAv2f64_OP2);
6265 break;
6266 case AArch64::FADDv4f32:
6267 Found |= Match(AArch64::FMULv4i32_indexed, 1, MCP::FMLAv4i32_indexed_OP1) ||
6268 Match(AArch64::FMULv4f32, 1, MCP::FMLAv4f32_OP1);
6269
6270 Found |= Match(AArch64::FMULv4i32_indexed, 2, MCP::FMLAv4i32_indexed_OP2) ||
6271 Match(AArch64::FMULv4f32, 2, MCP::FMLAv4f32_OP2);
6272 break;
6273 case AArch64::FSUBHrr:
6274 Found = Match(AArch64::FMULHrr, 1, MCP::FMULSUBH_OP1);
6275 Found |= Match(AArch64::FMULHrr, 2, MCP::FMULSUBH_OP2);
6276 Found |= Match(AArch64::FNMULHrr, 1, MCP::FNMULSUBH_OP1);
6277 break;
6278 case AArch64::FSUBSrr:
6279 Found = Match(AArch64::FMULSrr, 1, MCP::FMULSUBS_OP1);
6280
6281 Found |= Match(AArch64::FMULSrr, 2, MCP::FMULSUBS_OP2) ||
6282 Match(AArch64::FMULv1i32_indexed, 2, MCP::FMLSv1i32_indexed_OP2);
6283
6284 Found |= Match(AArch64::FNMULSrr, 1, MCP::FNMULSUBS_OP1);
6285 break;
6286 case AArch64::FSUBDrr:
6287 Found = Match(AArch64::FMULDrr, 1, MCP::FMULSUBD_OP1);
6288
6289 Found |= Match(AArch64::FMULDrr, 2, MCP::FMULSUBD_OP2) ||
6290 Match(AArch64::FMULv1i64_indexed, 2, MCP::FMLSv1i64_indexed_OP2);
6291
6292 Found |= Match(AArch64::FNMULDrr, 1, MCP::FNMULSUBD_OP1);
6293 break;
6294 case AArch64::FSUBv4f16:
6295 Found |= Match(AArch64::FMULv4i16_indexed, 2, MCP::FMLSv4i16_indexed_OP2) ||
6296 Match(AArch64::FMULv4f16, 2, MCP::FMLSv4f16_OP2);
6297
6298 Found |= Match(AArch64::FMULv4i16_indexed, 1, MCP::FMLSv4i16_indexed_OP1) ||
6299 Match(AArch64::FMULv4f16, 1, MCP::FMLSv4f16_OP1);
6300 break;
6301 case AArch64::FSUBv8f16:
6302 Found |= Match(AArch64::FMULv8i16_indexed, 2, MCP::FMLSv8i16_indexed_OP2) ||
6303 Match(AArch64::FMULv8f16, 2, MCP::FMLSv8f16_OP2);
6304
6305 Found |= Match(AArch64::FMULv8i16_indexed, 1, MCP::FMLSv8i16_indexed_OP1) ||
6306 Match(AArch64::FMULv8f16, 1, MCP::FMLSv8f16_OP1);
6307 break;
6308 case AArch64::FSUBv2f32:
6309 Found |= Match(AArch64::FMULv2i32_indexed, 2, MCP::FMLSv2i32_indexed_OP2) ||
6310 Match(AArch64::FMULv2f32, 2, MCP::FMLSv2f32_OP2);
6311
6312 Found |= Match(AArch64::FMULv2i32_indexed, 1, MCP::FMLSv2i32_indexed_OP1) ||
6313 Match(AArch64::FMULv2f32, 1, MCP::FMLSv2f32_OP1);
6314 break;
6315 case AArch64::FSUBv2f64:
6316 Found |= Match(AArch64::FMULv2i64_indexed, 2, MCP::FMLSv2i64_indexed_OP2) ||
6317 Match(AArch64::FMULv2f64, 2, MCP::FMLSv2f64_OP2);
6318
6319 Found |= Match(AArch64::FMULv2i64_indexed, 1, MCP::FMLSv2i64_indexed_OP1) ||
6320 Match(AArch64::FMULv2f64, 1, MCP::FMLSv2f64_OP1);
6321 break;
6322 case AArch64::FSUBv4f32:
6323 Found |= Match(AArch64::FMULv4i32_indexed, 2, MCP::FMLSv4i32_indexed_OP2) ||
6324 Match(AArch64::FMULv4f32, 2, MCP::FMLSv4f32_OP2);
6325
6326 Found |= Match(AArch64::FMULv4i32_indexed, 1, MCP::FMLSv4i32_indexed_OP1) ||
6327 Match(AArch64::FMULv4f32, 1, MCP::FMLSv4f32_OP1);
6328 break;
6329 }
6330 return Found;
6331}
6332
6334 SmallVectorImpl<unsigned> &Patterns) {
6335 MachineBasicBlock &MBB = *Root.getParent();
6336 bool Found = false;
6337
6338 auto Match = [&](unsigned Opcode, int Operand, unsigned Pattern) -> bool {
6340 MachineOperand &MO = Root.getOperand(Operand);
6341 MachineInstr *MI = nullptr;
6342 if (MO.isReg() && MO.getReg().isVirtual())
6343 MI = MRI.getUniqueVRegDef(MO.getReg());
6344 // Ignore No-op COPYs in FMUL(COPY(DUP(..)))
6345 if (MI && MI->getOpcode() == TargetOpcode::COPY &&
6346 MI->getOperand(1).getReg().isVirtual())
6347 MI = MRI.getUniqueVRegDef(MI->getOperand(1).getReg());
6348 if (MI && MI->getOpcode() == Opcode) {
6349 Patterns.push_back(Pattern);
6350 return true;
6351 }
6352 return false;
6353 };
6354
6356
6357 switch (Root.getOpcode()) {
6358 default:
6359 return false;
6360 case AArch64::FMULv2f32:
6361 Found = Match(AArch64::DUPv2i32lane, 1, MCP::FMULv2i32_indexed_OP1);
6362 Found |= Match(AArch64::DUPv2i32lane, 2, MCP::FMULv2i32_indexed_OP2);
6363 break;
6364 case AArch64::FMULv2f64:
6365 Found = Match(AArch64::DUPv2i64lane, 1, MCP::FMULv2i64_indexed_OP1);
6366 Found |= Match(AArch64::DUPv2i64lane, 2, MCP::FMULv2i64_indexed_OP2);
6367 break;
6368 case AArch64::FMULv4f16:
6369 Found = Match(AArch64::DUPv4i16lane, 1, MCP::FMULv4i16_indexed_OP1);
6370 Found |= Match(AArch64::DUPv4i16lane, 2, MCP::FMULv4i16_indexed_OP2);
6371 break;
6372 case AArch64::FMULv4f32:
6373 Found = Match(AArch64::DUPv4i32lane, 1, MCP::FMULv4i32_indexed_OP1);
6374 Found |= Match(AArch64::DUPv4i32lane, 2, MCP::FMULv4i32_indexed_OP2);
6375 break;
6376 case AArch64::FMULv8f16:
6377 Found = Match(AArch64::DUPv8i16lane, 1, MCP::FMULv8i16_indexed_OP1);
6378 Found |= Match(AArch64::DUPv8i16lane, 2, MCP::FMULv8i16_indexed_OP2);
6379 break;
6380 }
6381
6382 return Found;
6383}
6384
6386 SmallVectorImpl<unsigned> &Patterns) {
6387 unsigned Opc = Root.getOpcode();
6388 MachineBasicBlock &MBB = *Root.getParent();
6390
6391 auto Match = [&](unsigned Opcode, unsigned Pattern) -> bool {
6392 MachineOperand &MO = Root.getOperand(1);
6393 MachineInstr *MI = MRI.getUniqueVRegDef(MO.getReg());
6394 if (MI != nullptr && (MI->getOpcode() == Opcode) &&
6395 MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()) &&
6399 MI->getFlag(MachineInstr::MIFlag::FmNsz)) {
6400 Patterns.push_back(Pattern);
6401 return true;
6402 }
6403 return false;
6404 };
6405
6406 switch (Opc) {
6407 default:
6408 break;
6409 case AArch64::FNEGDr:
6410 return Match(AArch64::FMADDDrrr, AArch64MachineCombinerPattern::FNMADD);
6411 case AArch64::FNEGSr:
6412 return Match(AArch64::FMADDSrrr, AArch64MachineCombinerPattern::FNMADD);
6413 }
6414
6415 return false;
6416}
6417
6418/// Return true when a code sequence can improve throughput. It
6419/// should be called only for instructions in loops.
6420/// \param Pattern - combiner pattern
6422 switch (Pattern) {
6423 default:
6424 break;
6530 return true;
6531 } // end switch (Pattern)
6532 return false;
6533}
6534
6535/// Find other MI combine patterns.
6537 SmallVectorImpl<unsigned> &Patterns) {
6538 // A - (B + C) ==> (A - B) - C or (A - C) - B
6539 unsigned Opc = Root.getOpcode();
6540 MachineBasicBlock &MBB = *Root.getParent();
6541
6542 switch (Opc) {
6543 case AArch64::SUBWrr:
6544 case AArch64::SUBSWrr:
6545 case AArch64::SUBXrr:
6546 case AArch64::SUBSXrr:
6547 // Found candidate root.
6548 break;
6549 default:
6550 return false;
6551 }
6552
6553 if (isCombineInstrSettingFlag(Opc) &&
6554 Root.findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr, true) ==
6555 -1)
6556 return false;
6557
6558 if (canCombine(MBB, Root.getOperand(2), AArch64::ADDWrr) ||
6559 canCombine(MBB, Root.getOperand(2), AArch64::ADDSWrr) ||
6560 canCombine(MBB, Root.getOperand(2), AArch64::ADDXrr) ||
6561 canCombine(MBB, Root.getOperand(2), AArch64::ADDSXrr)) {
6564 return true;
6565 }
6566
6567 return false;
6568}
6569
6572 switch (Pattern) {
6576 default:
6578 }
6579}
6580
6581/// Return true when there is potentially a faster code sequence for an
6582/// instruction chain ending in \p Root. All potential patterns are listed in
6583/// the \p Pattern vector. Pattern should be sorted in priority order since the
6584/// pattern evaluator stops checking as soon as it finds a faster sequence.
6585
6587 MachineInstr &Root, SmallVectorImpl<unsigned> &Patterns,
6588 bool DoRegPressureReduce) const {
6589 // Integer patterns
6590 if (getMaddPatterns(Root, Patterns))
6591 return true;
6592 // Floating point patterns
6593 if (getFMULPatterns(Root, Patterns))
6594 return true;
6595 if (getFMAPatterns(Root, Patterns))
6596 return true;
6597 if (getFNEGPatterns(Root, Patterns))
6598 return true;
6599
6600 // Other patterns
6601 if (getMiscPatterns(Root, Patterns))
6602 return true;
6603
6604 return TargetInstrInfo::getMachineCombinerPatterns(Root, Patterns,
6605 DoRegPressureReduce);
6606}
6607
6609/// genFusedMultiply - Generate fused multiply instructions.
6610/// This function supports both integer and floating point instructions.
6611/// A typical example:
6612/// F|MUL I=A,B,0
6613/// F|ADD R,I,C
6614/// ==> F|MADD R,A,B,C
6615/// \param MF Containing MachineFunction
6616/// \param MRI Register information
6617/// \param TII Target information
6618/// \param Root is the F|ADD instruction
6619/// \param [out] InsInstrs is a vector of machine instructions and will
6620/// contain the generated madd instruction
6621/// \param IdxMulOpd is index of operand in Root that is the result of
6622/// the F|MUL. In the example above IdxMulOpd is 1.
6623/// \param MaddOpc the opcode fo the f|madd instruction
6624/// \param RC Register class of operands
6625/// \param kind of fma instruction (addressing mode) to be generated
6626/// \param ReplacedAddend is the result register from the instruction
6627/// replacing the non-combined operand, if any.
6628static MachineInstr *
6630 const TargetInstrInfo *TII, MachineInstr &Root,
6631 SmallVectorImpl<MachineInstr *> &InsInstrs, unsigned IdxMulOpd,
6632 unsigned MaddOpc, const TargetRegisterClass *RC,
6633 FMAInstKind kind = FMAInstKind::Default,
6634 const Register *ReplacedAddend = nullptr) {
6635 assert(IdxMulOpd == 1 || IdxMulOpd == 2);
6636
6637 unsigned IdxOtherOpd = IdxMulOpd == 1 ? 2 : 1;
6638 MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg());
6639 Register ResultReg = Root.getOperand(0).getReg();
6640 Register SrcReg0 = MUL->getOperand(1).getReg();
6641 bool Src0IsKill = MUL->getOperand(1).isKill();
6642 Register SrcReg1 = MUL->getOperand(2).getReg();
6643 bool Src1IsKill = MUL->getOperand(2).isKill();
6644
6645 Register SrcReg2;
6646 bool Src2IsKill;
6647 if (ReplacedAddend) {
6648 // If we just generated a new addend, we must be it's only use.
6649 SrcReg2 = *ReplacedAddend;
6650 Src2IsKill = true;
6651 } else {
6652 SrcReg2 = Root.getOperand(IdxOtherOpd).getReg();
6653 Src2IsKill = Root.getOperand(IdxOtherOpd).isKill();
6654 }
6655
6656 if (ResultReg.isVirtual())
6657 MRI.constrainRegClass(ResultReg, RC);
6658 if (SrcReg0.isVirtual())
6659 MRI.constrainRegClass(SrcReg0, RC);
6660 if (SrcReg1.isVirtual())
6661 MRI.constrainRegClass(SrcReg1, RC);
6662 if (SrcReg2.isVirtual())
6663 MRI.constrainRegClass(SrcReg2, RC);
6664
6666 if (kind == FMAInstKind::Default)
6667 MIB = BuildMI(MF, MIMetadata(Root), TII->get(MaddOpc), ResultReg)
6668 .addReg(SrcReg0, getKillRegState(Src0IsKill))
6669 .addReg(SrcReg1, getKillRegState(Src1IsKill))
6670 .addReg(SrcReg2, getKillRegState(Src2IsKill));
6671 else if (kind == FMAInstKind::Indexed)
6672 MIB = BuildMI(MF, MIMetadata(Root), TII->get(MaddOpc), ResultReg)
6673 .addReg(SrcReg2, getKillRegState(Src2IsKill))
6674 .addReg(SrcReg0, getKillRegState(Src0IsKill))
6675 .addReg(SrcReg1, getKillRegState(Src1IsKill))
6676 .addImm(MUL->getOperand(3).getImm());
6677 else if (kind == FMAInstKind::Accumulator)
6678 MIB = BuildMI(MF, MIMetadata(Root), TII->get(MaddOpc), ResultReg)
6679 .addReg(SrcReg2, getKillRegState(Src2IsKill))
6680 .addReg(SrcReg0, getKillRegState(Src0IsKill))
6681 .addReg(SrcReg1, getKillRegState(Src1IsKill));
6682 else
6683 assert(false && "Invalid FMA instruction kind \n");
6684 // Insert the MADD (MADD, FMA, FMS, FMLA, FMSL)
6685 InsInstrs.push_back(MIB);
6686 return MUL;
6687}
6688
6689static MachineInstr *
6691 const TargetInstrInfo *TII, MachineInstr &Root,
6693 MachineInstr *MAD = MRI.getUniqueVRegDef(Root.getOperand(1).getReg());
6694
6695 unsigned Opc = 0;
6696 const TargetRegisterClass *RC = MRI.getRegClass(MAD->getOperand(0).getReg());
6697 if (AArch64::FPR32RegClass.hasSubClassEq(RC))
6698 Opc = AArch64::FNMADDSrrr;
6699 else if (AArch64::FPR64RegClass.hasSubClassEq(RC))
6700 Opc = AArch64::FNMADDDrrr;
6701 else
6702 return nullptr;
6703
6704 Register ResultReg = Root.getOperand(0).getReg();
6705 Register SrcReg0 = MAD->getOperand(1).getReg();
6706 Register SrcReg1 = MAD->getOperand(2).getReg();
6707 Register SrcReg2 = MAD->getOperand(3).getReg();
6708 bool Src0IsKill = MAD->getOperand(1).isKill();
6709 bool Src1IsKill = MAD->getOperand(2).isKill();
6710 bool Src2IsKill = MAD->getOperand(3).isKill();
6711 if (ResultReg.isVirtual())
6712 MRI.constrainRegClass(ResultReg, RC);
6713 if (SrcReg0.isVirtual())
6714 MRI.constrainRegClass(SrcReg0, RC);
6715 if (SrcReg1.isVirtual())
6716 MRI.constrainRegClass(SrcReg1, RC);
6717 if (SrcReg2.isVirtual())
6718 MRI.constrainRegClass(SrcReg2, RC);
6719
6721 BuildMI(MF, MIMetadata(Root), TII->get(Opc), ResultReg)
6722 .addReg(SrcReg0, getKillRegState(Src0IsKill))
6723 .addReg(SrcReg1, getKillRegState(Src1IsKill))
6724 .addReg(SrcReg2, getKillRegState(Src2IsKill));
6725 InsInstrs.push_back(MIB);
6726
6727 return MAD;
6728}
6729
6730/// Fold (FMUL x (DUP y lane)) into (FMUL_indexed x y lane)
6731static MachineInstr *
6734 unsigned IdxDupOp, unsigned MulOpc,
6736 assert(((IdxDupOp == 1) || (IdxDupOp == 2)) &&
6737 "Invalid index of FMUL operand");
6738
6739 MachineFunction &MF = *Root.getMF();
6741
6742 MachineInstr *Dup =
6743 MF.getRegInfo().getUniqueVRegDef(Root.getOperand(IdxDupOp).getReg());
6744
6745 if (Dup->getOpcode() == TargetOpcode::COPY)
6746 Dup = MRI.getUniqueVRegDef(Dup->getOperand(1).getReg());
6747
6748 Register DupSrcReg = Dup->getOperand(1).getReg();
6749 MRI.clearKillFlags(DupSrcReg);
6750 MRI.constrainRegClass(DupSrcReg, RC);
6751
6752 unsigned DupSrcLane = Dup->getOperand(2).getImm();
6753
6754 unsigned IdxMulOp = IdxDupOp == 1 ? 2 : 1;
6755 MachineOperand &MulOp = Root.getOperand(IdxMulOp);
6756
6757 Register ResultReg = Root.getOperand(0).getReg();
6758
6760 MIB = BuildMI(MF, MIMetadata(Root), TII->get(MulOpc), ResultReg)
6761 .add(MulOp)
6762 .addReg(DupSrcReg)
6763 .addImm(DupSrcLane);
6764
6765 InsInstrs.push_back(MIB);
6766 return &Root;
6767}
6768
6769/// genFusedMultiplyAcc - Helper to generate fused multiply accumulate
6770/// instructions.
6771///
6772/// \see genFusedMultiply
6776 unsigned IdxMulOpd, unsigned MaddOpc, const TargetRegisterClass *RC) {
6777 return genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd, MaddOpc, RC,
6778 FMAInstKind::Accumulator);
6779}
6780
6781/// genNeg - Helper to generate an intermediate negation of the second operand
6782/// of Root
6784 const TargetInstrInfo *TII, MachineInstr &Root,
6786 DenseMap<unsigned, unsigned> &InstrIdxForVirtReg,
6787 unsigned MnegOpc, const TargetRegisterClass *RC) {
6788 Register NewVR = MRI.createVirtualRegister(RC);
6790 BuildMI(MF, MIMetadata(Root), TII->get(MnegOpc), NewVR)
6791 .add(Root.getOperand(2));
6792 InsInstrs.push_back(MIB);
6793
6794 assert(InstrIdxForVirtReg.empty());
6795 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
6796
6797 return NewVR;
6798}
6799
6800/// genFusedMultiplyAccNeg - Helper to generate fused multiply accumulate
6801/// instructions with an additional negation of the accumulator
6805 DenseMap<unsigned, unsigned> &InstrIdxForVirtReg, unsigned IdxMulOpd,
6806 unsigned MaddOpc, unsigned MnegOpc, const TargetRegisterClass *RC) {
6807 assert(IdxMulOpd == 1);
6808
6809 Register NewVR =
6810 genNeg(MF, MRI, TII, Root, InsInstrs, InstrIdxForVirtReg, MnegOpc, RC);
6811 return genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd, MaddOpc, RC,
6812 FMAInstKind::Accumulator, &NewVR);
6813}
6814
6815/// genFusedMultiplyIdx - Helper to generate fused multiply accumulate
6816/// instructions.
6817///
6818/// \see genFusedMultiply
6822 unsigned IdxMulOpd, unsigned MaddOpc, const TargetRegisterClass *RC) {
6823 return genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd, MaddOpc, RC,
6824 FMAInstKind::Indexed);
6825}
6826
6827/// genFusedMultiplyAccNeg - Helper to generate fused multiply accumulate
6828/// instructions with an additional negation of the accumulator
6832 DenseMap<unsigned, unsigned> &InstrIdxForVirtReg, unsigned IdxMulOpd,
6833 unsigned MaddOpc, unsigned MnegOpc, const TargetRegisterClass *RC) {
6834 assert(IdxMulOpd == 1);
6835
6836 Register NewVR =
6837 genNeg(MF, MRI, TII, Root, InsInstrs, InstrIdxForVirtReg, MnegOpc, RC);
6838
6839 return genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd, MaddOpc, RC,
6840 FMAInstKind::Indexed, &NewVR);
6841}
6842
6843/// genMaddR - Generate madd instruction and combine mul and add using
6844/// an extra virtual register
6845/// Example - an ADD intermediate needs to be stored in a register:
6846/// MUL I=A,B,0
6847/// ADD R,I,Imm
6848/// ==> ORR V, ZR, Imm
6849/// ==> MADD R,A,B,V
6850/// \param MF Containing MachineFunction
6851/// \param MRI Register information
6852/// \param TII Target information
6853/// \param Root is the ADD instruction
6854/// \param [out] InsInstrs is a vector of machine instructions and will
6855/// contain the generated madd instruction
6856/// \param IdxMulOpd is index of operand in Root that is the result of
6857/// the MUL. In the example above IdxMulOpd is 1.
6858/// \param MaddOpc the opcode fo the madd instruction
6859/// \param VR is a virtual register that holds the value of an ADD operand
6860/// (V in the example above).
6861/// \param RC Register class of operands
6863 const TargetInstrInfo *TII, MachineInstr &Root,
6865 unsigned IdxMulOpd, unsigned MaddOpc, unsigned VR,
6866 const TargetRegisterClass *RC) {
6867 assert(IdxMulOpd == 1 || IdxMulOpd == 2);
6868
6869 MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg());
6870 Register ResultReg = Root.getOperand(0).getReg();
6871 Register SrcReg0 = MUL->getOperand(1).getReg();
6872 bool Src0IsKill = MUL->getOperand(1).isKill();
6873 Register SrcReg1 = MUL->getOperand(2).getReg();
6874 bool Src1IsKill = MUL->getOperand(2).isKill();
6875
6876 if (ResultReg.isVirtual())
6877 MRI.constrainRegClass(ResultReg, RC);
6878 if (SrcReg0.isVirtual())
6879 MRI.constrainRegClass(SrcReg0, RC);
6880 if (SrcReg1.isVirtual())
6881 MRI.constrainRegClass(SrcReg1, RC);
6883 MRI.constrainRegClass(VR, RC);
6884
6886 BuildMI(MF, MIMetadata(Root), TII->get(MaddOpc), ResultReg)
6887 .addReg(SrcReg0, getKillRegState(Src0IsKill))
6888 .addReg(SrcReg1, getKillRegState(Src1IsKill))
6889 .addReg(VR);
6890 // Insert the MADD
6891 InsInstrs.push_back(MIB);
6892 return MUL;
6893}
6894
6895/// Do the following transformation
6896/// A - (B + C) ==> (A - B) - C
6897/// A - (B + C) ==> (A - C) - B
6898static void
6900 const TargetInstrInfo *TII, MachineInstr &Root,
6903 unsigned IdxOpd1,
6904 DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) {
6905 assert(IdxOpd1 == 1 || IdxOpd1 == 2);
6906 unsigned IdxOtherOpd = IdxOpd1 == 1 ? 2 : 1;
6907 MachineInstr *AddMI = MRI.getUniqueVRegDef(Root.getOperand(2).getReg());
6908
6909 Register ResultReg = Root.getOperand(0).getReg();
6910 Register RegA = Root.getOperand(1).getReg();
6911 bool RegAIsKill = Root.getOperand(1).isKill();
6912 Register RegB = AddMI->getOperand(IdxOpd1).getReg();
6913 bool RegBIsKill = AddMI->getOperand(IdxOpd1).isKill();
6914 Register RegC = AddMI->getOperand(IdxOtherOpd).getReg();
6915 bool RegCIsKill = AddMI->getOperand(IdxOtherOpd).isKill();
6916 Register NewVR = MRI.createVirtualRegister(MRI.getRegClass(RegA));
6917
6918 unsigned Opcode = Root.getOpcode();
6919 if (Opcode == AArch64::SUBSWrr)
6920 Opcode = AArch64::SUBWrr;
6921 else if (Opcode == AArch64::SUBSXrr)
6922 Opcode = AArch64::SUBXrr;
6923 else
6924 assert((Opcode == AArch64::SUBWrr || Opcode == AArch64::SUBXrr) &&
6925 "Unexpected instruction opcode.");
6926
6927 uint32_t Flags = Root.mergeFlagsWith(*AddMI);
6928 Flags &= ~MachineInstr::NoSWrap;
6929 Flags &= ~MachineInstr::NoUWrap;
6930
6931 MachineInstrBuilder MIB1 =
6932 BuildMI(MF, MIMetadata(Root), TII->get(Opcode), NewVR)
6933 .addReg(RegA, getKillRegState(RegAIsKill))
6934 .addReg(RegB, getKillRegState(RegBIsKill))
6935 .setMIFlags(Flags);
6936 MachineInstrBuilder MIB2 =
6937 BuildMI(MF, MIMetadata(Root), TII->get(Opcode), ResultReg)
6938 .addReg(NewVR, getKillRegState(true))
6939 .addReg(RegC, getKillRegState(RegCIsKill))
6940 .setMIFlags(Flags);
6941
6942 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
6943 InsInstrs.push_back(MIB1);
6944 InsInstrs.push_back(MIB2);
6945 DelInstrs.push_back(AddMI);
6946 DelInstrs.push_back(&Root);
6947}
6948
6949/// When getMachineCombinerPatterns() finds potential patterns,
6950/// this function generates the instructions that could replace the
6951/// original code sequence
6953 MachineInstr &Root, unsigned Pattern,
6956 DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const {
6957 MachineBasicBlock &MBB = *Root.getParent();
6959 MachineFunction &MF = *MBB.getParent();
6961
6962 MachineInstr *MUL = nullptr;
6963 const TargetRegisterClass *RC;
6964 unsigned Opc;
6965 switch (Pattern) {
6966 default:
6967 // Reassociate instructions.
6969 DelInstrs, InstrIdxForVirtReg);
6970 return;
6972 // A - (B + C)
6973 // ==> (A - B) - C
6974 genSubAdd2SubSub(MF, MRI, TII, Root, InsInstrs, DelInstrs, 1,
6975 InstrIdxForVirtReg);
6976 return;
6978 // A - (B + C)
6979 // ==> (A - C) - B
6980 genSubAdd2SubSub(MF, MRI, TII, Root, InsInstrs, DelInstrs, 2,
6981 InstrIdxForVirtReg);
6982 return;
6985 // MUL I=A,B,0
6986 // ADD R,I,C
6987 // ==> MADD R,A,B,C
6988 // --- Create(MADD);
6990 Opc = AArch64::MADDWrrr;
6991 RC = &AArch64::GPR32RegClass;
6992 } else {
6993 Opc = AArch64::MADDXrrr;
6994 RC = &AArch64::GPR64RegClass;
6995 }
6996 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
6997 break;
7000 // MUL I=A,B,0
7001 // ADD R,C,I
7002 // ==> MADD R,A,B,C
7003 // --- Create(MADD);
7005 Opc = AArch64::MADDWrrr;
7006 RC = &AArch64::GPR32RegClass;
7007 } else {
7008 Opc = AArch64::MADDXrrr;
7009 RC = &AArch64::GPR64RegClass;
7010 }
7011 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7012 break;
7015 // MUL I=A,B,0
7016 // ADD R,I,Imm
7017 // ==> MOV V, Imm
7018 // ==> MADD R,A,B,V
7019 // --- Create(MADD);
7020 const TargetRegisterClass *OrrRC;
7021 unsigned BitSize, OrrOpc, ZeroReg;
7023 OrrOpc = AArch64::ORRWri;
7024 OrrRC = &AArch64::GPR32spRegClass;
7025 BitSize = 32;
7026 ZeroReg = AArch64::WZR;
7027 Opc = AArch64::MADDWrrr;
7028 RC = &AArch64::GPR32RegClass;
7029 } else {
7030 OrrOpc = AArch64::ORRXri;
7031 OrrRC = &AArch64::GPR64spRegClass;
7032 BitSize = 64;
7033 ZeroReg = AArch64::XZR;
7034 Opc = AArch64::MADDXrrr;
7035 RC = &AArch64::GPR64RegClass;
7036 }
7037 Register NewVR = MRI.createVirtualRegister(OrrRC);
7038 uint64_t Imm = Root.getOperand(2).getImm();
7039
7040 if (Root.getOperand(3).isImm()) {
7041 unsigned Val = Root.getOperand(3).getImm();
7042 Imm = Imm << Val;
7043 }
7044 uint64_t UImm = SignExtend64(Imm, BitSize);
7045 // The immediate can be composed via a single instruction.
7047 AArch64_IMM::expandMOVImm(UImm, BitSize, Insn);
7048 if (Insn.size() != 1)
7049 return;
7050 auto MovI = Insn.begin();
7052 // MOV is an alias for one of three instructions: movz, movn, and orr.
7053 if (MovI->Opcode == OrrOpc)
7054 MIB1 = BuildMI(MF, MIMetadata(Root), TII->get(OrrOpc), NewVR)
7055 .addReg(ZeroReg)
7056 .addImm(MovI->Op2);
7057 else {
7058 if (BitSize == 32)
7059 assert((MovI->Opcode == AArch64::MOVNWi ||
7060 MovI->Opcode == AArch64::MOVZWi) &&
7061 "Expected opcode");
7062 else
7063 assert((MovI->Opcode == AArch64::MOVNXi ||
7064 MovI->Opcode == AArch64::MOVZXi) &&
7065 "Expected opcode");
7066 MIB1 = BuildMI(MF, MIMetadata(Root), TII->get(MovI->Opcode), NewVR)
7067 .addImm(MovI->Op1)
7068 .addImm(MovI->Op2);
7069 }
7070 InsInstrs.push_back(MIB1);
7071 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
7072 MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
7073 break;
7074 }
7077 // MUL I=A,B,0
7078 // SUB R,I, C
7079 // ==> SUB V, 0, C
7080 // ==> MADD R,A,B,V // = -C + A*B
7081 // --- Create(MADD);
7082 const TargetRegisterClass *SubRC;
7083 unsigned SubOpc, ZeroReg;
7085 SubOpc = AArch64::SUBWrr;
7086 SubRC = &AArch64::GPR32spRegClass;
7087 ZeroReg = AArch64::WZR;
7088 Opc = AArch64::MADDWrrr;
7089 RC = &AArch64::GPR32RegClass;
7090 } else {
7091 SubOpc = AArch64::SUBXrr;
7092 SubRC = &AArch64::GPR64spRegClass;
7093 ZeroReg = AArch64::XZR;
7094 Opc = AArch64::MADDXrrr;
7095 RC = &AArch64::GPR64RegClass;
7096 }
7097 Register NewVR = MRI.createVirtualRegister(SubRC);
7098 // SUB NewVR, 0, C
7099 MachineInstrBuilder MIB1 =
7100 BuildMI(MF, MIMetadata(Root), TII->get(SubOpc), NewVR)
7101 .addReg(ZeroReg)
7102 .add(Root.getOperand(2));
7103 InsInstrs.push_back(MIB1);
7104 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
7105 MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
7106 break;
7107 }
7110 // MUL I=A,B,0
7111 // SUB R,C,I
7112 // ==> MSUB R,A,B,C (computes C - A*B)
7113 // --- Create(MSUB);
7115 Opc = AArch64::MSUBWrrr;
7116 RC = &AArch64::GPR32RegClass;
7117 } else {
7118 Opc = AArch64::MSUBXrrr;
7119 RC = &AArch64::GPR64RegClass;
7120 }
7121 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7122 break;
7125 // MUL I=A,B,0
7126 // SUB R,I, Imm
7127 // ==> MOV V, -Imm
7128 // ==> MADD R,A,B,V // = -Imm + A*B
7129 // --- Create(MADD);
7130 const TargetRegisterClass *OrrRC;
7131 unsigned BitSize, OrrOpc, ZeroReg;
7133 OrrOpc = AArch64::ORRWri;
7134 OrrRC = &AArch64::GPR32spRegClass;
7135 BitSize = 32;
7136 ZeroReg = AArch64::WZR;
7137 Opc = AArch64::MADDWrrr;
7138 RC = &AArch64::GPR32RegClass;
7139 } else {
7140 OrrOpc = AArch64::ORRXri;
7141 OrrRC = &AArch64::GPR64spRegClass;
7142 BitSize = 64;
7143 ZeroReg = AArch64::XZR;
7144 Opc = AArch64::MADDXrrr;
7145 RC = &AArch64::GPR64RegClass;
7146 }
7147 Register NewVR = MRI.createVirtualRegister(OrrRC);
7148 uint64_t Imm = Root.getOperand(2).getImm();
7149 if (Root.getOperand(3).isImm()) {
7150 unsigned Val = Root.getOperand(3).getImm();
7151 Imm = Imm << Val;
7152 }
7153 uint64_t UImm = SignExtend64(-Imm, BitSize);
7154 // The immediate can be composed via a single instruction.
7156 AArch64_IMM::expandMOVImm(UImm, BitSize, Insn);
7157 if (Insn.size() != 1)
7158 return;
7159 auto MovI = Insn.begin();
7161 // MOV is an alias for one of three instructions: movz, movn, and orr.
7162 if (MovI->Opcode == OrrOpc)
7163 MIB1 = BuildMI(MF, MIMetadata(Root), TII->get(OrrOpc), NewVR)
7164 .addReg(ZeroReg)
7165 .addImm(MovI->Op2);
7166 else {
7167 if (BitSize == 32)
7168 assert((MovI->Opcode == AArch64::MOVNWi ||
7169 MovI->Opcode == AArch64::MOVZWi) &&
7170 "Expected opcode");
7171 else
7172 assert((MovI->Opcode == AArch64::MOVNXi ||
7173 MovI->Opcode == AArch64::MOVZXi) &&
7174 "Expected opcode");
7175 MIB1 = BuildMI(MF, MIMetadata(Root), TII->get(MovI->Opcode), NewVR)
7176 .addImm(MovI->Op1)
7177 .addImm(MovI->Op2);
7178 }
7179 InsInstrs.push_back(MIB1);
7180 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
7181 MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
7182 break;
7183 }
7184
7186 Opc = AArch64::MLAv8i8;
7187 RC = &AArch64::FPR64RegClass;
7188 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
7189 break;
7191 Opc = AArch64::MLAv8i8;
7192 RC = &AArch64::FPR64RegClass;
7193 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7194 break;
7196 Opc = AArch64::MLAv16i8;
7197 RC = &AArch64::FPR128RegClass;
7198 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
7199 break;
7201 Opc = AArch64::MLAv16i8;
7202 RC = &AArch64::FPR128RegClass;
7203 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7204 break;
7206 Opc = AArch64::MLAv4i16;
7207 RC = &AArch64::FPR64RegClass;
7208 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
7209 break;
7211 Opc = AArch64::MLAv4i16;
7212 RC = &AArch64::FPR64RegClass;
7213 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7214 break;
7216 Opc = AArch64::MLAv8i16;
7217 RC = &AArch64::FPR128RegClass;
7218 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
7219 break;
7221 Opc = AArch64::MLAv8i16;
7222 RC = &AArch64::FPR128RegClass;
7223 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7224 break;
7226 Opc = AArch64::MLAv2i32;
7227 RC = &AArch64::FPR64RegClass;
7228 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
7229 break;
7231 Opc = AArch64::MLAv2i32;
7232 RC = &AArch64::FPR64RegClass;
7233 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7234 break;
7236 Opc = AArch64::MLAv4i32;
7237 RC = &AArch64::FPR128RegClass;
7238 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
7239 break;
7241 Opc = AArch64::MLAv4i32;
7242 RC = &AArch64::FPR128RegClass;
7243 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7244 break;
7245
7247 Opc = AArch64::MLAv8i8;
7248 RC = &AArch64::FPR64RegClass;
7249 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
7250 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv8i8,
7251 RC);
7252 break;
7254 Opc = AArch64::MLSv8i8;
7255 RC = &AArch64::FPR64RegClass;
7256 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7257 break;
7259 Opc = AArch64::MLAv16i8;
7260 RC = &AArch64::FPR128RegClass;
7261 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
7262 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv16i8,
7263 RC);
7264 break;
7266 Opc = AArch64::MLSv16i8;
7267 RC = &AArch64::FPR128RegClass;
7268 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7269 break;
7271 Opc = AArch64::MLAv4i16;
7272 RC = &AArch64::FPR64RegClass;
7273 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
7274 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i16,
7275 RC);
7276 break;
7278 Opc = AArch64::MLSv4i16;
7279 RC = &AArch64::FPR64RegClass;
7280 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7281 break;
7283 Opc = AArch64::MLAv8i16;
7284 RC = &AArch64::FPR128RegClass;
7285 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
7286 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv8i16,
7287 RC);
7288 break;
7290 Opc = AArch64::MLSv8i16;
7291 RC = &AArch64::FPR128RegClass;
7292 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7293 break;
7295 Opc = AArch64::MLAv2i32;
7296 RC = &AArch64::FPR64RegClass;
7297 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
7298 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv2i32,
7299 RC);
7300 break;
7302 Opc = AArch64::MLSv2i32;
7303 RC = &AArch64::FPR64RegClass;
7304 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7305 break;
7307 Opc = AArch64::MLAv4i32;
7308 RC = &AArch64::FPR128RegClass;
7309 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
7310 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i32,
7311 RC);
7312 break;
7314 Opc = AArch64::MLSv4i32;
7315 RC = &AArch64::FPR128RegClass;
7316 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7317 break;
7318
7320 Opc = AArch64::MLAv4i16_indexed;
7321 RC = &AArch64::FPR64RegClass;
7322 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
7323 break;
7325 Opc = AArch64::MLAv4i16_indexed;
7326 RC = &AArch64::FPR64RegClass;
7327 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7328 break;
7330 Opc = AArch64::MLAv8i16_indexed;
7331 RC = &AArch64::FPR128RegClass;
7332 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
7333 break;
7335 Opc = AArch64::MLAv8i16_indexed;
7336 RC = &AArch64::FPR128RegClass;
7337 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7338 break;
7340 Opc = AArch64::MLAv2i32_indexed;
7341 RC = &AArch64::FPR64RegClass;
7342 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
7343 break;
7345 Opc = AArch64::MLAv2i32_indexed;
7346 RC = &AArch64::FPR64RegClass;
7347 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7348 break;
7350 Opc = AArch64::MLAv4i32_indexed;
7351 RC = &AArch64::FPR128RegClass;
7352 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
7353 break;
7355 Opc = AArch64::MLAv4i32_indexed;
7356 RC = &AArch64::FPR128RegClass;
7357 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7358 break;
7359
7361 Opc = AArch64::MLAv4i16_indexed;
7362 RC = &AArch64::FPR64RegClass;
7363 MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs,
7364 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i16,
7365 RC);
7366 break;
7368 Opc = AArch64::MLSv4i16_indexed;
7369 RC = &AArch64::FPR64RegClass;
7370 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7371 break;
7373 Opc = AArch64::MLAv8i16_indexed;
7374 RC = &AArch64::FPR128RegClass;
7375 MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs,
7376 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv8i16,
7377 RC);
7378 break;
7380 Opc = AArch64::MLSv8i16_indexed;
7381 RC = &AArch64::FPR128RegClass;
7382 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7383 break;
7385 Opc = AArch64::MLAv2i32_indexed;
7386 RC = &AArch64::FPR64RegClass;
7387 MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs,
7388 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv2i32,
7389 RC);
7390 break;
7392 Opc = AArch64::MLSv2i32_indexed;
7393 RC = &AArch64::FPR64RegClass;
7394 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7395 break;
7397 Opc = AArch64::MLAv4i32_indexed;
7398 RC = &AArch64::FPR128RegClass;
7399 MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs,
7400 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i32,
7401 RC);
7402 break;
7404 Opc = AArch64::MLSv4i32_indexed;
7405 RC = &AArch64::FPR128RegClass;
7406 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7407 break;
7408
7409 // Floating Point Support
7411 Opc = AArch64::FMADDHrrr;
7412 RC = &AArch64::FPR16RegClass;
7413 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
7414 break;
7416 Opc = AArch64::FMADDSrrr;
7417 RC = &AArch64::FPR32RegClass;
7418 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
7419 break;
7421 Opc = AArch64::FMADDDrrr;
7422 RC = &AArch64::FPR64RegClass;
7423 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
7424 break;
7425
7427 Opc = AArch64::FMADDHrrr;
7428 RC = &AArch64::FPR16RegClass;
7429 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7430 break;
7432 Opc = AArch64::FMADDSrrr;
7433 RC = &AArch64::FPR32RegClass;
7434 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7435 break;
7437 Opc = AArch64::FMADDDrrr;
7438 RC = &AArch64::FPR64RegClass;
7439 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7440 break;
7441
7443 Opc = AArch64::FMLAv1i32_indexed;
7444 RC = &AArch64::FPR32RegClass;
7445 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
7446 FMAInstKind::Indexed);
7447 break;
7449 Opc = AArch64::FMLAv1i32_indexed;
7450 RC = &AArch64::FPR32RegClass;
7451 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
7452 FMAInstKind::Indexed);
7453 break;
7454
7456 Opc = AArch64::FMLAv1i64_indexed;
7457 RC = &AArch64::FPR64RegClass;
7458 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
7459 FMAInstKind::Indexed);
7460 break;
7462 Opc = AArch64::FMLAv1i64_indexed;
7463 RC = &AArch64::FPR64RegClass;
7464 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
7465 FMAInstKind::Indexed);
7466 break;
7467
7469 RC = &AArch64::FPR64RegClass;
7470 Opc = AArch64::FMLAv4i16_indexed;
7471 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
7472 FMAInstKind::Indexed);
7473 break;
7475 RC = &AArch64::FPR64RegClass;
7476 Opc = AArch64::FMLAv4f16;
7477 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
7478 FMAInstKind::Accumulator);
7479 break;
7481 RC = &AArch64::FPR64RegClass;
7482 Opc = AArch64::FMLAv4i16_indexed;
7483 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
7484 FMAInstKind::Indexed);
7485 break;
7487 RC = &AArch64::FPR64RegClass;
7488 Opc = AArch64::FMLAv4f16;
7489 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
7490 FMAInstKind::Accumulator);
7491 break;
7492
7495 RC = &AArch64::FPR64RegClass;
7497 Opc = AArch64::FMLAv2i32_indexed;
7498 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
7499 FMAInstKind::Indexed);
7500 } else {
7501 Opc = AArch64::FMLAv2f32;
7502 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
7503 FMAInstKind::Accumulator);
7504 }
7505 break;
7508 RC = &AArch64::FPR64RegClass;
7510 Opc = AArch64::FMLAv2i32_indexed;
7511 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
7512 FMAInstKind::Indexed);
7513 } else {
7514 Opc = AArch64::FMLAv2f32;
7515 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
7516 FMAInstKind::Accumulator);
7517 }
7518 break;
7519
7521 RC = &AArch64::FPR128RegClass;
7522 Opc = AArch64::FMLAv8i16_indexed;
7523 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
7524 FMAInstKind::Indexed);
7525 break;
7527 RC = &AArch64::FPR128RegClass;
7528 Opc = AArch64::FMLAv8f16;
7529 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
7530 FMAInstKind::Accumulator);
7531 break;
7533 RC = &AArch64::FPR128RegClass;
7534 Opc = AArch64::FMLAv8i16_indexed;
7535 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
7536 FMAInstKind::Indexed);
7537 break;
7539 RC = &AArch64::FPR128RegClass;
7540 Opc = AArch64::FMLAv8f16;
7541 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
7542 FMAInstKind::Accumulator);
7543 break;
7544
7547 RC = &AArch64::FPR128RegClass;
7549 Opc = AArch64::FMLAv2i64_indexed;
7550 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
7551 FMAInstKind::Indexed);
7552 } else {
7553 Opc = AArch64::FMLAv2f64;
7554 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
7555 FMAInstKind::Accumulator);
7556 }
7557 break;
7560 RC = &AArch64::FPR128RegClass;
7562 Opc = AArch64::FMLAv2i64_indexed;
7563 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
7564 FMAInstKind::Indexed);
7565 } else {
7566 Opc = AArch64::FMLAv2f64;
7567 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
7568 FMAInstKind::Accumulator);
7569 }
7570 break;
7571
7574 RC = &AArch64::FPR128RegClass;
7576 Opc = AArch64::FMLAv4i32_indexed;
7577 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
7578 FMAInstKind::Indexed);
7579 } else {
7580 Opc = AArch64::FMLAv4f32;
7581 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
7582 FMAInstKind::Accumulator);
7583 }
7584 break;
7585
7588 RC = &AArch64::FPR128RegClass;
7590 Opc = AArch64::FMLAv4i32_indexed;
7591 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
7592 FMAInstKind::Indexed);
7593 } else {
7594 Opc = AArch64::FMLAv4f32;
7595 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
7596 FMAInstKind::Accumulator);
7597 }
7598 break;
7599
7601 Opc = AArch64::FNMSUBHrrr;
7602 RC = &AArch64::FPR16RegClass;
7603 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
7604 break;
7606 Opc = AArch64::FNMSUBSrrr;
7607 RC = &AArch64::FPR32RegClass;
7608 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
7609 break;
7611 Opc = AArch64::FNMSUBDrrr;
7612 RC = &AArch64::FPR64RegClass;
7613 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
7614 break;
7615
7617 Opc = AArch64::FNMADDHrrr;
7618 RC = &AArch64::FPR16RegClass;
7619 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
7620 break;
7622 Opc = AArch64::FNMADDSrrr;
7623 RC = &AArch64::FPR32RegClass;
7624 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
7625 break;
7627 Opc = AArch64::FNMADDDrrr;
7628 RC = &AArch64::FPR64RegClass;
7629 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
7630 break;
7631
7633 Opc = AArch64::FMSUBHrrr;
7634 RC = &AArch64::FPR16RegClass;
7635 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7636 break;
7638 Opc = AArch64::FMSUBSrrr;
7639 RC = &AArch64::FPR32RegClass;
7640 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7641 break;
7643 Opc = AArch64::FMSUBDrrr;
7644 RC = &AArch64::FPR64RegClass;
7645 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7646 break;
7647
7649 Opc = AArch64::FMLSv1i32_indexed;
7650 RC = &AArch64::FPR32RegClass;
7651 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
7652 FMAInstKind::Indexed);
7653 break;
7654
7656 Opc = AArch64::FMLSv1i64_indexed;
7657 RC = &AArch64::FPR64RegClass;
7658 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
7659 FMAInstKind::Indexed);
7660 break;
7661
7664 RC = &AArch64::FPR64RegClass;
7665 Register NewVR = MRI.createVirtualRegister(RC);
7666 MachineInstrBuilder MIB1 =
7667 BuildMI(MF, MIMetadata(Root), TII->get(AArch64::FNEGv4f16), NewVR)
7668 .add(Root.getOperand(2));
7669 InsInstrs.push_back(MIB1);
7670 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
7672 Opc = AArch64::FMLAv4f16;
7673 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
7674 FMAInstKind::Accumulator, &NewVR);
7675 } else {
7676 Opc = AArch64::FMLAv4i16_indexed;
7677 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
7678 FMAInstKind::Indexed, &NewVR);
7679 }
7680 break;
7681 }
7683 RC = &AArch64::FPR64RegClass;
7684 Opc = AArch64::FMLSv4f16;
7685 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
7686 FMAInstKind::Accumulator);
7687 break;
7689 RC = &AArch64::FPR64RegClass;
7690 Opc = AArch64::FMLSv4i16_indexed;
7691 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
7692 FMAInstKind::Indexed);
7693 break;
7694
7697 RC = &AArch64::FPR64RegClass;
7699 Opc = AArch64::FMLSv2i32_indexed;
7700 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
7701 FMAInstKind::Indexed);
7702 } else {
7703 Opc = AArch64::FMLSv2f32;
7704 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
7705 FMAInstKind::Accumulator);
7706 }
7707 break;
7708
7711 RC = &AArch64::FPR128RegClass;
7712 Register NewVR = MRI.createVirtualRegister(RC);
7713 MachineInstrBuilder MIB1 =
7714 BuildMI(MF, MIMetadata(Root), TII->get(AArch64::FNEGv8f16), NewVR)
7715 .add(Root.getOperand(2));
7716 InsInstrs.push_back(MIB1);
7717 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
7719 Opc = AArch64::FMLAv8f16;
7720 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
7721 FMAInstKind::Accumulator, &NewVR);
7722 } else {
7723 Opc = AArch64::FMLAv8i16_indexed;
7724 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
7725 FMAInstKind::Indexed, &NewVR);
7726 }
7727 break;
7728 }
7730 RC = &AArch64::FPR128RegClass;
7731 Opc = AArch64::FMLSv8f16;
7732 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
7733 FMAInstKind::Accumulator);
7734 break;
7736 RC = &AArch64::FPR128RegClass;
7737 Opc = AArch64::FMLSv8i16_indexed;
7738 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
7739 FMAInstKind::Indexed);
7740 break;
7741
7744 RC = &AArch64::FPR128RegClass;
7746 Opc = AArch64::FMLSv2i64_indexed;
7747 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
7748 FMAInstKind::Indexed);
7749 } else {
7750 Opc = AArch64::FMLSv2f64;
7751 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
7752 FMAInstKind::Accumulator);
7753 }
7754 break;
7755
7758 RC = &AArch64::FPR128RegClass;
7760 Opc = AArch64::FMLSv4i32_indexed;
7761 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
7762 FMAInstKind::Indexed);
7763 } else {
7764 Opc = AArch64::FMLSv4f32;
7765 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
7766 FMAInstKind::Accumulator);
7767 }
7768 break;
7771 RC = &AArch64::FPR64RegClass;
7772 Register NewVR = MRI.createVirtualRegister(RC);
7773 MachineInstrBuilder MIB1 =
7774 BuildMI(MF, MIMetadata(Root), TII->get(AArch64::FNEGv2f32), NewVR)
7775 .add(Root.getOperand(2));
7776 InsInstrs.push_back(MIB1);
7777 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
7779 Opc = AArch64::FMLAv2i32_indexed;
7780 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
7781 FMAInstKind::Indexed, &NewVR);
7782 } else {
7783 Opc = AArch64::FMLAv2f32;
7784 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
7785 FMAInstKind::Accumulator, &NewVR);
7786 }
7787 break;
7788 }
7791 RC = &AArch64::FPR128RegClass;
7792 Register NewVR = MRI.createVirtualRegister(RC);
7793 MachineInstrBuilder MIB1 =
7794 BuildMI(MF, MIMetadata(Root), TII->get(AArch64::FNEGv4f32), NewVR)
7795 .add(Root.getOperand(2));
7796 InsInstrs.push_back(MIB1);
7797 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
7799 Opc = AArch64::FMLAv4i32_indexed;
7800 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
7801 FMAInstKind::Indexed, &NewVR);
7802 } else {
7803 Opc = AArch64::FMLAv4f32;
7804 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
7805 FMAInstKind::Accumulator, &NewVR);
7806 }
7807 break;
7808 }
7811 RC = &AArch64::FPR128RegClass;
7812 Register NewVR = MRI.createVirtualRegister(RC);
7813 MachineInstrBuilder MIB1 =
7814 BuildMI(MF, MIMetadata(Root), TII->get(AArch64::FNEGv2f64), NewVR)
7815 .add(Root.getOperand(2));
7816 InsInstrs.push_back(MIB1);
7817 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
7819 Opc = AArch64::FMLAv2i64_indexed;
7820 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
7821 FMAInstKind::Indexed, &NewVR);
7822 } else {
7823 Opc = AArch64::FMLAv2f64;
7824 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
7825 FMAInstKind::Accumulator, &NewVR);
7826 }
7827 break;
7828 }
7831 unsigned IdxDupOp =
7833 : 2;
7834 genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv2i32_indexed,
7835 &AArch64::FPR128RegClass, MRI);
7836 break;
7837 }
7840 unsigned IdxDupOp =
7842 : 2;
7843 genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv2i64_indexed,
7844 &AArch64::FPR128RegClass, MRI);
7845 break;
7846 }
7849 unsigned IdxDupOp =
7851 : 2;
7852 genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv4i16_indexed,
7853 &AArch64::FPR128_loRegClass, MRI);
7854 break;
7855 }
7858 unsigned IdxDupOp =
7860 : 2;
7861 genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv4i32_indexed,
7862 &AArch64::FPR128RegClass, MRI);
7863 break;
7864 }
7867 unsigned IdxDupOp =
7869 : 2;
7870 genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv8i16_indexed,
7871 &AArch64::FPR128_loRegClass, MRI);
7872 break;
7873 }
7875 MUL = genFNegatedMAD(MF, MRI, TII, Root, InsInstrs);
7876 break;
7877 }
7878
7879 } // end switch (Pattern)
7880 // Record MUL and ADD/SUB for deletion
7881 if (MUL)
7882 DelInstrs.push_back(MUL);
7883 DelInstrs.push_back(&Root);
7884
7885 // Set the flags on the inserted instructions to be the merged flags of the
7886 // instructions that we have combined.
7887 uint32_t Flags = Root.getFlags();
7888 if (MUL)
7889 Flags = Root.mergeFlagsWith(*MUL);
7890 for (auto *MI : InsInstrs)
7891 MI->setFlags(Flags);
7892}
7893
7894/// Replace csincr-branch sequence by simple conditional branch
7895///
7896/// Examples:
7897/// 1. \code
7898/// csinc w9, wzr, wzr, <condition code>
7899/// tbnz w9, #0, 0x44
7900/// \endcode
7901/// to
7902/// \code
7903/// b.<inverted condition code>
7904/// \endcode
7905///
7906/// 2. \code
7907/// csinc w9, wzr, wzr, <condition code>
7908/// tbz w9, #0, 0x44
7909/// \endcode
7910/// to
7911/// \code
7912/// b.<condition code>
7913/// \endcode
7914///
7915/// Replace compare and branch sequence by TBZ/TBNZ instruction when the
7916/// compare's constant operand is power of 2.
7917///
7918/// Examples:
7919/// \code
7920/// and w8, w8, #0x400
7921/// cbnz w8, L1
7922/// \endcode
7923/// to
7924/// \code
7925/// tbnz w8, #10, L1
7926/// \endcode
7927///
7928/// \param MI Conditional Branch
7929/// \return True when the simple conditional branch is generated
7930///
7932 bool IsNegativeBranch = false;
7933 bool IsTestAndBranch = false;
7934 unsigned TargetBBInMI = 0;
7935 switch (MI.getOpcode()) {
7936 default:
7937 llvm_unreachable("Unknown branch instruction?");
7938 case AArch64::Bcc:
7939 return false;
7940 case AArch64::CBZW:
7941 case AArch64::CBZX:
7942 TargetBBInMI = 1;
7943 break;
7944 case AArch64::CBNZW:
7945 case AArch64::CBNZX:
7946 TargetBBInMI = 1;
7947 IsNegativeBranch = true;
7948 break;
7949 case AArch64::TBZW:
7950 case AArch64::TBZX:
7951 TargetBBInMI = 2;
7952 IsTestAndBranch = true;
7953 break;
7954 case AArch64::TBNZW:
7955 case AArch64::TBNZX:
7956 TargetBBInMI = 2;
7957 IsNegativeBranch = true;
7958 IsTestAndBranch = true;
7959 break;
7960 }
7961 // So we increment a zero register and test for bits other
7962 // than bit 0? Conservatively bail out in case the verifier
7963 // missed this case.
7964 if (IsTestAndBranch && MI.getOperand(1).getImm())
7965 return false;
7966
7967 // Find Definition.
7968 assert(MI.getParent() && "Incomplete machine instruciton\n");
7969 MachineBasicBlock *MBB = MI.getParent();
7970 MachineFunction *MF = MBB->getParent();
7972 Register VReg = MI.getOperand(0).getReg();
7973 if (!VReg.isVirtual())
7974 return false;
7975
7976 MachineInstr *DefMI = MRI->getVRegDef(VReg);
7977
7978 // Look through COPY instructions to find definition.
7979 while (DefMI->isCopy()) {
7980 Register CopyVReg = DefMI->getOperand(1).getReg();
7981 if (!MRI->hasOneNonDBGUse(CopyVReg))
7982 return false;
7983 if (!MRI->hasOneDef(CopyVReg))
7984 return false;
7985 DefMI = MRI->getVRegDef(CopyVReg);
7986 }
7987
7988 switch (DefMI->getOpcode()) {
7989 default:
7990 return false;
7991 // Fold AND into a TBZ/TBNZ if constant operand is power of 2.
7992 case AArch64::ANDWri:
7993 case AArch64::ANDXri: {
7994 if (IsTestAndBranch)
7995 return false;
7996 if (DefMI->getParent() != MBB)
7997 return false;
7998 if (!MRI->hasOneNonDBGUse(VReg))
7999 return false;
8000
8001 bool Is32Bit = (DefMI->getOpcode() == AArch64::ANDWri);
8003 DefMI->getOperand(2).getImm(), Is32Bit ? 32 : 64);
8004 if (!isPowerOf2_64(Mask))
8005 return false;
8006
8008 Register NewReg = MO.getReg();
8009 if (!NewReg.isVirtual())
8010 return false;
8011
8012 assert(!MRI->def_empty(NewReg) && "Register must be defined.");
8013
8014 MachineBasicBlock &RefToMBB = *MBB;
8015 MachineBasicBlock *TBB = MI.getOperand(1).getMBB();
8016 DebugLoc DL = MI.getDebugLoc();
8017 unsigned Imm = Log2_64(Mask);
8018 unsigned Opc = (Imm < 32)
8019 ? (IsNegativeBranch ? AArch64::TBNZW : AArch64::TBZW)
8020 : (IsNegativeBranch ? AArch64::TBNZX : AArch64::TBZX);
8021 MachineInstr *NewMI = BuildMI(RefToMBB, MI, DL, get(Opc))
8022 .addReg(NewReg)
8023 .addImm(Imm)
8024 .addMBB(TBB);
8025 // Register lives on to the CBZ now.
8026 MO.setIsKill(false);
8027
8028 // For immediate smaller than 32, we need to use the 32-bit
8029 // variant (W) in all cases. Indeed the 64-bit variant does not
8030 // allow to encode them.
8031 // Therefore, if the input register is 64-bit, we need to take the
8032 // 32-bit sub-part.
8033 if (!Is32Bit && Imm < 32)
8034 NewMI->getOperand(0).setSubReg(AArch64::sub_32);
8035 MI.eraseFromParent();
8036 return true;
8037 }
8038 // Look for CSINC
8039 case AArch64::CSINCWr:
8040 case AArch64::CSINCXr: {
8041 if (!(DefMI->getOperand(1).getReg() == AArch64::WZR &&
8042 DefMI->getOperand(2).getReg() == AArch64::WZR) &&
8043 !(DefMI->getOperand(1).getReg() == AArch64::XZR &&
8044 DefMI->getOperand(2).getReg() == AArch64::XZR))
8045 return false;
8046
8047 if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr,
8048 true) != -1)
8049 return false;
8050
8052 // Convert only when the condition code is not modified between
8053 // the CSINC and the branch. The CC may be used by other
8054 // instructions in between.
8056 return false;
8057 MachineBasicBlock &RefToMBB = *MBB;
8058 MachineBasicBlock *TBB = MI.getOperand(TargetBBInMI).getMBB();
8059 DebugLoc DL = MI.getDebugLoc();
8060 if (IsNegativeBranch)
8062 BuildMI(RefToMBB, MI, DL, get(AArch64::Bcc)).addImm(CC).addMBB(TBB);
8063 MI.eraseFromParent();
8064 return true;
8065 }
8066 }
8067}
8068
8069std::pair<unsigned, unsigned>
8071 const unsigned Mask = AArch64II::MO_FRAGMENT;
8072 return std::make_pair(TF & Mask, TF & ~Mask);
8073}
8074
8077 using namespace AArch64II;
8078
8079 static const std::pair<unsigned, const char *> TargetFlags[] = {
8080 {MO_PAGE, "aarch64-page"}, {MO_PAGEOFF, "aarch64-pageoff"},
8081 {MO_G3, "aarch64-g3"}, {MO_G2, "aarch64-g2"},
8082 {MO_G1, "aarch64-g1"}, {MO_G0, "aarch64-g0"},
8083 {MO_HI12, "aarch64-hi12"}};
8084 return ArrayRef(TargetFlags);
8085}
8086
8089 using namespace AArch64II;
8090
8091 static const std::pair<unsigned, const char *> TargetFlags[] = {
8092 {MO_COFFSTUB, "aarch64-coffstub"},
8093 {MO_GOT, "aarch64-got"},
8094 {MO_NC, "aarch64-nc"},
8095 {MO_S, "aarch64-s"},
8096 {MO_TLS, "aarch64-tls"},
8097 {MO_DLLIMPORT, "aarch64-dllimport"},
8098 {MO_PREL, "aarch64-prel"},
8099 {MO_TAGGED, "aarch64-tagged"},
8100 {MO_ARM64EC_CALLMANGLE, "aarch64-arm64ec-callmangle"},
8101 };
8102 return ArrayRef(TargetFlags);
8103}
8104
8107 static const std::pair<MachineMemOperand::Flags, const char *> TargetFlags[] =
8108 {{MOSuppressPair, "aarch64-suppress-pair"},
8109 {MOStridedAccess, "aarch64-strided-access"}};
8110 return ArrayRef(TargetFlags);
8111}
8112
8113/// Constants defining how certain sequences should be outlined.
8114/// This encompasses how an outlined function should be called, and what kind of
8115/// frame should be emitted for that outlined function.
8116///
8117/// \p MachineOutlinerDefault implies that the function should be called with
8118/// a save and restore of LR to the stack.
8119///
8120/// That is,
8121///
8122/// I1 Save LR OUTLINED_FUNCTION:
8123/// I2 --> BL OUTLINED_FUNCTION I1
8124/// I3 Restore LR I2
8125/// I3
8126/// RET
8127///
8128/// * Call construction overhead: 3 (save + BL + restore)
8129/// * Frame construction overhead: 1 (ret)
8130/// * Requires stack fixups? Yes
8131///
8132/// \p MachineOutlinerTailCall implies that the function is being created from
8133/// a sequence of instructions ending in a return.
8134///
8135/// That is,
8136///
8137/// I1 OUTLINED_FUNCTION:
8138/// I2 --> B OUTLINED_FUNCTION I1
8139/// RET I2
8140/// RET
8141///
8142/// * Call construction overhead: 1 (B)
8143/// * Frame construction overhead: 0 (Return included in sequence)
8144/// * Requires stack fixups? No
8145///
8146/// \p MachineOutlinerNoLRSave implies that the function should be called using
8147/// a BL instruction, but doesn't require LR to be saved and restored. This
8148/// happens when LR is known to be dead.
8149///
8150/// That is,
8151///
8152/// I1 OUTLINED_FUNCTION:
8153/// I2 --> BL OUTLINED_FUNCTION I1
8154/// I3 I2
8155/// I3
8156/// RET
8157///
8158/// * Call construction overhead: 1 (BL)
8159/// * Frame construction overhead: 1 (RET)
8160/// * Requires stack fixups? No
8161///
8162/// \p MachineOutlinerThunk implies that the function is being created from
8163/// a sequence of instructions ending in a call. The outlined function is
8164/// called with a BL instruction, and the outlined function tail-calls the
8165/// original call destination.
8166///
8167/// That is,
8168///
8169/// I1 OUTLINED_FUNCTION:
8170/// I2 --> BL OUTLINED_FUNCTION I1
8171/// BL f I2
8172/// B f
8173/// * Call construction overhead: 1 (BL)
8174/// * Frame construction overhead: 0
8175/// * Requires stack fixups? No
8176///
8177/// \p MachineOutlinerRegSave implies that the function should be called with a
8178/// save and restore of LR to an available register. This allows us to avoid
8179/// stack fixups. Note that this outlining variant is compatible with the
8180/// NoLRSave case.
8181///
8182/// That is,
8183///
8184/// I1 Save LR OUTLINED_FUNCTION:
8185/// I2 --> BL OUTLINED_FUNCTION I1
8186/// I3 Restore LR I2
8187/// I3
8188/// RET
8189///
8190/// * Call construction overhead: 3 (save + BL + restore)
8191/// * Frame construction overhead: 1 (ret)
8192/// * Requires stack fixups? No
8194 MachineOutlinerDefault, /// Emit a save, restore, call, and return.
8195 MachineOutlinerTailCall, /// Only emit a branch.
8196 MachineOutlinerNoLRSave, /// Emit a call and return.
8197 MachineOutlinerThunk, /// Emit a call and tail-call.
8198 MachineOutlinerRegSave /// Same as default, but save to a register.
8200
8204 UnsafeRegsDead = 0x8
8206
8208AArch64InstrInfo::findRegisterToSaveLRTo(outliner::Candidate &C) const {
8209 MachineFunction *MF = C.getMF();
8211 const AArch64RegisterInfo *ARI =
8212 static_cast<const AArch64RegisterInfo *>(&TRI);
8213 // Check if there is an available register across the sequence that we can
8214 // use.
8215 for (unsigned Reg : AArch64::GPR64RegClass) {
8216 if (!ARI->isReservedReg(*MF, Reg) &&
8217 Reg != AArch64::LR && // LR is not reserved, but don't use it.
8218 Reg != AArch64::X16 && // X16 is not guaranteed to be preserved.
8219 Reg != AArch64::X17 && // Ditto for X17.
8220 C.isAvailableAcrossAndOutOfSeq(Reg, TRI) &&
8221 C.isAvailableInsideSeq(Reg, TRI))
8222 return Reg;
8223 }
8224 return Register();
8225}
8226
8227static bool
8229 const outliner::Candidate &b) {
8230 const auto &MFIa = a.getMF()->getInfo<AArch64FunctionInfo>();
8231 const auto &MFIb = b.getMF()->getInfo<AArch64FunctionInfo>();
8232
8233 return MFIa->shouldSignReturnAddress(false) == MFIb->shouldSignReturnAddress(false) &&
8234 MFIa->shouldSignReturnAddress(true) == MFIb->shouldSignReturnAddress(true);
8235}
8236
8237static bool
8239 const outliner::Candidate &b) {
8240 const auto &MFIa = a.getMF()->getInfo<AArch64FunctionInfo>();
8241 const auto &MFIb = b.getMF()->getInfo<AArch64FunctionInfo>();
8242
8243 return MFIa->shouldSignWithBKey() == MFIb->shouldSignWithBKey();
8244}
8245
8247 const outliner::Candidate &b) {
8248 const AArch64Subtarget &SubtargetA =
8250 const AArch64Subtarget &SubtargetB =
8251 b.getMF()->getSubtarget<AArch64Subtarget>();
8252 return SubtargetA.hasV8_3aOps() == SubtargetB.hasV8_3aOps();
8253}
8254
8255std::optional<outliner::OutlinedFunction>
8257 std::vector<outliner::Candidate> &RepeatedSequenceLocs) const {
8258 outliner::Candidate &FirstCand = RepeatedSequenceLocs[0];
8259
8260 unsigned SequenceSize = 0;
8261 for (auto &MI : FirstCand)
8262 SequenceSize += getInstSizeInBytes(MI);
8263
8264 unsigned NumBytesToCreateFrame = 0;
8265
8266 // We only allow outlining for functions having exactly matching return
8267 // address signing attributes, i.e., all share the same value for the
8268 // attribute "sign-return-address" and all share the same type of key they
8269 // are signed with.
8270 // Additionally we require all functions to simultaniously either support
8271 // v8.3a features or not. Otherwise an outlined function could get signed
8272 // using dedicated v8.3 instructions and a call from a function that doesn't
8273 // support v8.3 instructions would therefore be invalid.
8274 if (std::adjacent_find(
8275 RepeatedSequenceLocs.begin(), RepeatedSequenceLocs.end(),
8276 [](const outliner::Candidate &a, const outliner::Candidate &b) {
8277 // Return true if a and b are non-equal w.r.t. return address
8278 // signing or support of v8.3a features
8279 if (outliningCandidatesSigningScopeConsensus(a, b) &&
8280 outliningCandidatesSigningKeyConsensus(a, b) &&
8281 outliningCandidatesV8_3OpsConsensus(a, b)) {
8282 return false;
8283 }
8284 return true;
8285 }) != RepeatedSequenceLocs.end()) {
8286 return std::nullopt;
8287 }
8288
8289 // Since at this point all candidates agree on their return address signing
8290 // picking just one is fine. If the candidate functions potentially sign their
8291 // return addresses, the outlined function should do the same. Note that in
8292 // the case of "sign-return-address"="non-leaf" this is an assumption: It is
8293 // not certainly true that the outlined function will have to sign its return
8294 // address but this decision is made later, when the decision to outline
8295 // has already been made.
8296 // The same holds for the number of additional instructions we need: On
8297 // v8.3a RET can be replaced by RETAA/RETAB and no AUT instruction is
8298 // necessary. However, at this point we don't know if the outlined function
8299 // will have a RET instruction so we assume the worst.
8300 const TargetRegisterInfo &TRI = getRegisterInfo();
8301 // Performing a tail call may require extra checks when PAuth is enabled.
8302 // If PAuth is disabled, set it to zero for uniformity.
8303 unsigned NumBytesToCheckLRInTCEpilogue = 0;
8304 if (FirstCand.getMF()
8305 ->getInfo<AArch64FunctionInfo>()
8306 ->shouldSignReturnAddress(true)) {
8307 // One PAC and one AUT instructions
8308 NumBytesToCreateFrame += 8;
8309
8310 // PAuth is enabled - set extra tail call cost, if any.
8311 auto LRCheckMethod = Subtarget.getAuthenticatedLRCheckMethod();
8312 NumBytesToCheckLRInTCEpilogue =
8314 // Checking the authenticated LR value may significantly impact
8315 // SequenceSize, so account for it for more precise results.
8316 if (isTailCallReturnInst(RepeatedSequenceLocs[0].back()))
8317 SequenceSize += NumBytesToCheckLRInTCEpilogue;
8318
8319 // We have to check if sp modifying instructions would get outlined.
8320 // If so we only allow outlining if sp is unchanged overall, so matching
8321 // sub and add instructions are okay to outline, all other sp modifications
8322 // are not
8323 auto hasIllegalSPModification = [&TRI](outliner::Candidate &C) {
8324 int SPValue = 0;
8325 for (auto &MI : C) {
8326 if (MI.modifiesRegister(AArch64::SP, &TRI)) {
8327 switch (MI.getOpcode()) {
8328 case AArch64::ADDXri:
8329 case AArch64::ADDWri:
8330 assert(MI.getNumOperands() == 4 && "Wrong number of operands");
8331 assert(MI.getOperand(2).isImm() &&
8332 "Expected operand to be immediate");
8333 assert(MI.getOperand(1).isReg() &&
8334 "Expected operand to be a register");
8335 // Check if the add just increments sp. If so, we search for
8336 // matching sub instructions that decrement sp. If not, the
8337 // modification is illegal
8338 if (MI.getOperand(1).getReg() == AArch64::SP)
8339 SPValue += MI.getOperand(2).getImm();
8340 else
8341 return true;
8342 break;
8343 case AArch64::SUBXri:
8344 case AArch64::SUBWri:
8345 assert(MI.getNumOperands() == 4 && "Wrong number of operands");
8346 assert(MI.getOperand(2).isImm() &&
8347 "Expected operand to be immediate");
8348 assert(MI.getOperand(1).isReg() &&
8349 "Expected operand to be a register");
8350 // Check if the sub just decrements sp. If so, we search for
8351 // matching add instructions that increment sp. If not, the
8352 // modification is illegal
8353 if (MI.getOperand(1).getReg() == AArch64::SP)
8354 SPValue -= MI.getOperand(2).getImm();
8355 else
8356 return true;
8357 break;
8358 default:
8359 return true;
8360 }
8361 }
8362 }
8363 if (SPValue)
8364 return true;
8365 return false;
8366 };
8367 // Remove candidates with illegal stack modifying instructions
8368 llvm::erase_if(RepeatedSequenceLocs, hasIllegalSPModification);
8369
8370 // If the sequence doesn't have enough candidates left, then we're done.
8371 if (RepeatedSequenceLocs.size() < 2)
8372 return std::nullopt;
8373 }
8374
8375 // Properties about candidate MBBs that hold for all of them.
8376 unsigned FlagsSetInAll = 0xF;
8377
8378 // Compute liveness information for each candidate, and set FlagsSetInAll.
8379 for (outliner::Candidate &C : RepeatedSequenceLocs)
8380 FlagsSetInAll &= C.Flags;
8381
8382 unsigned LastInstrOpcode = RepeatedSequenceLocs[0].back().getOpcode();
8383
8384 // Helper lambda which sets call information for every candidate.
8385 auto SetCandidateCallInfo =
8386 [&RepeatedSequenceLocs](unsigned CallID, unsigned NumBytesForCall) {
8387 for (outliner::Candidate &C : RepeatedSequenceLocs)
8388 C.setCallInfo(CallID, NumBytesForCall);
8389 };
8390
8391 unsigned FrameID = MachineOutlinerDefault;
8392 NumBytesToCreateFrame += 4;
8393
8394 bool HasBTI = any_of(RepeatedSequenceLocs, [](outliner::Candidate &C) {
8395 return C.getMF()->getInfo<AArch64FunctionInfo>()->branchTargetEnforcement();
8396 });
8397
8398 // We check to see if CFI Instructions are present, and if they are
8399 // we find the number of CFI Instructions in the candidates.
8400 unsigned CFICount = 0;
8401 for (auto &I : RepeatedSequenceLocs[0]) {
8402 if (I.isCFIInstruction())
8403 CFICount++;
8404 }
8405
8406 // We compare the number of found CFI Instructions to the number of CFI
8407 // instructions in the parent function for each candidate. We must check this
8408 // since if we outline one of the CFI instructions in a function, we have to
8409 // outline them all for correctness. If we do not, the address offsets will be
8410 // incorrect between the two sections of the program.
8411 for (outliner::Candidate &C : RepeatedSequenceLocs) {
8412 std::vector<MCCFIInstruction> CFIInstructions =
8413 C.getMF()->getFrameInstructions();
8414
8415 if (CFICount > 0 && CFICount != CFIInstructions.size())
8416 return std::nullopt;
8417 }
8418
8419 // Returns true if an instructions is safe to fix up, false otherwise.
8420 auto IsSafeToFixup = [this, &TRI](MachineInstr &MI) {
8421 if (MI.isCall())
8422 return true;
8423
8424 if (!MI.modifiesRegister(AArch64::SP, &TRI) &&
8425 !MI.readsRegister(AArch64::SP, &TRI))
8426 return true;
8427
8428 // Any modification of SP will break our code to save/restore LR.
8429 // FIXME: We could handle some instructions which add a constant
8430 // offset to SP, with a bit more work.
8431 if (MI.modifiesRegister(AArch64::SP, &TRI))
8432 return false;
8433
8434 // At this point, we have a stack instruction that we might need to
8435 // fix up. We'll handle it if it's a load or store.
8436 if (MI.mayLoadOrStore()) {
8437 const MachineOperand *Base; // Filled with the base operand of MI.
8438 int64_t Offset; // Filled with the offset of MI.
8439 bool OffsetIsScalable;
8440
8441 // Does it allow us to offset the base operand and is the base the
8442 // register SP?
8443 if (!getMemOperandWithOffset(MI, Base, Offset, OffsetIsScalable, &TRI) ||
8444 !Base->isReg() || Base->getReg() != AArch64::SP)
8445 return false;
8446
8447 // Fixe-up code below assumes bytes.
8448 if (OffsetIsScalable)
8449 return false;
8450
8451 // Find the minimum/maximum offset for this instruction and check
8452 // if fixing it up would be in range.
8453 int64_t MinOffset,
8454 MaxOffset; // Unscaled offsets for the instruction.
8455 // The scale to multiply the offsets by.
8456 TypeSize Scale(0U, false), DummyWidth(0U, false);
8457 getMemOpInfo(MI.getOpcode(), Scale, DummyWidth, MinOffset, MaxOffset);
8458
8459 Offset += 16; // Update the offset to what it would be if we outlined.
8460 if (Offset < MinOffset * (int64_t)Scale.getFixedValue() ||
8461 Offset > MaxOffset * (int64_t)Scale.getFixedValue())
8462 return false;
8463
8464 // It's in range, so we can outline it.
8465 return true;
8466 }
8467
8468 // FIXME: Add handling for instructions like "add x0, sp, #8".
8469
8470 // We can't fix it up, so don't outline it.
8471 return false;
8472 };
8473
8474 // True if it's possible to fix up each stack instruction in this sequence.
8475 // Important for frames/call variants that modify the stack.
8476 bool AllStackInstrsSafe = llvm::all_of(FirstCand, IsSafeToFixup);
8477
8478 // If the last instruction in any candidate is a terminator, then we should
8479 // tail call all of the candidates.
8480 if (RepeatedSequenceLocs[0].back().isTerminator()) {
8481 FrameID = MachineOutlinerTailCall;
8482 NumBytesToCreateFrame = 0;
8483 unsigned NumBytesForCall = 4 + NumBytesToCheckLRInTCEpilogue;
8484 SetCandidateCallInfo(MachineOutlinerTailCall, NumBytesForCall);
8485 }
8486
8487 else if (LastInstrOpcode == AArch64::BL ||
8488 ((LastInstrOpcode == AArch64::BLR ||
8489 LastInstrOpcode == AArch64::BLRNoIP) &&
8490 !HasBTI)) {
8491 // FIXME: Do we need to check if the code after this uses the value of LR?
8492 FrameID = MachineOutlinerThunk;
8493 NumBytesToCreateFrame = NumBytesToCheckLRInTCEpilogue;
8494 SetCandidateCallInfo(MachineOutlinerThunk, 4);
8495 }
8496
8497 else {
8498 // We need to decide how to emit calls + frames. We can always emit the same
8499 // frame if we don't need to save to the stack. If we have to save to the
8500 // stack, then we need a different frame.
8501 unsigned NumBytesNoStackCalls = 0;
8502 std::vector<outliner::Candidate> CandidatesWithoutStackFixups;
8503
8504 // Check if we have to save LR.
8505 for (outliner::Candidate &C : RepeatedSequenceLocs) {
8506 bool LRAvailable =
8507 (C.Flags & MachineOutlinerMBBFlags::LRUnavailableSomewhere)
8508 ? C.isAvailableAcrossAndOutOfSeq(AArch64::LR, TRI)
8509 : true;
8510 // If we have a noreturn caller, then we're going to be conservative and
8511 // say that we have to save LR. If we don't have a ret at the end of the
8512 // block, then we can't reason about liveness accurately.
8513 //
8514 // FIXME: We can probably do better than always disabling this in
8515 // noreturn functions by fixing up the liveness info.
8516 bool IsNoReturn =
8517 C.getMF()->getFunction().hasFnAttribute(Attribute::NoReturn);
8518
8519 // Is LR available? If so, we don't need a save.
8520 if (LRAvailable && !IsNoReturn) {
8521 NumBytesNoStackCalls += 4;
8522 C.setCallInfo(MachineOutlinerNoLRSave, 4);
8523 CandidatesWithoutStackFixups.push_back(C);
8524 }
8525
8526 // Is an unused register available? If so, we won't modify the stack, so
8527 // we can outline with the same frame type as those that don't save LR.
8528 else if (findRegisterToSaveLRTo(C)) {
8529 NumBytesNoStackCalls += 12;
8530 C.setCallInfo(MachineOutlinerRegSave, 12);
8531 CandidatesWithoutStackFixups.push_back(C);
8532 }
8533
8534 // Is SP used in the sequence at all? If not, we don't have to modify
8535 // the stack, so we are guaranteed to get the same frame.
8536 else if (C.isAvailableInsideSeq(AArch64::SP, TRI)) {
8537 NumBytesNoStackCalls += 12;
8538 C.setCallInfo(MachineOutlinerDefault, 12);
8539 CandidatesWithoutStackFixups.push_back(C);
8540 }
8541
8542 // If we outline this, we need to modify the stack. Pretend we don't
8543 // outline this by saving all of its bytes.
8544 else {
8545 NumBytesNoStackCalls += SequenceSize;
8546 }
8547 }
8548
8549 // If there are no places where we have to save LR, then note that we
8550 // don't have to update the stack. Otherwise, give every candidate the
8551 // default call type, as long as it's safe to do so.
8552 if (!AllStackInstrsSafe ||
8553 NumBytesNoStackCalls <= RepeatedSequenceLocs.size() * 12) {
8554 RepeatedSequenceLocs = CandidatesWithoutStackFixups;
8555 FrameID = MachineOutlinerNoLRSave;
8556 } else {
8557 SetCandidateCallInfo(MachineOutlinerDefault, 12);
8558
8559 // Bugzilla ID: 46767
8560 // TODO: Check if fixing up the stack more than once is safe so we can
8561 // outline these.
8562 //
8563 // An outline resulting in a caller that requires stack fixups at the
8564 // callsite to a callee that also requires stack fixups can happen when
8565 // there are no available registers at the candidate callsite for a
8566 // candidate that itself also has calls.
8567 //
8568 // In other words if function_containing_sequence in the following pseudo
8569 // assembly requires that we save LR at the point of the call, but there
8570 // are no available registers: in this case we save using SP and as a
8571 // result the SP offsets requires stack fixups by multiples of 16.
8572 //
8573 // function_containing_sequence:
8574 // ...
8575 // save LR to SP <- Requires stack instr fixups in OUTLINED_FUNCTION_N
8576 // call OUTLINED_FUNCTION_N
8577 // restore LR from SP
8578 // ...
8579 //
8580 // OUTLINED_FUNCTION_N:
8581 // save LR to SP <- Requires stack instr fixups in OUTLINED_FUNCTION_N
8582 // ...
8583 // bl foo
8584 // restore LR from SP
8585 // ret
8586 //
8587 // Because the code to handle more than one stack fixup does not
8588 // currently have the proper checks for legality, these cases will assert
8589 // in the AArch64 MachineOutliner. This is because the code to do this
8590 // needs more hardening, testing, better checks that generated code is
8591 // legal, etc and because it is only verified to handle a single pass of
8592 // stack fixup.
8593 //
8594 // The assert happens in AArch64InstrInfo::buildOutlinedFrame to catch
8595 // these cases until they are known to be handled. Bugzilla 46767 is
8596 // referenced in comments at the assert site.
8597 //
8598 // To avoid asserting (or generating non-legal code on noassert builds)
8599 // we remove all candidates which would need more than one stack fixup by
8600 // pruning the cases where the candidate has calls while also having no
8601 // available LR and having no available general purpose registers to copy
8602 // LR to (ie one extra stack save/restore).
8603 //
8604 if (FlagsSetInAll & MachineOutlinerMBBFlags::HasCalls) {
8605 erase_if(RepeatedSequenceLocs, [this, &TRI](outliner::Candidate &C) {
8606 auto IsCall = [](const MachineInstr &MI) { return MI.isCall(); };
8607 return (llvm::any_of(C, IsCall)) &&
8608 (!C.isAvailableAcrossAndOutOfSeq(AArch64::LR, TRI) ||
8609 !findRegisterToSaveLRTo(C));
8610 });
8611 }
8612 }
8613
8614 // If we dropped all of the candidates, bail out here.
8615 if (RepeatedSequenceLocs.size() < 2) {
8616 RepeatedSequenceLocs.clear();
8617 return std::nullopt;
8618 }
8619 }
8620
8621 // Does every candidate's MBB contain a call? If so, then we might have a call
8622 // in the range.
8623 if (FlagsSetInAll & MachineOutlinerMBBFlags::HasCalls) {
8624 // Check if the range contains a call. These require a save + restore of the
8625 // link register.
8626 bool ModStackToSaveLR = false;
8627 if (std::any_of(FirstCand.begin(), std::prev(FirstCand.end()),
8628 [](const MachineInstr &MI) { return MI.isCall(); }))
8629 ModStackToSaveLR = true;
8630
8631 // Handle the last instruction separately. If this is a tail call, then the
8632 // last instruction is a call. We don't want to save + restore in this case.
8633 // However, it could be possible that the last instruction is a call without
8634 // it being valid to tail call this sequence. We should consider this as
8635 // well.
8636 else if (FrameID != MachineOutlinerThunk &&
8637 FrameID != MachineOutlinerTailCall && FirstCand.back().isCall())
8638 ModStackToSaveLR = true;
8639
8640 if (ModStackToSaveLR) {
8641 // We can't fix up the stack. Bail out.
8642 if (!AllStackInstrsSafe) {
8643 RepeatedSequenceLocs.clear();
8644 return std::nullopt;
8645 }
8646
8647 // Save + restore LR.
8648 NumBytesToCreateFrame += 8;
8649 }
8650 }
8651
8652 // If we have CFI instructions, we can only outline if the outlined section
8653 // can be a tail call
8654 if (FrameID != MachineOutlinerTailCall && CFICount > 0)
8655 return std::nullopt;
8656
8657 return outliner::OutlinedFunction(RepeatedSequenceLocs, SequenceSize,
8658 NumBytesToCreateFrame, FrameID);
8659}
8660
8662 Function &F, std::vector<outliner::Candidate> &Candidates) const {
8663 // If a bunch of candidates reach this point they must agree on their return
8664 // address signing. It is therefore enough to just consider the signing
8665 // behaviour of one of them
8666 const auto &CFn = Candidates.front().getMF()->getFunction();
8667
8668 // Since all candidates belong to the same module, just copy the
8669 // function-level attributes of an arbitrary function.
8670 if (CFn.hasFnAttribute("sign-return-address"))
8671 F.addFnAttr(CFn.getFnAttribute("sign-return-address"));
8672 if (CFn.hasFnAttribute("sign-return-address-key"))
8673 F.addFnAttr(CFn.getFnAttribute("sign-return-address-key"));
8674
8675 AArch64GenInstrInfo::mergeOutliningCandidateAttributes(F, Candidates);
8676}
8677
8679 MachineFunction &MF, bool OutlineFromLinkOnceODRs) const {
8680 const Function &F = MF.getFunction();
8681
8682 // Can F be deduplicated by the linker? If it can, don't outline from it.
8683 if (!OutlineFromLinkOnceODRs && F.hasLinkOnceODRLinkage())
8684 return false;
8685
8686 // Don't outline from functions with section markings; the program could
8687 // expect that all the code is in the named section.
8688 // FIXME: Allow outlining from multiple functions with the same section
8689 // marking.
8690 if (F.hasSection())
8691 return false;
8692
8693 // Outlining from functions with redzones is unsafe since the outliner may
8694 // modify the stack. Check if hasRedZone is true or unknown; if yes, don't
8695 // outline from it.
8697 if (!AFI || AFI->hasRedZone().value_or(true))
8698 return false;
8699
8700 // FIXME: Teach the outliner to generate/handle Windows unwind info.
8702 return false;
8703
8704 // It's safe to outline from MF.
8705 return true;
8706}
8707
8710 unsigned &Flags) const {
8712 "Must track liveness!");
8714 std::pair<MachineBasicBlock::iterator, MachineBasicBlock::iterator>>
8715 Ranges;
8716 // According to the AArch64 Procedure Call Standard, the following are
8717 // undefined on entry/exit from a function call:
8718 //
8719 // * Registers x16, x17, (and thus w16, w17)
8720 // * Condition codes (and thus the NZCV register)
8721 //
8722 // If any of these registers are used inside or live across an outlined
8723 // function, then they may be modified later, either by the compiler or
8724 // some other tool (like the linker).
8725 //
8726 // To avoid outlining in these situations, partition each block into ranges
8727 // where these registers are dead. We will only outline from those ranges.
8729 auto AreAllUnsafeRegsDead = [&LRU]() {
8730 return LRU.available(AArch64::W16) && LRU.available(AArch64::W17) &&
8731 LRU.available(AArch64::NZCV);
8732 };
8733
8734 // We need to know if LR is live across an outlining boundary later on in
8735 // order to decide how we'll create the outlined call, frame, etc.
8736 //
8737 // It's pretty expensive to check this for *every candidate* within a block.
8738 // That's some potentially n^2 behaviour, since in the worst case, we'd need
8739 // to compute liveness from the end of the block for O(n) candidates within
8740 // the block.
8741 //
8742 // So, to improve the average case, let's keep track of liveness from the end
8743 // of the block to the beginning of *every outlinable range*. If we know that
8744 // LR is available in every range we could outline from, then we know that
8745 // we don't need to check liveness for any candidate within that range.
8746 bool LRAvailableEverywhere = true;
8747 // Compute liveness bottom-up.
8748 LRU.addLiveOuts(MBB);
8749 // Update flags that require info about the entire MBB.
8750 auto UpdateWholeMBBFlags = [&Flags](const MachineInstr &MI) {
8751 if (MI.isCall() && !MI.isTerminator())
8752 Flags |= MachineOutlinerMBBFlags::HasCalls;
8753 };
8754 // Range: [RangeBegin, RangeEnd)
8755 MachineBasicBlock::instr_iterator RangeBegin, RangeEnd;
8756 unsigned RangeLen;
8757 auto CreateNewRangeStartingAt =
8758 [&RangeBegin, &RangeEnd,
8759 &RangeLen](MachineBasicBlock::instr_iterator NewBegin) {
8760 RangeBegin = NewBegin;
8761 RangeEnd = std::next(RangeBegin);
8762 RangeLen = 0;
8763 };
8764 auto SaveRangeIfNonEmpty = [&RangeLen, &Ranges, &RangeBegin, &RangeEnd]() {
8765 // At least one unsafe register is not dead. We do not want to outline at
8766 // this point. If it is long enough to outline from, save the range
8767 // [RangeBegin, RangeEnd).
8768 if (RangeLen > 1)
8769 Ranges.push_back(std::make_pair(RangeBegin, RangeEnd));
8770 };
8771 // Find the first point where all unsafe registers are dead.
8772 // FIND: <safe instr> <-- end of first potential range
8773 // SKIP: <unsafe def>
8774 // SKIP: ... everything between ...
8775 // SKIP: <unsafe use>
8776 auto FirstPossibleEndPt = MBB.instr_rbegin();
8777 for (; FirstPossibleEndPt != MBB.instr_rend(); ++FirstPossibleEndPt) {
8778 LRU.stepBackward(*FirstPossibleEndPt);
8779 // Update flags that impact how we outline across the entire block,
8780 // regardless of safety.
8781 UpdateWholeMBBFlags(*FirstPossibleEndPt);
8782 if (AreAllUnsafeRegsDead())
8783 break;
8784 }
8785 // If we exhausted the entire block, we have no safe ranges to outline.
8786 if (FirstPossibleEndPt == MBB.instr_rend())
8787 return Ranges;
8788 // Current range.
8789 CreateNewRangeStartingAt(FirstPossibleEndPt->getIterator());
8790 // StartPt points to the first place where all unsafe registers
8791 // are dead (if there is any such point). Begin partitioning the MBB into
8792 // ranges.
8793 for (auto &MI : make_range(FirstPossibleEndPt, MBB.instr_rend())) {
8794 LRU.stepBackward(MI);
8795 UpdateWholeMBBFlags(MI);
8796 if (!AreAllUnsafeRegsDead()) {
8797 SaveRangeIfNonEmpty();
8798 CreateNewRangeStartingAt(MI.getIterator());
8799 continue;
8800 }
8801 LRAvailableEverywhere &= LRU.available(AArch64::LR);
8802 RangeBegin = MI.getIterator();
8803 ++RangeLen;
8804 }
8805 // Above loop misses the last (or only) range. If we are still safe, then
8806 // let's save the range.
8807 if (AreAllUnsafeRegsDead())
8808 SaveRangeIfNonEmpty();
8809 if (Ranges.empty())
8810 return Ranges;
8811 // We found the ranges bottom-up. Mapping expects the top-down. Reverse
8812 // the order.
8813 std::reverse(Ranges.begin(), Ranges.end());
8814 // If there is at least one outlinable range where LR is unavailable
8815 // somewhere, remember that.
8816 if (!LRAvailableEverywhere)
8817 Flags |= MachineOutlinerMBBFlags::LRUnavailableSomewhere;
8818 return Ranges;
8819}
8820
8823 unsigned Flags) const {
8824 MachineInstr &MI = *MIT;
8825 MachineBasicBlock *MBB = MI.getParent();
8826 MachineFunction *MF = MBB->getParent();
8828
8829 // Don't outline anything used for return address signing. The outlined
8830 // function will get signed later if needed
8831 switch (MI.getOpcode()) {
8832 case AArch64::PACM:
8833 case AArch64::PACIASP:
8834 case AArch64::PACIBSP:
8835 case AArch64::PACIASPPC:
8836 case AArch64::PACIBSPPC:
8837 case AArch64::AUTIASP:
8838 case AArch64::AUTIBSP:
8839 case AArch64::AUTIASPPCi:
8840 case AArch64::AUTIASPPCr:
8841 case AArch64::AUTIBSPPCi:
8842 case AArch64::AUTIBSPPCr:
8843 case AArch64::RETAA:
8844 case AArch64::RETAB:
8845 case AArch64::RETAASPPCi:
8846 case AArch64::RETAASPPCr:
8847 case AArch64::RETABSPPCi:
8848 case AArch64::RETABSPPCr:
8849 case AArch64::EMITBKEY:
8850 case AArch64::PAUTH_PROLOGUE:
8851 case AArch64::PAUTH_EPILOGUE:
8853 }
8854
8855 // Don't outline LOHs.
8856 if (FuncInfo->getLOHRelated().count(&MI))
8858
8859 // We can only outline these if we will tail call the outlined function, or
8860 // fix up the CFI offsets. Currently, CFI instructions are outlined only if
8861 // in a tail call.
8862 //
8863 // FIXME: If the proper fixups for the offset are implemented, this should be
8864 // possible.
8865 if (MI.isCFIInstruction())
8867
8868 // Is this a terminator for a basic block?
8869 if (MI.isTerminator())
8870 // TargetInstrInfo::getOutliningType has already filtered out anything
8871 // that would break this, so we can allow it here.
8873
8874 // Make sure none of the operands are un-outlinable.
8875 for (const MachineOperand &MOP : MI.operands()) {
8876 // A check preventing CFI indices was here before, but only CFI
8877 // instructions should have those.
8878 assert(!MOP.isCFIIndex());
8879
8880 // If it uses LR or W30 explicitly, then don't touch it.
8881 if (MOP.isReg() && !MOP.isImplicit() &&
8882 (MOP.getReg() == AArch64::LR || MOP.getReg() == AArch64::W30))
8884 }
8885
8886 // Special cases for instructions that can always be outlined, but will fail
8887 // the later tests. e.g, ADRPs, which are PC-relative use LR, but can always
8888 // be outlined because they don't require a *specific* value to be in LR.
8889 if (MI.getOpcode() == AArch64::ADRP)
8891
8892 // If MI is a call we might be able to outline it. We don't want to outline
8893 // any calls that rely on the position of items on the stack. When we outline
8894 // something containing a call, we have to emit a save and restore of LR in
8895 // the outlined function. Currently, this always happens by saving LR to the
8896 // stack. Thus, if we outline, say, half the parameters for a function call
8897 // plus the call, then we'll break the callee's expectations for the layout
8898 // of the stack.
8899 //
8900 // FIXME: Allow calls to functions which construct a stack frame, as long
8901 // as they don't access arguments on the stack.
8902 // FIXME: Figure out some way to analyze functions defined in other modules.
8903 // We should be able to compute the memory usage based on the IR calling
8904 // convention, even if we can't see the definition.
8905 if (MI.isCall()) {
8906 // Get the function associated with the call. Look at each operand and find
8907 // the one that represents the callee and get its name.
8908 const Function *Callee = nullptr;
8909 for (const MachineOperand &MOP : MI.operands()) {
8910 if (MOP.isGlobal()) {
8911 Callee = dyn_cast<Function>(MOP.getGlobal());
8912 break;
8913 }
8914 }
8915
8916 // Never outline calls to mcount. There isn't any rule that would require
8917 // this, but the Linux kernel's "ftrace" feature depends on it.
8918 if (Callee && Callee->getName() == "\01_mcount")
8920
8921 // If we don't know anything about the callee, assume it depends on the
8922 // stack layout of the caller. In that case, it's only legal to outline
8923 // as a tail-call. Explicitly list the call instructions we know about so we
8924 // don't get unexpected results with call pseudo-instructions.
8925 auto UnknownCallOutlineType = outliner::InstrType::Illegal;
8926 if (MI.getOpcode() == AArch64::BLR ||
8927 MI.getOpcode() == AArch64::BLRNoIP || MI.getOpcode() == AArch64::BL)
8928 UnknownCallOutlineType = outliner::InstrType::LegalTerminator;
8929
8930 if (!Callee)
8931 return UnknownCallOutlineType;
8932
8933 // We have a function we have information about. Check it if it's something
8934 // can safely outline.
8935 MachineFunction *CalleeMF = MF->getMMI().getMachineFunction(*Callee);
8936
8937 // We don't know what's going on with the callee at all. Don't touch it.
8938 if (!CalleeMF)
8939 return UnknownCallOutlineType;
8940
8941 // Check if we know anything about the callee saves on the function. If we
8942 // don't, then don't touch it, since that implies that we haven't
8943 // computed anything about its stack frame yet.
8944 MachineFrameInfo &MFI = CalleeMF->getFrameInfo();
8945 if (!MFI.isCalleeSavedInfoValid() || MFI.getStackSize() > 0 ||
8946 MFI.getNumObjects() > 0)
8947 return UnknownCallOutlineType;
8948
8949 // At this point, we can say that CalleeMF ought to not pass anything on the
8950 // stack. Therefore, we can outline it.
8952 }
8953
8954 // Don't touch the link register or W30.
8955 if (MI.readsRegister(AArch64::W30, &getRegisterInfo()) ||
8956 MI.modifiesRegister(AArch64::W30, &getRegisterInfo()))
8958
8959 // Don't outline BTI instructions, because that will prevent the outlining
8960 // site from being indirectly callable.
8961 if (hasBTISemantics(MI))
8963
8965}
8966
8967void AArch64InstrInfo::fixupPostOutline(MachineBasicBlock &MBB) const {
8968 for (MachineInstr &MI : MBB) {
8969 const MachineOperand *Base;
8970 TypeSize Width(0, false);
8971 int64_t Offset;
8972 bool OffsetIsScalable;
8973
8974 // Is this a load or store with an immediate offset with SP as the base?
8975 if (!MI.mayLoadOrStore() ||
8976 !getMemOperandWithOffsetWidth(MI, Base, Offset, OffsetIsScalable, Width,
8977 &RI) ||
8978 (Base->isReg() && Base->getReg() != AArch64::SP))
8979 continue;
8980
8981 // It is, so we have to fix it up.
8982 TypeSize Scale(0U, false);
8983 int64_t Dummy1, Dummy2;
8984
8986 assert(StackOffsetOperand.isImm() && "Stack offset wasn't immediate!");
8987 getMemOpInfo(MI.getOpcode(), Scale, Width, Dummy1, Dummy2);
8988 assert(Scale != 0 && "Unexpected opcode!");
8989 assert(!OffsetIsScalable && "Expected offset to be a byte offset");
8990
8991 // We've pushed the return address to the stack, so add 16 to the offset.
8992 // This is safe, since we already checked if it would overflow when we
8993 // checked if this instruction was legal to outline.
8994 int64_t NewImm = (Offset + 16) / (int64_t)Scale.getFixedValue();
8995 StackOffsetOperand.setImm(NewImm);
8996 }
8997}
8998
9000 const AArch64InstrInfo *TII,
9001 bool ShouldSignReturnAddr) {
9002 if (!ShouldSignReturnAddr)
9003 return;
9004
9005 BuildMI(MBB, MBB.begin(), DebugLoc(), TII->get(AArch64::PAUTH_PROLOGUE))
9008 TII->get(AArch64::PAUTH_EPILOGUE))
9010}
9011
9014 const outliner::OutlinedFunction &OF) const {
9015
9017
9019 FI->setOutliningStyle("Tail Call");
9021 // For thunk outlining, rewrite the last instruction from a call to a
9022 // tail-call.
9023 MachineInstr *Call = &*--MBB.instr_end();
9024 unsigned TailOpcode;
9025 if (Call->getOpcode() == AArch64::BL) {
9026 TailOpcode = AArch64::TCRETURNdi;
9027 } else {
9028 assert(Call->getOpcode() == AArch64::BLR ||
9029 Call->getOpcode() == AArch64::BLRNoIP);
9030 TailOpcode = AArch64::TCRETURNriALL;
9031 }
9032 MachineInstr *TC = BuildMI(MF, DebugLoc(), get(TailOpcode))
9033 .add(Call->getOperand(0))
9034 .addImm(0);
9035 MBB.insert(MBB.end(), TC);
9036 Call->eraseFromParent();
9037
9038 FI->setOutliningStyle("Thunk");
9039 }
9040
9041 bool IsLeafFunction = true;
9042
9043 // Is there a call in the outlined range?
9044 auto IsNonTailCall = [](const MachineInstr &MI) {
9045 return MI.isCall() && !MI.isReturn();
9046 };
9047
9048 if (llvm::any_of(MBB.instrs(), IsNonTailCall)) {
9049 // Fix up the instructions in the range, since we're going to modify the
9050 // stack.
9051
9052 // Bugzilla ID: 46767
9053 // TODO: Check if fixing up twice is safe so we can outline these.
9055 "Can only fix up stack references once");
9056 fixupPostOutline(MBB);
9057
9058 IsLeafFunction = false;
9059
9060 // LR has to be a live in so that we can save it.
9061 if (!MBB.isLiveIn(AArch64::LR))
9062 MBB.addLiveIn(AArch64::LR);
9063
9066
9069 Et = std::prev(MBB.end());
9070
9071 // Insert a save before the outlined region
9072 MachineInstr *STRXpre = BuildMI(MF, DebugLoc(), get(AArch64::STRXpre))
9073 .addReg(AArch64::SP, RegState::Define)
9074 .addReg(AArch64::LR)
9075 .addReg(AArch64::SP)
9076 .addImm(-16);
9077 It = MBB.insert(It, STRXpre);
9078
9080 const TargetSubtargetInfo &STI = MF.getSubtarget();
9081 const MCRegisterInfo *MRI = STI.getRegisterInfo();
9082 unsigned DwarfReg = MRI->getDwarfRegNum(AArch64::LR, true);
9083
9084 // Add a CFI saying the stack was moved 16 B down.
9085 int64_t StackPosEntry =
9087 BuildMI(MBB, It, DebugLoc(), get(AArch64::CFI_INSTRUCTION))
9088 .addCFIIndex(StackPosEntry)
9090
9091 // Add a CFI saying that the LR that we want to find is now 16 B higher
9092 // than before.
9093 int64_t LRPosEntry = MF.addFrameInst(
9094 MCCFIInstruction::createOffset(nullptr, DwarfReg, -16));
9095 BuildMI(MBB, It, DebugLoc(), get(AArch64::CFI_INSTRUCTION))
9096 .addCFIIndex(LRPosEntry)
9098 }
9099
9100 // Insert a restore before the terminator for the function.
9101 MachineInstr *LDRXpost = BuildMI(MF, DebugLoc(), get(AArch64::LDRXpost))
9102 .addReg(AArch64::SP, RegState::Define)
9103 .addReg(AArch64::LR, RegState::Define)
9104 .addReg(AArch64::SP)
9105 .addImm(16);
9106 Et = MBB.insert(Et, LDRXpost);
9107 }
9108
9109 bool ShouldSignReturnAddr = FI->shouldSignReturnAddress(!IsLeafFunction);
9110
9111 // If this is a tail call outlined function, then there's already a return.
9114 signOutlinedFunction(MF, MBB, this, ShouldSignReturnAddr);
9115 return;
9116 }
9117
9118 // It's not a tail call, so we have to insert the return ourselves.
9119
9120 // LR has to be a live in so that we can return to it.
9121 if (!MBB.isLiveIn(AArch64::LR))
9122 MBB.addLiveIn(AArch64::LR);
9123
9124 MachineInstr *ret = BuildMI(MF, DebugLoc(), get(AArch64::RET))
9125 .addReg(AArch64::LR);
9126 MBB.insert(MBB.end(), ret);
9127
9128 signOutlinedFunction(MF, MBB, this, ShouldSignReturnAddr);
9129
9130 FI->setOutliningStyle("Function");
9131
9132 // Did we have to modify the stack by saving the link register?
9134 return;
9135
9136 // We modified the stack.
9137 // Walk over the basic block and fix up all the stack accesses.
9138 fixupPostOutline(MBB);
9139}
9140
9144
9145 // Are we tail calling?
9146 if (C.CallConstructionID == MachineOutlinerTailCall) {
9147 // If yes, then we can just branch to the label.
9148 It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::TCRETURNdi))
9149 .addGlobalAddress(M.getNamedValue(MF.getName()))
9150 .addImm(0));
9151 return It;
9152 }
9153
9154 // Are we saving the link register?
9155 if (C.CallConstructionID == MachineOutlinerNoLRSave ||
9156 C.CallConstructionID == MachineOutlinerThunk) {
9157 // No, so just insert the call.
9158 It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::BL))
9159 .addGlobalAddress(M.getNamedValue(MF.getName())));
9160 return It;
9161 }
9162
9163 // We want to return the spot where we inserted the call.
9165
9166 // Instructions for saving and restoring LR around the call instruction we're
9167 // going to insert.
9168 MachineInstr *Save;
9169 MachineInstr *Restore;
9170 // Can we save to a register?
9171 if (C.CallConstructionID == MachineOutlinerRegSave) {
9172 // FIXME: This logic should be sunk into a target-specific interface so that
9173 // we don't have to recompute the register.
9174 Register Reg = findRegisterToSaveLRTo(C);
9175 assert(Reg && "No callee-saved register available?");
9176
9177 // LR has to be a live in so that we can save it.
9178 if (!MBB.isLiveIn(AArch64::LR))
9179 MBB.addLiveIn(AArch64::LR);
9180
9181 // Save and restore LR from Reg.
9182 Save = BuildMI(MF, DebugLoc(), get(AArch64::ORRXrs), Reg)
9183 .addReg(AArch64::XZR)
9184 .addReg(AArch64::LR)
9185 .addImm(0);
9186 Restore = BuildMI(MF, DebugLoc(), get(AArch64::ORRXrs), AArch64::LR)
9187 .addReg(AArch64::XZR)
9188 .addReg(Reg)
9189 .addImm(0);
9190 } else {
9191 // We have the default case. Save and restore from SP.
9192 Save = BuildMI(MF, DebugLoc(), get(AArch64::STRXpre))
9193 .addReg(AArch64::SP, RegState::Define)
9194 .addReg(AArch64::LR)
9195 .addReg(AArch64::SP)
9196 .addImm(-16);
9197 Restore = BuildMI(MF, DebugLoc(), get(AArch64::LDRXpost))
9198 .addReg(AArch64::SP, RegState::Define)
9199 .addReg(AArch64::LR, RegState::Define)
9200 .addReg(AArch64::SP)
9201 .addImm(16);
9202 }
9203
9204 It = MBB.insert(It, Save);
9205 It++;
9206
9207 // Insert the call.
9208 It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::BL))
9209 .addGlobalAddress(M.getNamedValue(MF.getName())));
9210 CallPt = It;
9211 It++;
9212
9213 It = MBB.insert(It, Restore);
9214 return CallPt;
9215}
9216
9218 MachineFunction &MF) const {
9219 return MF.getFunction().hasMinSize();
9220}
9221
9224 DebugLoc &DL,
9225 bool AllowSideEffects) const {
9226 const MachineFunction &MF = *MBB.getParent();
9228 const AArch64RegisterInfo &TRI = *STI.getRegisterInfo();
9229
9230 if (TRI.isGeneralPurposeRegister(MF, Reg)) {
9231 BuildMI(MBB, Iter, DL, get(AArch64::MOVZXi), Reg).addImm(0).addImm(0);
9232 } else if (STI.hasSVE()) {
9233 BuildMI(MBB, Iter, DL, get(AArch64::DUP_ZI_D), Reg)
9234 .addImm(0)
9235 .addImm(0);
9236 } else {
9237 BuildMI(MBB, Iter, DL, get(AArch64::MOVIv2d_ns), Reg)
9238 .addImm(0);
9239 }
9240}
9241
9242std::optional<DestSourcePair>
9244
9245 // AArch64::ORRWrs and AArch64::ORRXrs with WZR/XZR reg
9246 // and zero immediate operands used as an alias for mov instruction.
9247 if (MI.getOpcode() == AArch64::ORRWrs &&
9248 MI.getOperand(1).getReg() == AArch64::WZR &&
9249 MI.getOperand(3).getImm() == 0x0 &&
9250 // Check that the w->w move is not a zero-extending w->x mov.
9251 (!MI.getOperand(0).getReg().isVirtual() ||
9252 MI.getOperand(0).getSubReg() == 0) &&
9253 (!MI.getOperand(0).getReg().isPhysical() ||
9254 MI.findRegisterDefOperandIdx(MI.getOperand(0).getReg() - AArch64::W0 +
9255 AArch64::X0,
9256 /*TRI=*/nullptr) == -1))
9257 return DestSourcePair{MI.getOperand(0), MI.getOperand(2)};
9258
9259 if (MI.getOpcode() == AArch64::ORRXrs &&
9260 MI.getOperand(1).getReg() == AArch64::XZR &&
9261 MI.getOperand(3).getImm() == 0x0)
9262 return DestSourcePair{MI.getOperand(0), MI.getOperand(2)};
9263
9264 return std::nullopt;
9265}
9266
9267std::optional<DestSourcePair>
9269 if (MI.getOpcode() == AArch64::ORRWrs &&
9270 MI.getOperand(1).getReg() == AArch64::WZR &&
9271 MI.getOperand(3).getImm() == 0x0)
9272 return DestSourcePair{MI.getOperand(0), MI.getOperand(2)};
9273 return std::nullopt;
9274}
9275
9276std::optional<RegImmPair>
9278 int Sign = 1;
9279 int64_t Offset = 0;
9280
9281 // TODO: Handle cases where Reg is a super- or sub-register of the
9282 // destination register.
9283 const MachineOperand &Op0 = MI.getOperand(0);
9284 if (!Op0.isReg() || Reg != Op0.getReg())
9285 return std::nullopt;
9286
9287 switch (MI.getOpcode()) {
9288 default:
9289 return std::nullopt;
9290 case AArch64::SUBWri:
9291 case AArch64::SUBXri:
9292 case AArch64::SUBSWri:
9293 case AArch64::SUBSXri:
9294 Sign *= -1;
9295 [[fallthrough]];
9296 case AArch64::ADDSWri:
9297 case AArch64::ADDSXri:
9298 case AArch64::ADDWri:
9299 case AArch64::ADDXri: {
9300 // TODO: Third operand can be global address (usually some string).
9301 if (!MI.getOperand(0).isReg() || !MI.getOperand(1).isReg() ||
9302 !MI.getOperand(2).isImm())
9303 return std::nullopt;
9304 int Shift = MI.getOperand(3).getImm();
9305 assert((Shift == 0 || Shift == 12) && "Shift can be either 0 or 12");
9306 Offset = Sign * (MI.getOperand(2).getImm() << Shift);
9307 }
9308 }
9309 return RegImmPair{MI.getOperand(1).getReg(), Offset};
9310}
9311
9312/// If the given ORR instruction is a copy, and \p DescribedReg overlaps with
9313/// the destination register then, if possible, describe the value in terms of
9314/// the source register.
9315static std::optional<ParamLoadedValue>
9317 const TargetInstrInfo *TII,
9318 const TargetRegisterInfo *TRI) {
9319 auto DestSrc = TII->isCopyLikeInstr(MI);
9320 if (!DestSrc)
9321 return std::nullopt;
9322
9323 Register DestReg = DestSrc->Destination->getReg();
9324 Register SrcReg = DestSrc->Source->getReg();
9325
9326 auto Expr = DIExpression::get(MI.getMF()->getFunction().getContext(), {});
9327
9328 // If the described register is the destination, just return the source.
9329 if (DestReg == DescribedReg)
9330 return ParamLoadedValue(MachineOperand::CreateReg(SrcReg, false), Expr);
9331
9332 // ORRWrs zero-extends to 64-bits, so we need to consider such cases.
9333 if (MI.getOpcode() == AArch64::ORRWrs &&
9334 TRI->isSuperRegister(DestReg, DescribedReg))
9335 return ParamLoadedValue(MachineOperand::CreateReg(SrcReg, false), Expr);
9336
9337 // We may need to describe the lower part of a ORRXrs move.
9338 if (MI.getOpcode() == AArch64::ORRXrs &&
9339 TRI->isSubRegister(DestReg, DescribedReg)) {
9340 Register SrcSubReg = TRI->getSubReg(SrcReg, AArch64::sub_32);
9341 return ParamLoadedValue(MachineOperand::CreateReg(SrcSubReg, false), Expr);
9342 }
9343
9344 assert(!TRI->isSuperOrSubRegisterEq(DestReg, DescribedReg) &&
9345 "Unhandled ORR[XW]rs copy case");
9346
9347 return std::nullopt;
9348}
9349
9351 // Functions cannot be split to different sections on AArch64 if they have
9352 // a red zone. This is because relaxing a cross-section branch may require
9353 // incrementing the stack pointer to spill a register, which would overwrite
9354 // the red zone.
9355 if (MF.getInfo<AArch64FunctionInfo>()->hasRedZone().value_or(true))
9356 return false;
9357
9359}
9360
9362 const MachineBasicBlock &MBB) const {
9363 // Asm Goto blocks can contain conditional branches to goto labels, which can
9364 // get moved out of range of the branch instruction.
9365 auto isAsmGoto = [](const MachineInstr &MI) {
9366 return MI.getOpcode() == AArch64::INLINEASM_BR;
9367 };
9368 if (llvm::any_of(MBB, isAsmGoto) || MBB.isInlineAsmBrIndirectTarget())
9369 return false;
9370
9371 // Because jump tables are label-relative instead of table-relative, they all
9372 // must be in the same section or relocation fixup handling will fail.
9373
9374 // Check if MBB is a jump table target
9376 auto containsMBB = [&MBB](const MachineJumpTableEntry &JTE) {
9377 return llvm::is_contained(JTE.MBBs, &MBB);
9378 };
9379 if (MJTI != nullptr && llvm::any_of(MJTI->getJumpTables(), containsMBB))
9380 return false;
9381
9382 // Check if MBB contains a jump table lookup
9383 for (const MachineInstr &MI : MBB) {
9384 switch (MI.getOpcode()) {
9385 case TargetOpcode::G_BRJT:
9386 case AArch64::JumpTableDest32:
9387 case AArch64::JumpTableDest16:
9388 case AArch64::JumpTableDest8:
9389 return false;
9390 default:
9391 continue;
9392 }
9393 }
9394
9395 // MBB isn't a special case, so it's safe to be split to the cold section.
9396 return true;
9397}
9398
9399std::optional<ParamLoadedValue>
9401 Register Reg) const {
9402 const MachineFunction *MF = MI.getMF();
9404 switch (MI.getOpcode()) {
9405 case AArch64::MOVZWi:
9406 case AArch64::MOVZXi: {
9407 // MOVZWi may be used for producing zero-extended 32-bit immediates in
9408 // 64-bit parameters, so we need to consider super-registers.
9409 if (!TRI->isSuperRegisterEq(MI.getOperand(0).getReg(), Reg))
9410 return std::nullopt;
9411
9412 if (!MI.getOperand(1).isImm())
9413 return std::nullopt;
9414 int64_t Immediate = MI.getOperand(1).getImm();
9415 int Shift = MI.getOperand(2).getImm();
9416 return ParamLoadedValue(MachineOperand::CreateImm(Immediate << Shift),
9417 nullptr);
9418 }
9419 case AArch64::ORRWrs:
9420 case AArch64::ORRXrs:
9421 return describeORRLoadedValue(MI, Reg, this, TRI);
9422 }
9423
9425}
9426
9428 MachineInstr &ExtMI, MachineRegisterInfo &MRI) const {
9429 assert(ExtMI.getOpcode() == TargetOpcode::G_SEXT ||
9430 ExtMI.getOpcode() == TargetOpcode::G_ZEXT ||
9431 ExtMI.getOpcode() == TargetOpcode::G_ANYEXT);
9432
9433 // Anyexts are nops.
9434 if (ExtMI.getOpcode() == TargetOpcode::G_ANYEXT)
9435 return true;
9436
9437 Register DefReg = ExtMI.getOperand(0).getReg();
9438 if (!MRI.hasOneNonDBGUse(DefReg))
9439 return false;
9440
9441 // It's likely that a sext/zext as a G_PTR_ADD offset will be folded into an
9442 // addressing mode.
9443 auto *UserMI = &*MRI.use_instr_nodbg_begin(DefReg);
9444 return UserMI->getOpcode() == TargetOpcode::G_PTR_ADD;
9445}
9446
9448 return get(Opc).TSFlags & AArch64::ElementSizeMask;
9449}
9450
9451bool AArch64InstrInfo::isPTestLikeOpcode(unsigned Opc) const {
9452 return get(Opc).TSFlags & AArch64::InstrFlagIsPTestLike;
9453}
9454
9455bool AArch64InstrInfo::isWhileOpcode(unsigned Opc) const {
9456 return get(Opc).TSFlags & AArch64::InstrFlagIsWhile;
9457}
9458
9459unsigned int
9461 return OptLevel >= CodeGenOptLevel::Aggressive ? 6 : 2;
9462}
9463
9464bool AArch64InstrInfo::isLegalAddressingMode(unsigned NumBytes, int64_t Offset,
9465 unsigned Scale) const {
9466 if (Offset && Scale)
9467 return false;
9468
9469 // Check Reg + Imm
9470 if (!Scale) {
9471 // 9-bit signed offset
9472 if (isInt<9>(Offset))
9473 return true;
9474
9475 // 12-bit unsigned offset
9476 unsigned Shift = Log2_64(NumBytes);
9477 if (NumBytes && Offset > 0 && (Offset / NumBytes) <= (1LL << 12) - 1 &&
9478 // Must be a multiple of NumBytes (NumBytes is a power of 2)
9479 (Offset >> Shift) << Shift == Offset)
9480 return true;
9481 return false;
9482 }
9483
9484 // Check reg1 + SIZE_IN_BYTES * reg2 and reg1 + reg2
9485 return Scale == 1 || (Scale > 0 && Scale == NumBytes);
9486}
9487
9489 if (MF.getSubtarget<AArch64Subtarget>().hardenSlsBlr())
9490 return AArch64::BLRNoIP;
9491 else
9492 return AArch64::BLR;
9493}
9494
9497 Register TargetReg, bool FrameSetup) const {
9498 assert(TargetReg != AArch64::SP && "New top of stack cannot aleady be in SP");
9499
9501 MachineFunction &MF = *MBB.getParent();
9502 const AArch64InstrInfo *TII =
9503 MF.getSubtarget<AArch64Subtarget>().getInstrInfo();
9504 int64_t ProbeSize = MF.getInfo<AArch64FunctionInfo>()->getStackProbeSize();
9506
9507 MachineFunction::iterator MBBInsertPoint = std::next(MBB.getIterator());
9508 MachineBasicBlock *LoopTestMBB =
9510 MF.insert(MBBInsertPoint, LoopTestMBB);
9511 MachineBasicBlock *LoopBodyMBB =
9513 MF.insert(MBBInsertPoint, LoopBodyMBB);
9515 MF.insert(MBBInsertPoint, ExitMBB);
9516 MachineInstr::MIFlag Flags =
9518
9519 // LoopTest:
9520 // SUB SP, SP, #ProbeSize
9521 emitFrameOffset(*LoopTestMBB, LoopTestMBB->end(), DL, AArch64::SP,
9522 AArch64::SP, StackOffset::getFixed(-ProbeSize), TII, Flags);
9523
9524 // CMP SP, TargetReg
9525 BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL, TII->get(AArch64::SUBSXrx64),
9526 AArch64::XZR)
9527 .addReg(AArch64::SP)
9528 .addReg(TargetReg)
9530 .setMIFlags(Flags);
9531
9532 // B.<Cond> LoopExit
9533 BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL, TII->get(AArch64::Bcc))
9535 .addMBB(ExitMBB)
9536 .setMIFlags(Flags);
9537
9538 // STR XZR, [SP]
9539 BuildMI(*LoopBodyMBB, LoopBodyMBB->end(), DL, TII->get(AArch64::STRXui))
9540 .addReg(AArch64::XZR)
9541 .addReg(AArch64::SP)
9542 .addImm(0)
9543 .setMIFlags(Flags);
9544
9545 // B loop
9546 BuildMI(*LoopBodyMBB, LoopBodyMBB->end(), DL, TII->get(AArch64::B))
9547 .addMBB(LoopTestMBB)
9548 .setMIFlags(Flags);
9549
9550 // LoopExit:
9551 // MOV SP, TargetReg
9552 BuildMI(*ExitMBB, ExitMBB->end(), DL, TII->get(AArch64::ADDXri), AArch64::SP)
9553 .addReg(TargetReg)
9554 .addImm(0)
9556 .setMIFlags(Flags);
9557
9558 // LDR XZR, [SP]
9559 BuildMI(*ExitMBB, ExitMBB->end(), DL, TII->get(AArch64::LDRXui))
9560 .addReg(AArch64::XZR, RegState::Define)
9561 .addReg(AArch64::SP)
9562 .addImm(0)
9563 .setMIFlags(Flags);
9564
9565 ExitMBB->splice(ExitMBB->end(), &MBB, std::next(MBBI), MBB.end());
9567
9568 LoopTestMBB->addSuccessor(ExitMBB);
9569 LoopTestMBB->addSuccessor(LoopBodyMBB);
9570 LoopBodyMBB->addSuccessor(LoopTestMBB);
9571 MBB.addSuccessor(LoopTestMBB);
9572
9573 // Update liveins.
9574 if (MF.getRegInfo().reservedRegsFrozen())
9575 fullyRecomputeLiveIns({ExitMBB, LoopBodyMBB, LoopTestMBB});
9576
9577 return ExitMBB->begin();
9578}
9579
9580namespace {
9581class AArch64PipelinerLoopInfo : public TargetInstrInfo::PipelinerLoopInfo {
9582 MachineInstr *PredBranch;
9584
9585public:
9586 AArch64PipelinerLoopInfo(MachineInstr *PredBranch,
9588 : PredBranch(PredBranch), Cond(Cond.begin(), Cond.end()) {}
9589
9590 bool shouldIgnoreForPipelining(const MachineInstr *MI) const override {
9591 // Make the instructions for loop control be placed in stage 0.
9592 // The predecessors of PredBranch are considered by the caller.
9593 return MI == PredBranch;
9594 }
9595
9596 std::optional<bool> createTripCountGreaterCondition(
9597 int TC, MachineBasicBlock &MBB,
9598 SmallVectorImpl<MachineOperand> &CondParam) override {
9599 // A branch instruction will be inserted as "if (Cond) goto epilogue".
9600 // Cond is normalized for such use.
9601 // The predecessors of the branch are assumed to have already been inserted.
9602 CondParam = Cond;
9603 return {};
9604 }
9605
9606 void setPreheader(MachineBasicBlock *NewPreheader) override {}
9607
9608 void adjustTripCount(int TripCountAdjust) override {}
9609
9610 void disposed() override {}
9611};
9612} // namespace
9613
9614static bool isCompareAndBranch(unsigned Opcode) {
9615 switch (Opcode) {
9616 case AArch64::CBZW:
9617 case AArch64::CBZX:
9618 case AArch64::CBNZW:
9619 case AArch64::CBNZX:
9620 case AArch64::TBZW:
9621 case AArch64::TBZX:
9622 case AArch64::TBNZW:
9623 case AArch64::TBNZX:
9624 return true;
9625 }
9626 return false;
9627}
9628
9629std::unique_ptr<TargetInstrInfo::PipelinerLoopInfo>
9631 MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
9633 if (analyzeBranch(*LoopBB, TBB, FBB, Cond))
9634 return nullptr;
9635
9636 // Infinite loops are not supported
9637 if (TBB == LoopBB && FBB == LoopBB)
9638 return nullptr;
9639
9640 // Must be conditional branch
9641 if (FBB == nullptr)
9642 return nullptr;
9643
9644 assert((TBB == LoopBB || FBB == LoopBB) &&
9645 "The Loop must be a single-basic-block loop");
9646
9647 // Normalization for createTripCountGreaterCondition()
9648 if (TBB == LoopBB)
9650
9651 MachineInstr *CondBranch = &*LoopBB->getFirstTerminator();
9653
9654 // Find the immediate predecessor of the conditional branch
9655 MachineInstr *PredBranch = nullptr;
9656 if (CondBranch->getOpcode() == AArch64::Bcc) {
9657 for (MachineInstr &MI : reverse(*LoopBB)) {
9658 if (MI.modifiesRegister(AArch64::NZCV, &TRI)) {
9659 PredBranch = &MI;
9660 break;
9661 }
9662 }
9663 if (!PredBranch)
9664 return nullptr;
9665 } else if (isCompareAndBranch(CondBranch->getOpcode())) {
9666 const MachineRegisterInfo &MRI = LoopBB->getParent()->getRegInfo();
9667 Register Reg = CondBranch->getOperand(0).getReg();
9668 if (!Reg.isVirtual())
9669 return nullptr;
9670 PredBranch = MRI.getVRegDef(Reg);
9671
9672 // MachinePipeliner does not expect that the immediate predecessor is a Phi
9673 if (PredBranch->isPHI())
9674 return nullptr;
9675
9676 if (PredBranch->getParent() != LoopBB)
9677 return nullptr;
9678 } else {
9679 return nullptr;
9680 }
9681
9682 return std::make_unique<AArch64PipelinerLoopInfo>(PredBranch, Cond);
9683}
9684
9685#define GET_INSTRINFO_HELPERS
9686#define GET_INSTRMAP_INFO
9687#include "AArch64GenInstrInfo.inc"
unsigned SubReg
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
static bool forwardCopyWillClobberTuple(unsigned DestReg, unsigned SrcReg, unsigned NumRegs)
static cl::opt< unsigned > BCCDisplacementBits("aarch64-bcc-offset-bits", cl::Hidden, cl::init(19), cl::desc("Restrict range of Bcc instructions (DEBUG)"))
static void genSubAdd2SubSub(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, SmallVectorImpl< MachineInstr * > &DelInstrs, unsigned IdxOpd1, DenseMap< unsigned, unsigned > &InstrIdxForVirtReg)
Do the following transformation A - (B + C) ==> (A - B) - C A - (B + C) ==> (A - C) - B.
static bool getMaddPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns)
Find instructions that can be turned into madd.
static AArch64CC::CondCode findCondCodeUsedByInstr(const MachineInstr &Instr)
Find a condition code used by the instruction.
static MachineInstr * genFusedMultiplyAcc(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, unsigned IdxMulOpd, unsigned MaddOpc, const TargetRegisterClass *RC)
genFusedMultiplyAcc - Helper to generate fused multiply accumulate instructions.
static bool isCombineInstrCandidate64(unsigned Opc)
static unsigned removeCopies(const MachineRegisterInfo &MRI, unsigned VReg)
static bool areCFlagsAccessedBetweenInstrs(MachineBasicBlock::iterator From, MachineBasicBlock::iterator To, const TargetRegisterInfo *TRI, const AccessKind AccessToCheck=AK_All)
True when condition flags are accessed (either by writing or reading) on the instruction trace starti...
static bool getFMAPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns)
Floating-Point Support.
static bool isADDSRegImm(unsigned Opcode)
static MachineInstr * genFusedMultiplyIdxNeg(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, DenseMap< unsigned, unsigned > &InstrIdxForVirtReg, unsigned IdxMulOpd, unsigned MaddOpc, unsigned MnegOpc, const TargetRegisterClass *RC)
genFusedMultiplyAccNeg - Helper to generate fused multiply accumulate instructions with an additional...
static bool isCompareAndBranch(unsigned Opcode)
static unsigned sForm(MachineInstr &Instr)
Get opcode of S version of Instr.
static bool isCombineInstrSettingFlag(unsigned Opc)
@ AK_Write
static bool getFNEGPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns)
static bool canPairLdStOpc(unsigned FirstOpc, unsigned SecondOpc)
static int findCondCodeUseOperandIdxForBranchOrSelect(const MachineInstr &Instr)
static unsigned getBranchDisplacementBits(unsigned Opc)
static std::optional< ParamLoadedValue > describeORRLoadedValue(const MachineInstr &MI, Register DescribedReg, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
If the given ORR instruction is a copy, and DescribedReg overlaps with the destination register then,...
static bool getFMULPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns)
static MachineInstr * genFusedMultiplyAccNeg(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, DenseMap< unsigned, unsigned > &InstrIdxForVirtReg, unsigned IdxMulOpd, unsigned MaddOpc, unsigned MnegOpc, const TargetRegisterClass *RC)
genFusedMultiplyAccNeg - Helper to generate fused multiply accumulate instructions with an additional...
static void appendVGScaledOffsetExpr(SmallVectorImpl< char > &Expr, int NumBytes, int NumVGScaledBytes, unsigned VG, llvm::raw_string_ostream &Comment)
static MachineInstr * genMaddR(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, unsigned IdxMulOpd, unsigned MaddOpc, unsigned VR, const TargetRegisterClass *RC)
genMaddR - Generate madd instruction and combine mul and add using an extra virtual register Example ...
static bool scaleOffset(unsigned Opc, int64_t &Offset)
static bool canCombineWithFMUL(MachineBasicBlock &MBB, MachineOperand &MO, unsigned MulOpc)
unsigned scaledOffsetOpcode(unsigned Opcode, unsigned &Scale)
static MachineInstr * genFusedMultiplyIdx(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, unsigned IdxMulOpd, unsigned MaddOpc, const TargetRegisterClass *RC)
genFusedMultiplyIdx - Helper to generate fused multiply accumulate instructions.
static MachineInstr * genIndexedMultiply(MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, unsigned IdxDupOp, unsigned MulOpc, const TargetRegisterClass *RC, MachineRegisterInfo &MRI)
Fold (FMUL x (DUP y lane)) into (FMUL_indexed x y lane)
static bool isSUBSRegImm(unsigned Opcode)
static bool UpdateOperandRegClass(MachineInstr &Instr)
static const TargetRegisterClass * getRegClass(const MachineInstr &MI, Register Reg)
static bool canCmpInstrBeRemoved(MachineInstr &MI, MachineInstr &CmpInstr, int CmpValue, const TargetRegisterInfo &TRI, SmallVectorImpl< MachineInstr * > &CCUseInstrs, bool &IsInvertCC)
unsigned unscaledOffsetOpcode(unsigned Opcode)
static bool canInstrSubstituteCmpInstr(MachineInstr &MI, MachineInstr &CmpInstr, const TargetRegisterInfo &TRI)
Check if CmpInstr can be substituted by MI.
static UsedNZCV getUsedNZCV(AArch64CC::CondCode CC)
static Register genNeg(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, DenseMap< unsigned, unsigned > &InstrIdxForVirtReg, unsigned MnegOpc, const TargetRegisterClass *RC)
genNeg - Helper to generate an intermediate negation of the second operand of Root
static bool isCombineInstrCandidateFP(const MachineInstr &Inst)
static bool canCombine(MachineBasicBlock &MBB, MachineOperand &MO, unsigned CombineOpc, unsigned ZeroReg=0, bool CheckZeroReg=false)
static unsigned convertToNonFlagSettingOpc(const MachineInstr &MI)
Return the opcode that does not set flags when possible - otherwise return the original opcode.
static const MachineInstrBuilder & AddSubReg(const MachineInstrBuilder &MIB, unsigned Reg, unsigned SubIdx, unsigned State, const TargetRegisterInfo *TRI)
static bool outliningCandidatesV8_3OpsConsensus(const outliner::Candidate &a, const outliner::Candidate &b)
static bool isCombineInstrCandidate32(unsigned Opc)
static void parseCondBranch(MachineInstr *LastInst, MachineBasicBlock *&Target, SmallVectorImpl< MachineOperand > &Cond)
static unsigned offsetExtendOpcode(unsigned Opcode)
MachineOutlinerMBBFlags
@ LRUnavailableSomewhere
@ HasCalls
@ UnsafeRegsDead
static void loadRegPairFromStackSlot(const TargetRegisterInfo &TRI, MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore, const MCInstrDesc &MCID, Register DestReg, unsigned SubIdx0, unsigned SubIdx1, int FI, MachineMemOperand *MMO)
static bool getMiscPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns)
Find other MI combine patterns.
static bool outliningCandidatesSigningKeyConsensus(const outliner::Candidate &a, const outliner::Candidate &b)
static bool outliningCandidatesSigningScopeConsensus(const outliner::Candidate &a, const outliner::Candidate &b)
static bool shouldClusterFI(const MachineFrameInfo &MFI, int FI1, int64_t Offset1, unsigned Opcode1, int FI2, int64_t Offset2, unsigned Opcode2)
static cl::opt< unsigned > TBZDisplacementBits("aarch64-tbz-offset-bits", cl::Hidden, cl::init(14), cl::desc("Restrict range of TB[N]Z instructions (DEBUG)"))
static MCCFIInstruction createDefCFAExpression(const TargetRegisterInfo &TRI, unsigned Reg, const StackOffset &Offset)
static MachineInstr * genFusedMultiply(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, unsigned IdxMulOpd, unsigned MaddOpc, const TargetRegisterClass *RC, FMAInstKind kind=FMAInstKind::Default, const Register *ReplacedAddend=nullptr)
genFusedMultiply - Generate fused multiply instructions.
static bool isCombineInstrCandidate(unsigned Opc)
static unsigned regOffsetOpcode(unsigned Opcode)
MachineOutlinerClass
Constants defining how certain sequences should be outlined.
@ MachineOutlinerTailCall
Emit a save, restore, call, and return.
@ MachineOutlinerRegSave
Emit a call and tail-call.
@ MachineOutlinerNoLRSave
Only emit a branch.
@ MachineOutlinerThunk
Emit a call and return.
@ MachineOutlinerDefault
static cl::opt< unsigned > BDisplacementBits("aarch64-b-offset-bits", cl::Hidden, cl::init(26), cl::desc("Restrict range of B instructions (DEBUG)"))
static bool areCFlagsAliveInSuccessors(const MachineBasicBlock *MBB)
Check if AArch64::NZCV should be alive in successors of MBB.
static void emitFrameOffsetAdj(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, unsigned DestReg, unsigned SrcReg, int64_t Offset, unsigned Opc, const TargetInstrInfo *TII, MachineInstr::MIFlag Flag, bool NeedsWinCFI, bool *HasWinCFI, bool EmitCFAOffset, StackOffset CFAOffset, unsigned FrameReg)
static bool isCheapImmediate(const MachineInstr &MI, unsigned BitSize)
static cl::opt< unsigned > CBZDisplacementBits("aarch64-cbz-offset-bits", cl::Hidden, cl::init(19), cl::desc("Restrict range of CB[N]Z instructions (DEBUG)"))
static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg, unsigned *NewVReg=nullptr)
static void signOutlinedFunction(MachineFunction &MF, MachineBasicBlock &MBB, const AArch64InstrInfo *TII, bool ShouldSignReturnAddr)
static MachineInstr * genFNegatedMAD(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs)
static bool canCombineWithMUL(MachineBasicBlock &MBB, MachineOperand &MO, unsigned MulOpc, unsigned ZeroReg)
static void storeRegPairToStackSlot(const TargetRegisterInfo &TRI, MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore, const MCInstrDesc &MCID, Register SrcReg, bool IsKill, unsigned SubIdx0, unsigned SubIdx1, int FI, MachineMemOperand *MMO)
SmallVector< AArch64_IMM::ImmInsnModel, 4 > Insn
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
BlockVerifier::State From
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
uint64_t Size
bool End
Definition: ELF_riscv.cpp:480
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
static LVOptions Options
Definition: LVOptions.cpp:25
This file implements the LivePhysRegs utility for tracking liveness of physical registers.
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
unsigned const TargetRegisterInfo * TRI
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
const char LLVMTargetMachineRef TM
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
const SmallVectorImpl< MachineOperand > & Cond
This file declares the machine register scavenger class.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallVector class.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition: Value.cpp:469
AArch64FunctionInfo - This class is derived from MachineFunctionInfo and contains private AArch64-spe...
bool shouldSignReturnAddress(const MachineFunction &MF) const
const SetOfInstructions & getLOHRelated() const
bool needsDwarfUnwindInfo(const MachineFunction &MF) const
void setOutliningStyle(std::string Style)
std::optional< bool > hasRedZone() const
static bool isHForm(const MachineInstr &MI)
Returns whether the instruction is in H form (16 bit operands)
void insertSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, Register DstReg, ArrayRef< MachineOperand > Cond, Register TrueReg, Register FalseReg) const override
static bool hasBTISemantics(const MachineInstr &MI)
Returns whether the instruction can be compatible with non-zero BTYPE.
static bool isQForm(const MachineInstr &MI)
Returns whether the instruction is in Q form (128 bit operands)
static void decomposeStackOffsetForFrameOffsets(const StackOffset &Offset, int64_t &NumBytes, int64_t &NumPredicateVectors, int64_t &NumDataVectors)
Returns the offset in parts to which this frame offset can be decomposed for the purpose of describin...
static bool getMemOpInfo(unsigned Opcode, TypeSize &Scale, TypeSize &Width, int64_t &MinOffset, int64_t &MaxOffset)
Returns true if opcode Opc is a memory operation.
static bool isTailCallReturnInst(const MachineInstr &MI)
Returns true if MI is one of the TCRETURN* instructions.
static bool isFPRCopy(const MachineInstr &MI)
Does this instruction rename an FPR without modifying bits?
MachineInstr * emitLdStWithAddr(MachineInstr &MemI, const ExtAddrMode &AM) const override
std::optional< DestSourcePair > isCopyInstrImpl(const MachineInstr &MI) const override
If the specific machine instruction is an instruction that moves/copies value from one register to an...
MachineBasicBlock * getBranchDestBlock(const MachineInstr &MI) const override
std::optional< RegImmPair > isAddImmediate(const MachineInstr &MI, Register Reg) const override
unsigned getInstSizeInBytes(const MachineInstr &MI) const override
GetInstSize - Return the number of bytes of code the specified instruction may be.
uint64_t getElementSizeForOpcode(unsigned Opc) const
Returns the vector element size (B, H, S or D) of an SVE opcode.
outliner::InstrType getOutliningTypeImpl(MachineBasicBlock::iterator &MIT, unsigned Flags) const override
bool areMemAccessesTriviallyDisjoint(const MachineInstr &MIa, const MachineInstr &MIb) const override
void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc) const override
static bool isGPRCopy(const MachineInstr &MI)
Does this instruction rename a GPR without modifying bits?
static unsigned convertToFlagSettingOpc(unsigned Opc)
Return the opcode that set flags when possible.
bool getMachineCombinerPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns, bool DoRegPressureReduce) const override
Return true when there is potentially a faster code sequence for an instruction chain ending in Root.
bool isBranchOffsetInRange(unsigned BranchOpc, int64_t BrOffset) const override
bool canInsertSelect(const MachineBasicBlock &, ArrayRef< MachineOperand > Cond, Register, Register, Register, int &, int &, int &) const override
static const MachineOperand & getLdStOffsetOp(const MachineInstr &MI)
Returns the immediate offset operator of a load/store.
bool isCoalescableExtInstr(const MachineInstr &MI, Register &SrcReg, Register &DstReg, unsigned &SubIdx) const override
bool isWhileOpcode(unsigned Opc) const
Returns true if the opcode is for an SVE WHILE## instruction.
static std::optional< unsigned > getUnscaledLdSt(unsigned Opc)
Returns the unscaled load/store for the scaled load/store opcode, if there is a corresponding unscale...
static bool hasUnscaledLdStOffset(unsigned Opc)
Return true if it has an unscaled load/store offset.
static bool isPreLdSt(const MachineInstr &MI)
Returns whether the instruction is a pre-indexed load/store.
MachineBasicBlock::iterator insertOutlinedCall(Module &M, MachineBasicBlock &MBB, MachineBasicBlock::iterator &It, MachineFunction &MF, outliner::Candidate &C) const override
std::optional< ExtAddrMode > getAddrModeFromMemoryOp(const MachineInstr &MemI, const TargetRegisterInfo *TRI) const override
bool getMemOperandsWithOffsetWidth(const MachineInstr &MI, SmallVectorImpl< const MachineOperand * > &BaseOps, int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width, const TargetRegisterInfo *TRI) const override
bool analyzeBranchPredicate(MachineBasicBlock &MBB, MachineBranchPredicate &MBP, bool AllowModify) const override
static bool isSEHInstruction(const MachineInstr &MI)
Return true if the instructions is a SEH instruciton used for unwinding on Windows.
void insertIndirectBranch(MachineBasicBlock &MBB, MachineBasicBlock &NewDestBB, MachineBasicBlock &RestoreBB, const DebugLoc &DL, int64_t BrOffset, RegScavenger *RS) const override
SmallVector< std::pair< MachineBasicBlock::iterator, MachineBasicBlock::iterator > > getOutlinableRanges(MachineBasicBlock &MBB, unsigned &Flags) const override
static bool isPairableLdStInst(const MachineInstr &MI)
Return true if pairing the given load or store may be paired with another.
void genAlternativeCodeSequence(MachineInstr &Root, unsigned Pattern, SmallVectorImpl< MachineInstr * > &InsInstrs, SmallVectorImpl< MachineInstr * > &DelInstrs, DenseMap< unsigned, unsigned > &InstrIdxForVirtReg) const override
When getMachineCombinerPatterns() finds patterns, this function generates the instructions that could...
const AArch64RegisterInfo & getRegisterInfo() const
getRegisterInfo - TargetInstrInfo is a superset of MRegister info.
static bool isPreSt(const MachineInstr &MI)
Returns whether the instruction is a pre-indexed store.
MachineInstr * foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI, ArrayRef< unsigned > Ops, MachineBasicBlock::iterator InsertPt, int FrameIndex, LiveIntervals *LIS=nullptr, VirtRegMap *VRM=nullptr) const override
void insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const override
AArch64InstrInfo(const AArch64Subtarget &STI)
static bool isPairedLdSt(const MachineInstr &MI)
Returns whether the instruction is a paired load/store.
bool useMachineCombiner() const override
AArch64 supports MachineCombiner.
ArrayRef< std::pair< MachineMemOperand::Flags, const char * > > getSerializableMachineMemOperandTargetFlags() const override
void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register DestReg, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg) const override
bool isExtendLikelyToBeFolded(MachineInstr &ExtMI, MachineRegisterInfo &MRI) const override
static bool isFalkorShiftExtFast(const MachineInstr &MI)
Returns true if the instruction has a shift by immediate that can be executed in one cycle less.
std::optional< ParamLoadedValue > describeLoadedValue(const MachineInstr &MI, Register Reg) const override
bool getMemOperandWithOffsetWidth(const MachineInstr &MI, const MachineOperand *&BaseOp, int64_t &Offset, bool &OffsetIsScalable, TypeSize &Width, const TargetRegisterInfo *TRI) const
If OffsetIsScalable is set to 'true', the offset is scaled by vscale.
Register isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const override
bool reverseBranchCondition(SmallVectorImpl< MachineOperand > &Cond) const override
static bool isStridedAccess(const MachineInstr &MI)
Return true if the given load or store is a strided memory access.
bool shouldClusterMemOps(ArrayRef< const MachineOperand * > BaseOps1, int64_t Offset1, bool OffsetIsScalable1, ArrayRef< const MachineOperand * > BaseOps2, int64_t Offset2, bool OffsetIsScalable2, unsigned ClusterSize, unsigned NumBytes) const override
Detect opportunities for ldp/stp formation.
bool expandPostRAPseudo(MachineInstr &MI) const override
unsigned int getTailDuplicateSize(CodeGenOptLevel OptLevel) const override
bool isFunctionSafeToOutlineFrom(MachineFunction &MF, bool OutlineFromLinkOnceODRs) const override
static bool isFpOrNEON(const MachineInstr &MI)
Returns whether the instruction is FP or NEON.
void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg) const override
bool shouldOutlineFromFunctionByDefault(MachineFunction &MF) const override
unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const override
bool isThroughputPattern(unsigned Pattern) const override
Return true when a code sequence can improve throughput.
void copyGPRRegTuple(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, DebugLoc DL, unsigned DestReg, unsigned SrcReg, bool KillSrc, unsigned Opcode, unsigned ZeroReg, llvm::ArrayRef< unsigned > Indices) const
void buildClearRegister(Register Reg, MachineBasicBlock &MBB, MachineBasicBlock::iterator Iter, DebugLoc &DL, bool AllowSideEffects=true) const override
void buildOutlinedFrame(MachineBasicBlock &MBB, MachineFunction &MF, const outliner::OutlinedFunction &OF) const override
ArrayRef< std::pair< unsigned, const char * > > getSerializableDirectMachineOperandTargetFlags() const override
MachineOperand & getMemOpBaseRegImmOfsOffsetOperand(MachineInstr &LdSt) const
Return the immediate offset of the base register in a load/store LdSt.
bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify=false) const override
bool canFoldIntoAddrMode(const MachineInstr &MemI, Register Reg, const MachineInstr &AddrI, ExtAddrMode &AM) const override
static bool isLdStPairSuppressed(const MachineInstr &MI)
Return true if pairing the given load or store is hinted to be unprofitable.
bool isFunctionSafeToSplit(const MachineFunction &MF) const override
bool isAssociativeAndCommutative(const MachineInstr &Inst, bool Invert) const override
Return true when Inst is associative and commutative so that it can be reassociated.
std::unique_ptr< TargetInstrInfo::PipelinerLoopInfo > analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const override
bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const override
unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const override
MachineBasicBlock::iterator probedStackAlloc(MachineBasicBlock::iterator MBBI, Register TargetReg, bool FrameSetup) const
bool optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int64_t CmpMask, int64_t CmpValue, const MachineRegisterInfo *MRI) const override
optimizeCompareInstr - Convert the instruction supplying the argument to the comparison into one that...
std::optional< outliner::OutlinedFunction > getOutliningCandidateInfo(std::vector< outliner::Candidate > &RepeatedSequenceLocs) const override
static unsigned getLoadStoreImmIdx(unsigned Opc)
Returns the index for the immediate for a given instruction.
static bool isGPRZero(const MachineInstr &MI)
Does this instruction set its full destination register to zero?
std::pair< unsigned, unsigned > decomposeMachineOperandsTargetFlags(unsigned TF) const override
bool analyzeCompare(const MachineInstr &MI, Register &SrcReg, Register &SrcReg2, int64_t &CmpMask, int64_t &CmpValue) const override
analyzeCompare - For a comparison instruction, return the source registers in SrcReg and SrcReg2,...
CombinerObjective getCombinerObjective(unsigned Pattern) const override
bool isMBBSafeToSplitToCold(const MachineBasicBlock &MBB) const override
bool isAsCheapAsAMove(const MachineInstr &MI) const override
bool isLegalAddressingMode(unsigned NumBytes, int64_t Offset, unsigned Scale) const
std::optional< DestSourcePair > isCopyLikeInstrImpl(const MachineInstr &MI) const override
static void suppressLdStPair(MachineInstr &MI)
Hint that pairing the given load or store is unprofitable.
Register isStoreToStackSlot(const MachineInstr &MI, int &FrameIndex) const override
static bool isPreLd(const MachineInstr &MI)
Returns whether the instruction is a pre-indexed load.
ArrayRef< std::pair< unsigned, const char * > > getSerializableBitmaskMachineOperandTargetFlags() const override
void copyPhysRegTuple(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc, unsigned Opcode, llvm::ArrayRef< unsigned > Indices) const
bool optimizeCondBranch(MachineInstr &MI) const override
Replace csincr-branch sequence by simple conditional branch.
static int getMemScale(unsigned Opc)
Scaling factor for (scaled or unscaled) load or store.
bool isCandidateToMergeOrPair(const MachineInstr &MI) const
Return true if this is a load/store that can be potentially paired/merged.
MCInst getNop() const override
static const MachineOperand & getLdStBaseOp(const MachineInstr &MI)
Returns the base register operator of a load/store.
bool isPTestLikeOpcode(unsigned Opc) const
Returns true if the opcode is for an SVE instruction that sets the condition codes as if it's results...
void mergeOutliningCandidateAttributes(Function &F, std::vector< outliner::Candidate > &Candidates) const override
static void decomposeStackOffsetForDwarfOffsets(const StackOffset &Offset, int64_t &ByteSized, int64_t &VGSized)
bool isReservedReg(const MachineFunction &MF, MCRegister Reg) const
bool isNeonAvailable() const
Returns true if the target has NEON and the function at runtime is known to have NEON enabled (e....
const AArch64RegisterInfo * getRegisterInfo() const override
unsigned ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const
ClassifyGlobalReference - Find the target operand flags that describe how a global value should be re...
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
const T & front() const
front - Get the first element.
Definition: ArrayRef.h:168
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
This class represents an Operation in the Expression.
A debug info location.
Definition: DebugLoc.h:33
bool empty() const
Definition: DenseMap.h:98
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:220
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition: Function.h:685
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:682
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:656
A set of register units used to track register liveness.
Definition: LiveRegUnits.h:30
bool available(MCPhysReg Reg) const
Returns true if no part of physical register Reg is live.
Definition: LiveRegUnits.h:116
void stepBackward(const MachineInstr &MI)
Updates liveness when stepping backwards over the instruction MI.
void addLiveOuts(const MachineBasicBlock &MBB)
Adds registers living out of block MBB.
static LocationSize precise(uint64_t Value)
This class is intended to be used as a base class for asm properties and features specific to the tar...
Definition: MCAsmInfo.h:56
bool usesWindowsCFI() const
Definition: MCAsmInfo.h:799
static MCCFIInstruction createOffset(MCSymbol *L, unsigned Register, int Offset, SMLoc Loc={})
.cfi_offset Previous value of Register is saved at offset Offset from CFA.
Definition: MCDwarf.h:583
static MCCFIInstruction cfiDefCfaOffset(MCSymbol *L, int Offset, SMLoc Loc={})
.cfi_def_cfa_offset modifies a rule for computing CFA.
Definition: MCDwarf.h:556
static MCCFIInstruction cfiDefCfa(MCSymbol *L, unsigned Register, int Offset, SMLoc Loc={})
.cfi_def_cfa defines a rule for computing CFA as: take address from Register and add Offset to it.
Definition: MCDwarf.h:541
static MCCFIInstruction createEscape(MCSymbol *L, StringRef Vals, SMLoc Loc={}, StringRef Comment="")
.cfi_escape Allows the user to add arbitrary bytes to the unwind info.
Definition: MCDwarf.h:647
MCInstBuilder & addImm(int64_t Val)
Add a new integer immediate operand.
Definition: MCInstBuilder.h:43
Instances of this class represent a single low-level machine instruction.
Definition: MCInst.h:184
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:198
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
Wrapper class representing physical registers. Should be passed by value.
Definition: MCRegister.h:33
constexpr bool isValid() const
Definition: MCRegister.h:81
static constexpr unsigned NoRegister
Definition: MCRegister.h:52
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition: Metadata.h:1541
Set of metadata that should be preserved when using BuildMI().
bool isInlineAsmBrIndirectTarget() const
Returns true if this is the indirect dest of an INLINEASM_BR.
unsigned pred_size() const
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
reverse_instr_iterator instr_rbegin()
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
bool isLiveIn(MCPhysReg Reg, LaneBitmask LaneMask=LaneBitmask::getAll()) const
Return true if the specified register is in the live in set.
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
MBBSectionID getSectionID() const
Returns the section ID of this basic block.
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
reverse_instr_iterator instr_rend()
DebugLoc findDebugLoc(instr_iterator MBBI)
Find the next valid DebugLoc starting at MBBI, skipping any debug instructions.
Instructions::iterator instr_iterator
iterator getLastNonDebugInstr(bool SkipPseudoOp=true)
Returns an iterator to the last non-debug instruction in the basic block, or end().
bool isLayoutSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB will be emitted immediately after this block, such that if this bloc...
instr_iterator instr_end()
Instructions::const_iterator const_instr_iterator
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
iterator_range< succ_iterator > successors()
instr_iterator getFirstInstrTerminator()
Same getFirstTerminator but it ignores bundles and return an instr_iterator instead.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
uint64_t getStackSize() const
Return the number of bytes that must be allocated to hold all of the fixed size frame objects.
void setStackID(int ObjectIdx, uint8_t ID)
bool isCalleeSavedInfoValid() const
Has the callee saved info been calculated yet?
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
unsigned getNumObjects() const
Return the number of objects.
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
unsigned addFrameInst(const MCCFIInstruction &Inst)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
MachineModuleInfo & getMMI() const
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineJumpTableInfo * getJumpTableInfo() const
getJumpTableInfo - Return the jump table info object for the current function.
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
void insert(iterator MBBI, MachineBasicBlock *MBB)
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & setMemRefs(ArrayRef< MachineMemOperand * > MMOs) const
const MachineInstrBuilder & addCFIIndex(unsigned CFIIndex) const
const MachineInstrBuilder & setMIFlag(MachineInstr::MIFlag Flag) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addSym(MCSymbol *Sym, unsigned char TargetFlags=0) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
reverse_iterator getReverse() const
Get a reverse iterator to the same node.
Representation of each machine instruction.
Definition: MachineInstr.h:69
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:558
bool mayLoadOrStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read or modify memory.
bool isCopy() const
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:341
bool getFlag(MIFlag Flag) const
Return whether an MI flag is set.
Definition: MachineInstr.h:391
uint32_t mergeFlagsWith(const MachineInstr &Other) const
Return the MIFlags which represent both MachineInstrs.
unsigned getNumExplicitOperands() const
Returns the number of non-implicit operands.
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
bool isFullCopy() const
bool hasUnmodeledSideEffects() const
Return true if this instruction has side effects that are not modeled by mayLoad / mayStore,...
bool definesRegister(Register Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr fully defines the specified register.
void setDesc(const MCInstrDesc &TID)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one.
bool hasOrderedMemoryRef() const
Return true if this instruction may have an ordered or volatile memory reference, or if the informati...
const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
ArrayRef< MachineMemOperand * > memoperands() const
Access to memory operands of the instruction.
Definition: MachineInstr.h:771
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:487
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
void removeOperand(unsigned OpNo)
Erase an operand from an instruction, leaving it with one fewer operand than it started with.
bool isPHI() const
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:568
uint32_t getFlags() const
Return the MI flags bitvector.
Definition: MachineInstr.h:386
int findRegisterDefOperandIdx(Register Reg, const TargetRegisterInfo *TRI, bool isDead=false, bool Overlap=false) const
Returns the operand index that is a def of the specified register or -1 if it is not found.
const std::vector< MachineJumpTableEntry > & getJumpTables() const
A description of a memory reference used in the backend.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
MachineFunction * getMachineFunction(const Function &F) const
Returns the MachineFunction associated to IR function F if there is one, otherwise nullptr.
MachineOperand class - Representation of each machine instruction operand.
void setSubReg(unsigned subReg)
unsigned getSubReg() const
void setImm(int64_t immVal)
int64_t getImm() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
MachineBasicBlock * getMBB() const
void setIsDead(bool Val=true)
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
void setIsKill(bool Val=true)
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
static MachineOperand CreateImm(int64_t Val)
MachineOperandType getType() const
getType - Returns the MachineOperandType for this operand.
void setIsUndef(bool Val=true)
Register getReg() const
getReg - Returns the register number.
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
bool tracksLiveness() const
tracksLiveness - Returns true when tracking register liveness accurately.
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
bool reservedRegsFrozen() const
reservedRegsFrozen - Returns true after freezeReservedRegs() was called to ensure the set of reserved...
const TargetRegisterClass * getRegClassOrNull(Register Reg) const
Return the register class of Reg, or null if Reg has not been assigned a register class yet.
const TargetRegisterClass * constrainRegClass(Register Reg, const TargetRegisterClass *RC, unsigned MinNumRegs=0)
constrainRegClass - Constrain the register class of the specified virtual register to be a common sub...
MachineInstr * getUniqueVRegDef(Register Reg) const
getUniqueVRegDef - Return the unique machine instr that defines the specified virtual register or nul...
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
MI-level patchpoint operands.
Definition: StackMaps.h:76
uint32_t getNumPatchBytes() const
Return the number of patchable bytes the given patchpoint should emit.
Definition: StackMaps.h:104
void enterBasicBlockEnd(MachineBasicBlock &MBB)
Start tracking liveness from the end of basic block MBB.
bool isRegUsed(Register Reg, bool includeReserved=true) const
Return if a specific register is currently used.
Register FindUnusedReg(const TargetRegisterClass *RC) const
Find an unused register of the specified register class.
void setRegUsed(Register Reg, LaneBitmask LaneMask=LaneBitmask::getAll())
Tell the scavenger a register is used.
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
constexpr bool isValid() const
Definition: Register.h:116
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
static constexpr bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:71
static constexpr bool isPhysicalRegister(unsigned Reg)
Return true if the specified register number is in the physical register namespace.
Definition: Register.h:65
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition: Register.h:95
Represents a location in source code.
Definition: SMLoc.h:23
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:360
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition: SmallString.h:26
void append(StringRef RHS)
Append from a StringRef.
Definition: SmallString.h:68
StringRef str() const
Explicit conversion to StringRef.
Definition: SmallString.h:254
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:696
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
MI-level stackmap operands.
Definition: StackMaps.h:35
uint32_t getNumPatchBytes() const
Return the number of patchable bytes the given stackmap should emit.
Definition: StackMaps.h:50
StackOffset holds a fixed and a scalable offset in bytes.
Definition: TypeSize.h:33
int64_t getFixed() const
Returns the fixed component of the stack.
Definition: TypeSize.h:49
int64_t getScalable() const
Returns the scalable component of the stack.
Definition: TypeSize.h:52
static StackOffset get(int64_t Fixed, int64_t Scalable)
Definition: TypeSize.h:44
static StackOffset getScalable(int64_t Scalable)
Definition: TypeSize.h:43
static StackOffset getFixed(int64_t Fixed)
Definition: TypeSize.h:42
MI-level Statepoint operands.
Definition: StackMaps.h:158
uint32_t getNumPatchBytes() const
Return the number of patchable bytes the given statepoint should emit.
Definition: StackMaps.h:207
Object returned by analyzeLoopForPipelining.
TargetInstrInfo - Interface to description of machine instruction set.
virtual void genAlternativeCodeSequence(MachineInstr &Root, unsigned Pattern, SmallVectorImpl< MachineInstr * > &InsInstrs, SmallVectorImpl< MachineInstr * > &DelInstrs, DenseMap< unsigned, unsigned > &InstIdxForVirtReg) const
When getMachineCombinerPatterns() finds patterns, this function generates the instructions that could...
virtual std::optional< ParamLoadedValue > describeLoadedValue(const MachineInstr &MI, Register Reg) const
Produce the expression describing the MI loading a value into the physical register Reg.
virtual bool getMachineCombinerPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns, bool DoRegPressureReduce) const
Return true when there is potentially a faster code sequence for an instruction chain ending in Root.
virtual bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const
Test if the given instruction should be considered a scheduling boundary.
virtual CombinerObjective getCombinerObjective(unsigned Pattern) const
Return the objective of a combiner pattern.
virtual bool isFunctionSafeToSplit(const MachineFunction &MF) const
Return true if the function is a viable candidate for machine function splitting.
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:76
TargetOptions Options
const MCAsmInfo * getMCAsmInfo() const
Return target specific asm information.
unsigned UnsafeFPMath
UnsafeFPMath - This flag is enabled when the -enable-unsafe-fp-math flag is specified on the command ...
bool contains(Register Reg) const
Return true if the specified register is included in this register class.
bool hasSubClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a sub-class of or equal to this class.
bool hasSuperClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a super-class of or equal to this class.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
TargetSubtargetInfo - Generic base class for all target subtargets.
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
virtual const TargetInstrInfo * getInstrInfo() const
Target - Wrapper for Target specific information.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition: TypeSize.h:330
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
Definition: TypeSize.h:333
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:171
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition: TypeSize.h:168
self_iterator getIterator()
Definition: ilist_node.h:109
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
Definition: ilist_node.h:316
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:660
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static CondCode getInvertedCondCode(CondCode Code)
@ MO_NC
MO_NC - Indicates whether the linker is expected to check the symbol reference for overflow.
@ MO_G1
MO_G1 - A symbol operand with this flag (granule 1) represents the bits 16-31 of a 64-bit address,...
@ MO_PAGEOFF
MO_PAGEOFF - A symbol operand with this flag represents the offset of that symbol within a 4K page.
@ MO_GOT
MO_GOT - This flag indicates that a symbol operand represents the address of the GOT entry for the sy...
@ MO_G0
MO_G0 - A symbol operand with this flag (granule 0) represents the bits 0-15 of a 64-bit address,...
@ MO_PAGE
MO_PAGE - A symbol operand with this flag represents the pc-relative offset of the 4K page containing...
@ MO_G2
MO_G2 - A symbol operand with this flag (granule 2) represents the bits 32-47 of a 64-bit address,...
@ MO_G3
MO_G3 - A symbol operand with this flag (granule 3) represents the high 16-bits of a 64-bit address,...
unsigned getCheckerSizeInBytes(AuthCheckMethod Method)
Returns the number of bytes added by checkAuthenticatedRegister.
const SysReg * lookupSysRegByName(StringRef)
static uint64_t decodeLogicalImmediate(uint64_t val, unsigned regSize)
decodeLogicalImmediate - Decode a logical immediate value in the form "N:immr:imms" (where the immr a...
static unsigned getShiftValue(unsigned Imm)
getShiftValue - Extract the shift value.
static unsigned getArithExtendImm(AArch64_AM::ShiftExtendType ET, unsigned Imm)
getArithExtendImm - Encode the extend type and shift amount for an arithmetic instruction: imm: 3-bit...
static unsigned getArithShiftValue(unsigned Imm)
getArithShiftValue - get the arithmetic shift value.
static uint64_t encodeLogicalImmediate(uint64_t imm, unsigned regSize)
encodeLogicalImmediate - Return the encoded immediate value for a logical immediate instruction of th...
static AArch64_AM::ShiftExtendType getArithExtendType(unsigned Imm)
static AArch64_AM::ShiftExtendType getShiftType(unsigned Imm)
getShiftType - Extract the shift type.
static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)
getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...
void expandMOVImm(uint64_t Imm, unsigned BitSize, SmallVectorImpl< ImmInsnModel > &Insn)
Expand a MOVi32imm or MOVi64imm pseudo instruction to one or more real move-immediate instructions to...
static const uint64_t InstrFlagIsWhile
static const uint64_t InstrFlagIsPTestLike
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:121
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Dead
Unused definition.
@ Renamable
Register that may be renamed.
@ Define
Register definition.
@ Kill
The last use of a register.
@ Undef
Value of the register doesn't matter.
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:450
constexpr double e
Definition: MathExtras.h:31
InstrType
Represents how an instruction should be mapped by the outliner.
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:456
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1722
static bool isCondBranchOpcode(int Opc)
std::pair< MachineOperand, DIExpression * > ParamLoadedValue
MCCFIInstruction createDefCFA(const TargetRegisterInfo &TRI, unsigned FrameReg, unsigned Reg, const StackOffset &Offset, bool LastAdjustmentWasScalable=true)
static bool isPTrueOpcode(unsigned Opc)
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
int isAArch64FrameOffsetLegal(const MachineInstr &MI, StackOffset &Offset, bool *OutUseUnscaledOp=nullptr, unsigned *OutUnscaledOp=nullptr, int64_t *EmittableOffset=nullptr)
Check if the Offset is a valid frame offset for MI.
static bool isIndirectBranchOpcode(int Opc)
MCCFIInstruction createCFAOffset(const TargetRegisterInfo &MRI, unsigned Reg, const StackOffset &OffsetFromDefCFA)
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
unsigned getBLRCallOpcode(const MachineFunction &MF)
Return opcode to be used for indirect calls.
@ AArch64FrameOffsetIsLegal
Offset is legal.
@ AArch64FrameOffsetCanUpdate
Offset can apply, at least partly.
@ AArch64FrameOffsetCannotUpdate
Offset cannot apply.
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition: MathExtras.h:280
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:330
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1729
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:419
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
AArch64MachineCombinerPattern
@ MULSUBv8i16_OP2
@ FMULv4i16_indexed_OP1
@ FMLSv1i32_indexed_OP2
@ MULSUBv2i32_indexed_OP1
@ MULADDXI_OP1
@ FMLAv2i32_indexed_OP2
@ MULADDv4i16_indexed_OP2
@ FMLAv1i64_indexed_OP1
@ MULSUBv16i8_OP1
@ FMLAv8i16_indexed_OP2
@ FMULv2i32_indexed_OP1
@ MULSUBv8i16_indexed_OP2
@ FMLAv1i64_indexed_OP2
@ MULSUBv4i16_indexed_OP2
@ FMLAv1i32_indexed_OP1
@ FMLAv2i64_indexed_OP2
@ FMLSv8i16_indexed_OP1
@ MULSUBv2i32_OP1
@ FMULv4i16_indexed_OP2
@ MULSUBv4i32_indexed_OP2
@ FMULv2i64_indexed_OP2
@ MULSUBXI_OP1
@ FMLAv4i32_indexed_OP1
@ MULADDWI_OP1
@ MULADDv4i16_OP2
@ FMULv8i16_indexed_OP2
@ MULSUBv4i16_OP1
@ MULADDv4i32_OP2
@ MULADDv8i8_OP1
@ MULADDv2i32_OP2
@ MULADDv16i8_OP2
@ MULADDv8i8_OP2
@ FMLSv4i16_indexed_OP1
@ MULADDv16i8_OP1
@ FMLAv2i64_indexed_OP1
@ FMLAv1i32_indexed_OP2
@ FMLSv2i64_indexed_OP2
@ MULADDv2i32_OP1
@ MULADDv4i32_OP1
@ MULADDv2i32_indexed_OP1
@ MULSUBv16i8_OP2
@ MULADDv4i32_indexed_OP1
@ MULADDv2i32_indexed_OP2
@ FMLAv4i16_indexed_OP2
@ MULSUBv8i16_OP1
@ FMULv2i32_indexed_OP2
@ FMLSv2i32_indexed_OP2
@ FMLSv4i32_indexed_OP1
@ FMULv2i64_indexed_OP1
@ MULSUBv4i16_OP2
@ FMLSv4i16_indexed_OP2
@ FMLAv2i32_indexed_OP1
@ FMLSv2i32_indexed_OP1
@ FMLAv8i16_indexed_OP1
@ MULSUBv4i16_indexed_OP1
@ FMLSv4i32_indexed_OP2
@ MULADDv4i32_indexed_OP2
@ MULSUBv4i32_OP2
@ MULSUBv8i16_indexed_OP1
@ MULADDv8i16_OP2
@ MULSUBv2i32_indexed_OP2
@ FMULv4i32_indexed_OP2
@ FMLSv2i64_indexed_OP1
@ MULADDv4i16_OP1
@ FMLAv4i32_indexed_OP2
@ MULADDv8i16_indexed_OP1
@ FMULv4i32_indexed_OP1
@ FMLAv4i16_indexed_OP1
@ FMULv8i16_indexed_OP1
@ MULSUBv8i8_OP1
@ MULADDv8i16_OP1
@ MULSUBv4i32_indexed_OP1
@ MULSUBv4i32_OP1
@ FMLSv8i16_indexed_OP2
@ MULADDv8i16_indexed_OP2
@ MULSUBWI_OP1
@ MULSUBv2i32_OP2
@ FMLSv1i64_indexed_OP2
@ MULADDv4i16_indexed_OP1
@ MULSUBv8i8_OP2
void emitFrameOffset(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, unsigned DestReg, unsigned SrcReg, StackOffset Offset, const TargetInstrInfo *TII, MachineInstr::MIFlag=MachineInstr::NoFlags, bool SetNZCV=false, bool NeedsWinCFI=false, bool *HasWinCFI=nullptr, bool EmitCFAOffset=false, StackOffset InitialOffset={}, unsigned FrameReg=AArch64::SP)
emitFrameOffset - Emit instructions as needed to set DestReg to SrcReg plus Offset.
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:156
CombinerObjective
The combiner's goal may differ based on which pattern it is attempting to optimize.
std::optional< UsedNZCV > examineCFlagsUse(MachineInstr &MI, MachineInstr &CmpInstr, const TargetRegisterInfo &TRI, SmallVectorImpl< MachineInstr * > *CCUseInstrs=nullptr)
CodeGenOptLevel
Code generation optimization level.
Definition: CodeGen.h:54
auto instructionsWithoutDebug(IterT It, IterT End, bool SkipPseudoOp=true)
Construct a range iterator which begins at It and moves forwards until End is reached,...
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
unsigned getUndefRegState(bool B)
unsigned getDefRegState(bool B)
unsigned getKillRegState(bool B)
bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
Definition: MathExtras.h:244
DWARFExpression::Operation Op
static bool isUncondBranchOpcode(int Opc)
unsigned encodeSLEB128(int64_t Value, raw_ostream &OS, unsigned PadTo=0)
Utility function to encode a SLEB128 value to an output stream.
Definition: LEB128.h:23
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition: STLExtras.h:2051
bool rewriteAArch64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, unsigned FrameReg, StackOffset &Offset, const AArch64InstrInfo *TII)
rewriteAArch64FrameIndex - Rewrite MI to access 'Offset' bytes from the FP.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1879
static const MachineMemOperand::Flags MOSuppressPair
unsigned encodeULEB128(uint64_t Value, raw_ostream &OS, unsigned PadTo=0)
Utility function to encode a ULEB128 value to an output stream.
Definition: LEB128.h:80
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition: MathExtras.h:465
bool isNZCVTouchedInInstructionRange(const MachineInstr &DefMI, const MachineInstr &UseMI, const TargetRegisterInfo *TRI)
Return true if there is an instruction /after/ DefMI and before UseMI which either reads or clobbers ...
static const MachineMemOperand::Flags MOStridedAccess
@ Default
The result values are uniform if and only if all operands are uniform.
void fullyRecomputeLiveIns(ArrayRef< MachineBasicBlock * > MBBs)
Convenience function for recomputing live-in's for a set of MBBs until the computation converges.
Definition: LivePhysRegs.h:215
Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
Description of the encoding of one expression Op.
Used to describe addressing mode similar to ExtAddrMode in CodeGenPrepare.
static const MBBSectionID ColdSectionID
MachineJumpTableEntry - One jump table in the jump table info.
This class contains a discriminated union of information about pointers in memory operands,...
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
Used to describe a register and immediate addition.
An individual sequence of instructions to be replaced with a call to an outlined function.
MachineFunction * getMF() const
The information necessary to create an outlined function for some class of candidate.
unsigned FrameConstructionID
Target-defined identifier for constructing a frame for this function.